{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 34278, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.9173230643561467e-05, "grad_norm": 54.045951829468514, "learning_rate": 9.718172983479106e-09, "loss": 3.307, "step": 1 }, { "epoch": 5.8346461287122933e-05, "grad_norm": 59.45590508211042, "learning_rate": 1.943634596695821e-08, "loss": 3.4011, "step": 2 }, { "epoch": 8.751969193068441e-05, "grad_norm": 56.232314342880564, "learning_rate": 2.915451895043732e-08, "loss": 3.3368, "step": 3 }, { "epoch": 0.00011669292257424587, "grad_norm": 57.02148351734131, "learning_rate": 3.887269193391642e-08, "loss": 3.349, "step": 4 }, { "epoch": 0.00014586615321780734, "grad_norm": 55.620710576506895, "learning_rate": 4.8590864917395535e-08, "loss": 3.2952, "step": 5 }, { "epoch": 0.00017503938386136881, "grad_norm": 60.152501431945254, "learning_rate": 5.830903790087464e-08, "loss": 3.4352, "step": 6 }, { "epoch": 0.0002042126145049303, "grad_norm": 63.889700682468856, "learning_rate": 6.802721088435375e-08, "loss": 3.493, "step": 7 }, { "epoch": 0.00023338584514849173, "grad_norm": 50.90441079107094, "learning_rate": 7.774538386783285e-08, "loss": 3.2058, "step": 8 }, { "epoch": 0.00026255907579205323, "grad_norm": 56.09070079855593, "learning_rate": 8.746355685131196e-08, "loss": 3.293, "step": 9 }, { "epoch": 0.0002917323064356147, "grad_norm": 63.61971462398412, "learning_rate": 9.718172983479107e-08, "loss": 3.46, "step": 10 }, { "epoch": 0.00032090553707917613, "grad_norm": 60.29565511961232, "learning_rate": 1.0689990281827017e-07, "loss": 3.4424, "step": 11 }, { "epoch": 0.00035007876772273763, "grad_norm": 52.44151203167023, "learning_rate": 1.1661807580174928e-07, "loss": 3.22, "step": 12 }, { "epoch": 0.0003792519983662991, "grad_norm": 58.19796349152196, "learning_rate": 1.263362487852284e-07, "loss": 3.4233, "step": 13 }, { "epoch": 0.0004084252290098606, "grad_norm": 61.45277974737471, "learning_rate": 1.360544217687075e-07, "loss": 3.4018, "step": 14 }, { "epoch": 0.000437598459653422, "grad_norm": 51.96697093650594, "learning_rate": 1.457725947521866e-07, "loss": 3.206, "step": 15 }, { "epoch": 0.00046677169029698347, "grad_norm": 53.54038814993851, "learning_rate": 1.554907677356657e-07, "loss": 3.2888, "step": 16 }, { "epoch": 0.000495944920940545, "grad_norm": 57.73223559145958, "learning_rate": 1.6520894071914482e-07, "loss": 3.3793, "step": 17 }, { "epoch": 0.0005251181515841065, "grad_norm": 60.66841021414202, "learning_rate": 1.7492711370262392e-07, "loss": 3.3604, "step": 18 }, { "epoch": 0.0005542913822276679, "grad_norm": 48.39521870204028, "learning_rate": 1.8464528668610302e-07, "loss": 3.149, "step": 19 }, { "epoch": 0.0005834646128712294, "grad_norm": 55.805932545996086, "learning_rate": 1.9436345966958214e-07, "loss": 3.3099, "step": 20 }, { "epoch": 0.0006126378435147909, "grad_norm": 55.822401672096156, "learning_rate": 2.0408163265306121e-07, "loss": 3.3476, "step": 21 }, { "epoch": 0.0006418110741583523, "grad_norm": 48.00227667036085, "learning_rate": 2.1379980563654034e-07, "loss": 3.1326, "step": 22 }, { "epoch": 0.0006709843048019138, "grad_norm": 46.23671337498092, "learning_rate": 2.2351797862001946e-07, "loss": 3.0579, "step": 23 }, { "epoch": 0.0007001575354454753, "grad_norm": 49.215337803463065, "learning_rate": 2.3323615160349856e-07, "loss": 3.1714, "step": 24 }, { "epoch": 0.0007293307660890366, "grad_norm": 50.0964962852148, "learning_rate": 2.429543245869777e-07, "loss": 3.2463, "step": 25 }, { "epoch": 0.0007585039967325981, "grad_norm": 40.48331009532296, "learning_rate": 2.526724975704568e-07, "loss": 2.8898, "step": 26 }, { "epoch": 0.0007876772273761596, "grad_norm": 43.58796335693116, "learning_rate": 2.623906705539359e-07, "loss": 3.0225, "step": 27 }, { "epoch": 0.0008168504580197211, "grad_norm": 50.27078559700237, "learning_rate": 2.72108843537415e-07, "loss": 3.2008, "step": 28 }, { "epoch": 0.0008460236886632825, "grad_norm": 42.62875679870639, "learning_rate": 2.818270165208941e-07, "loss": 2.9526, "step": 29 }, { "epoch": 0.000875196919306844, "grad_norm": 36.538631056416364, "learning_rate": 2.915451895043732e-07, "loss": 2.7081, "step": 30 }, { "epoch": 0.0009043701499504055, "grad_norm": 37.95410524150656, "learning_rate": 3.0126336248785234e-07, "loss": 2.7338, "step": 31 }, { "epoch": 0.0009335433805939669, "grad_norm": 38.13933215569488, "learning_rate": 3.109815354713314e-07, "loss": 2.7104, "step": 32 }, { "epoch": 0.0009627166112375284, "grad_norm": 35.171458371816755, "learning_rate": 3.2069970845481054e-07, "loss": 2.6817, "step": 33 }, { "epoch": 0.00099188984188109, "grad_norm": 34.74362617495035, "learning_rate": 3.3041788143828963e-07, "loss": 2.652, "step": 34 }, { "epoch": 0.0010210630725246514, "grad_norm": 35.15349620528716, "learning_rate": 3.401360544217688e-07, "loss": 2.6788, "step": 35 }, { "epoch": 0.001050236303168213, "grad_norm": 34.67235666928212, "learning_rate": 3.4985422740524783e-07, "loss": 2.5883, "step": 36 }, { "epoch": 0.0010794095338117742, "grad_norm": 32.38172424222036, "learning_rate": 3.5957240038872693e-07, "loss": 2.5411, "step": 37 }, { "epoch": 0.0011085827644553357, "grad_norm": 33.1882021752588, "learning_rate": 3.6929057337220603e-07, "loss": 2.573, "step": 38 }, { "epoch": 0.0011377559950988972, "grad_norm": 32.09588599180164, "learning_rate": 3.790087463556852e-07, "loss": 2.5205, "step": 39 }, { "epoch": 0.0011669292257424587, "grad_norm": 30.64571408525671, "learning_rate": 3.887269193391643e-07, "loss": 2.3672, "step": 40 }, { "epoch": 0.0011961024563860202, "grad_norm": 27.741409245103288, "learning_rate": 3.984450923226434e-07, "loss": 2.2191, "step": 41 }, { "epoch": 0.0012252756870295817, "grad_norm": 28.002704456507587, "learning_rate": 4.0816326530612243e-07, "loss": 2.1296, "step": 42 }, { "epoch": 0.0012544489176731432, "grad_norm": 27.01102683162873, "learning_rate": 4.1788143828960163e-07, "loss": 2.0846, "step": 43 }, { "epoch": 0.0012836221483167045, "grad_norm": 27.30622428117336, "learning_rate": 4.275996112730807e-07, "loss": 2.0225, "step": 44 }, { "epoch": 0.001312795378960266, "grad_norm": 30.44286494588882, "learning_rate": 4.373177842565598e-07, "loss": 1.9974, "step": 45 }, { "epoch": 0.0013419686096038275, "grad_norm": 41.43761884824672, "learning_rate": 4.4703595724003893e-07, "loss": 1.9598, "step": 46 }, { "epoch": 0.001371141840247389, "grad_norm": 83.41599647174692, "learning_rate": 4.5675413022351803e-07, "loss": 1.8443, "step": 47 }, { "epoch": 0.0014003150708909505, "grad_norm": 109.62322489197544, "learning_rate": 4.6647230320699713e-07, "loss": 1.8672, "step": 48 }, { "epoch": 0.001429488301534512, "grad_norm": 112.88366758582197, "learning_rate": 4.7619047619047623e-07, "loss": 1.7683, "step": 49 }, { "epoch": 0.0014586615321780733, "grad_norm": 96.52880293362055, "learning_rate": 4.859086491739554e-07, "loss": 1.7578, "step": 50 }, { "epoch": 0.0014878347628216348, "grad_norm": 51.58173404640064, "learning_rate": 4.956268221574345e-07, "loss": 1.6946, "step": 51 }, { "epoch": 0.0015170079934651963, "grad_norm": 98.63939811986286, "learning_rate": 5.053449951409136e-07, "loss": 1.624, "step": 52 }, { "epoch": 0.0015461812241087578, "grad_norm": 392.52631989640827, "learning_rate": 5.150631681243927e-07, "loss": 1.6404, "step": 53 }, { "epoch": 0.0015753544547523193, "grad_norm": 36.83694097250143, "learning_rate": 5.247813411078718e-07, "loss": 1.621, "step": 54 }, { "epoch": 0.0016045276853958808, "grad_norm": 37.944539281225644, "learning_rate": 5.344995140913509e-07, "loss": 1.5761, "step": 55 }, { "epoch": 0.0016337009160394423, "grad_norm": 41.05549932657623, "learning_rate": 5.4421768707483e-07, "loss": 1.6411, "step": 56 }, { "epoch": 0.0016628741466830036, "grad_norm": 42.30992951788921, "learning_rate": 5.539358600583091e-07, "loss": 1.5739, "step": 57 }, { "epoch": 0.001692047377326565, "grad_norm": 38.06215707218943, "learning_rate": 5.636540330417882e-07, "loss": 1.4234, "step": 58 }, { "epoch": 0.0017212206079701266, "grad_norm": 29.2367743854925, "learning_rate": 5.733722060252673e-07, "loss": 1.4642, "step": 59 }, { "epoch": 0.001750393838613688, "grad_norm": 25.18278161139425, "learning_rate": 5.830903790087464e-07, "loss": 1.3755, "step": 60 }, { "epoch": 0.0017795670692572496, "grad_norm": 18.831005340291743, "learning_rate": 5.928085519922256e-07, "loss": 1.2975, "step": 61 }, { "epoch": 0.001808740299900811, "grad_norm": 19.21243668324547, "learning_rate": 6.025267249757047e-07, "loss": 1.2641, "step": 62 }, { "epoch": 0.0018379135305443726, "grad_norm": 18.402347207240606, "learning_rate": 6.122448979591837e-07, "loss": 1.2559, "step": 63 }, { "epoch": 0.0018670867611879339, "grad_norm": 10.38479986233958, "learning_rate": 6.219630709426628e-07, "loss": 1.2621, "step": 64 }, { "epoch": 0.0018962599918314954, "grad_norm": 11.240970680762699, "learning_rate": 6.316812439261419e-07, "loss": 1.2273, "step": 65 }, { "epoch": 0.0019254332224750569, "grad_norm": 9.30095359552287, "learning_rate": 6.413994169096211e-07, "loss": 1.1881, "step": 66 }, { "epoch": 0.0019546064531186184, "grad_norm": 8.727234288494008, "learning_rate": 6.511175898931002e-07, "loss": 1.201, "step": 67 }, { "epoch": 0.00198377968376218, "grad_norm": 8.448178343565178, "learning_rate": 6.608357628765793e-07, "loss": 1.199, "step": 68 }, { "epoch": 0.0020129529144057414, "grad_norm": 7.605640693479639, "learning_rate": 6.705539358600584e-07, "loss": 1.1633, "step": 69 }, { "epoch": 0.002042126145049303, "grad_norm": 7.763773063041981, "learning_rate": 6.802721088435376e-07, "loss": 1.157, "step": 70 }, { "epoch": 0.0020712993756928644, "grad_norm": 7.83234540673427, "learning_rate": 6.899902818270166e-07, "loss": 1.127, "step": 71 }, { "epoch": 0.002100472606336426, "grad_norm": 7.751860059488426, "learning_rate": 6.997084548104957e-07, "loss": 1.1411, "step": 72 }, { "epoch": 0.0021296458369799874, "grad_norm": 7.221011120013558, "learning_rate": 7.094266277939748e-07, "loss": 1.1508, "step": 73 }, { "epoch": 0.0021588190676235484, "grad_norm": 7.249742675328467, "learning_rate": 7.191448007774539e-07, "loss": 1.1322, "step": 74 }, { "epoch": 0.00218799229826711, "grad_norm": 7.41932368070449, "learning_rate": 7.288629737609331e-07, "loss": 1.1169, "step": 75 }, { "epoch": 0.0022171655289106714, "grad_norm": 6.84055899908655, "learning_rate": 7.385811467444121e-07, "loss": 1.1396, "step": 76 }, { "epoch": 0.002246338759554233, "grad_norm": 7.371084361682207, "learning_rate": 7.482993197278913e-07, "loss": 1.1648, "step": 77 }, { "epoch": 0.0022755119901977944, "grad_norm": 7.28952170579296, "learning_rate": 7.580174927113704e-07, "loss": 1.0947, "step": 78 }, { "epoch": 0.002304685220841356, "grad_norm": 6.60461471141226, "learning_rate": 7.677356656948494e-07, "loss": 1.0815, "step": 79 }, { "epoch": 0.0023338584514849174, "grad_norm": 6.855555588321384, "learning_rate": 7.774538386783286e-07, "loss": 1.0779, "step": 80 }, { "epoch": 0.002363031682128479, "grad_norm": 7.464498057697863, "learning_rate": 7.871720116618077e-07, "loss": 1.0906, "step": 81 }, { "epoch": 0.0023922049127720404, "grad_norm": 6.971944178717365, "learning_rate": 7.968901846452868e-07, "loss": 1.0776, "step": 82 }, { "epoch": 0.002421378143415602, "grad_norm": 6.441529733700574, "learning_rate": 8.066083576287659e-07, "loss": 1.0514, "step": 83 }, { "epoch": 0.0024505513740591634, "grad_norm": 6.06148020983748, "learning_rate": 8.163265306122449e-07, "loss": 1.0736, "step": 84 }, { "epoch": 0.002479724604702725, "grad_norm": 6.444048939489742, "learning_rate": 8.260447035957241e-07, "loss": 1.0877, "step": 85 }, { "epoch": 0.0025088978353462865, "grad_norm": 6.181458160460038, "learning_rate": 8.357628765792033e-07, "loss": 1.0835, "step": 86 }, { "epoch": 0.0025380710659898475, "grad_norm": 5.922607770428193, "learning_rate": 8.454810495626823e-07, "loss": 1.0658, "step": 87 }, { "epoch": 0.002567244296633409, "grad_norm": 5.542619628627248, "learning_rate": 8.551992225461614e-07, "loss": 1.0226, "step": 88 }, { "epoch": 0.0025964175272769705, "grad_norm": 5.747595143803563, "learning_rate": 8.649173955296406e-07, "loss": 1.0332, "step": 89 }, { "epoch": 0.002625590757920532, "grad_norm": 5.402496013260269, "learning_rate": 8.746355685131196e-07, "loss": 1.0308, "step": 90 }, { "epoch": 0.0026547639885640935, "grad_norm": 5.562058138188112, "learning_rate": 8.843537414965988e-07, "loss": 1.059, "step": 91 }, { "epoch": 0.002683937219207655, "grad_norm": 5.361866485657315, "learning_rate": 8.940719144800779e-07, "loss": 1.0442, "step": 92 }, { "epoch": 0.0027131104498512165, "grad_norm": 5.351336135979188, "learning_rate": 9.037900874635569e-07, "loss": 1.0374, "step": 93 }, { "epoch": 0.002742283680494778, "grad_norm": 4.839716796476288, "learning_rate": 9.135082604470361e-07, "loss": 1.0648, "step": 94 }, { "epoch": 0.0027714569111383395, "grad_norm": 4.894278673171512, "learning_rate": 9.23226433430515e-07, "loss": 1.059, "step": 95 }, { "epoch": 0.002800630141781901, "grad_norm": 4.840493645110819, "learning_rate": 9.329446064139943e-07, "loss": 1.036, "step": 96 }, { "epoch": 0.0028298033724254625, "grad_norm": 4.641389959250609, "learning_rate": 9.426627793974734e-07, "loss": 1.0267, "step": 97 }, { "epoch": 0.002858976603069024, "grad_norm": 4.781491164120611, "learning_rate": 9.523809523809525e-07, "loss": 1.0354, "step": 98 }, { "epoch": 0.0028881498337125855, "grad_norm": 4.583366378736798, "learning_rate": 9.620991253644317e-07, "loss": 1.0251, "step": 99 }, { "epoch": 0.0029173230643561466, "grad_norm": 4.825726016243985, "learning_rate": 9.718172983479108e-07, "loss": 1.0144, "step": 100 }, { "epoch": 0.002946496294999708, "grad_norm": 4.556217342908838, "learning_rate": 9.815354713313896e-07, "loss": 0.9955, "step": 101 }, { "epoch": 0.0029756695256432696, "grad_norm": 4.625039174353454, "learning_rate": 9.91253644314869e-07, "loss": 0.9907, "step": 102 }, { "epoch": 0.003004842756286831, "grad_norm": 4.68840635703037, "learning_rate": 1.000971817298348e-06, "loss": 1.0229, "step": 103 }, { "epoch": 0.0030340159869303926, "grad_norm": 4.743602647176867, "learning_rate": 1.0106899902818272e-06, "loss": 1.0059, "step": 104 }, { "epoch": 0.003063189217573954, "grad_norm": 4.731460723266639, "learning_rate": 1.0204081632653063e-06, "loss": 1.0157, "step": 105 }, { "epoch": 0.0030923624482175156, "grad_norm": 4.747413103995433, "learning_rate": 1.0301263362487854e-06, "loss": 1.0059, "step": 106 }, { "epoch": 0.003121535678861077, "grad_norm": 4.8678737085012616, "learning_rate": 1.0398445092322645e-06, "loss": 1.0222, "step": 107 }, { "epoch": 0.0031507089095046386, "grad_norm": 4.836400548026891, "learning_rate": 1.0495626822157436e-06, "loss": 1.0162, "step": 108 }, { "epoch": 0.0031798821401482, "grad_norm": 4.847008660412388, "learning_rate": 1.0592808551992226e-06, "loss": 1.0279, "step": 109 }, { "epoch": 0.0032090553707917616, "grad_norm": 4.987535364124845, "learning_rate": 1.0689990281827017e-06, "loss": 1.0224, "step": 110 }, { "epoch": 0.003238228601435323, "grad_norm": 4.779277015313041, "learning_rate": 1.0787172011661808e-06, "loss": 1.0064, "step": 111 }, { "epoch": 0.0032674018320788846, "grad_norm": 4.903732341827149, "learning_rate": 1.08843537414966e-06, "loss": 1.0306, "step": 112 }, { "epoch": 0.003296575062722446, "grad_norm": 5.067311367747353, "learning_rate": 1.098153547133139e-06, "loss": 0.9965, "step": 113 }, { "epoch": 0.003325748293366007, "grad_norm": 5.147921224622042, "learning_rate": 1.1078717201166181e-06, "loss": 0.9921, "step": 114 }, { "epoch": 0.0033549215240095687, "grad_norm": 5.143403518771765, "learning_rate": 1.1175898931000972e-06, "loss": 0.9971, "step": 115 }, { "epoch": 0.00338409475465313, "grad_norm": 5.219385919357969, "learning_rate": 1.1273080660835763e-06, "loss": 1.006, "step": 116 }, { "epoch": 0.0034132679852966917, "grad_norm": 5.132465962986135, "learning_rate": 1.1370262390670554e-06, "loss": 0.9755, "step": 117 }, { "epoch": 0.003442441215940253, "grad_norm": 5.195001396279438, "learning_rate": 1.1467444120505345e-06, "loss": 0.9852, "step": 118 }, { "epoch": 0.0034716144465838147, "grad_norm": 5.258841826130356, "learning_rate": 1.1564625850340136e-06, "loss": 0.9595, "step": 119 }, { "epoch": 0.003500787677227376, "grad_norm": 5.511324577831448, "learning_rate": 1.1661807580174927e-06, "loss": 0.978, "step": 120 }, { "epoch": 0.0035299609078709377, "grad_norm": 5.615991123547827, "learning_rate": 1.1758989310009718e-06, "loss": 1.0011, "step": 121 }, { "epoch": 0.003559134138514499, "grad_norm": 5.366948772170658, "learning_rate": 1.1856171039844512e-06, "loss": 0.9758, "step": 122 }, { "epoch": 0.0035883073691580607, "grad_norm": 5.615408935080543, "learning_rate": 1.19533527696793e-06, "loss": 0.9831, "step": 123 }, { "epoch": 0.003617480599801622, "grad_norm": 5.754702397510861, "learning_rate": 1.2050534499514093e-06, "loss": 1.0104, "step": 124 }, { "epoch": 0.0036466538304451837, "grad_norm": 5.842477132807181, "learning_rate": 1.2147716229348884e-06, "loss": 0.994, "step": 125 }, { "epoch": 0.003675827061088745, "grad_norm": 6.011047654181344, "learning_rate": 1.2244897959183673e-06, "loss": 0.9818, "step": 126 }, { "epoch": 0.0037050002917323062, "grad_norm": 6.192699227110393, "learning_rate": 1.2342079689018466e-06, "loss": 0.9825, "step": 127 }, { "epoch": 0.0037341735223758677, "grad_norm": 6.216835153057091, "learning_rate": 1.2439261418853255e-06, "loss": 0.9925, "step": 128 }, { "epoch": 0.0037633467530194292, "grad_norm": 6.194530008226965, "learning_rate": 1.2536443148688048e-06, "loss": 0.9706, "step": 129 }, { "epoch": 0.0037925199836629907, "grad_norm": 6.438872617277369, "learning_rate": 1.2633624878522837e-06, "loss": 0.9501, "step": 130 }, { "epoch": 0.0038216932143065522, "grad_norm": 6.408646071205254, "learning_rate": 1.2730806608357628e-06, "loss": 0.9656, "step": 131 }, { "epoch": 0.0038508664449501137, "grad_norm": 6.8926656915994515, "learning_rate": 1.2827988338192421e-06, "loss": 0.9717, "step": 132 }, { "epoch": 0.0038800396755936752, "grad_norm": 6.786347115572355, "learning_rate": 1.2925170068027212e-06, "loss": 0.9648, "step": 133 }, { "epoch": 0.003909212906237237, "grad_norm": 7.347768619737329, "learning_rate": 1.3022351797862003e-06, "loss": 0.955, "step": 134 }, { "epoch": 0.003938386136880798, "grad_norm": 7.829610945379658, "learning_rate": 1.3119533527696792e-06, "loss": 0.9522, "step": 135 }, { "epoch": 0.00396755936752436, "grad_norm": 7.863112355202574, "learning_rate": 1.3216715257531585e-06, "loss": 0.9546, "step": 136 }, { "epoch": 0.003996732598167921, "grad_norm": 8.05567637619532, "learning_rate": 1.3313896987366376e-06, "loss": 0.9559, "step": 137 }, { "epoch": 0.004025905828811483, "grad_norm": 8.437260079464115, "learning_rate": 1.3411078717201167e-06, "loss": 0.9535, "step": 138 }, { "epoch": 0.004055079059455044, "grad_norm": 8.870074634709532, "learning_rate": 1.3508260447035958e-06, "loss": 0.9651, "step": 139 }, { "epoch": 0.004084252290098606, "grad_norm": 9.374992282612821, "learning_rate": 1.3605442176870751e-06, "loss": 0.9671, "step": 140 }, { "epoch": 0.004113425520742167, "grad_norm": 9.475276853310358, "learning_rate": 1.370262390670554e-06, "loss": 0.9546, "step": 141 }, { "epoch": 0.004142598751385729, "grad_norm": 9.937411320393801, "learning_rate": 1.3799805636540331e-06, "loss": 0.9264, "step": 142 }, { "epoch": 0.00417177198202929, "grad_norm": 9.452404801305743, "learning_rate": 1.3896987366375122e-06, "loss": 0.9081, "step": 143 }, { "epoch": 0.004200945212672852, "grad_norm": 10.388070855265655, "learning_rate": 1.3994169096209913e-06, "loss": 0.9031, "step": 144 }, { "epoch": 0.004230118443316413, "grad_norm": 10.395794080232319, "learning_rate": 1.4091350826044706e-06, "loss": 0.954, "step": 145 }, { "epoch": 0.004259291673959975, "grad_norm": 10.814640721322968, "learning_rate": 1.4188532555879495e-06, "loss": 0.8961, "step": 146 }, { "epoch": 0.004288464904603536, "grad_norm": 10.711966415412752, "learning_rate": 1.4285714285714286e-06, "loss": 0.9116, "step": 147 }, { "epoch": 0.004317638135247097, "grad_norm": 10.875004401781785, "learning_rate": 1.4382896015549077e-06, "loss": 0.9104, "step": 148 }, { "epoch": 0.004346811365890659, "grad_norm": 11.176620547920438, "learning_rate": 1.4480077745383868e-06, "loss": 0.9196, "step": 149 }, { "epoch": 0.00437598459653422, "grad_norm": 11.122481269522007, "learning_rate": 1.4577259475218661e-06, "loss": 0.9329, "step": 150 }, { "epoch": 0.004405157827177782, "grad_norm": 10.9934949208899, "learning_rate": 1.4674441205053452e-06, "loss": 0.914, "step": 151 }, { "epoch": 0.004434331057821343, "grad_norm": 11.621354462329585, "learning_rate": 1.4771622934888241e-06, "loss": 0.9149, "step": 152 }, { "epoch": 0.004463504288464905, "grad_norm": 11.18837643736424, "learning_rate": 1.4868804664723032e-06, "loss": 0.8847, "step": 153 }, { "epoch": 0.004492677519108466, "grad_norm": 11.098190668768323, "learning_rate": 1.4965986394557825e-06, "loss": 0.9389, "step": 154 }, { "epoch": 0.004521850749752028, "grad_norm": 11.596961654297534, "learning_rate": 1.5063168124392616e-06, "loss": 0.8751, "step": 155 }, { "epoch": 0.004551023980395589, "grad_norm": 11.940546503160725, "learning_rate": 1.5160349854227407e-06, "loss": 0.8757, "step": 156 }, { "epoch": 0.004580197211039151, "grad_norm": 11.701906764397489, "learning_rate": 1.5257531584062196e-06, "loss": 0.8986, "step": 157 }, { "epoch": 0.004609370441682712, "grad_norm": 11.7691566034306, "learning_rate": 1.5354713313896987e-06, "loss": 0.8803, "step": 158 }, { "epoch": 0.004638543672326274, "grad_norm": 12.287601839750279, "learning_rate": 1.545189504373178e-06, "loss": 0.8606, "step": 159 }, { "epoch": 0.004667716902969835, "grad_norm": 11.789567332957226, "learning_rate": 1.5549076773566571e-06, "loss": 0.9111, "step": 160 }, { "epoch": 0.004696890133613396, "grad_norm": 11.945030037944933, "learning_rate": 1.5646258503401362e-06, "loss": 0.9047, "step": 161 }, { "epoch": 0.004726063364256958, "grad_norm": 12.339148912685063, "learning_rate": 1.5743440233236153e-06, "loss": 0.8572, "step": 162 }, { "epoch": 0.004755236594900519, "grad_norm": 12.183905694288685, "learning_rate": 1.5840621963070942e-06, "loss": 0.8671, "step": 163 }, { "epoch": 0.004784409825544081, "grad_norm": 12.041744890895613, "learning_rate": 1.5937803692905735e-06, "loss": 0.8553, "step": 164 }, { "epoch": 0.004813583056187642, "grad_norm": 11.930518409419967, "learning_rate": 1.6034985422740526e-06, "loss": 0.8616, "step": 165 }, { "epoch": 0.004842756286831204, "grad_norm": 12.3466248439367, "learning_rate": 1.6132167152575317e-06, "loss": 0.8784, "step": 166 }, { "epoch": 0.004871929517474765, "grad_norm": 12.35820643078063, "learning_rate": 1.6229348882410108e-06, "loss": 0.8461, "step": 167 }, { "epoch": 0.004901102748118327, "grad_norm": 12.234820859625453, "learning_rate": 1.6326530612244897e-06, "loss": 0.8581, "step": 168 }, { "epoch": 0.004930275978761888, "grad_norm": 11.976455501465534, "learning_rate": 1.642371234207969e-06, "loss": 0.8453, "step": 169 }, { "epoch": 0.00495944920940545, "grad_norm": 11.42338346944746, "learning_rate": 1.6520894071914481e-06, "loss": 0.8628, "step": 170 }, { "epoch": 0.004988622440049011, "grad_norm": 12.615894588712042, "learning_rate": 1.6618075801749272e-06, "loss": 0.855, "step": 171 }, { "epoch": 0.005017795670692573, "grad_norm": 12.10212199035121, "learning_rate": 1.6715257531584065e-06, "loss": 0.8667, "step": 172 }, { "epoch": 0.005046968901336134, "grad_norm": 12.553028710083943, "learning_rate": 1.6812439261418856e-06, "loss": 0.8406, "step": 173 }, { "epoch": 0.005076142131979695, "grad_norm": 12.344688693467962, "learning_rate": 1.6909620991253645e-06, "loss": 0.8336, "step": 174 }, { "epoch": 0.005105315362623257, "grad_norm": 12.228384478751527, "learning_rate": 1.7006802721088436e-06, "loss": 0.8372, "step": 175 }, { "epoch": 0.005134488593266818, "grad_norm": 12.03649962216464, "learning_rate": 1.7103984450923227e-06, "loss": 0.8211, "step": 176 }, { "epoch": 0.00516366182391038, "grad_norm": 12.123341825107705, "learning_rate": 1.720116618075802e-06, "loss": 0.8637, "step": 177 }, { "epoch": 0.005192835054553941, "grad_norm": 12.068774423302306, "learning_rate": 1.7298347910592811e-06, "loss": 0.8329, "step": 178 }, { "epoch": 0.005222008285197503, "grad_norm": 11.806571153776764, "learning_rate": 1.73955296404276e-06, "loss": 0.8477, "step": 179 }, { "epoch": 0.005251181515841064, "grad_norm": 12.046593563644189, "learning_rate": 1.7492711370262391e-06, "loss": 0.84, "step": 180 }, { "epoch": 0.005280354746484626, "grad_norm": 11.77316188026381, "learning_rate": 1.7589893100097182e-06, "loss": 0.8441, "step": 181 }, { "epoch": 0.005309527977128187, "grad_norm": 11.85419644496021, "learning_rate": 1.7687074829931975e-06, "loss": 0.8339, "step": 182 }, { "epoch": 0.005338701207771749, "grad_norm": 11.944891604233412, "learning_rate": 1.7784256559766766e-06, "loss": 0.8214, "step": 183 }, { "epoch": 0.00536787443841531, "grad_norm": 12.159880560789963, "learning_rate": 1.7881438289601557e-06, "loss": 0.7971, "step": 184 }, { "epoch": 0.005397047669058872, "grad_norm": 11.91412578384619, "learning_rate": 1.7978620019436346e-06, "loss": 0.7957, "step": 185 }, { "epoch": 0.005426220899702433, "grad_norm": 11.911521353949063, "learning_rate": 1.8075801749271137e-06, "loss": 0.808, "step": 186 }, { "epoch": 0.005455394130345994, "grad_norm": 12.062509293974587, "learning_rate": 1.817298347910593e-06, "loss": 0.7909, "step": 187 }, { "epoch": 0.005484567360989556, "grad_norm": 11.600416772936738, "learning_rate": 1.8270165208940721e-06, "loss": 0.8154, "step": 188 }, { "epoch": 0.005513740591633117, "grad_norm": 12.03027410387505, "learning_rate": 1.8367346938775512e-06, "loss": 0.7985, "step": 189 }, { "epoch": 0.005542913822276679, "grad_norm": 11.931153892179784, "learning_rate": 1.84645286686103e-06, "loss": 0.7597, "step": 190 }, { "epoch": 0.00557208705292024, "grad_norm": 11.454083915717893, "learning_rate": 1.8561710398445092e-06, "loss": 0.787, "step": 191 }, { "epoch": 0.005601260283563802, "grad_norm": 11.909385634464144, "learning_rate": 1.8658892128279885e-06, "loss": 0.7678, "step": 192 }, { "epoch": 0.005630433514207363, "grad_norm": 12.009181246049002, "learning_rate": 1.8756073858114676e-06, "loss": 0.7739, "step": 193 }, { "epoch": 0.005659606744850925, "grad_norm": 11.646051235739243, "learning_rate": 1.8853255587949467e-06, "loss": 0.7679, "step": 194 }, { "epoch": 0.005688779975494486, "grad_norm": 11.64121860283326, "learning_rate": 1.895043731778426e-06, "loss": 0.7798, "step": 195 }, { "epoch": 0.005717953206138048, "grad_norm": 11.567810719508575, "learning_rate": 1.904761904761905e-06, "loss": 0.7368, "step": 196 }, { "epoch": 0.005747126436781609, "grad_norm": 12.010866229070107, "learning_rate": 1.914480077745384e-06, "loss": 0.7623, "step": 197 }, { "epoch": 0.005776299667425171, "grad_norm": 11.883917388929667, "learning_rate": 1.9241982507288633e-06, "loss": 0.7268, "step": 198 }, { "epoch": 0.005805472898068732, "grad_norm": 11.72346628079956, "learning_rate": 1.933916423712342e-06, "loss": 0.7368, "step": 199 }, { "epoch": 0.005834646128712293, "grad_norm": 12.110677939017009, "learning_rate": 1.9436345966958215e-06, "loss": 0.7469, "step": 200 }, { "epoch": 0.005863819359355855, "grad_norm": 11.783926774204087, "learning_rate": 1.9533527696793004e-06, "loss": 0.7279, "step": 201 }, { "epoch": 0.005892992589999416, "grad_norm": 11.60066904064106, "learning_rate": 1.9630709426627793e-06, "loss": 0.7358, "step": 202 }, { "epoch": 0.005922165820642978, "grad_norm": 11.201533202218338, "learning_rate": 1.9727891156462586e-06, "loss": 0.7407, "step": 203 }, { "epoch": 0.005951339051286539, "grad_norm": 11.767916494241318, "learning_rate": 1.982507288629738e-06, "loss": 0.7033, "step": 204 }, { "epoch": 0.005980512281930101, "grad_norm": 11.57038811045555, "learning_rate": 1.992225461613217e-06, "loss": 0.7102, "step": 205 }, { "epoch": 0.006009685512573662, "grad_norm": 12.001815514280002, "learning_rate": 2.001943634596696e-06, "loss": 0.6806, "step": 206 }, { "epoch": 0.006038858743217224, "grad_norm": 11.79524440382437, "learning_rate": 2.011661807580175e-06, "loss": 0.7051, "step": 207 }, { "epoch": 0.006068031973860785, "grad_norm": 12.383127140344047, "learning_rate": 2.0213799805636543e-06, "loss": 0.6937, "step": 208 }, { "epoch": 0.006097205204504347, "grad_norm": 11.84018887052034, "learning_rate": 2.031098153547133e-06, "loss": 0.6978, "step": 209 }, { "epoch": 0.006126378435147908, "grad_norm": 11.447041483789565, "learning_rate": 2.0408163265306125e-06, "loss": 0.6793, "step": 210 }, { "epoch": 0.00615555166579147, "grad_norm": 11.658341946520713, "learning_rate": 2.050534499514092e-06, "loss": 0.6889, "step": 211 }, { "epoch": 0.006184724896435031, "grad_norm": 12.123436638014988, "learning_rate": 2.0602526724975707e-06, "loss": 0.684, "step": 212 }, { "epoch": 0.006213898127078592, "grad_norm": 11.504916523431982, "learning_rate": 2.0699708454810496e-06, "loss": 0.7034, "step": 213 }, { "epoch": 0.006243071357722154, "grad_norm": 11.538239186493822, "learning_rate": 2.079689018464529e-06, "loss": 0.6744, "step": 214 }, { "epoch": 0.006272244588365715, "grad_norm": 11.980938305971051, "learning_rate": 2.089407191448008e-06, "loss": 0.6989, "step": 215 }, { "epoch": 0.006301417819009277, "grad_norm": 11.945898901146133, "learning_rate": 2.099125364431487e-06, "loss": 0.6497, "step": 216 }, { "epoch": 0.006330591049652838, "grad_norm": 11.739472956996947, "learning_rate": 2.1088435374149664e-06, "loss": 0.6457, "step": 217 }, { "epoch": 0.0063597642802964, "grad_norm": 11.384022274679449, "learning_rate": 2.1185617103984453e-06, "loss": 0.6612, "step": 218 }, { "epoch": 0.006388937510939961, "grad_norm": 11.40822270680767, "learning_rate": 2.128279883381924e-06, "loss": 0.678, "step": 219 }, { "epoch": 0.006418110741583523, "grad_norm": 11.553583332185259, "learning_rate": 2.1379980563654035e-06, "loss": 0.6624, "step": 220 }, { "epoch": 0.006447283972227084, "grad_norm": 11.939577591021372, "learning_rate": 2.147716229348883e-06, "loss": 0.6379, "step": 221 }, { "epoch": 0.006476457202870646, "grad_norm": 11.7091863365697, "learning_rate": 2.1574344023323617e-06, "loss": 0.619, "step": 222 }, { "epoch": 0.006505630433514207, "grad_norm": 11.346639699567936, "learning_rate": 2.1671525753158406e-06, "loss": 0.6678, "step": 223 }, { "epoch": 0.006534803664157769, "grad_norm": 11.758854403205179, "learning_rate": 2.17687074829932e-06, "loss": 0.6105, "step": 224 }, { "epoch": 0.00656397689480133, "grad_norm": 11.473014914788202, "learning_rate": 2.1865889212827988e-06, "loss": 0.6417, "step": 225 }, { "epoch": 0.006593150125444892, "grad_norm": 12.027836450653655, "learning_rate": 2.196307094266278e-06, "loss": 0.6289, "step": 226 }, { "epoch": 0.006622323356088453, "grad_norm": 11.884689757159778, "learning_rate": 2.2060252672497574e-06, "loss": 0.5889, "step": 227 }, { "epoch": 0.006651496586732014, "grad_norm": 11.468961426808615, "learning_rate": 2.2157434402332363e-06, "loss": 0.6089, "step": 228 }, { "epoch": 0.006680669817375576, "grad_norm": 11.446624636013864, "learning_rate": 2.225461613216715e-06, "loss": 0.5994, "step": 229 }, { "epoch": 0.006709843048019137, "grad_norm": 11.66395812991183, "learning_rate": 2.2351797862001945e-06, "loss": 0.6043, "step": 230 }, { "epoch": 0.006739016278662699, "grad_norm": 11.332469127007451, "learning_rate": 2.244897959183674e-06, "loss": 0.5903, "step": 231 }, { "epoch": 0.00676818950930626, "grad_norm": 11.471237317246185, "learning_rate": 2.2546161321671527e-06, "loss": 0.6114, "step": 232 }, { "epoch": 0.006797362739949822, "grad_norm": 11.850247614856915, "learning_rate": 2.264334305150632e-06, "loss": 0.5855, "step": 233 }, { "epoch": 0.006826535970593383, "grad_norm": 11.82730517467475, "learning_rate": 2.274052478134111e-06, "loss": 0.5949, "step": 234 }, { "epoch": 0.006855709201236945, "grad_norm": 11.485264437660307, "learning_rate": 2.28377065111759e-06, "loss": 0.5551, "step": 235 }, { "epoch": 0.006884882431880506, "grad_norm": 11.405145187429145, "learning_rate": 2.293488824101069e-06, "loss": 0.5722, "step": 236 }, { "epoch": 0.006914055662524068, "grad_norm": 11.660399592734256, "learning_rate": 2.3032069970845484e-06, "loss": 0.5748, "step": 237 }, { "epoch": 0.006943228893167629, "grad_norm": 11.439655601904152, "learning_rate": 2.3129251700680273e-06, "loss": 0.5499, "step": 238 }, { "epoch": 0.006972402123811191, "grad_norm": 11.708246461632152, "learning_rate": 2.3226433430515066e-06, "loss": 0.559, "step": 239 }, { "epoch": 0.007001575354454752, "grad_norm": 11.53076305043417, "learning_rate": 2.3323615160349855e-06, "loss": 0.5765, "step": 240 }, { "epoch": 0.007030748585098313, "grad_norm": 11.210280964251877, "learning_rate": 2.342079689018465e-06, "loss": 0.549, "step": 241 }, { "epoch": 0.007059921815741875, "grad_norm": 11.496267401715498, "learning_rate": 2.3517978620019437e-06, "loss": 0.5216, "step": 242 }, { "epoch": 0.007089095046385436, "grad_norm": 11.055364756441806, "learning_rate": 2.361516034985423e-06, "loss": 0.5352, "step": 243 }, { "epoch": 0.007118268277028998, "grad_norm": 11.642585506110569, "learning_rate": 2.3712342079689023e-06, "loss": 0.5391, "step": 244 }, { "epoch": 0.007147441507672559, "grad_norm": 11.407627642817618, "learning_rate": 2.380952380952381e-06, "loss": 0.5056, "step": 245 }, { "epoch": 0.007176614738316121, "grad_norm": 11.082367037594938, "learning_rate": 2.39067055393586e-06, "loss": 0.5396, "step": 246 }, { "epoch": 0.007205787968959682, "grad_norm": 11.441394058724788, "learning_rate": 2.4003887269193394e-06, "loss": 0.5244, "step": 247 }, { "epoch": 0.007234961199603244, "grad_norm": 11.296712351311688, "learning_rate": 2.4101068999028187e-06, "loss": 0.5261, "step": 248 }, { "epoch": 0.007264134430246805, "grad_norm": 10.950446712681304, "learning_rate": 2.4198250728862976e-06, "loss": 0.5049, "step": 249 }, { "epoch": 0.007293307660890367, "grad_norm": 11.19605427329169, "learning_rate": 2.429543245869777e-06, "loss": 0.5067, "step": 250 }, { "epoch": 0.007322480891533928, "grad_norm": 11.412942617701502, "learning_rate": 2.4392614188532558e-06, "loss": 0.4921, "step": 251 }, { "epoch": 0.00735165412217749, "grad_norm": 11.100669247591972, "learning_rate": 2.4489795918367347e-06, "loss": 0.48, "step": 252 }, { "epoch": 0.007380827352821051, "grad_norm": 11.16500214937961, "learning_rate": 2.458697764820214e-06, "loss": 0.4881, "step": 253 }, { "epoch": 0.0074100005834646125, "grad_norm": 10.921514466016799, "learning_rate": 2.4684159378036933e-06, "loss": 0.496, "step": 254 }, { "epoch": 0.007439173814108174, "grad_norm": 11.13552901328187, "learning_rate": 2.478134110787172e-06, "loss": 0.4621, "step": 255 }, { "epoch": 0.0074683470447517355, "grad_norm": 10.988723529484387, "learning_rate": 2.487852283770651e-06, "loss": 0.4794, "step": 256 }, { "epoch": 0.007497520275395297, "grad_norm": 10.592447527785755, "learning_rate": 2.4975704567541304e-06, "loss": 0.4642, "step": 257 }, { "epoch": 0.0075266935060388585, "grad_norm": 11.09648035063937, "learning_rate": 2.5072886297376097e-06, "loss": 0.4454, "step": 258 }, { "epoch": 0.00755586673668242, "grad_norm": 10.789756784936234, "learning_rate": 2.5170068027210886e-06, "loss": 0.484, "step": 259 }, { "epoch": 0.0075850399673259815, "grad_norm": 10.505112737452256, "learning_rate": 2.5267249757045675e-06, "loss": 0.4742, "step": 260 }, { "epoch": 0.007614213197969543, "grad_norm": 10.198260105944357, "learning_rate": 2.5364431486880468e-06, "loss": 0.463, "step": 261 }, { "epoch": 0.0076433864286131045, "grad_norm": 10.58117157693541, "learning_rate": 2.5461613216715257e-06, "loss": 0.4485, "step": 262 }, { "epoch": 0.007672559659256666, "grad_norm": 10.057462462246159, "learning_rate": 2.5558794946550054e-06, "loss": 0.4607, "step": 263 }, { "epoch": 0.0077017328899002275, "grad_norm": 10.244038062005783, "learning_rate": 2.5655976676384843e-06, "loss": 0.4314, "step": 264 }, { "epoch": 0.007730906120543789, "grad_norm": 10.243916239162006, "learning_rate": 2.575315840621963e-06, "loss": 0.4033, "step": 265 }, { "epoch": 0.0077600793511873505, "grad_norm": 10.133218763856435, "learning_rate": 2.5850340136054425e-06, "loss": 0.4201, "step": 266 }, { "epoch": 0.0077892525818309116, "grad_norm": 9.992581101051586, "learning_rate": 2.5947521865889214e-06, "loss": 0.4381, "step": 267 }, { "epoch": 0.007818425812474473, "grad_norm": 9.799748051020952, "learning_rate": 2.6044703595724007e-06, "loss": 0.4254, "step": 268 }, { "epoch": 0.007847599043118035, "grad_norm": 10.097500004633288, "learning_rate": 2.6141885325558796e-06, "loss": 0.3934, "step": 269 }, { "epoch": 0.007876772273761596, "grad_norm": 9.710073580467833, "learning_rate": 2.6239067055393585e-06, "loss": 0.4059, "step": 270 }, { "epoch": 0.007905945504405158, "grad_norm": 9.505183836821201, "learning_rate": 2.633624878522838e-06, "loss": 0.3774, "step": 271 }, { "epoch": 0.00793511873504872, "grad_norm": 9.1929482166285, "learning_rate": 2.643343051506317e-06, "loss": 0.3996, "step": 272 }, { "epoch": 0.007964291965692281, "grad_norm": 9.364972993584722, "learning_rate": 2.6530612244897964e-06, "loss": 0.3946, "step": 273 }, { "epoch": 0.007993465196335842, "grad_norm": 8.870779249745748, "learning_rate": 2.6627793974732753e-06, "loss": 0.3545, "step": 274 }, { "epoch": 0.008022638426979404, "grad_norm": 8.735956325839938, "learning_rate": 2.6724975704567546e-06, "loss": 0.3629, "step": 275 }, { "epoch": 0.008051811657622965, "grad_norm": 8.576383992093225, "learning_rate": 2.6822157434402335e-06, "loss": 0.3931, "step": 276 }, { "epoch": 0.008080984888266527, "grad_norm": 8.555201112815267, "learning_rate": 2.6919339164237124e-06, "loss": 0.3627, "step": 277 }, { "epoch": 0.008110158118910088, "grad_norm": 7.984072846166019, "learning_rate": 2.7016520894071917e-06, "loss": 0.3729, "step": 278 }, { "epoch": 0.00813933134955365, "grad_norm": 8.08509797796882, "learning_rate": 2.7113702623906706e-06, "loss": 0.3879, "step": 279 }, { "epoch": 0.008168504580197211, "grad_norm": 7.80702870942227, "learning_rate": 2.7210884353741503e-06, "loss": 0.3499, "step": 280 }, { "epoch": 0.008197677810840772, "grad_norm": 7.574259010158371, "learning_rate": 2.730806608357629e-06, "loss": 0.3568, "step": 281 }, { "epoch": 0.008226851041484334, "grad_norm": 7.222573175239761, "learning_rate": 2.740524781341108e-06, "loss": 0.3348, "step": 282 }, { "epoch": 0.008256024272127896, "grad_norm": 6.879930759003031, "learning_rate": 2.7502429543245874e-06, "loss": 0.3409, "step": 283 }, { "epoch": 0.008285197502771458, "grad_norm": 6.978015023639886, "learning_rate": 2.7599611273080663e-06, "loss": 0.3651, "step": 284 }, { "epoch": 0.008314370733415018, "grad_norm": 6.358843281212613, "learning_rate": 2.7696793002915456e-06, "loss": 0.3603, "step": 285 }, { "epoch": 0.00834354396405858, "grad_norm": 6.320642016108672, "learning_rate": 2.7793974732750245e-06, "loss": 0.3388, "step": 286 }, { "epoch": 0.008372717194702142, "grad_norm": 6.32099186551206, "learning_rate": 2.7891156462585034e-06, "loss": 0.3689, "step": 287 }, { "epoch": 0.008401890425345704, "grad_norm": 5.807201388060067, "learning_rate": 2.7988338192419827e-06, "loss": 0.3409, "step": 288 }, { "epoch": 0.008431063655989264, "grad_norm": 5.647941332066215, "learning_rate": 2.8085519922254615e-06, "loss": 0.3326, "step": 289 }, { "epoch": 0.008460236886632826, "grad_norm": 5.453137115098798, "learning_rate": 2.8182701652089413e-06, "loss": 0.343, "step": 290 }, { "epoch": 0.008489410117276388, "grad_norm": 5.12504239664104, "learning_rate": 2.82798833819242e-06, "loss": 0.3198, "step": 291 }, { "epoch": 0.00851858334791995, "grad_norm": 5.063327780982386, "learning_rate": 2.837706511175899e-06, "loss": 0.2995, "step": 292 }, { "epoch": 0.00854775657856351, "grad_norm": 4.675748098209446, "learning_rate": 2.8474246841593784e-06, "loss": 0.3412, "step": 293 }, { "epoch": 0.008576929809207072, "grad_norm": 4.66246409593304, "learning_rate": 2.8571428571428573e-06, "loss": 0.2962, "step": 294 }, { "epoch": 0.008606103039850634, "grad_norm": 4.919686861392447, "learning_rate": 2.8668610301263366e-06, "loss": 0.3284, "step": 295 }, { "epoch": 0.008635276270494194, "grad_norm": 4.431848908970389, "learning_rate": 2.8765792031098155e-06, "loss": 0.3167, "step": 296 }, { "epoch": 0.008664449501137756, "grad_norm": 4.4158568891954335, "learning_rate": 2.8862973760932948e-06, "loss": 0.3108, "step": 297 }, { "epoch": 0.008693622731781318, "grad_norm": 3.998923406763035, "learning_rate": 2.8960155490767737e-06, "loss": 0.314, "step": 298 }, { "epoch": 0.00872279596242488, "grad_norm": 3.944104628252764, "learning_rate": 2.9057337220602525e-06, "loss": 0.3043, "step": 299 }, { "epoch": 0.00875196919306844, "grad_norm": 3.7899421781554365, "learning_rate": 2.9154518950437323e-06, "loss": 0.3225, "step": 300 }, { "epoch": 0.008781142423712002, "grad_norm": 3.552107196564264, "learning_rate": 2.925170068027211e-06, "loss": 0.2831, "step": 301 }, { "epoch": 0.008810315654355564, "grad_norm": 3.5812651862591265, "learning_rate": 2.9348882410106905e-06, "loss": 0.3414, "step": 302 }, { "epoch": 0.008839488884999126, "grad_norm": 3.5160124869214493, "learning_rate": 2.9446064139941694e-06, "loss": 0.3229, "step": 303 }, { "epoch": 0.008868662115642686, "grad_norm": 3.6089633308193254, "learning_rate": 2.9543245869776482e-06, "loss": 0.3126, "step": 304 }, { "epoch": 0.008897835346286248, "grad_norm": 3.5453234107237632, "learning_rate": 2.9640427599611276e-06, "loss": 0.3235, "step": 305 }, { "epoch": 0.00892700857692981, "grad_norm": 3.5035962731904386, "learning_rate": 2.9737609329446064e-06, "loss": 0.3105, "step": 306 }, { "epoch": 0.00895618180757337, "grad_norm": 3.7501205724538558, "learning_rate": 2.983479105928086e-06, "loss": 0.3153, "step": 307 }, { "epoch": 0.008985355038216932, "grad_norm": 3.394941486669191, "learning_rate": 2.993197278911565e-06, "loss": 0.3196, "step": 308 }, { "epoch": 0.009014528268860494, "grad_norm": 3.241172414342122, "learning_rate": 3.002915451895044e-06, "loss": 0.2779, "step": 309 }, { "epoch": 0.009043701499504056, "grad_norm": 3.1934920458602507, "learning_rate": 3.0126336248785233e-06, "loss": 0.3087, "step": 310 }, { "epoch": 0.009072874730147616, "grad_norm": 3.2268698451322657, "learning_rate": 3.022351797862002e-06, "loss": 0.2996, "step": 311 }, { "epoch": 0.009102047960791178, "grad_norm": 3.1635587205391626, "learning_rate": 3.0320699708454815e-06, "loss": 0.3178, "step": 312 }, { "epoch": 0.00913122119143474, "grad_norm": 3.0093458116750313, "learning_rate": 3.0417881438289604e-06, "loss": 0.2968, "step": 313 }, { "epoch": 0.009160394422078302, "grad_norm": 2.741170367470087, "learning_rate": 3.0515063168124392e-06, "loss": 0.2976, "step": 314 }, { "epoch": 0.009189567652721862, "grad_norm": 2.9455145470142714, "learning_rate": 3.0612244897959185e-06, "loss": 0.2977, "step": 315 }, { "epoch": 0.009218740883365424, "grad_norm": 2.7230192545396723, "learning_rate": 3.0709426627793974e-06, "loss": 0.3148, "step": 316 }, { "epoch": 0.009247914114008986, "grad_norm": 2.6993941016970457, "learning_rate": 3.080660835762877e-06, "loss": 0.3047, "step": 317 }, { "epoch": 0.009277087344652548, "grad_norm": 2.5951026957119185, "learning_rate": 3.090379008746356e-06, "loss": 0.304, "step": 318 }, { "epoch": 0.009306260575296108, "grad_norm": 2.754713376574571, "learning_rate": 3.1000971817298354e-06, "loss": 0.2758, "step": 319 }, { "epoch": 0.00933543380593967, "grad_norm": 2.608129232034051, "learning_rate": 3.1098153547133143e-06, "loss": 0.3091, "step": 320 }, { "epoch": 0.009364607036583232, "grad_norm": 2.3588671050032035, "learning_rate": 3.119533527696793e-06, "loss": 0.2734, "step": 321 }, { "epoch": 0.009393780267226792, "grad_norm": 2.576246544800748, "learning_rate": 3.1292517006802725e-06, "loss": 0.3143, "step": 322 }, { "epoch": 0.009422953497870354, "grad_norm": 2.442115567378784, "learning_rate": 3.1389698736637513e-06, "loss": 0.3121, "step": 323 }, { "epoch": 0.009452126728513916, "grad_norm": 2.5068164870128395, "learning_rate": 3.1486880466472307e-06, "loss": 0.3202, "step": 324 }, { "epoch": 0.009481299959157478, "grad_norm": 2.606783819651253, "learning_rate": 3.1584062196307095e-06, "loss": 0.2937, "step": 325 }, { "epoch": 0.009510473189801038, "grad_norm": 2.1178884108003984, "learning_rate": 3.1681243926141884e-06, "loss": 0.271, "step": 326 }, { "epoch": 0.0095396464204446, "grad_norm": 2.0652110483668404, "learning_rate": 3.177842565597668e-06, "loss": 0.2843, "step": 327 }, { "epoch": 0.009568819651088162, "grad_norm": 2.204974423982823, "learning_rate": 3.187560738581147e-06, "loss": 0.2592, "step": 328 }, { "epoch": 0.009597992881731724, "grad_norm": 2.0655925911611805, "learning_rate": 3.1972789115646264e-06, "loss": 0.2789, "step": 329 }, { "epoch": 0.009627166112375284, "grad_norm": 1.9531825316794622, "learning_rate": 3.2069970845481052e-06, "loss": 0.2545, "step": 330 }, { "epoch": 0.009656339343018846, "grad_norm": 2.1590601507754106, "learning_rate": 3.216715257531584e-06, "loss": 0.2606, "step": 331 }, { "epoch": 0.009685512573662408, "grad_norm": 2.0642596187813056, "learning_rate": 3.2264334305150634e-06, "loss": 0.2892, "step": 332 }, { "epoch": 0.009714685804305968, "grad_norm": 1.945148069761541, "learning_rate": 3.2361516034985423e-06, "loss": 0.2509, "step": 333 }, { "epoch": 0.00974385903494953, "grad_norm": 2.0957514084341544, "learning_rate": 3.2458697764820216e-06, "loss": 0.2663, "step": 334 }, { "epoch": 0.009773032265593092, "grad_norm": 2.094914155968183, "learning_rate": 3.2555879494655005e-06, "loss": 0.2792, "step": 335 }, { "epoch": 0.009802205496236654, "grad_norm": 2.153157228626012, "learning_rate": 3.2653061224489794e-06, "loss": 0.2866, "step": 336 }, { "epoch": 0.009831378726880214, "grad_norm": 1.8843072897963975, "learning_rate": 3.275024295432459e-06, "loss": 0.2772, "step": 337 }, { "epoch": 0.009860551957523776, "grad_norm": 1.9095121160251654, "learning_rate": 3.284742468415938e-06, "loss": 0.2631, "step": 338 }, { "epoch": 0.009889725188167338, "grad_norm": 2.1063348860832543, "learning_rate": 3.2944606413994174e-06, "loss": 0.2663, "step": 339 }, { "epoch": 0.0099188984188109, "grad_norm": 1.640192998600247, "learning_rate": 3.3041788143828962e-06, "loss": 0.3219, "step": 340 }, { "epoch": 0.00994807164945446, "grad_norm": 1.8783143584476323, "learning_rate": 3.3138969873663755e-06, "loss": 0.2576, "step": 341 }, { "epoch": 0.009977244880098022, "grad_norm": 1.8743606251045954, "learning_rate": 3.3236151603498544e-06, "loss": 0.3015, "step": 342 }, { "epoch": 0.010006418110741584, "grad_norm": 2.3756269137859913, "learning_rate": 3.3333333333333333e-06, "loss": 0.3041, "step": 343 }, { "epoch": 0.010035591341385146, "grad_norm": 1.820683881601083, "learning_rate": 3.343051506316813e-06, "loss": 0.2436, "step": 344 }, { "epoch": 0.010064764572028706, "grad_norm": 1.6527189980635084, "learning_rate": 3.352769679300292e-06, "loss": 0.3129, "step": 345 }, { "epoch": 0.010093937802672268, "grad_norm": 2.0328740369563074, "learning_rate": 3.3624878522837713e-06, "loss": 0.2767, "step": 346 }, { "epoch": 0.01012311103331583, "grad_norm": 1.7729144762059441, "learning_rate": 3.37220602526725e-06, "loss": 0.2849, "step": 347 }, { "epoch": 0.01015228426395939, "grad_norm": 1.6319492499213792, "learning_rate": 3.381924198250729e-06, "loss": 0.3011, "step": 348 }, { "epoch": 0.010181457494602952, "grad_norm": 1.5627342952620744, "learning_rate": 3.3916423712342083e-06, "loss": 0.2881, "step": 349 }, { "epoch": 0.010210630725246514, "grad_norm": 2.1436140364775365, "learning_rate": 3.4013605442176872e-06, "loss": 0.3088, "step": 350 }, { "epoch": 0.010239803955890076, "grad_norm": 1.5432322995400232, "learning_rate": 3.4110787172011665e-06, "loss": 0.2619, "step": 351 }, { "epoch": 0.010268977186533636, "grad_norm": 1.4919599215439503, "learning_rate": 3.4207968901846454e-06, "loss": 0.288, "step": 352 }, { "epoch": 0.010298150417177198, "grad_norm": 1.588909354418498, "learning_rate": 3.4305150631681243e-06, "loss": 0.2656, "step": 353 }, { "epoch": 0.01032732364782076, "grad_norm": 1.7238025208697898, "learning_rate": 3.440233236151604e-06, "loss": 0.2971, "step": 354 }, { "epoch": 0.010356496878464322, "grad_norm": 1.3741848014034796, "learning_rate": 3.449951409135083e-06, "loss": 0.2524, "step": 355 }, { "epoch": 0.010385670109107882, "grad_norm": 1.6950266671469552, "learning_rate": 3.4596695821185622e-06, "loss": 0.264, "step": 356 }, { "epoch": 0.010414843339751444, "grad_norm": 1.620413829771043, "learning_rate": 3.469387755102041e-06, "loss": 0.2825, "step": 357 }, { "epoch": 0.010444016570395006, "grad_norm": 1.4843445148445067, "learning_rate": 3.47910592808552e-06, "loss": 0.2524, "step": 358 }, { "epoch": 0.010473189801038568, "grad_norm": 1.4282873977233463, "learning_rate": 3.4888241010689993e-06, "loss": 0.2446, "step": 359 }, { "epoch": 0.010502363031682128, "grad_norm": 1.5458831165492712, "learning_rate": 3.4985422740524782e-06, "loss": 0.2602, "step": 360 }, { "epoch": 0.01053153626232569, "grad_norm": 1.9803172614901905, "learning_rate": 3.5082604470359575e-06, "loss": 0.3021, "step": 361 }, { "epoch": 0.010560709492969252, "grad_norm": 1.7412676481476632, "learning_rate": 3.5179786200194364e-06, "loss": 0.29, "step": 362 }, { "epoch": 0.010589882723612812, "grad_norm": 1.4824319966265418, "learning_rate": 3.527696793002916e-06, "loss": 0.2645, "step": 363 }, { "epoch": 0.010619055954256374, "grad_norm": 1.6505631113298946, "learning_rate": 3.537414965986395e-06, "loss": 0.3022, "step": 364 }, { "epoch": 0.010648229184899936, "grad_norm": 1.5184315725990505, "learning_rate": 3.547133138969874e-06, "loss": 0.2778, "step": 365 }, { "epoch": 0.010677402415543498, "grad_norm": 1.8930868281870492, "learning_rate": 3.5568513119533532e-06, "loss": 0.2707, "step": 366 }, { "epoch": 0.010706575646187058, "grad_norm": 1.462237554708469, "learning_rate": 3.566569484936832e-06, "loss": 0.2454, "step": 367 }, { "epoch": 0.01073574887683062, "grad_norm": 1.6865796691913675, "learning_rate": 3.5762876579203114e-06, "loss": 0.2669, "step": 368 }, { "epoch": 0.010764922107474182, "grad_norm": 1.5496968461346172, "learning_rate": 3.5860058309037903e-06, "loss": 0.2817, "step": 369 }, { "epoch": 0.010794095338117744, "grad_norm": 2.0249897064534523, "learning_rate": 3.595724003887269e-06, "loss": 0.2597, "step": 370 }, { "epoch": 0.010823268568761304, "grad_norm": 1.5958207293933246, "learning_rate": 3.6054421768707485e-06, "loss": 0.2521, "step": 371 }, { "epoch": 0.010852441799404866, "grad_norm": 1.4351172330015676, "learning_rate": 3.6151603498542274e-06, "loss": 0.2493, "step": 372 }, { "epoch": 0.010881615030048428, "grad_norm": 1.3779667132396949, "learning_rate": 3.624878522837707e-06, "loss": 0.2746, "step": 373 }, { "epoch": 0.010910788260691988, "grad_norm": 1.686772218077411, "learning_rate": 3.634596695821186e-06, "loss": 0.2697, "step": 374 }, { "epoch": 0.01093996149133555, "grad_norm": 1.63760834587086, "learning_rate": 3.644314868804665e-06, "loss": 0.3058, "step": 375 }, { "epoch": 0.010969134721979112, "grad_norm": 1.1955134036184547, "learning_rate": 3.6540330417881442e-06, "loss": 0.2521, "step": 376 }, { "epoch": 0.010998307952622674, "grad_norm": 1.2622028647576145, "learning_rate": 3.663751214771623e-06, "loss": 0.2619, "step": 377 }, { "epoch": 0.011027481183266234, "grad_norm": 1.5249266049659616, "learning_rate": 3.6734693877551024e-06, "loss": 0.2717, "step": 378 }, { "epoch": 0.011056654413909796, "grad_norm": 1.6900785662024531, "learning_rate": 3.6831875607385813e-06, "loss": 0.2573, "step": 379 }, { "epoch": 0.011085827644553358, "grad_norm": 1.211142533860906, "learning_rate": 3.69290573372206e-06, "loss": 0.2607, "step": 380 }, { "epoch": 0.01111500087519692, "grad_norm": 1.2637964860665716, "learning_rate": 3.70262390670554e-06, "loss": 0.2549, "step": 381 }, { "epoch": 0.01114417410584048, "grad_norm": 1.5521314677876958, "learning_rate": 3.7123420796890184e-06, "loss": 0.2704, "step": 382 }, { "epoch": 0.011173347336484042, "grad_norm": 1.4219165908310176, "learning_rate": 3.722060252672498e-06, "loss": 0.2822, "step": 383 }, { "epoch": 0.011202520567127604, "grad_norm": 1.3945148177559197, "learning_rate": 3.731778425655977e-06, "loss": 0.2554, "step": 384 }, { "epoch": 0.011231693797771166, "grad_norm": 1.4800200894833206, "learning_rate": 3.7414965986394563e-06, "loss": 0.2653, "step": 385 }, { "epoch": 0.011260867028414726, "grad_norm": 1.7467062544533822, "learning_rate": 3.7512147716229352e-06, "loss": 0.2694, "step": 386 }, { "epoch": 0.011290040259058288, "grad_norm": 1.4124869764924537, "learning_rate": 3.760932944606414e-06, "loss": 0.2681, "step": 387 }, { "epoch": 0.01131921348970185, "grad_norm": 1.4230904605810348, "learning_rate": 3.7706511175898934e-06, "loss": 0.288, "step": 388 }, { "epoch": 0.01134838672034541, "grad_norm": 1.357060034334733, "learning_rate": 3.7803692905733723e-06, "loss": 0.2743, "step": 389 }, { "epoch": 0.011377559950988972, "grad_norm": 1.372100485282738, "learning_rate": 3.790087463556852e-06, "loss": 0.2562, "step": 390 }, { "epoch": 0.011406733181632534, "grad_norm": 1.3167946418471566, "learning_rate": 3.799805636540331e-06, "loss": 0.2613, "step": 391 }, { "epoch": 0.011435906412276096, "grad_norm": 1.2965892630475417, "learning_rate": 3.80952380952381e-06, "loss": 0.3135, "step": 392 }, { "epoch": 0.011465079642919656, "grad_norm": 1.3897749697059423, "learning_rate": 3.819241982507289e-06, "loss": 0.2629, "step": 393 }, { "epoch": 0.011494252873563218, "grad_norm": 1.3108126440033776, "learning_rate": 3.828960155490768e-06, "loss": 0.2503, "step": 394 }, { "epoch": 0.01152342610420678, "grad_norm": 1.2565582473203867, "learning_rate": 3.838678328474247e-06, "loss": 0.2643, "step": 395 }, { "epoch": 0.011552599334850342, "grad_norm": 1.3845977026331957, "learning_rate": 3.848396501457727e-06, "loss": 0.257, "step": 396 }, { "epoch": 0.011581772565493902, "grad_norm": 1.0733200186856116, "learning_rate": 3.858114674441205e-06, "loss": 0.2543, "step": 397 }, { "epoch": 0.011610945796137464, "grad_norm": 1.3431895406111862, "learning_rate": 3.867832847424684e-06, "loss": 0.2564, "step": 398 }, { "epoch": 0.011640119026781026, "grad_norm": 1.5303229422311955, "learning_rate": 3.877551020408164e-06, "loss": 0.2493, "step": 399 }, { "epoch": 0.011669292257424586, "grad_norm": 1.5256524022733309, "learning_rate": 3.887269193391643e-06, "loss": 0.2632, "step": 400 }, { "epoch": 0.011698465488068148, "grad_norm": 1.3465739038902813, "learning_rate": 3.8969873663751215e-06, "loss": 0.2529, "step": 401 }, { "epoch": 0.01172763871871171, "grad_norm": 1.6741775016426725, "learning_rate": 3.906705539358601e-06, "loss": 0.2718, "step": 402 }, { "epoch": 0.011756811949355272, "grad_norm": 1.5926160920030783, "learning_rate": 3.91642371234208e-06, "loss": 0.2522, "step": 403 }, { "epoch": 0.011785985179998832, "grad_norm": 1.2376052927951111, "learning_rate": 3.926141885325559e-06, "loss": 0.2862, "step": 404 }, { "epoch": 0.011815158410642394, "grad_norm": 1.3319212228022448, "learning_rate": 3.935860058309039e-06, "loss": 0.253, "step": 405 }, { "epoch": 0.011844331641285956, "grad_norm": 1.3629418138465732, "learning_rate": 3.945578231292517e-06, "loss": 0.2689, "step": 406 }, { "epoch": 0.011873504871929518, "grad_norm": 1.2912180310714865, "learning_rate": 3.9552964042759965e-06, "loss": 0.2525, "step": 407 }, { "epoch": 0.011902678102573078, "grad_norm": 1.5134083021404499, "learning_rate": 3.965014577259476e-06, "loss": 0.2796, "step": 408 }, { "epoch": 0.01193185133321664, "grad_norm": 1.531948495145115, "learning_rate": 3.974732750242954e-06, "loss": 0.2531, "step": 409 }, { "epoch": 0.011961024563860202, "grad_norm": 1.4702809793293405, "learning_rate": 3.984450923226434e-06, "loss": 0.3054, "step": 410 }, { "epoch": 0.011990197794503764, "grad_norm": 1.3305350002771992, "learning_rate": 3.994169096209913e-06, "loss": 0.2884, "step": 411 }, { "epoch": 0.012019371025147324, "grad_norm": 1.4697924000376428, "learning_rate": 4.003887269193392e-06, "loss": 0.26, "step": 412 }, { "epoch": 0.012048544255790886, "grad_norm": 1.3735374659502786, "learning_rate": 4.013605442176871e-06, "loss": 0.2664, "step": 413 }, { "epoch": 0.012077717486434448, "grad_norm": 1.2954656043771686, "learning_rate": 4.02332361516035e-06, "loss": 0.2782, "step": 414 }, { "epoch": 0.012106890717078008, "grad_norm": 1.1448721497856589, "learning_rate": 4.033041788143829e-06, "loss": 0.2296, "step": 415 }, { "epoch": 0.01213606394772157, "grad_norm": 1.3316732945761685, "learning_rate": 4.042759961127309e-06, "loss": 0.2468, "step": 416 }, { "epoch": 0.012165237178365132, "grad_norm": 1.5767206129208087, "learning_rate": 4.052478134110788e-06, "loss": 0.2444, "step": 417 }, { "epoch": 0.012194410409008694, "grad_norm": 1.4368737411520363, "learning_rate": 4.062196307094266e-06, "loss": 0.2723, "step": 418 }, { "epoch": 0.012223583639652254, "grad_norm": 1.1620650429155768, "learning_rate": 4.071914480077746e-06, "loss": 0.2678, "step": 419 }, { "epoch": 0.012252756870295816, "grad_norm": 1.4174843135919835, "learning_rate": 4.081632653061225e-06, "loss": 0.2393, "step": 420 }, { "epoch": 0.012281930100939378, "grad_norm": 1.2567250097238387, "learning_rate": 4.0913508260447035e-06, "loss": 0.2574, "step": 421 }, { "epoch": 0.01231110333158294, "grad_norm": 1.3670647007335115, "learning_rate": 4.101068999028184e-06, "loss": 0.2528, "step": 422 }, { "epoch": 0.0123402765622265, "grad_norm": 1.5587689474510382, "learning_rate": 4.110787172011662e-06, "loss": 0.2748, "step": 423 }, { "epoch": 0.012369449792870062, "grad_norm": 1.3272715078796335, "learning_rate": 4.120505344995141e-06, "loss": 0.2565, "step": 424 }, { "epoch": 0.012398623023513624, "grad_norm": 1.2378761782370675, "learning_rate": 4.130223517978621e-06, "loss": 0.2631, "step": 425 }, { "epoch": 0.012427796254157185, "grad_norm": 1.1665020326387956, "learning_rate": 4.139941690962099e-06, "loss": 0.263, "step": 426 }, { "epoch": 0.012456969484800746, "grad_norm": 1.3182377098074065, "learning_rate": 4.1496598639455785e-06, "loss": 0.259, "step": 427 }, { "epoch": 0.012486142715444308, "grad_norm": 1.1516319554886278, "learning_rate": 4.159378036929058e-06, "loss": 0.2442, "step": 428 }, { "epoch": 0.01251531594608787, "grad_norm": 1.0877267499954546, "learning_rate": 4.169096209912537e-06, "loss": 0.2456, "step": 429 }, { "epoch": 0.01254448917673143, "grad_norm": 1.4293359511150157, "learning_rate": 4.178814382896016e-06, "loss": 0.2448, "step": 430 }, { "epoch": 0.012573662407374992, "grad_norm": 1.383345105895979, "learning_rate": 4.188532555879495e-06, "loss": 0.2564, "step": 431 }, { "epoch": 0.012602835638018554, "grad_norm": 1.3677681652027938, "learning_rate": 4.198250728862974e-06, "loss": 0.2483, "step": 432 }, { "epoch": 0.012632008868662116, "grad_norm": 1.3290166256189182, "learning_rate": 4.2079689018464535e-06, "loss": 0.2602, "step": 433 }, { "epoch": 0.012661182099305677, "grad_norm": 1.332224555434403, "learning_rate": 4.217687074829933e-06, "loss": 0.2335, "step": 434 }, { "epoch": 0.012690355329949238, "grad_norm": 1.3313100417461197, "learning_rate": 4.227405247813411e-06, "loss": 0.2392, "step": 435 }, { "epoch": 0.0127195285605928, "grad_norm": 1.2006719353729596, "learning_rate": 4.237123420796891e-06, "loss": 0.2504, "step": 436 }, { "epoch": 0.012748701791236362, "grad_norm": 1.0581534964792212, "learning_rate": 4.24684159378037e-06, "loss": 0.249, "step": 437 }, { "epoch": 0.012777875021879923, "grad_norm": 1.1171627090319456, "learning_rate": 4.256559766763848e-06, "loss": 0.2482, "step": 438 }, { "epoch": 0.012807048252523484, "grad_norm": 1.2435943357855226, "learning_rate": 4.266277939747328e-06, "loss": 0.2691, "step": 439 }, { "epoch": 0.012836221483167046, "grad_norm": 1.2748446085082195, "learning_rate": 4.275996112730807e-06, "loss": 0.2961, "step": 440 }, { "epoch": 0.012865394713810607, "grad_norm": 1.4193221476812536, "learning_rate": 4.2857142857142855e-06, "loss": 0.2467, "step": 441 }, { "epoch": 0.012894567944454169, "grad_norm": 1.1594012561569815, "learning_rate": 4.295432458697766e-06, "loss": 0.2271, "step": 442 }, { "epoch": 0.01292374117509773, "grad_norm": 1.4913336551053733, "learning_rate": 4.305150631681244e-06, "loss": 0.2665, "step": 443 }, { "epoch": 0.012952914405741292, "grad_norm": 1.2669816916780523, "learning_rate": 4.314868804664723e-06, "loss": 0.2363, "step": 444 }, { "epoch": 0.012982087636384853, "grad_norm": 1.3234504651952432, "learning_rate": 4.324586977648203e-06, "loss": 0.2656, "step": 445 }, { "epoch": 0.013011260867028415, "grad_norm": 1.6579390405095555, "learning_rate": 4.334305150631681e-06, "loss": 0.2455, "step": 446 }, { "epoch": 0.013040434097671976, "grad_norm": 1.349419654484547, "learning_rate": 4.3440233236151605e-06, "loss": 0.248, "step": 447 }, { "epoch": 0.013069607328315538, "grad_norm": 1.2588927126523761, "learning_rate": 4.35374149659864e-06, "loss": 0.2478, "step": 448 }, { "epoch": 0.013098780558959099, "grad_norm": 1.357180967939837, "learning_rate": 4.363459669582119e-06, "loss": 0.2391, "step": 449 }, { "epoch": 0.01312795378960266, "grad_norm": 1.3862470942315106, "learning_rate": 4.3731778425655976e-06, "loss": 0.2532, "step": 450 }, { "epoch": 0.013157127020246222, "grad_norm": 1.1208491806284107, "learning_rate": 4.382896015549078e-06, "loss": 0.249, "step": 451 }, { "epoch": 0.013186300250889784, "grad_norm": 1.4265465775294321, "learning_rate": 4.392614188532556e-06, "loss": 0.2598, "step": 452 }, { "epoch": 0.013215473481533345, "grad_norm": 1.3011849065304812, "learning_rate": 4.4023323615160355e-06, "loss": 0.2615, "step": 453 }, { "epoch": 0.013244646712176907, "grad_norm": 1.1182457723949686, "learning_rate": 4.412050534499515e-06, "loss": 0.2468, "step": 454 }, { "epoch": 0.013273819942820468, "grad_norm": 1.08008703791803, "learning_rate": 4.421768707482993e-06, "loss": 0.2381, "step": 455 }, { "epoch": 0.013302993173464029, "grad_norm": 1.2908888359703325, "learning_rate": 4.431486880466473e-06, "loss": 0.2302, "step": 456 }, { "epoch": 0.01333216640410759, "grad_norm": 1.3553893078632153, "learning_rate": 4.441205053449952e-06, "loss": 0.2389, "step": 457 }, { "epoch": 0.013361339634751153, "grad_norm": 1.2980071707761118, "learning_rate": 4.45092322643343e-06, "loss": 0.2318, "step": 458 }, { "epoch": 0.013390512865394714, "grad_norm": 1.3181871945419834, "learning_rate": 4.4606413994169105e-06, "loss": 0.2551, "step": 459 }, { "epoch": 0.013419686096038275, "grad_norm": 1.238630790097823, "learning_rate": 4.470359572400389e-06, "loss": 0.2586, "step": 460 }, { "epoch": 0.013448859326681837, "grad_norm": 1.3362103010683621, "learning_rate": 4.480077745383868e-06, "loss": 0.2602, "step": 461 }, { "epoch": 0.013478032557325399, "grad_norm": 1.0962828772234607, "learning_rate": 4.489795918367348e-06, "loss": 0.243, "step": 462 }, { "epoch": 0.01350720578796896, "grad_norm": 1.2585926563412242, "learning_rate": 4.499514091350826e-06, "loss": 0.2683, "step": 463 }, { "epoch": 0.01353637901861252, "grad_norm": 1.4080036055157774, "learning_rate": 4.509232264334305e-06, "loss": 0.314, "step": 464 }, { "epoch": 0.013565552249256083, "grad_norm": 1.2666446602777268, "learning_rate": 4.518950437317785e-06, "loss": 0.2601, "step": 465 }, { "epoch": 0.013594725479899645, "grad_norm": 1.183584288132124, "learning_rate": 4.528668610301264e-06, "loss": 0.2677, "step": 466 }, { "epoch": 0.013623898710543205, "grad_norm": 1.2559272580191174, "learning_rate": 4.5383867832847425e-06, "loss": 0.2623, "step": 467 }, { "epoch": 0.013653071941186767, "grad_norm": 1.2137655364874729, "learning_rate": 4.548104956268222e-06, "loss": 0.3135, "step": 468 }, { "epoch": 0.013682245171830329, "grad_norm": 1.0815132249305057, "learning_rate": 4.557823129251701e-06, "loss": 0.2281, "step": 469 }, { "epoch": 0.01371141840247389, "grad_norm": 1.1782532527157554, "learning_rate": 4.56754130223518e-06, "loss": 0.2367, "step": 470 }, { "epoch": 0.01374059163311745, "grad_norm": 1.4533580084977513, "learning_rate": 4.57725947521866e-06, "loss": 0.2484, "step": 471 }, { "epoch": 0.013769764863761013, "grad_norm": 1.3583985599668706, "learning_rate": 4.586977648202138e-06, "loss": 0.2664, "step": 472 }, { "epoch": 0.013798938094404575, "grad_norm": 1.2708757427546593, "learning_rate": 4.5966958211856175e-06, "loss": 0.2355, "step": 473 }, { "epoch": 0.013828111325048137, "grad_norm": 1.198494775196898, "learning_rate": 4.606413994169097e-06, "loss": 0.2299, "step": 474 }, { "epoch": 0.013857284555691697, "grad_norm": 1.5538009585829942, "learning_rate": 4.616132167152575e-06, "loss": 0.2457, "step": 475 }, { "epoch": 0.013886457786335259, "grad_norm": 1.2367763455447653, "learning_rate": 4.6258503401360546e-06, "loss": 0.2527, "step": 476 }, { "epoch": 0.01391563101697882, "grad_norm": 1.143066225777548, "learning_rate": 4.635568513119534e-06, "loss": 0.2428, "step": 477 }, { "epoch": 0.013944804247622383, "grad_norm": 1.1629123562353916, "learning_rate": 4.645286686103013e-06, "loss": 0.2441, "step": 478 }, { "epoch": 0.013973977478265943, "grad_norm": 1.2925898615315257, "learning_rate": 4.6550048590864925e-06, "loss": 0.262, "step": 479 }, { "epoch": 0.014003150708909505, "grad_norm": 1.300482449456053, "learning_rate": 4.664723032069971e-06, "loss": 0.24, "step": 480 }, { "epoch": 0.014032323939553067, "grad_norm": 1.4025190963822738, "learning_rate": 4.67444120505345e-06, "loss": 0.2551, "step": 481 }, { "epoch": 0.014061497170196627, "grad_norm": 1.335127065360521, "learning_rate": 4.68415937803693e-06, "loss": 0.235, "step": 482 }, { "epoch": 0.014090670400840189, "grad_norm": 1.0738846808453941, "learning_rate": 4.693877551020409e-06, "loss": 0.2362, "step": 483 }, { "epoch": 0.01411984363148375, "grad_norm": 1.2366364177512028, "learning_rate": 4.703595724003887e-06, "loss": 0.2267, "step": 484 }, { "epoch": 0.014149016862127313, "grad_norm": 1.568015498853348, "learning_rate": 4.713313896987367e-06, "loss": 0.2549, "step": 485 }, { "epoch": 0.014178190092770873, "grad_norm": 0.99074606036659, "learning_rate": 4.723032069970846e-06, "loss": 0.2484, "step": 486 }, { "epoch": 0.014207363323414435, "grad_norm": 1.2545910290085318, "learning_rate": 4.7327502429543244e-06, "loss": 0.2631, "step": 487 }, { "epoch": 0.014236536554057997, "grad_norm": 1.3838940278828762, "learning_rate": 4.742468415937805e-06, "loss": 0.2561, "step": 488 }, { "epoch": 0.014265709784701559, "grad_norm": 1.3331219246766512, "learning_rate": 4.752186588921283e-06, "loss": 0.2545, "step": 489 }, { "epoch": 0.014294883015345119, "grad_norm": 1.5306031190582838, "learning_rate": 4.761904761904762e-06, "loss": 0.2667, "step": 490 }, { "epoch": 0.01432405624598868, "grad_norm": 1.1863511505688096, "learning_rate": 4.771622934888242e-06, "loss": 0.245, "step": 491 }, { "epoch": 0.014353229476632243, "grad_norm": 1.2939705851848529, "learning_rate": 4.78134110787172e-06, "loss": 0.2342, "step": 492 }, { "epoch": 0.014382402707275803, "grad_norm": 1.2050040257403767, "learning_rate": 4.7910592808551995e-06, "loss": 0.2233, "step": 493 }, { "epoch": 0.014411575937919365, "grad_norm": 1.2670366063806675, "learning_rate": 4.800777453838679e-06, "loss": 0.2548, "step": 494 }, { "epoch": 0.014440749168562927, "grad_norm": 1.4186837559720642, "learning_rate": 4.810495626822158e-06, "loss": 0.2634, "step": 495 }, { "epoch": 0.014469922399206489, "grad_norm": 1.2092502080757692, "learning_rate": 4.820213799805637e-06, "loss": 0.245, "step": 496 }, { "epoch": 0.014499095629850049, "grad_norm": 1.2523718195794546, "learning_rate": 4.829931972789116e-06, "loss": 0.2804, "step": 497 }, { "epoch": 0.01452826886049361, "grad_norm": 1.2254633096437888, "learning_rate": 4.839650145772595e-06, "loss": 0.231, "step": 498 }, { "epoch": 0.014557442091137173, "grad_norm": 1.2755412540876685, "learning_rate": 4.8493683187560745e-06, "loss": 0.2798, "step": 499 }, { "epoch": 0.014586615321780735, "grad_norm": 1.6972872186718777, "learning_rate": 4.859086491739554e-06, "loss": 0.2423, "step": 500 }, { "epoch": 0.014615788552424295, "grad_norm": 1.49574324230808, "learning_rate": 4.868804664723032e-06, "loss": 0.2727, "step": 501 }, { "epoch": 0.014644961783067857, "grad_norm": 1.4688837322711812, "learning_rate": 4.8785228377065116e-06, "loss": 0.2799, "step": 502 }, { "epoch": 0.014674135013711419, "grad_norm": 1.3040968475741357, "learning_rate": 4.888241010689991e-06, "loss": 0.2468, "step": 503 }, { "epoch": 0.01470330824435498, "grad_norm": 1.399788268983685, "learning_rate": 4.897959183673469e-06, "loss": 0.2456, "step": 504 }, { "epoch": 0.014732481474998541, "grad_norm": 1.198851293120145, "learning_rate": 4.9076773566569495e-06, "loss": 0.2221, "step": 505 }, { "epoch": 0.014761654705642103, "grad_norm": 1.2491295874169632, "learning_rate": 4.917395529640428e-06, "loss": 0.2491, "step": 506 }, { "epoch": 0.014790827936285665, "grad_norm": 1.4679610151184677, "learning_rate": 4.927113702623907e-06, "loss": 0.2495, "step": 507 }, { "epoch": 0.014820001166929225, "grad_norm": 1.5826969791489809, "learning_rate": 4.936831875607387e-06, "loss": 0.2608, "step": 508 }, { "epoch": 0.014849174397572787, "grad_norm": 1.3592760935131265, "learning_rate": 4.946550048590865e-06, "loss": 0.2283, "step": 509 }, { "epoch": 0.014878347628216349, "grad_norm": 1.2970047256431163, "learning_rate": 4.956268221574344e-06, "loss": 0.2588, "step": 510 }, { "epoch": 0.01490752085885991, "grad_norm": 1.2880449054790777, "learning_rate": 4.965986394557824e-06, "loss": 0.2641, "step": 511 }, { "epoch": 0.014936694089503471, "grad_norm": 1.0672005591191107, "learning_rate": 4.975704567541302e-06, "loss": 0.2479, "step": 512 }, { "epoch": 0.014965867320147033, "grad_norm": 1.2408070495317607, "learning_rate": 4.9854227405247814e-06, "loss": 0.237, "step": 513 }, { "epoch": 0.014995040550790595, "grad_norm": 1.4600327611566248, "learning_rate": 4.995140913508261e-06, "loss": 0.2462, "step": 514 }, { "epoch": 0.015024213781434157, "grad_norm": 1.0322934537143222, "learning_rate": 5.00485908649174e-06, "loss": 0.2282, "step": 515 }, { "epoch": 0.015053387012077717, "grad_norm": 1.17271083022763, "learning_rate": 5.014577259475219e-06, "loss": 0.2335, "step": 516 }, { "epoch": 0.015082560242721279, "grad_norm": 1.2339027239877591, "learning_rate": 5.024295432458698e-06, "loss": 0.2605, "step": 517 }, { "epoch": 0.01511173347336484, "grad_norm": 1.343196723311313, "learning_rate": 5.034013605442177e-06, "loss": 0.2563, "step": 518 }, { "epoch": 0.015140906704008401, "grad_norm": 1.2042926311020554, "learning_rate": 5.0437317784256565e-06, "loss": 0.2351, "step": 519 }, { "epoch": 0.015170079934651963, "grad_norm": 1.2392118146441158, "learning_rate": 5.053449951409135e-06, "loss": 0.2681, "step": 520 }, { "epoch": 0.015199253165295525, "grad_norm": 1.1642156874413723, "learning_rate": 5.063168124392614e-06, "loss": 0.2342, "step": 521 }, { "epoch": 0.015228426395939087, "grad_norm": 1.0936206726276263, "learning_rate": 5.0728862973760935e-06, "loss": 0.2354, "step": 522 }, { "epoch": 0.015257599626582647, "grad_norm": 1.1784572696574878, "learning_rate": 5.082604470359572e-06, "loss": 0.2315, "step": 523 }, { "epoch": 0.015286772857226209, "grad_norm": 1.4018108993214766, "learning_rate": 5.092322643343051e-06, "loss": 0.2559, "step": 524 }, { "epoch": 0.015315946087869771, "grad_norm": 1.4868977642568852, "learning_rate": 5.1020408163265315e-06, "loss": 0.2738, "step": 525 }, { "epoch": 0.015345119318513333, "grad_norm": 1.0975374744514939, "learning_rate": 5.111758989310011e-06, "loss": 0.2424, "step": 526 }, { "epoch": 0.015374292549156893, "grad_norm": 1.1451405719954026, "learning_rate": 5.121477162293489e-06, "loss": 0.2563, "step": 527 }, { "epoch": 0.015403465779800455, "grad_norm": 1.2648820762237045, "learning_rate": 5.1311953352769686e-06, "loss": 0.2718, "step": 528 }, { "epoch": 0.015432639010444017, "grad_norm": 1.0871343834254525, "learning_rate": 5.140913508260448e-06, "loss": 0.2541, "step": 529 }, { "epoch": 0.015461812241087579, "grad_norm": 1.3389746879539115, "learning_rate": 5.150631681243926e-06, "loss": 0.2263, "step": 530 }, { "epoch": 0.015490985471731139, "grad_norm": 1.3609250813513585, "learning_rate": 5.160349854227406e-06, "loss": 0.2366, "step": 531 }, { "epoch": 0.015520158702374701, "grad_norm": 1.3349704582365165, "learning_rate": 5.170068027210885e-06, "loss": 0.2535, "step": 532 }, { "epoch": 0.015549331933018263, "grad_norm": 1.3591505569965345, "learning_rate": 5.179786200194364e-06, "loss": 0.2533, "step": 533 }, { "epoch": 0.015578505163661823, "grad_norm": 1.0546896668336805, "learning_rate": 5.189504373177843e-06, "loss": 0.2312, "step": 534 }, { "epoch": 0.015607678394305385, "grad_norm": 1.1642247748102443, "learning_rate": 5.199222546161322e-06, "loss": 0.2533, "step": 535 }, { "epoch": 0.015636851624948947, "grad_norm": 1.2734641669395699, "learning_rate": 5.208940719144801e-06, "loss": 0.2498, "step": 536 }, { "epoch": 0.015666024855592507, "grad_norm": 1.0357302408386122, "learning_rate": 5.21865889212828e-06, "loss": 0.2141, "step": 537 }, { "epoch": 0.01569519808623607, "grad_norm": 1.0304599709978406, "learning_rate": 5.228377065111759e-06, "loss": 0.2341, "step": 538 }, { "epoch": 0.01572437131687963, "grad_norm": 1.4047972859558309, "learning_rate": 5.2380952380952384e-06, "loss": 0.2325, "step": 539 }, { "epoch": 0.01575354454752319, "grad_norm": 1.3253297357545974, "learning_rate": 5.247813411078717e-06, "loss": 0.2495, "step": 540 }, { "epoch": 0.015782717778166755, "grad_norm": 1.0493019209181853, "learning_rate": 5.257531584062196e-06, "loss": 0.2275, "step": 541 }, { "epoch": 0.015811891008810315, "grad_norm": 1.3123986483884977, "learning_rate": 5.267249757045676e-06, "loss": 0.2218, "step": 542 }, { "epoch": 0.01584106423945388, "grad_norm": 1.515493491087858, "learning_rate": 5.276967930029156e-06, "loss": 0.2488, "step": 543 }, { "epoch": 0.01587023747009744, "grad_norm": 1.1272225662027693, "learning_rate": 5.286686103012634e-06, "loss": 0.2349, "step": 544 }, { "epoch": 0.015899410700741, "grad_norm": 1.1164426331892674, "learning_rate": 5.2964042759961135e-06, "loss": 0.261, "step": 545 }, { "epoch": 0.015928583931384563, "grad_norm": 1.2763851002221152, "learning_rate": 5.306122448979593e-06, "loss": 0.2466, "step": 546 }, { "epoch": 0.015957757162028123, "grad_norm": 1.2014379229596053, "learning_rate": 5.315840621963071e-06, "loss": 0.2335, "step": 547 }, { "epoch": 0.015986930392671683, "grad_norm": 1.2100206293147397, "learning_rate": 5.3255587949465505e-06, "loss": 0.2506, "step": 548 }, { "epoch": 0.016016103623315247, "grad_norm": 1.3275336886079625, "learning_rate": 5.33527696793003e-06, "loss": 0.2634, "step": 549 }, { "epoch": 0.016045276853958807, "grad_norm": 1.3174878416843403, "learning_rate": 5.344995140913509e-06, "loss": 0.2299, "step": 550 }, { "epoch": 0.016074450084602367, "grad_norm": 0.9008258507259723, "learning_rate": 5.354713313896988e-06, "loss": 0.2384, "step": 551 }, { "epoch": 0.01610362331524593, "grad_norm": 1.3084802157410997, "learning_rate": 5.364431486880467e-06, "loss": 0.2366, "step": 552 }, { "epoch": 0.01613279654588949, "grad_norm": 1.2136094468462453, "learning_rate": 5.374149659863946e-06, "loss": 0.2624, "step": 553 }, { "epoch": 0.016161969776533055, "grad_norm": 1.4392730033686674, "learning_rate": 5.383867832847425e-06, "loss": 0.2801, "step": 554 }, { "epoch": 0.016191143007176615, "grad_norm": 1.2542445458376885, "learning_rate": 5.393586005830904e-06, "loss": 0.2458, "step": 555 }, { "epoch": 0.016220316237820175, "grad_norm": 1.34420603075901, "learning_rate": 5.403304178814383e-06, "loss": 0.2392, "step": 556 }, { "epoch": 0.01624948946846374, "grad_norm": 1.3102475035554522, "learning_rate": 5.413022351797862e-06, "loss": 0.2572, "step": 557 }, { "epoch": 0.0162786626991073, "grad_norm": 1.2479299081465867, "learning_rate": 5.422740524781341e-06, "loss": 0.2454, "step": 558 }, { "epoch": 0.01630783592975086, "grad_norm": 1.4107698889438265, "learning_rate": 5.4324586977648204e-06, "loss": 0.2442, "step": 559 }, { "epoch": 0.016337009160394423, "grad_norm": 1.4361667300580239, "learning_rate": 5.442176870748301e-06, "loss": 0.2634, "step": 560 }, { "epoch": 0.016366182391037983, "grad_norm": 1.072946103830777, "learning_rate": 5.451895043731778e-06, "loss": 0.2227, "step": 561 }, { "epoch": 0.016395355621681543, "grad_norm": 1.1858106557560961, "learning_rate": 5.461613216715258e-06, "loss": 0.2203, "step": 562 }, { "epoch": 0.016424528852325107, "grad_norm": 1.1162929363472966, "learning_rate": 5.471331389698738e-06, "loss": 0.2395, "step": 563 }, { "epoch": 0.016453702082968667, "grad_norm": 1.1884848177255527, "learning_rate": 5.481049562682216e-06, "loss": 0.2282, "step": 564 }, { "epoch": 0.01648287531361223, "grad_norm": 1.1283973242586844, "learning_rate": 5.4907677356656954e-06, "loss": 0.2603, "step": 565 }, { "epoch": 0.01651204854425579, "grad_norm": 1.354333277408854, "learning_rate": 5.500485908649175e-06, "loss": 0.2459, "step": 566 }, { "epoch": 0.01654122177489935, "grad_norm": 1.325314031879747, "learning_rate": 5.510204081632653e-06, "loss": 0.2178, "step": 567 }, { "epoch": 0.016570395005542915, "grad_norm": 1.11324996018437, "learning_rate": 5.5199222546161325e-06, "loss": 0.2338, "step": 568 }, { "epoch": 0.016599568236186475, "grad_norm": 1.3652803222148933, "learning_rate": 5.529640427599612e-06, "loss": 0.2141, "step": 569 }, { "epoch": 0.016628741466830035, "grad_norm": 1.5019331891505299, "learning_rate": 5.539358600583091e-06, "loss": 0.2889, "step": 570 }, { "epoch": 0.0166579146974736, "grad_norm": 1.2501896889679502, "learning_rate": 5.54907677356657e-06, "loss": 0.2358, "step": 571 }, { "epoch": 0.01668708792811716, "grad_norm": 1.295296482077547, "learning_rate": 5.558794946550049e-06, "loss": 0.25, "step": 572 }, { "epoch": 0.01671626115876072, "grad_norm": 1.1903028277520935, "learning_rate": 5.568513119533528e-06, "loss": 0.2442, "step": 573 }, { "epoch": 0.016745434389404283, "grad_norm": 1.308995566767906, "learning_rate": 5.578231292517007e-06, "loss": 0.2548, "step": 574 }, { "epoch": 0.016774607620047843, "grad_norm": 1.3248921047172681, "learning_rate": 5.587949465500486e-06, "loss": 0.2714, "step": 575 }, { "epoch": 0.016803780850691407, "grad_norm": 1.2266592847317053, "learning_rate": 5.597667638483965e-06, "loss": 0.2275, "step": 576 }, { "epoch": 0.016832954081334967, "grad_norm": 1.399214573599055, "learning_rate": 5.6073858114674455e-06, "loss": 0.2321, "step": 577 }, { "epoch": 0.016862127311978527, "grad_norm": 1.22807791517516, "learning_rate": 5.617103984450923e-06, "loss": 0.2557, "step": 578 }, { "epoch": 0.01689130054262209, "grad_norm": 1.020201564208349, "learning_rate": 5.626822157434403e-06, "loss": 0.2544, "step": 579 }, { "epoch": 0.01692047377326565, "grad_norm": 1.15048463646873, "learning_rate": 5.6365403304178826e-06, "loss": 0.2258, "step": 580 }, { "epoch": 0.01694964700390921, "grad_norm": 1.3848293352426153, "learning_rate": 5.646258503401361e-06, "loss": 0.2637, "step": 581 }, { "epoch": 0.016978820234552775, "grad_norm": 1.2734601804541426, "learning_rate": 5.65597667638484e-06, "loss": 0.2179, "step": 582 }, { "epoch": 0.017007993465196335, "grad_norm": 1.05233161257467, "learning_rate": 5.66569484936832e-06, "loss": 0.2198, "step": 583 }, { "epoch": 0.0170371666958399, "grad_norm": 1.2250757368811662, "learning_rate": 5.675413022351798e-06, "loss": 0.2452, "step": 584 }, { "epoch": 0.01706633992648346, "grad_norm": 1.2896924036488904, "learning_rate": 5.6851311953352774e-06, "loss": 0.2414, "step": 585 }, { "epoch": 0.01709551315712702, "grad_norm": 1.0976720248422727, "learning_rate": 5.694849368318757e-06, "loss": 0.2263, "step": 586 }, { "epoch": 0.017124686387770583, "grad_norm": 1.0181006871906253, "learning_rate": 5.704567541302236e-06, "loss": 0.2573, "step": 587 }, { "epoch": 0.017153859618414143, "grad_norm": 1.2622292761764857, "learning_rate": 5.7142857142857145e-06, "loss": 0.2535, "step": 588 }, { "epoch": 0.017183032849057703, "grad_norm": 1.3264411006970873, "learning_rate": 5.724003887269194e-06, "loss": 0.2532, "step": 589 }, { "epoch": 0.017212206079701267, "grad_norm": 1.3876971135647425, "learning_rate": 5.733722060252673e-06, "loss": 0.2511, "step": 590 }, { "epoch": 0.017241379310344827, "grad_norm": 0.9140296006283072, "learning_rate": 5.743440233236152e-06, "loss": 0.2181, "step": 591 }, { "epoch": 0.017270552540988388, "grad_norm": 1.1627511150725285, "learning_rate": 5.753158406219631e-06, "loss": 0.2426, "step": 592 }, { "epoch": 0.01729972577163195, "grad_norm": 0.9866981750071782, "learning_rate": 5.76287657920311e-06, "loss": 0.2276, "step": 593 }, { "epoch": 0.01732889900227551, "grad_norm": 1.007490986657743, "learning_rate": 5.7725947521865895e-06, "loss": 0.2073, "step": 594 }, { "epoch": 0.017358072232919075, "grad_norm": 1.2474666067068907, "learning_rate": 5.782312925170068e-06, "loss": 0.2378, "step": 595 }, { "epoch": 0.017387245463562635, "grad_norm": 1.279196133454035, "learning_rate": 5.792031098153547e-06, "loss": 0.2607, "step": 596 }, { "epoch": 0.017416418694206195, "grad_norm": 1.1945455195443986, "learning_rate": 5.8017492711370275e-06, "loss": 0.2442, "step": 597 }, { "epoch": 0.01744559192484976, "grad_norm": 1.420540164405751, "learning_rate": 5.811467444120505e-06, "loss": 0.2123, "step": 598 }, { "epoch": 0.01747476515549332, "grad_norm": 1.3801792520800733, "learning_rate": 5.821185617103985e-06, "loss": 0.2866, "step": 599 }, { "epoch": 0.01750393838613688, "grad_norm": 1.4450475245321002, "learning_rate": 5.8309037900874645e-06, "loss": 0.2689, "step": 600 }, { "epoch": 0.017533111616780443, "grad_norm": 1.4434020910980732, "learning_rate": 5.840621963070943e-06, "loss": 0.2461, "step": 601 }, { "epoch": 0.017562284847424003, "grad_norm": 1.170915524215105, "learning_rate": 5.850340136054422e-06, "loss": 0.2304, "step": 602 }, { "epoch": 0.017591458078067564, "grad_norm": 1.184787623641522, "learning_rate": 5.860058309037902e-06, "loss": 0.2216, "step": 603 }, { "epoch": 0.017620631308711127, "grad_norm": 1.4226813433172771, "learning_rate": 5.869776482021381e-06, "loss": 0.266, "step": 604 }, { "epoch": 0.017649804539354687, "grad_norm": 1.1701452340689182, "learning_rate": 5.879494655004859e-06, "loss": 0.2365, "step": 605 }, { "epoch": 0.01767897776999825, "grad_norm": 1.1314185813102013, "learning_rate": 5.889212827988339e-06, "loss": 0.2279, "step": 606 }, { "epoch": 0.01770815100064181, "grad_norm": 1.3417308850278244, "learning_rate": 5.898931000971818e-06, "loss": 0.2297, "step": 607 }, { "epoch": 0.01773732423128537, "grad_norm": 1.1774011258844665, "learning_rate": 5.9086491739552965e-06, "loss": 0.2268, "step": 608 }, { "epoch": 0.017766497461928935, "grad_norm": 1.1483123651885505, "learning_rate": 5.918367346938776e-06, "loss": 0.261, "step": 609 }, { "epoch": 0.017795670692572495, "grad_norm": 1.0659224829575464, "learning_rate": 5.928085519922255e-06, "loss": 0.2557, "step": 610 }, { "epoch": 0.017824843923216056, "grad_norm": 1.3453124406569783, "learning_rate": 5.937803692905734e-06, "loss": 0.2597, "step": 611 }, { "epoch": 0.01785401715385962, "grad_norm": 0.9446040810558779, "learning_rate": 5.947521865889213e-06, "loss": 0.2445, "step": 612 }, { "epoch": 0.01788319038450318, "grad_norm": 1.075287696623533, "learning_rate": 5.957240038872692e-06, "loss": 0.2346, "step": 613 }, { "epoch": 0.01791236361514674, "grad_norm": 1.1094662802964779, "learning_rate": 5.966958211856172e-06, "loss": 0.2217, "step": 614 }, { "epoch": 0.017941536845790303, "grad_norm": 1.4239082627795743, "learning_rate": 5.97667638483965e-06, "loss": 0.2595, "step": 615 }, { "epoch": 0.017970710076433864, "grad_norm": 1.4489530560590527, "learning_rate": 5.98639455782313e-06, "loss": 0.2404, "step": 616 }, { "epoch": 0.017999883307077427, "grad_norm": 1.1926829007742819, "learning_rate": 5.9961127308066094e-06, "loss": 0.2334, "step": 617 }, { "epoch": 0.018029056537720987, "grad_norm": 1.2726694122969338, "learning_rate": 6.005830903790088e-06, "loss": 0.2362, "step": 618 }, { "epoch": 0.018058229768364548, "grad_norm": 1.2354341099058872, "learning_rate": 6.015549076773567e-06, "loss": 0.236, "step": 619 }, { "epoch": 0.01808740299900811, "grad_norm": 1.1685072760117496, "learning_rate": 6.0252672497570465e-06, "loss": 0.2352, "step": 620 }, { "epoch": 0.01811657622965167, "grad_norm": 1.334388794918696, "learning_rate": 6.034985422740526e-06, "loss": 0.2369, "step": 621 }, { "epoch": 0.01814574946029523, "grad_norm": 1.2956117401686313, "learning_rate": 6.044703595724004e-06, "loss": 0.2287, "step": 622 }, { "epoch": 0.018174922690938795, "grad_norm": 1.0273512689823068, "learning_rate": 6.054421768707484e-06, "loss": 0.2498, "step": 623 }, { "epoch": 0.018204095921582356, "grad_norm": 1.2532451149411814, "learning_rate": 6.064139941690963e-06, "loss": 0.2438, "step": 624 }, { "epoch": 0.018233269152225916, "grad_norm": 1.2453640640748649, "learning_rate": 6.073858114674441e-06, "loss": 0.2363, "step": 625 }, { "epoch": 0.01826244238286948, "grad_norm": 1.0258445071379283, "learning_rate": 6.083576287657921e-06, "loss": 0.2254, "step": 626 }, { "epoch": 0.01829161561351304, "grad_norm": 1.1176272783245296, "learning_rate": 6.0932944606414e-06, "loss": 0.2287, "step": 627 }, { "epoch": 0.018320788844156603, "grad_norm": 0.9938821905647645, "learning_rate": 6.1030126336248785e-06, "loss": 0.232, "step": 628 }, { "epoch": 0.018349962074800164, "grad_norm": 1.2124939329163622, "learning_rate": 6.112730806608358e-06, "loss": 0.2254, "step": 629 }, { "epoch": 0.018379135305443724, "grad_norm": 1.2653067642151536, "learning_rate": 6.122448979591837e-06, "loss": 0.2331, "step": 630 }, { "epoch": 0.018408308536087287, "grad_norm": 1.2801920844112518, "learning_rate": 6.132167152575316e-06, "loss": 0.2646, "step": 631 }, { "epoch": 0.018437481766730848, "grad_norm": 1.3997481743012654, "learning_rate": 6.141885325558795e-06, "loss": 0.2271, "step": 632 }, { "epoch": 0.018466654997374408, "grad_norm": 1.0788089008051882, "learning_rate": 6.151603498542274e-06, "loss": 0.231, "step": 633 }, { "epoch": 0.01849582822801797, "grad_norm": 1.1463139231059625, "learning_rate": 6.161321671525754e-06, "loss": 0.2745, "step": 634 }, { "epoch": 0.01852500145866153, "grad_norm": 1.5979156001730184, "learning_rate": 6.171039844509232e-06, "loss": 0.2569, "step": 635 }, { "epoch": 0.018554174689305095, "grad_norm": 1.179192085643085, "learning_rate": 6.180758017492712e-06, "loss": 0.2307, "step": 636 }, { "epoch": 0.018583347919948656, "grad_norm": 0.9235492089736143, "learning_rate": 6.1904761904761914e-06, "loss": 0.2272, "step": 637 }, { "epoch": 0.018612521150592216, "grad_norm": 1.3841914000140947, "learning_rate": 6.200194363459671e-06, "loss": 0.2366, "step": 638 }, { "epoch": 0.01864169438123578, "grad_norm": 1.6345443503321042, "learning_rate": 6.209912536443149e-06, "loss": 0.2582, "step": 639 }, { "epoch": 0.01867086761187934, "grad_norm": 0.991943928840273, "learning_rate": 6.2196307094266285e-06, "loss": 0.2453, "step": 640 }, { "epoch": 0.0187000408425229, "grad_norm": 1.0783813604538777, "learning_rate": 6.229348882410108e-06, "loss": 0.2411, "step": 641 }, { "epoch": 0.018729214073166463, "grad_norm": 1.1270768208689745, "learning_rate": 6.239067055393586e-06, "loss": 0.2298, "step": 642 }, { "epoch": 0.018758387303810024, "grad_norm": 1.5482563859839245, "learning_rate": 6.248785228377066e-06, "loss": 0.2603, "step": 643 }, { "epoch": 0.018787560534453584, "grad_norm": 1.1322433743515374, "learning_rate": 6.258503401360545e-06, "loss": 0.2231, "step": 644 }, { "epoch": 0.018816733765097148, "grad_norm": 1.1655810249844738, "learning_rate": 6.268221574344023e-06, "loss": 0.2245, "step": 645 }, { "epoch": 0.018845906995740708, "grad_norm": 1.302144670429167, "learning_rate": 6.277939747327503e-06, "loss": 0.2509, "step": 646 }, { "epoch": 0.01887508022638427, "grad_norm": 1.3650227046508312, "learning_rate": 6.287657920310982e-06, "loss": 0.2391, "step": 647 }, { "epoch": 0.01890425345702783, "grad_norm": 1.2382916532992942, "learning_rate": 6.297376093294461e-06, "loss": 0.2299, "step": 648 }, { "epoch": 0.018933426687671392, "grad_norm": 1.2406123637674724, "learning_rate": 6.30709426627794e-06, "loss": 0.2179, "step": 649 }, { "epoch": 0.018962599918314955, "grad_norm": 1.1771581053389553, "learning_rate": 6.316812439261419e-06, "loss": 0.228, "step": 650 }, { "epoch": 0.018991773148958516, "grad_norm": 1.1978049273503264, "learning_rate": 6.326530612244899e-06, "loss": 0.2345, "step": 651 }, { "epoch": 0.019020946379602076, "grad_norm": 1.0438236362338311, "learning_rate": 6.336248785228377e-06, "loss": 0.2301, "step": 652 }, { "epoch": 0.01905011961024564, "grad_norm": 1.2392735048781178, "learning_rate": 6.345966958211857e-06, "loss": 0.2241, "step": 653 }, { "epoch": 0.0190792928408892, "grad_norm": 1.2713076329828823, "learning_rate": 6.355685131195336e-06, "loss": 0.2511, "step": 654 }, { "epoch": 0.01910846607153276, "grad_norm": 1.0998516143216948, "learning_rate": 6.365403304178814e-06, "loss": 0.2352, "step": 655 }, { "epoch": 0.019137639302176324, "grad_norm": 1.4780548053617104, "learning_rate": 6.375121477162294e-06, "loss": 0.2953, "step": 656 }, { "epoch": 0.019166812532819884, "grad_norm": 1.0594420018595556, "learning_rate": 6.384839650145773e-06, "loss": 0.2081, "step": 657 }, { "epoch": 0.019195985763463447, "grad_norm": 1.0429732466991684, "learning_rate": 6.394557823129253e-06, "loss": 0.2488, "step": 658 }, { "epoch": 0.019225158994107008, "grad_norm": 1.23883413417218, "learning_rate": 6.404275996112731e-06, "loss": 0.2444, "step": 659 }, { "epoch": 0.019254332224750568, "grad_norm": 1.2195983025524706, "learning_rate": 6.4139941690962105e-06, "loss": 0.2385, "step": 660 }, { "epoch": 0.01928350545539413, "grad_norm": 1.4125414694072445, "learning_rate": 6.42371234207969e-06, "loss": 0.2599, "step": 661 }, { "epoch": 0.01931267868603769, "grad_norm": 1.3614194178922787, "learning_rate": 6.433430515063168e-06, "loss": 0.2275, "step": 662 }, { "epoch": 0.019341851916681252, "grad_norm": 1.2375400770445757, "learning_rate": 6.443148688046648e-06, "loss": 0.2279, "step": 663 }, { "epoch": 0.019371025147324816, "grad_norm": 1.541586033440628, "learning_rate": 6.452866861030127e-06, "loss": 0.2657, "step": 664 }, { "epoch": 0.019400198377968376, "grad_norm": 1.2173335817869875, "learning_rate": 6.462585034013606e-06, "loss": 0.2331, "step": 665 }, { "epoch": 0.019429371608611936, "grad_norm": 0.9510361983667885, "learning_rate": 6.472303206997085e-06, "loss": 0.2238, "step": 666 }, { "epoch": 0.0194585448392555, "grad_norm": 1.2748878906489571, "learning_rate": 6.482021379980564e-06, "loss": 0.2486, "step": 667 }, { "epoch": 0.01948771806989906, "grad_norm": 1.4634602527052192, "learning_rate": 6.491739552964043e-06, "loss": 0.2657, "step": 668 }, { "epoch": 0.019516891300542624, "grad_norm": 0.899820820063537, "learning_rate": 6.501457725947522e-06, "loss": 0.2149, "step": 669 }, { "epoch": 0.019546064531186184, "grad_norm": 1.1159452835771118, "learning_rate": 6.511175898931001e-06, "loss": 0.2526, "step": 670 }, { "epoch": 0.019575237761829744, "grad_norm": 1.8587001243768357, "learning_rate": 6.520894071914481e-06, "loss": 0.2368, "step": 671 }, { "epoch": 0.019604410992473308, "grad_norm": 1.1995907976290137, "learning_rate": 6.530612244897959e-06, "loss": 0.2454, "step": 672 }, { "epoch": 0.019633584223116868, "grad_norm": 1.1396371491198773, "learning_rate": 6.540330417881439e-06, "loss": 0.2436, "step": 673 }, { "epoch": 0.019662757453760428, "grad_norm": 1.0339295633577104, "learning_rate": 6.550048590864918e-06, "loss": 0.2352, "step": 674 }, { "epoch": 0.01969193068440399, "grad_norm": 1.3780473198007388, "learning_rate": 6.559766763848398e-06, "loss": 0.2559, "step": 675 }, { "epoch": 0.019721103915047552, "grad_norm": 1.0840698315294541, "learning_rate": 6.569484936831876e-06, "loss": 0.2264, "step": 676 }, { "epoch": 0.019750277145691116, "grad_norm": 1.1117692534296524, "learning_rate": 6.579203109815355e-06, "loss": 0.2306, "step": 677 }, { "epoch": 0.019779450376334676, "grad_norm": 1.2177199856031593, "learning_rate": 6.588921282798835e-06, "loss": 0.2173, "step": 678 }, { "epoch": 0.019808623606978236, "grad_norm": 1.4263377351884565, "learning_rate": 6.598639455782313e-06, "loss": 0.2632, "step": 679 }, { "epoch": 0.0198377968376218, "grad_norm": 1.1117249106111613, "learning_rate": 6.6083576287657925e-06, "loss": 0.2449, "step": 680 }, { "epoch": 0.01986697006826536, "grad_norm": 1.3031984868239914, "learning_rate": 6.618075801749272e-06, "loss": 0.2645, "step": 681 }, { "epoch": 0.01989614329890892, "grad_norm": 1.3056784859426693, "learning_rate": 6.627793974732751e-06, "loss": 0.2322, "step": 682 }, { "epoch": 0.019925316529552484, "grad_norm": 0.9106923138955538, "learning_rate": 6.6375121477162296e-06, "loss": 0.2255, "step": 683 }, { "epoch": 0.019954489760196044, "grad_norm": 1.154730924057303, "learning_rate": 6.647230320699709e-06, "loss": 0.2543, "step": 684 }, { "epoch": 0.019983662990839604, "grad_norm": 1.040696964643687, "learning_rate": 6.656948493683188e-06, "loss": 0.2235, "step": 685 }, { "epoch": 0.020012836221483168, "grad_norm": 1.1673430548551929, "learning_rate": 6.666666666666667e-06, "loss": 0.2393, "step": 686 }, { "epoch": 0.020042009452126728, "grad_norm": 1.0125759625993465, "learning_rate": 6.676384839650146e-06, "loss": 0.2178, "step": 687 }, { "epoch": 0.02007118268277029, "grad_norm": 1.3676924651707596, "learning_rate": 6.686103012633626e-06, "loss": 0.252, "step": 688 }, { "epoch": 0.020100355913413852, "grad_norm": 1.1636943279474745, "learning_rate": 6.695821185617104e-06, "loss": 0.2342, "step": 689 }, { "epoch": 0.020129529144057412, "grad_norm": 1.2277204714048011, "learning_rate": 6.705539358600584e-06, "loss": 0.2505, "step": 690 }, { "epoch": 0.020158702374700976, "grad_norm": 1.2978532674924002, "learning_rate": 6.715257531584063e-06, "loss": 0.2369, "step": 691 }, { "epoch": 0.020187875605344536, "grad_norm": 1.1769595666714598, "learning_rate": 6.7249757045675425e-06, "loss": 0.2515, "step": 692 }, { "epoch": 0.020217048835988096, "grad_norm": 1.1000942989695253, "learning_rate": 6.734693877551021e-06, "loss": 0.2428, "step": 693 }, { "epoch": 0.02024622206663166, "grad_norm": 0.8371113735131498, "learning_rate": 6.7444120505345e-06, "loss": 0.1993, "step": 694 }, { "epoch": 0.02027539529727522, "grad_norm": 1.120861467251313, "learning_rate": 6.75413022351798e-06, "loss": 0.2342, "step": 695 }, { "epoch": 0.02030456852791878, "grad_norm": 1.0814084397294184, "learning_rate": 6.763848396501458e-06, "loss": 0.2589, "step": 696 }, { "epoch": 0.020333741758562344, "grad_norm": 1.1083675959863164, "learning_rate": 6.773566569484937e-06, "loss": 0.2288, "step": 697 }, { "epoch": 0.020362914989205904, "grad_norm": 1.328044711803676, "learning_rate": 6.783284742468417e-06, "loss": 0.2873, "step": 698 }, { "epoch": 0.020392088219849468, "grad_norm": 1.2457702284333594, "learning_rate": 6.793002915451895e-06, "loss": 0.2384, "step": 699 }, { "epoch": 0.020421261450493028, "grad_norm": 0.9138010870235221, "learning_rate": 6.8027210884353745e-06, "loss": 0.2123, "step": 700 }, { "epoch": 0.020450434681136588, "grad_norm": 1.0760978257264822, "learning_rate": 6.812439261418854e-06, "loss": 0.2563, "step": 701 }, { "epoch": 0.020479607911780152, "grad_norm": 1.3198861326415898, "learning_rate": 6.822157434402333e-06, "loss": 0.2572, "step": 702 }, { "epoch": 0.020508781142423712, "grad_norm": 1.4575455604532646, "learning_rate": 6.8318756073858115e-06, "loss": 0.2379, "step": 703 }, { "epoch": 0.020537954373067272, "grad_norm": 1.109333016897937, "learning_rate": 6.841593780369291e-06, "loss": 0.2112, "step": 704 }, { "epoch": 0.020567127603710836, "grad_norm": 1.0302927524542895, "learning_rate": 6.85131195335277e-06, "loss": 0.2145, "step": 705 }, { "epoch": 0.020596300834354396, "grad_norm": 1.2909537063625136, "learning_rate": 6.861030126336249e-06, "loss": 0.2866, "step": 706 }, { "epoch": 0.020625474064997956, "grad_norm": 1.6093127500609739, "learning_rate": 6.870748299319728e-06, "loss": 0.2486, "step": 707 }, { "epoch": 0.02065464729564152, "grad_norm": 1.2878326694626485, "learning_rate": 6.880466472303208e-06, "loss": 0.2341, "step": 708 }, { "epoch": 0.02068382052628508, "grad_norm": 1.1876576872307236, "learning_rate": 6.890184645286687e-06, "loss": 0.2422, "step": 709 }, { "epoch": 0.020712993756928644, "grad_norm": 1.2053349767690207, "learning_rate": 6.899902818270166e-06, "loss": 0.226, "step": 710 }, { "epoch": 0.020742166987572204, "grad_norm": 1.1621295917851582, "learning_rate": 6.909620991253645e-06, "loss": 0.2573, "step": 711 }, { "epoch": 0.020771340218215764, "grad_norm": 1.0779227597831955, "learning_rate": 6.9193391642371245e-06, "loss": 0.2219, "step": 712 }, { "epoch": 0.020800513448859328, "grad_norm": 1.0600744780537699, "learning_rate": 6.929057337220603e-06, "loss": 0.2435, "step": 713 }, { "epoch": 0.020829686679502888, "grad_norm": 1.1870402162790261, "learning_rate": 6.938775510204082e-06, "loss": 0.2448, "step": 714 }, { "epoch": 0.020858859910146448, "grad_norm": 1.15577933444016, "learning_rate": 6.948493683187562e-06, "loss": 0.241, "step": 715 }, { "epoch": 0.020888033140790012, "grad_norm": 1.0385025420205705, "learning_rate": 6.95821185617104e-06, "loss": 0.2184, "step": 716 }, { "epoch": 0.020917206371433572, "grad_norm": 1.1981330447897032, "learning_rate": 6.967930029154519e-06, "loss": 0.2529, "step": 717 }, { "epoch": 0.020946379602077136, "grad_norm": 1.3893074797081237, "learning_rate": 6.977648202137999e-06, "loss": 0.2406, "step": 718 }, { "epoch": 0.020975552832720696, "grad_norm": 1.3478510367178977, "learning_rate": 6.987366375121478e-06, "loss": 0.2499, "step": 719 }, { "epoch": 0.021004726063364256, "grad_norm": 1.2290443600628422, "learning_rate": 6.9970845481049564e-06, "loss": 0.2249, "step": 720 }, { "epoch": 0.02103389929400782, "grad_norm": 1.223682726594295, "learning_rate": 7.006802721088436e-06, "loss": 0.234, "step": 721 }, { "epoch": 0.02106307252465138, "grad_norm": 1.2519725592077438, "learning_rate": 7.016520894071915e-06, "loss": 0.2158, "step": 722 }, { "epoch": 0.02109224575529494, "grad_norm": 1.0305745510312196, "learning_rate": 7.0262390670553935e-06, "loss": 0.2625, "step": 723 }, { "epoch": 0.021121418985938504, "grad_norm": 1.4914540459963992, "learning_rate": 7.035957240038873e-06, "loss": 0.2494, "step": 724 }, { "epoch": 0.021150592216582064, "grad_norm": 1.4949867206265093, "learning_rate": 7.045675413022353e-06, "loss": 0.2352, "step": 725 }, { "epoch": 0.021179765447225624, "grad_norm": 1.3330272613270544, "learning_rate": 7.055393586005832e-06, "loss": 0.2236, "step": 726 }, { "epoch": 0.021208938677869188, "grad_norm": 1.1526352996187208, "learning_rate": 7.06511175898931e-06, "loss": 0.2502, "step": 727 }, { "epoch": 0.021238111908512748, "grad_norm": 1.166306458061035, "learning_rate": 7.07482993197279e-06, "loss": 0.2195, "step": 728 }, { "epoch": 0.021267285139156312, "grad_norm": 1.2374836997108423, "learning_rate": 7.084548104956269e-06, "loss": 0.2595, "step": 729 }, { "epoch": 0.021296458369799872, "grad_norm": 1.0946137599976848, "learning_rate": 7.094266277939748e-06, "loss": 0.2235, "step": 730 }, { "epoch": 0.021325631600443432, "grad_norm": 1.3542403105266712, "learning_rate": 7.103984450923227e-06, "loss": 0.2517, "step": 731 }, { "epoch": 0.021354804831086996, "grad_norm": 1.3603040073749126, "learning_rate": 7.1137026239067065e-06, "loss": 0.259, "step": 732 }, { "epoch": 0.021383978061730556, "grad_norm": 1.148411391960263, "learning_rate": 7.123420796890185e-06, "loss": 0.2511, "step": 733 }, { "epoch": 0.021413151292374116, "grad_norm": 1.3990863614174234, "learning_rate": 7.133138969873664e-06, "loss": 0.2233, "step": 734 }, { "epoch": 0.02144232452301768, "grad_norm": 1.1289883923040358, "learning_rate": 7.1428571428571436e-06, "loss": 0.2483, "step": 735 }, { "epoch": 0.02147149775366124, "grad_norm": 1.592308270330597, "learning_rate": 7.152575315840623e-06, "loss": 0.2566, "step": 736 }, { "epoch": 0.0215006709843048, "grad_norm": 1.1676915593536734, "learning_rate": 7.162293488824101e-06, "loss": 0.2244, "step": 737 }, { "epoch": 0.021529844214948364, "grad_norm": 1.434217925251431, "learning_rate": 7.172011661807581e-06, "loss": 0.2694, "step": 738 }, { "epoch": 0.021559017445591924, "grad_norm": 1.3398064548223516, "learning_rate": 7.18172983479106e-06, "loss": 0.2445, "step": 739 }, { "epoch": 0.021588190676235488, "grad_norm": 1.0631062210348727, "learning_rate": 7.191448007774538e-06, "loss": 0.2387, "step": 740 }, { "epoch": 0.021617363906879048, "grad_norm": 1.2419787284853856, "learning_rate": 7.201166180758018e-06, "loss": 0.2128, "step": 741 }, { "epoch": 0.02164653713752261, "grad_norm": 1.347170823544848, "learning_rate": 7.210884353741497e-06, "loss": 0.2189, "step": 742 }, { "epoch": 0.021675710368166172, "grad_norm": 1.1052609255450854, "learning_rate": 7.2206025267249755e-06, "loss": 0.2274, "step": 743 }, { "epoch": 0.021704883598809732, "grad_norm": 0.9141740311729317, "learning_rate": 7.230320699708455e-06, "loss": 0.2621, "step": 744 }, { "epoch": 0.021734056829453292, "grad_norm": 1.2036279072941747, "learning_rate": 7.240038872691935e-06, "loss": 0.2294, "step": 745 }, { "epoch": 0.021763230060096856, "grad_norm": 1.1373876492741393, "learning_rate": 7.249757045675414e-06, "loss": 0.248, "step": 746 }, { "epoch": 0.021792403290740416, "grad_norm": 1.03097313866654, "learning_rate": 7.259475218658893e-06, "loss": 0.205, "step": 747 }, { "epoch": 0.021821576521383976, "grad_norm": 1.225664083192016, "learning_rate": 7.269193391642372e-06, "loss": 0.2117, "step": 748 }, { "epoch": 0.02185074975202754, "grad_norm": 1.116765043842244, "learning_rate": 7.278911564625851e-06, "loss": 0.2331, "step": 749 }, { "epoch": 0.0218799229826711, "grad_norm": 0.9894490765307579, "learning_rate": 7.28862973760933e-06, "loss": 0.2103, "step": 750 }, { "epoch": 0.021909096213314664, "grad_norm": 0.9377323370832152, "learning_rate": 7.298347910592809e-06, "loss": 0.2209, "step": 751 }, { "epoch": 0.021938269443958224, "grad_norm": 1.1477138262356188, "learning_rate": 7.3080660835762885e-06, "loss": 0.2073, "step": 752 }, { "epoch": 0.021967442674601784, "grad_norm": 1.1800756399150825, "learning_rate": 7.317784256559768e-06, "loss": 0.2314, "step": 753 }, { "epoch": 0.021996615905245348, "grad_norm": 1.1324552297353292, "learning_rate": 7.327502429543246e-06, "loss": 0.2559, "step": 754 }, { "epoch": 0.022025789135888908, "grad_norm": 1.187821230593947, "learning_rate": 7.3372206025267255e-06, "loss": 0.2116, "step": 755 }, { "epoch": 0.02205496236653247, "grad_norm": 1.2814527100250865, "learning_rate": 7.346938775510205e-06, "loss": 0.2385, "step": 756 }, { "epoch": 0.022084135597176032, "grad_norm": 1.3137757179370568, "learning_rate": 7.356656948493683e-06, "loss": 0.2269, "step": 757 }, { "epoch": 0.022113308827819592, "grad_norm": 1.03965993372393, "learning_rate": 7.366375121477163e-06, "loss": 0.2237, "step": 758 }, { "epoch": 0.022142482058463153, "grad_norm": 1.2122225918284888, "learning_rate": 7.376093294460642e-06, "loss": 0.2602, "step": 759 }, { "epoch": 0.022171655289106716, "grad_norm": 1.1906781306110434, "learning_rate": 7.38581146744412e-06, "loss": 0.2457, "step": 760 }, { "epoch": 0.022200828519750276, "grad_norm": 1.0016455739396728, "learning_rate": 7.3955296404276e-06, "loss": 0.245, "step": 761 }, { "epoch": 0.02223000175039384, "grad_norm": 1.0791268810825152, "learning_rate": 7.40524781341108e-06, "loss": 0.2, "step": 762 }, { "epoch": 0.0222591749810374, "grad_norm": 1.2559405314741636, "learning_rate": 7.414965986394559e-06, "loss": 0.2267, "step": 763 }, { "epoch": 0.02228834821168096, "grad_norm": 1.2986118162267253, "learning_rate": 7.424684159378037e-06, "loss": 0.2468, "step": 764 }, { "epoch": 0.022317521442324524, "grad_norm": 0.9914341493989796, "learning_rate": 7.434402332361517e-06, "loss": 0.2304, "step": 765 }, { "epoch": 0.022346694672968084, "grad_norm": 1.2410376941673065, "learning_rate": 7.444120505344996e-06, "loss": 0.2341, "step": 766 }, { "epoch": 0.022375867903611645, "grad_norm": 1.144974483693795, "learning_rate": 7.453838678328475e-06, "loss": 0.2419, "step": 767 }, { "epoch": 0.022405041134255208, "grad_norm": 1.0885957617854234, "learning_rate": 7.463556851311954e-06, "loss": 0.2379, "step": 768 }, { "epoch": 0.02243421436489877, "grad_norm": 1.1391880660459601, "learning_rate": 7.473275024295433e-06, "loss": 0.2227, "step": 769 }, { "epoch": 0.022463387595542332, "grad_norm": 1.159826237177272, "learning_rate": 7.482993197278913e-06, "loss": 0.2249, "step": 770 }, { "epoch": 0.022492560826185892, "grad_norm": 1.1279372315475507, "learning_rate": 7.492711370262391e-06, "loss": 0.2464, "step": 771 }, { "epoch": 0.022521734056829452, "grad_norm": 1.0538247174746573, "learning_rate": 7.5024295432458704e-06, "loss": 0.2701, "step": 772 }, { "epoch": 0.022550907287473016, "grad_norm": 1.0464884319638112, "learning_rate": 7.51214771622935e-06, "loss": 0.2694, "step": 773 }, { "epoch": 0.022580080518116576, "grad_norm": 1.0341781965712196, "learning_rate": 7.521865889212828e-06, "loss": 0.2483, "step": 774 }, { "epoch": 0.022609253748760137, "grad_norm": 1.1521353006383643, "learning_rate": 7.5315840621963075e-06, "loss": 0.2214, "step": 775 }, { "epoch": 0.0226384269794037, "grad_norm": 1.0305586585976119, "learning_rate": 7.541302235179787e-06, "loss": 0.2137, "step": 776 }, { "epoch": 0.02266760021004726, "grad_norm": 1.2017679461793598, "learning_rate": 7.551020408163265e-06, "loss": 0.2167, "step": 777 }, { "epoch": 0.02269677344069082, "grad_norm": 1.147090709009779, "learning_rate": 7.560738581146745e-06, "loss": 0.2342, "step": 778 }, { "epoch": 0.022725946671334384, "grad_norm": 1.0402610823434146, "learning_rate": 7.570456754130224e-06, "loss": 0.2206, "step": 779 }, { "epoch": 0.022755119901977944, "grad_norm": 1.31274832001162, "learning_rate": 7.580174927113704e-06, "loss": 0.2367, "step": 780 }, { "epoch": 0.022784293132621508, "grad_norm": 1.1955641509643553, "learning_rate": 7.589893100097182e-06, "loss": 0.2567, "step": 781 }, { "epoch": 0.02281346636326507, "grad_norm": 1.3969703270390994, "learning_rate": 7.599611273080662e-06, "loss": 0.2256, "step": 782 }, { "epoch": 0.02284263959390863, "grad_norm": 1.0412933297362201, "learning_rate": 7.609329446064141e-06, "loss": 0.2043, "step": 783 }, { "epoch": 0.022871812824552192, "grad_norm": 1.0444517135384463, "learning_rate": 7.61904761904762e-06, "loss": 0.2195, "step": 784 }, { "epoch": 0.022900986055195752, "grad_norm": 1.3301626783376574, "learning_rate": 7.628765792031099e-06, "loss": 0.2525, "step": 785 }, { "epoch": 0.022930159285839313, "grad_norm": 1.2009624863679822, "learning_rate": 7.638483965014577e-06, "loss": 0.2383, "step": 786 }, { "epoch": 0.022959332516482876, "grad_norm": 1.0323992570020093, "learning_rate": 7.648202137998057e-06, "loss": 0.2256, "step": 787 }, { "epoch": 0.022988505747126436, "grad_norm": 1.1705082357772691, "learning_rate": 7.657920310981536e-06, "loss": 0.2162, "step": 788 }, { "epoch": 0.023017678977769997, "grad_norm": 1.016935911820381, "learning_rate": 7.667638483965015e-06, "loss": 0.2397, "step": 789 }, { "epoch": 0.02304685220841356, "grad_norm": 1.2098682858694556, "learning_rate": 7.677356656948495e-06, "loss": 0.2488, "step": 790 }, { "epoch": 0.02307602543905712, "grad_norm": 1.1292603128213778, "learning_rate": 7.687074829931972e-06, "loss": 0.2336, "step": 791 }, { "epoch": 0.023105198669700684, "grad_norm": 1.063301311419607, "learning_rate": 7.696793002915453e-06, "loss": 0.2286, "step": 792 }, { "epoch": 0.023134371900344244, "grad_norm": 1.49425163247432, "learning_rate": 7.706511175898933e-06, "loss": 0.2195, "step": 793 }, { "epoch": 0.023163545130987805, "grad_norm": 1.2549036667959854, "learning_rate": 7.71622934888241e-06, "loss": 0.204, "step": 794 }, { "epoch": 0.02319271836163137, "grad_norm": 1.4739247197768788, "learning_rate": 7.72594752186589e-06, "loss": 0.2417, "step": 795 }, { "epoch": 0.02322189159227493, "grad_norm": 1.3112302205058461, "learning_rate": 7.735665694849369e-06, "loss": 0.2782, "step": 796 }, { "epoch": 0.02325106482291849, "grad_norm": 1.619905754938196, "learning_rate": 7.745383867832848e-06, "loss": 0.3017, "step": 797 }, { "epoch": 0.023280238053562052, "grad_norm": 1.3563231856399103, "learning_rate": 7.755102040816327e-06, "loss": 0.2599, "step": 798 }, { "epoch": 0.023309411284205613, "grad_norm": 1.327772442289883, "learning_rate": 7.764820213799807e-06, "loss": 0.248, "step": 799 }, { "epoch": 0.023338584514849173, "grad_norm": 1.2852257136338878, "learning_rate": 7.774538386783286e-06, "loss": 0.231, "step": 800 }, { "epoch": 0.023367757745492736, "grad_norm": 1.2482077449290176, "learning_rate": 7.784256559766764e-06, "loss": 0.2229, "step": 801 }, { "epoch": 0.023396930976136297, "grad_norm": 1.1450007187952924, "learning_rate": 7.793974732750243e-06, "loss": 0.272, "step": 802 }, { "epoch": 0.02342610420677986, "grad_norm": 1.0787544506279128, "learning_rate": 7.803692905733722e-06, "loss": 0.224, "step": 803 }, { "epoch": 0.02345527743742342, "grad_norm": 1.6160197667204934, "learning_rate": 7.813411078717202e-06, "loss": 0.2463, "step": 804 }, { "epoch": 0.02348445066806698, "grad_norm": 0.8985221469400513, "learning_rate": 7.823129251700681e-06, "loss": 0.2042, "step": 805 }, { "epoch": 0.023513623898710544, "grad_norm": 1.1277547620984618, "learning_rate": 7.83284742468416e-06, "loss": 0.2648, "step": 806 }, { "epoch": 0.023542797129354105, "grad_norm": 1.0706806725213707, "learning_rate": 7.84256559766764e-06, "loss": 0.233, "step": 807 }, { "epoch": 0.023571970359997665, "grad_norm": 1.2250251136180632, "learning_rate": 7.852283770651117e-06, "loss": 0.2505, "step": 808 }, { "epoch": 0.02360114359064123, "grad_norm": 1.0975135269964724, "learning_rate": 7.862001943634598e-06, "loss": 0.2332, "step": 809 }, { "epoch": 0.02363031682128479, "grad_norm": 1.2494564394211676, "learning_rate": 7.871720116618077e-06, "loss": 0.2133, "step": 810 }, { "epoch": 0.023659490051928352, "grad_norm": 1.3489757437988497, "learning_rate": 7.881438289601555e-06, "loss": 0.2253, "step": 811 }, { "epoch": 0.023688663282571912, "grad_norm": 1.0251269096777629, "learning_rate": 7.891156462585034e-06, "loss": 0.2213, "step": 812 }, { "epoch": 0.023717836513215473, "grad_norm": 0.9716459114793602, "learning_rate": 7.900874635568514e-06, "loss": 0.2236, "step": 813 }, { "epoch": 0.023747009743859036, "grad_norm": 1.0508870648670574, "learning_rate": 7.910592808551993e-06, "loss": 0.2336, "step": 814 }, { "epoch": 0.023776182974502597, "grad_norm": 1.0360282373369818, "learning_rate": 7.920310981535472e-06, "loss": 0.2213, "step": 815 }, { "epoch": 0.023805356205146157, "grad_norm": 1.098218013035958, "learning_rate": 7.930029154518952e-06, "loss": 0.2357, "step": 816 }, { "epoch": 0.02383452943578972, "grad_norm": 1.1575540066522236, "learning_rate": 7.939747327502431e-06, "loss": 0.2421, "step": 817 }, { "epoch": 0.02386370266643328, "grad_norm": 1.4602086588436836, "learning_rate": 7.949465500485909e-06, "loss": 0.2123, "step": 818 }, { "epoch": 0.02389287589707684, "grad_norm": 1.054613072449019, "learning_rate": 7.959183673469388e-06, "loss": 0.2331, "step": 819 }, { "epoch": 0.023922049127720404, "grad_norm": 1.380533444087093, "learning_rate": 7.968901846452867e-06, "loss": 0.2186, "step": 820 }, { "epoch": 0.023951222358363965, "grad_norm": 1.0968081127881506, "learning_rate": 7.978620019436347e-06, "loss": 0.2151, "step": 821 }, { "epoch": 0.02398039558900753, "grad_norm": 1.2678370235797236, "learning_rate": 7.988338192419826e-06, "loss": 0.2447, "step": 822 }, { "epoch": 0.02400956881965109, "grad_norm": 1.1840706113496122, "learning_rate": 7.998056365403305e-06, "loss": 0.2225, "step": 823 }, { "epoch": 0.02403874205029465, "grad_norm": 1.2965871034391179, "learning_rate": 8.007774538386784e-06, "loss": 0.2178, "step": 824 }, { "epoch": 0.024067915280938212, "grad_norm": 1.264222685552627, "learning_rate": 8.017492711370262e-06, "loss": 0.2683, "step": 825 }, { "epoch": 0.024097088511581773, "grad_norm": 1.3527595201863405, "learning_rate": 8.027210884353741e-06, "loss": 0.216, "step": 826 }, { "epoch": 0.024126261742225333, "grad_norm": 1.4038635883884818, "learning_rate": 8.036929057337222e-06, "loss": 0.2665, "step": 827 }, { "epoch": 0.024155434972868896, "grad_norm": 0.9586492703955889, "learning_rate": 8.0466472303207e-06, "loss": 0.2331, "step": 828 }, { "epoch": 0.024184608203512457, "grad_norm": 1.052317215766637, "learning_rate": 8.05636540330418e-06, "loss": 0.2352, "step": 829 }, { "epoch": 0.024213781434156017, "grad_norm": 1.0505145744789148, "learning_rate": 8.066083576287659e-06, "loss": 0.2424, "step": 830 }, { "epoch": 0.02424295466479958, "grad_norm": 1.259839897348867, "learning_rate": 8.075801749271138e-06, "loss": 0.264, "step": 831 }, { "epoch": 0.02427212789544314, "grad_norm": 1.165680747204167, "learning_rate": 8.085519922254617e-06, "loss": 0.2346, "step": 832 }, { "epoch": 0.024301301126086704, "grad_norm": 1.269560461730615, "learning_rate": 8.095238095238097e-06, "loss": 0.2324, "step": 833 }, { "epoch": 0.024330474356730265, "grad_norm": 1.0600569819159396, "learning_rate": 8.104956268221576e-06, "loss": 0.2305, "step": 834 }, { "epoch": 0.024359647587373825, "grad_norm": 1.1810103725868992, "learning_rate": 8.114674441205053e-06, "loss": 0.2636, "step": 835 }, { "epoch": 0.02438882081801739, "grad_norm": 0.9233645643119518, "learning_rate": 8.124392614188533e-06, "loss": 0.2255, "step": 836 }, { "epoch": 0.02441799404866095, "grad_norm": 0.839916756444877, "learning_rate": 8.134110787172012e-06, "loss": 0.1963, "step": 837 }, { "epoch": 0.02444716727930451, "grad_norm": 1.1297884673558336, "learning_rate": 8.143828960155491e-06, "loss": 0.2182, "step": 838 }, { "epoch": 0.024476340509948073, "grad_norm": 1.012255127000924, "learning_rate": 8.15354713313897e-06, "loss": 0.243, "step": 839 }, { "epoch": 0.024505513740591633, "grad_norm": 1.1376812095346318, "learning_rate": 8.16326530612245e-06, "loss": 0.2034, "step": 840 }, { "epoch": 0.024534686971235193, "grad_norm": 1.15627653019019, "learning_rate": 8.17298347910593e-06, "loss": 0.237, "step": 841 }, { "epoch": 0.024563860201878757, "grad_norm": 1.3314035681696466, "learning_rate": 8.182701652089407e-06, "loss": 0.2091, "step": 842 }, { "epoch": 0.024593033432522317, "grad_norm": 1.0392278314289032, "learning_rate": 8.192419825072886e-06, "loss": 0.2321, "step": 843 }, { "epoch": 0.02462220666316588, "grad_norm": 1.1571268182840173, "learning_rate": 8.202137998056367e-06, "loss": 0.242, "step": 844 }, { "epoch": 0.02465137989380944, "grad_norm": 1.0872892444485658, "learning_rate": 8.211856171039845e-06, "loss": 0.2329, "step": 845 }, { "epoch": 0.024680553124453, "grad_norm": 1.1112375767980343, "learning_rate": 8.221574344023324e-06, "loss": 0.2332, "step": 846 }, { "epoch": 0.024709726355096565, "grad_norm": 1.028165264665385, "learning_rate": 8.231292517006804e-06, "loss": 0.2148, "step": 847 }, { "epoch": 0.024738899585740125, "grad_norm": 1.2878773067873275, "learning_rate": 8.241010689990283e-06, "loss": 0.24, "step": 848 }, { "epoch": 0.024768072816383685, "grad_norm": 1.2089653481220402, "learning_rate": 8.250728862973762e-06, "loss": 0.2291, "step": 849 }, { "epoch": 0.02479724604702725, "grad_norm": 1.2961702554882062, "learning_rate": 8.260447035957241e-06, "loss": 0.2293, "step": 850 }, { "epoch": 0.02482641927767081, "grad_norm": 1.1287301452949055, "learning_rate": 8.27016520894072e-06, "loss": 0.2203, "step": 851 }, { "epoch": 0.02485559250831437, "grad_norm": 1.0994200408217525, "learning_rate": 8.279883381924198e-06, "loss": 0.2097, "step": 852 }, { "epoch": 0.024884765738957933, "grad_norm": 1.1749016881150973, "learning_rate": 8.289601554907678e-06, "loss": 0.2324, "step": 853 }, { "epoch": 0.024913938969601493, "grad_norm": 1.459717519565719, "learning_rate": 8.299319727891157e-06, "loss": 0.2252, "step": 854 }, { "epoch": 0.024943112200245057, "grad_norm": 1.455160742778526, "learning_rate": 8.309037900874636e-06, "loss": 0.2333, "step": 855 }, { "epoch": 0.024972285430888617, "grad_norm": 1.023204593740571, "learning_rate": 8.318756073858116e-06, "loss": 0.2271, "step": 856 }, { "epoch": 0.025001458661532177, "grad_norm": 1.1995934918409512, "learning_rate": 8.328474246841595e-06, "loss": 0.2578, "step": 857 }, { "epoch": 0.02503063189217574, "grad_norm": 1.1620257301761994, "learning_rate": 8.338192419825074e-06, "loss": 0.2304, "step": 858 }, { "epoch": 0.0250598051228193, "grad_norm": 1.1110701350031862, "learning_rate": 8.347910592808552e-06, "loss": 0.2199, "step": 859 }, { "epoch": 0.02508897835346286, "grad_norm": 1.4282060642842584, "learning_rate": 8.357628765792031e-06, "loss": 0.226, "step": 860 }, { "epoch": 0.025118151584106425, "grad_norm": 1.1058873150035762, "learning_rate": 8.36734693877551e-06, "loss": 0.2325, "step": 861 }, { "epoch": 0.025147324814749985, "grad_norm": 1.0212208946954713, "learning_rate": 8.37706511175899e-06, "loss": 0.231, "step": 862 }, { "epoch": 0.02517649804539355, "grad_norm": 1.0584289725256428, "learning_rate": 8.386783284742469e-06, "loss": 0.2372, "step": 863 }, { "epoch": 0.02520567127603711, "grad_norm": 1.3915339635059458, "learning_rate": 8.396501457725948e-06, "loss": 0.2155, "step": 864 }, { "epoch": 0.02523484450668067, "grad_norm": 0.9602181741267753, "learning_rate": 8.406219630709426e-06, "loss": 0.2274, "step": 865 }, { "epoch": 0.025264017737324233, "grad_norm": 1.0949774891934438, "learning_rate": 8.415937803692907e-06, "loss": 0.2359, "step": 866 }, { "epoch": 0.025293190967967793, "grad_norm": 1.1976347719025633, "learning_rate": 8.425655976676386e-06, "loss": 0.247, "step": 867 }, { "epoch": 0.025322364198611353, "grad_norm": 1.0966583335751576, "learning_rate": 8.435374149659866e-06, "loss": 0.2243, "step": 868 }, { "epoch": 0.025351537429254917, "grad_norm": 0.999186771932867, "learning_rate": 8.445092322643343e-06, "loss": 0.1996, "step": 869 }, { "epoch": 0.025380710659898477, "grad_norm": 1.1549892838997626, "learning_rate": 8.454810495626823e-06, "loss": 0.2291, "step": 870 }, { "epoch": 0.025409883890542037, "grad_norm": 1.2565325042452247, "learning_rate": 8.464528668610302e-06, "loss": 0.23, "step": 871 }, { "epoch": 0.0254390571211856, "grad_norm": 1.264468498651524, "learning_rate": 8.474246841593781e-06, "loss": 0.2371, "step": 872 }, { "epoch": 0.02546823035182916, "grad_norm": 1.1107458572943656, "learning_rate": 8.48396501457726e-06, "loss": 0.2308, "step": 873 }, { "epoch": 0.025497403582472725, "grad_norm": 1.245594829405502, "learning_rate": 8.49368318756074e-06, "loss": 0.2476, "step": 874 }, { "epoch": 0.025526576813116285, "grad_norm": 1.188427098742183, "learning_rate": 8.503401360544217e-06, "loss": 0.2309, "step": 875 }, { "epoch": 0.025555750043759845, "grad_norm": 1.2916364374413594, "learning_rate": 8.513119533527697e-06, "loss": 0.2275, "step": 876 }, { "epoch": 0.02558492327440341, "grad_norm": 1.1123804597760718, "learning_rate": 8.522837706511176e-06, "loss": 0.2216, "step": 877 }, { "epoch": 0.02561409650504697, "grad_norm": 1.0506988222299276, "learning_rate": 8.532555879494655e-06, "loss": 0.2402, "step": 878 }, { "epoch": 0.02564326973569053, "grad_norm": 1.07888229018488, "learning_rate": 8.542274052478135e-06, "loss": 0.247, "step": 879 }, { "epoch": 0.025672442966334093, "grad_norm": 1.114420567762565, "learning_rate": 8.551992225461614e-06, "loss": 0.2199, "step": 880 }, { "epoch": 0.025701616196977653, "grad_norm": 1.0733804086551966, "learning_rate": 8.561710398445093e-06, "loss": 0.2364, "step": 881 }, { "epoch": 0.025730789427621213, "grad_norm": 1.1515411067974415, "learning_rate": 8.571428571428571e-06, "loss": 0.2129, "step": 882 }, { "epoch": 0.025759962658264777, "grad_norm": 1.043433286716918, "learning_rate": 8.581146744412052e-06, "loss": 0.2355, "step": 883 }, { "epoch": 0.025789135888908337, "grad_norm": 1.0336903893330118, "learning_rate": 8.590864917395531e-06, "loss": 0.2135, "step": 884 }, { "epoch": 0.0258183091195519, "grad_norm": 1.256521542254548, "learning_rate": 8.60058309037901e-06, "loss": 0.2093, "step": 885 }, { "epoch": 0.02584748235019546, "grad_norm": 1.3417628029323483, "learning_rate": 8.610301263362488e-06, "loss": 0.261, "step": 886 }, { "epoch": 0.02587665558083902, "grad_norm": 1.0107801209511784, "learning_rate": 8.620019436345967e-06, "loss": 0.2119, "step": 887 }, { "epoch": 0.025905828811482585, "grad_norm": 1.1406597825944313, "learning_rate": 8.629737609329447e-06, "loss": 0.2322, "step": 888 }, { "epoch": 0.025935002042126145, "grad_norm": 1.3920662585308612, "learning_rate": 8.639455782312926e-06, "loss": 0.215, "step": 889 }, { "epoch": 0.025964175272769705, "grad_norm": 1.1511277459389424, "learning_rate": 8.649173955296405e-06, "loss": 0.2472, "step": 890 }, { "epoch": 0.02599334850341327, "grad_norm": 0.9606071792676083, "learning_rate": 8.658892128279885e-06, "loss": 0.2161, "step": 891 }, { "epoch": 0.02602252173405683, "grad_norm": 1.2710760257723615, "learning_rate": 8.668610301263362e-06, "loss": 0.2287, "step": 892 }, { "epoch": 0.02605169496470039, "grad_norm": 1.168320003779378, "learning_rate": 8.678328474246842e-06, "loss": 0.2431, "step": 893 }, { "epoch": 0.026080868195343953, "grad_norm": 0.8512252460881229, "learning_rate": 8.688046647230321e-06, "loss": 0.1971, "step": 894 }, { "epoch": 0.026110041425987513, "grad_norm": 1.0740925586658399, "learning_rate": 8.6977648202138e-06, "loss": 0.2528, "step": 895 }, { "epoch": 0.026139214656631077, "grad_norm": 1.0778145588806725, "learning_rate": 8.70748299319728e-06, "loss": 0.2134, "step": 896 }, { "epoch": 0.026168387887274637, "grad_norm": 1.0246197372812758, "learning_rate": 8.717201166180759e-06, "loss": 0.2418, "step": 897 }, { "epoch": 0.026197561117918197, "grad_norm": 0.9971908131729528, "learning_rate": 8.726919339164238e-06, "loss": 0.2249, "step": 898 }, { "epoch": 0.02622673434856176, "grad_norm": 0.9847569765468884, "learning_rate": 8.736637512147716e-06, "loss": 0.2177, "step": 899 }, { "epoch": 0.02625590757920532, "grad_norm": 1.223519064180211, "learning_rate": 8.746355685131195e-06, "loss": 0.2563, "step": 900 }, { "epoch": 0.02628508080984888, "grad_norm": 1.1518220206967986, "learning_rate": 8.756073858114676e-06, "loss": 0.2264, "step": 901 }, { "epoch": 0.026314254040492445, "grad_norm": 1.189621075076587, "learning_rate": 8.765792031098155e-06, "loss": 0.2184, "step": 902 }, { "epoch": 0.026343427271136005, "grad_norm": 1.2186169070260915, "learning_rate": 8.775510204081633e-06, "loss": 0.2407, "step": 903 }, { "epoch": 0.02637260050177957, "grad_norm": 1.2635856808123254, "learning_rate": 8.785228377065112e-06, "loss": 0.2262, "step": 904 }, { "epoch": 0.02640177373242313, "grad_norm": 0.9812279936248159, "learning_rate": 8.794946550048592e-06, "loss": 0.2247, "step": 905 }, { "epoch": 0.02643094696306669, "grad_norm": 1.1066437925647374, "learning_rate": 8.804664723032071e-06, "loss": 0.2362, "step": 906 }, { "epoch": 0.026460120193710253, "grad_norm": 1.111992497097666, "learning_rate": 8.81438289601555e-06, "loss": 0.2073, "step": 907 }, { "epoch": 0.026489293424353813, "grad_norm": 0.9645432811176635, "learning_rate": 8.82410106899903e-06, "loss": 0.2296, "step": 908 }, { "epoch": 0.026518466654997373, "grad_norm": 0.8492398097468394, "learning_rate": 8.833819241982507e-06, "loss": 0.2228, "step": 909 }, { "epoch": 0.026547639885640937, "grad_norm": 1.0748246979612344, "learning_rate": 8.843537414965987e-06, "loss": 0.2317, "step": 910 }, { "epoch": 0.026576813116284497, "grad_norm": 1.0053226522490737, "learning_rate": 8.853255587949466e-06, "loss": 0.2302, "step": 911 }, { "epoch": 0.026605986346928057, "grad_norm": 0.9971656931450253, "learning_rate": 8.862973760932945e-06, "loss": 0.2193, "step": 912 }, { "epoch": 0.02663515957757162, "grad_norm": 0.9847409980163035, "learning_rate": 8.872691933916424e-06, "loss": 0.2199, "step": 913 }, { "epoch": 0.02666433280821518, "grad_norm": 1.25498568142084, "learning_rate": 8.882410106899904e-06, "loss": 0.2146, "step": 914 }, { "epoch": 0.026693506038858745, "grad_norm": 0.9161040678664293, "learning_rate": 8.892128279883383e-06, "loss": 0.2379, "step": 915 }, { "epoch": 0.026722679269502305, "grad_norm": 1.2235565726421131, "learning_rate": 8.90184645286686e-06, "loss": 0.25, "step": 916 }, { "epoch": 0.026751852500145865, "grad_norm": 1.406004105551934, "learning_rate": 8.91156462585034e-06, "loss": 0.2239, "step": 917 }, { "epoch": 0.02678102573078943, "grad_norm": 1.1342506113992574, "learning_rate": 8.921282798833821e-06, "loss": 0.2106, "step": 918 }, { "epoch": 0.02681019896143299, "grad_norm": 1.1239375255085295, "learning_rate": 8.931000971817299e-06, "loss": 0.2329, "step": 919 }, { "epoch": 0.02683937219207655, "grad_norm": 0.9457265195547269, "learning_rate": 8.940719144800778e-06, "loss": 0.2118, "step": 920 }, { "epoch": 0.026868545422720113, "grad_norm": 1.0706261685912044, "learning_rate": 8.950437317784257e-06, "loss": 0.2731, "step": 921 }, { "epoch": 0.026897718653363673, "grad_norm": 1.110692695974071, "learning_rate": 8.960155490767737e-06, "loss": 0.2014, "step": 922 }, { "epoch": 0.026926891884007233, "grad_norm": 0.8500774921458645, "learning_rate": 8.969873663751216e-06, "loss": 0.2016, "step": 923 }, { "epoch": 0.026956065114650797, "grad_norm": 1.1852674803784724, "learning_rate": 8.979591836734695e-06, "loss": 0.2098, "step": 924 }, { "epoch": 0.026985238345294357, "grad_norm": 1.1139269298523111, "learning_rate": 8.989310009718175e-06, "loss": 0.2131, "step": 925 }, { "epoch": 0.02701441157593792, "grad_norm": 0.8962746684288225, "learning_rate": 8.999028182701652e-06, "loss": 0.2088, "step": 926 }, { "epoch": 0.02704358480658148, "grad_norm": 1.2521461375304326, "learning_rate": 9.008746355685131e-06, "loss": 0.2539, "step": 927 }, { "epoch": 0.02707275803722504, "grad_norm": 1.1281918008149143, "learning_rate": 9.01846452866861e-06, "loss": 0.2362, "step": 928 }, { "epoch": 0.027101931267868605, "grad_norm": 0.9363948903611427, "learning_rate": 9.02818270165209e-06, "loss": 0.2083, "step": 929 }, { "epoch": 0.027131104498512165, "grad_norm": 1.145122754071775, "learning_rate": 9.03790087463557e-06, "loss": 0.1974, "step": 930 }, { "epoch": 0.027160277729155725, "grad_norm": 1.0865477187764203, "learning_rate": 9.047619047619049e-06, "loss": 0.2341, "step": 931 }, { "epoch": 0.02718945095979929, "grad_norm": 1.3141465162510526, "learning_rate": 9.057337220602528e-06, "loss": 0.2234, "step": 932 }, { "epoch": 0.02721862419044285, "grad_norm": 1.1538744272422732, "learning_rate": 9.067055393586006e-06, "loss": 0.225, "step": 933 }, { "epoch": 0.02724779742108641, "grad_norm": 1.0662250990413884, "learning_rate": 9.076773566569485e-06, "loss": 0.2199, "step": 934 }, { "epoch": 0.027276970651729973, "grad_norm": 1.0405211203137403, "learning_rate": 9.086491739552964e-06, "loss": 0.2134, "step": 935 }, { "epoch": 0.027306143882373533, "grad_norm": 1.167944520650825, "learning_rate": 9.096209912536444e-06, "loss": 0.2542, "step": 936 }, { "epoch": 0.027335317113017097, "grad_norm": 1.2235520404347386, "learning_rate": 9.105928085519923e-06, "loss": 0.2302, "step": 937 }, { "epoch": 0.027364490343660657, "grad_norm": 0.8376879036238295, "learning_rate": 9.115646258503402e-06, "loss": 0.2084, "step": 938 }, { "epoch": 0.027393663574304217, "grad_norm": 1.1672300639818123, "learning_rate": 9.125364431486881e-06, "loss": 0.2129, "step": 939 }, { "epoch": 0.02742283680494778, "grad_norm": 1.238442350287253, "learning_rate": 9.13508260447036e-06, "loss": 0.2273, "step": 940 }, { "epoch": 0.02745201003559134, "grad_norm": 1.1226085765871399, "learning_rate": 9.14480077745384e-06, "loss": 0.2313, "step": 941 }, { "epoch": 0.0274811832662349, "grad_norm": 1.1102788007428515, "learning_rate": 9.15451895043732e-06, "loss": 0.2087, "step": 942 }, { "epoch": 0.027510356496878465, "grad_norm": 1.04976885817183, "learning_rate": 9.164237123420797e-06, "loss": 0.2206, "step": 943 }, { "epoch": 0.027539529727522025, "grad_norm": 1.1268358378697063, "learning_rate": 9.173955296404276e-06, "loss": 0.2234, "step": 944 }, { "epoch": 0.027568702958165586, "grad_norm": 1.0288268415220287, "learning_rate": 9.183673469387756e-06, "loss": 0.2237, "step": 945 }, { "epoch": 0.02759787618880915, "grad_norm": 1.1745349868779273, "learning_rate": 9.193391642371235e-06, "loss": 0.2179, "step": 946 }, { "epoch": 0.02762704941945271, "grad_norm": 1.1508004689734275, "learning_rate": 9.203109815354714e-06, "loss": 0.2146, "step": 947 }, { "epoch": 0.027656222650096273, "grad_norm": 0.9210802873402436, "learning_rate": 9.212827988338194e-06, "loss": 0.2247, "step": 948 }, { "epoch": 0.027685395880739833, "grad_norm": 1.1622540536116515, "learning_rate": 9.222546161321673e-06, "loss": 0.2363, "step": 949 }, { "epoch": 0.027714569111383393, "grad_norm": 1.1425629262142145, "learning_rate": 9.23226433430515e-06, "loss": 0.2479, "step": 950 }, { "epoch": 0.027743742342026957, "grad_norm": 0.9106780037226964, "learning_rate": 9.24198250728863e-06, "loss": 0.2409, "step": 951 }, { "epoch": 0.027772915572670517, "grad_norm": 1.0965125281970576, "learning_rate": 9.251700680272109e-06, "loss": 0.2306, "step": 952 }, { "epoch": 0.027802088803314078, "grad_norm": 1.1201151553658704, "learning_rate": 9.261418853255588e-06, "loss": 0.2198, "step": 953 }, { "epoch": 0.02783126203395764, "grad_norm": 1.0211064270830699, "learning_rate": 9.271137026239068e-06, "loss": 0.2362, "step": 954 }, { "epoch": 0.0278604352646012, "grad_norm": 1.1512785422164178, "learning_rate": 9.280855199222547e-06, "loss": 0.2131, "step": 955 }, { "epoch": 0.027889608495244765, "grad_norm": 1.066555236087168, "learning_rate": 9.290573372206026e-06, "loss": 0.2318, "step": 956 }, { "epoch": 0.027918781725888325, "grad_norm": 1.1943198550527352, "learning_rate": 9.300291545189504e-06, "loss": 0.227, "step": 957 }, { "epoch": 0.027947954956531885, "grad_norm": 1.2512433672693648, "learning_rate": 9.310009718172985e-06, "loss": 0.2331, "step": 958 }, { "epoch": 0.02797712818717545, "grad_norm": 1.1449455664610984, "learning_rate": 9.319727891156464e-06, "loss": 0.2235, "step": 959 }, { "epoch": 0.02800630141781901, "grad_norm": 1.3393978089874983, "learning_rate": 9.329446064139942e-06, "loss": 0.2308, "step": 960 }, { "epoch": 0.02803547464846257, "grad_norm": 1.056733030847754, "learning_rate": 9.339164237123421e-06, "loss": 0.2708, "step": 961 }, { "epoch": 0.028064647879106133, "grad_norm": 0.9567251442342112, "learning_rate": 9.3488824101069e-06, "loss": 0.2195, "step": 962 }, { "epoch": 0.028093821109749693, "grad_norm": 1.2786594166323981, "learning_rate": 9.35860058309038e-06, "loss": 0.2112, "step": 963 }, { "epoch": 0.028122994340393254, "grad_norm": 1.077364504649222, "learning_rate": 9.36831875607386e-06, "loss": 0.208, "step": 964 }, { "epoch": 0.028152167571036817, "grad_norm": 1.2229919345979692, "learning_rate": 9.378036929057338e-06, "loss": 0.2155, "step": 965 }, { "epoch": 0.028181340801680378, "grad_norm": 1.066008540075167, "learning_rate": 9.387755102040818e-06, "loss": 0.2116, "step": 966 }, { "epoch": 0.02821051403232394, "grad_norm": 0.9000305593331918, "learning_rate": 9.397473275024295e-06, "loss": 0.2192, "step": 967 }, { "epoch": 0.0282396872629675, "grad_norm": 1.2483103558638065, "learning_rate": 9.407191448007775e-06, "loss": 0.2446, "step": 968 }, { "epoch": 0.02826886049361106, "grad_norm": 1.1654942991154087, "learning_rate": 9.416909620991254e-06, "loss": 0.2237, "step": 969 }, { "epoch": 0.028298033724254625, "grad_norm": 1.191747132746991, "learning_rate": 9.426627793974733e-06, "loss": 0.2449, "step": 970 }, { "epoch": 0.028327206954898185, "grad_norm": 1.1163267821303762, "learning_rate": 9.436345966958213e-06, "loss": 0.2364, "step": 971 }, { "epoch": 0.028356380185541746, "grad_norm": 1.3026286877189537, "learning_rate": 9.446064139941692e-06, "loss": 0.2179, "step": 972 }, { "epoch": 0.02838555341618531, "grad_norm": 1.1532836150899277, "learning_rate": 9.455782312925171e-06, "loss": 0.2369, "step": 973 }, { "epoch": 0.02841472664682887, "grad_norm": 1.2249757364981582, "learning_rate": 9.465500485908649e-06, "loss": 0.2381, "step": 974 }, { "epoch": 0.02844389987747243, "grad_norm": 0.895540398285069, "learning_rate": 9.47521865889213e-06, "loss": 0.2228, "step": 975 }, { "epoch": 0.028473073108115993, "grad_norm": 1.0567271109109968, "learning_rate": 9.48493683187561e-06, "loss": 0.2292, "step": 976 }, { "epoch": 0.028502246338759554, "grad_norm": 1.0687373189904286, "learning_rate": 9.494655004859087e-06, "loss": 0.2548, "step": 977 }, { "epoch": 0.028531419569403117, "grad_norm": 1.305079059525117, "learning_rate": 9.504373177842566e-06, "loss": 0.273, "step": 978 }, { "epoch": 0.028560592800046677, "grad_norm": 1.1352561925162497, "learning_rate": 9.514091350826045e-06, "loss": 0.2006, "step": 979 }, { "epoch": 0.028589766030690238, "grad_norm": 0.9489404568910308, "learning_rate": 9.523809523809525e-06, "loss": 0.2206, "step": 980 }, { "epoch": 0.0286189392613338, "grad_norm": 1.5175181641343878, "learning_rate": 9.533527696793004e-06, "loss": 0.2104, "step": 981 }, { "epoch": 0.02864811249197736, "grad_norm": 1.401465604706397, "learning_rate": 9.543245869776483e-06, "loss": 0.228, "step": 982 }, { "epoch": 0.02867728572262092, "grad_norm": 0.8548407597315596, "learning_rate": 9.552964042759963e-06, "loss": 0.1962, "step": 983 }, { "epoch": 0.028706458953264485, "grad_norm": 1.2278122948693826, "learning_rate": 9.56268221574344e-06, "loss": 0.2216, "step": 984 }, { "epoch": 0.028735632183908046, "grad_norm": 1.0253338593853756, "learning_rate": 9.57240038872692e-06, "loss": 0.2103, "step": 985 }, { "epoch": 0.028764805414551606, "grad_norm": 1.0819021309696246, "learning_rate": 9.582118561710399e-06, "loss": 0.2151, "step": 986 }, { "epoch": 0.02879397864519517, "grad_norm": 1.1621048015455133, "learning_rate": 9.591836734693878e-06, "loss": 0.2278, "step": 987 }, { "epoch": 0.02882315187583873, "grad_norm": 1.148004969755955, "learning_rate": 9.601554907677358e-06, "loss": 0.2065, "step": 988 }, { "epoch": 0.028852325106482293, "grad_norm": 1.4542424991666156, "learning_rate": 9.611273080660837e-06, "loss": 0.2662, "step": 989 }, { "epoch": 0.028881498337125854, "grad_norm": 1.4312639880264453, "learning_rate": 9.620991253644316e-06, "loss": 0.2584, "step": 990 }, { "epoch": 0.028910671567769414, "grad_norm": 1.248581176882113, "learning_rate": 9.630709426627794e-06, "loss": 0.2002, "step": 991 }, { "epoch": 0.028939844798412977, "grad_norm": 1.075820748048547, "learning_rate": 9.640427599611275e-06, "loss": 0.2338, "step": 992 }, { "epoch": 0.028969018029056538, "grad_norm": 1.4514438131506477, "learning_rate": 9.650145772594754e-06, "loss": 0.2322, "step": 993 }, { "epoch": 0.028998191259700098, "grad_norm": 1.139222248383189, "learning_rate": 9.659863945578232e-06, "loss": 0.2208, "step": 994 }, { "epoch": 0.02902736449034366, "grad_norm": 1.2710835376802465, "learning_rate": 9.669582118561711e-06, "loss": 0.2531, "step": 995 }, { "epoch": 0.02905653772098722, "grad_norm": 1.3670607561351638, "learning_rate": 9.67930029154519e-06, "loss": 0.217, "step": 996 }, { "epoch": 0.029085710951630785, "grad_norm": 1.0421126991423406, "learning_rate": 9.68901846452867e-06, "loss": 0.2368, "step": 997 }, { "epoch": 0.029114884182274346, "grad_norm": 1.164074668991034, "learning_rate": 9.698736637512149e-06, "loss": 0.2484, "step": 998 }, { "epoch": 0.029144057412917906, "grad_norm": 1.1465573713760517, "learning_rate": 9.708454810495628e-06, "loss": 0.2278, "step": 999 }, { "epoch": 0.02917323064356147, "grad_norm": 1.0784643564271585, "learning_rate": 9.718172983479108e-06, "loss": 0.2462, "step": 1000 }, { "epoch": 0.02920240387420503, "grad_norm": 1.1130447279632785, "learning_rate": 9.727891156462585e-06, "loss": 0.2311, "step": 1001 }, { "epoch": 0.02923157710484859, "grad_norm": 1.2503403259198171, "learning_rate": 9.737609329446065e-06, "loss": 0.232, "step": 1002 }, { "epoch": 0.029260750335492153, "grad_norm": 1.1556947570749498, "learning_rate": 9.747327502429544e-06, "loss": 0.2339, "step": 1003 }, { "epoch": 0.029289923566135714, "grad_norm": 0.9942713910664488, "learning_rate": 9.757045675413023e-06, "loss": 0.2439, "step": 1004 }, { "epoch": 0.029319096796779274, "grad_norm": 0.979660492071142, "learning_rate": 9.766763848396502e-06, "loss": 0.2094, "step": 1005 }, { "epoch": 0.029348270027422838, "grad_norm": 0.9381926924935178, "learning_rate": 9.776482021379982e-06, "loss": 0.2114, "step": 1006 }, { "epoch": 0.029377443258066398, "grad_norm": 0.9335735269940261, "learning_rate": 9.78620019436346e-06, "loss": 0.2134, "step": 1007 }, { "epoch": 0.02940661648870996, "grad_norm": 1.0880985866604813, "learning_rate": 9.795918367346939e-06, "loss": 0.2344, "step": 1008 }, { "epoch": 0.02943578971935352, "grad_norm": 1.2107379716170956, "learning_rate": 9.805636540330418e-06, "loss": 0.2284, "step": 1009 }, { "epoch": 0.029464962949997082, "grad_norm": 1.1463554003095695, "learning_rate": 9.815354713313899e-06, "loss": 0.2275, "step": 1010 }, { "epoch": 0.029494136180640645, "grad_norm": 0.9356145745746572, "learning_rate": 9.825072886297377e-06, "loss": 0.2212, "step": 1011 }, { "epoch": 0.029523309411284206, "grad_norm": 1.1456220731005373, "learning_rate": 9.834791059280856e-06, "loss": 0.2009, "step": 1012 }, { "epoch": 0.029552482641927766, "grad_norm": 0.9493920852972088, "learning_rate": 9.844509232264335e-06, "loss": 0.2433, "step": 1013 }, { "epoch": 0.02958165587257133, "grad_norm": 1.120088355834498, "learning_rate": 9.854227405247815e-06, "loss": 0.2425, "step": 1014 }, { "epoch": 0.02961082910321489, "grad_norm": 1.0822183880901732, "learning_rate": 9.863945578231294e-06, "loss": 0.2248, "step": 1015 }, { "epoch": 0.02964000233385845, "grad_norm": 0.9589973441307148, "learning_rate": 9.873663751214773e-06, "loss": 0.2257, "step": 1016 }, { "epoch": 0.029669175564502014, "grad_norm": 1.1954242272928033, "learning_rate": 9.883381924198252e-06, "loss": 0.2179, "step": 1017 }, { "epoch": 0.029698348795145574, "grad_norm": 1.0242808476806817, "learning_rate": 9.89310009718173e-06, "loss": 0.2343, "step": 1018 }, { "epoch": 0.029727522025789137, "grad_norm": 0.9177405765805806, "learning_rate": 9.90281827016521e-06, "loss": 0.2291, "step": 1019 }, { "epoch": 0.029756695256432698, "grad_norm": 1.2918547765404111, "learning_rate": 9.912536443148689e-06, "loss": 0.2086, "step": 1020 }, { "epoch": 0.029785868487076258, "grad_norm": 0.9779153452842309, "learning_rate": 9.922254616132168e-06, "loss": 0.1967, "step": 1021 }, { "epoch": 0.02981504171771982, "grad_norm": 1.4523478461643309, "learning_rate": 9.931972789115647e-06, "loss": 0.2171, "step": 1022 }, { "epoch": 0.02984421494836338, "grad_norm": 1.2042093839394294, "learning_rate": 9.941690962099127e-06, "loss": 0.2229, "step": 1023 }, { "epoch": 0.029873388179006942, "grad_norm": 1.373374530674181, "learning_rate": 9.951409135082604e-06, "loss": 0.2259, "step": 1024 }, { "epoch": 0.029902561409650506, "grad_norm": 1.4002754178324355, "learning_rate": 9.961127308066084e-06, "loss": 0.2517, "step": 1025 }, { "epoch": 0.029931734640294066, "grad_norm": 1.1459327387497917, "learning_rate": 9.970845481049563e-06, "loss": 0.2115, "step": 1026 }, { "epoch": 0.029960907870937626, "grad_norm": 1.2113509802193894, "learning_rate": 9.980563654033044e-06, "loss": 0.2271, "step": 1027 }, { "epoch": 0.02999008110158119, "grad_norm": 1.2171569257268688, "learning_rate": 9.990281827016522e-06, "loss": 0.2373, "step": 1028 }, { "epoch": 0.03001925433222475, "grad_norm": 1.2814230734715215, "learning_rate": 1e-05, "loss": 0.2208, "step": 1029 }, { "epoch": 0.030048427562868314, "grad_norm": 1.1038896598078904, "learning_rate": 9.999999977680598e-06, "loss": 0.2418, "step": 1030 }, { "epoch": 0.030077600793511874, "grad_norm": 1.1780794329233468, "learning_rate": 9.99999991072239e-06, "loss": 0.271, "step": 1031 }, { "epoch": 0.030106774024155434, "grad_norm": 1.2161672986842347, "learning_rate": 9.999999799125373e-06, "loss": 0.2393, "step": 1032 }, { "epoch": 0.030135947254798998, "grad_norm": 1.0013279539555224, "learning_rate": 9.999999642889553e-06, "loss": 0.2331, "step": 1033 }, { "epoch": 0.030165120485442558, "grad_norm": 1.1191553516627004, "learning_rate": 9.999999442014931e-06, "loss": 0.2565, "step": 1034 }, { "epoch": 0.030194293716086118, "grad_norm": 1.0903349175568633, "learning_rate": 9.999999196501506e-06, "loss": 0.2656, "step": 1035 }, { "epoch": 0.03022346694672968, "grad_norm": 1.6161898354090392, "learning_rate": 9.999998906349283e-06, "loss": 0.2493, "step": 1036 }, { "epoch": 0.030252640177373242, "grad_norm": 1.0186301722121622, "learning_rate": 9.999998571558263e-06, "loss": 0.2247, "step": 1037 }, { "epoch": 0.030281813408016802, "grad_norm": 0.9351870649492161, "learning_rate": 9.999998192128449e-06, "loss": 0.2076, "step": 1038 }, { "epoch": 0.030310986638660366, "grad_norm": 1.352484917920168, "learning_rate": 9.999997768059845e-06, "loss": 0.248, "step": 1039 }, { "epoch": 0.030340159869303926, "grad_norm": 0.9669604788901461, "learning_rate": 9.999997299352456e-06, "loss": 0.1975, "step": 1040 }, { "epoch": 0.03036933309994749, "grad_norm": 1.2802932840500656, "learning_rate": 9.999996786006282e-06, "loss": 0.2477, "step": 1041 }, { "epoch": 0.03039850633059105, "grad_norm": 1.0765392634378834, "learning_rate": 9.999996228021332e-06, "loss": 0.1886, "step": 1042 }, { "epoch": 0.03042767956123461, "grad_norm": 1.2080753231670838, "learning_rate": 9.999995625397607e-06, "loss": 0.2101, "step": 1043 }, { "epoch": 0.030456852791878174, "grad_norm": 1.0108288252893798, "learning_rate": 9.999994978135117e-06, "loss": 0.2501, "step": 1044 }, { "epoch": 0.030486026022521734, "grad_norm": 1.0778645089294687, "learning_rate": 9.999994286233866e-06, "loss": 0.219, "step": 1045 }, { "epoch": 0.030515199253165294, "grad_norm": 1.46972921740258, "learning_rate": 9.999993549693859e-06, "loss": 0.2725, "step": 1046 }, { "epoch": 0.030544372483808858, "grad_norm": 1.0426141075524984, "learning_rate": 9.999992768515101e-06, "loss": 0.2262, "step": 1047 }, { "epoch": 0.030573545714452418, "grad_norm": 0.8282285003795017, "learning_rate": 9.999991942697602e-06, "loss": 0.2029, "step": 1048 }, { "epoch": 0.03060271894509598, "grad_norm": 1.287499827347934, "learning_rate": 9.999991072241371e-06, "loss": 0.2347, "step": 1049 }, { "epoch": 0.030631892175739542, "grad_norm": 1.0875295125811832, "learning_rate": 9.999990157146411e-06, "loss": 0.2292, "step": 1050 }, { "epoch": 0.030661065406383102, "grad_norm": 1.0651058116013083, "learning_rate": 9.999989197412733e-06, "loss": 0.2094, "step": 1051 }, { "epoch": 0.030690238637026666, "grad_norm": 1.3083532189291245, "learning_rate": 9.999988193040345e-06, "loss": 0.2448, "step": 1052 }, { "epoch": 0.030719411867670226, "grad_norm": 1.174307964114287, "learning_rate": 9.999987144029256e-06, "loss": 0.2164, "step": 1053 }, { "epoch": 0.030748585098313786, "grad_norm": 1.1792434998984536, "learning_rate": 9.999986050379476e-06, "loss": 0.2175, "step": 1054 }, { "epoch": 0.03077775832895735, "grad_norm": 1.105215045612077, "learning_rate": 9.999984912091012e-06, "loss": 0.2458, "step": 1055 }, { "epoch": 0.03080693155960091, "grad_norm": 1.2327368379074821, "learning_rate": 9.999983729163879e-06, "loss": 0.2386, "step": 1056 }, { "epoch": 0.03083610479024447, "grad_norm": 1.0071343378859123, "learning_rate": 9.999982501598085e-06, "loss": 0.2403, "step": 1057 }, { "epoch": 0.030865278020888034, "grad_norm": 0.9581753368319474, "learning_rate": 9.999981229393638e-06, "loss": 0.2333, "step": 1058 }, { "epoch": 0.030894451251531594, "grad_norm": 0.9039464725641874, "learning_rate": 9.999979912550554e-06, "loss": 0.2296, "step": 1059 }, { "epoch": 0.030923624482175158, "grad_norm": 0.9799420306191866, "learning_rate": 9.999978551068843e-06, "loss": 0.2247, "step": 1060 }, { "epoch": 0.030952797712818718, "grad_norm": 1.0631962544979339, "learning_rate": 9.999977144948516e-06, "loss": 0.2481, "step": 1061 }, { "epoch": 0.030981970943462278, "grad_norm": 0.8651539004894622, "learning_rate": 9.999975694189588e-06, "loss": 0.2337, "step": 1062 }, { "epoch": 0.031011144174105842, "grad_norm": 1.1665030369366545, "learning_rate": 9.999974198792071e-06, "loss": 0.2519, "step": 1063 }, { "epoch": 0.031040317404749402, "grad_norm": 1.114903267694991, "learning_rate": 9.999972658755976e-06, "loss": 0.2231, "step": 1064 }, { "epoch": 0.031069490635392962, "grad_norm": 0.9330234882071323, "learning_rate": 9.99997107408132e-06, "loss": 0.2148, "step": 1065 }, { "epoch": 0.031098663866036526, "grad_norm": 0.9682184930048112, "learning_rate": 9.999969444768116e-06, "loss": 0.2154, "step": 1066 }, { "epoch": 0.031127837096680086, "grad_norm": 1.10379167524357, "learning_rate": 9.999967770816376e-06, "loss": 0.2454, "step": 1067 }, { "epoch": 0.031157010327323646, "grad_norm": 0.9999280949417427, "learning_rate": 9.99996605222612e-06, "loss": 0.225, "step": 1068 }, { "epoch": 0.03118618355796721, "grad_norm": 1.0971592229248597, "learning_rate": 9.999964288997361e-06, "loss": 0.2335, "step": 1069 }, { "epoch": 0.03121535678861077, "grad_norm": 0.9988697953983956, "learning_rate": 9.999962481130112e-06, "loss": 0.2646, "step": 1070 }, { "epoch": 0.031244530019254334, "grad_norm": 1.1785567945511561, "learning_rate": 9.999960628624394e-06, "loss": 0.2036, "step": 1071 }, { "epoch": 0.031273703249897894, "grad_norm": 0.8444076295505306, "learning_rate": 9.999958731480219e-06, "loss": 0.2206, "step": 1072 }, { "epoch": 0.031302876480541454, "grad_norm": 1.1765564114291198, "learning_rate": 9.999956789697608e-06, "loss": 0.2196, "step": 1073 }, { "epoch": 0.031332049711185014, "grad_norm": 1.0027789207640034, "learning_rate": 9.999954803276575e-06, "loss": 0.2435, "step": 1074 }, { "epoch": 0.03136122294182858, "grad_norm": 1.1942686350375853, "learning_rate": 9.99995277221714e-06, "loss": 0.2146, "step": 1075 }, { "epoch": 0.03139039617247214, "grad_norm": 1.0105641916721053, "learning_rate": 9.99995069651932e-06, "loss": 0.2211, "step": 1076 }, { "epoch": 0.0314195694031157, "grad_norm": 1.035972104046321, "learning_rate": 9.999948576183133e-06, "loss": 0.2601, "step": 1077 }, { "epoch": 0.03144874263375926, "grad_norm": 1.0463994881138075, "learning_rate": 9.999946411208598e-06, "loss": 0.231, "step": 1078 }, { "epoch": 0.03147791586440282, "grad_norm": 1.0958469859592759, "learning_rate": 9.999944201595736e-06, "loss": 0.2162, "step": 1079 }, { "epoch": 0.03150708909504638, "grad_norm": 0.9170157458484897, "learning_rate": 9.999941947344567e-06, "loss": 0.2536, "step": 1080 }, { "epoch": 0.03153626232568995, "grad_norm": 1.0339277976868972, "learning_rate": 9.999939648455108e-06, "loss": 0.2283, "step": 1081 }, { "epoch": 0.03156543555633351, "grad_norm": 1.010049306504477, "learning_rate": 9.999937304927384e-06, "loss": 0.2272, "step": 1082 }, { "epoch": 0.03159460878697707, "grad_norm": 0.8365637377041215, "learning_rate": 9.999934916761411e-06, "loss": 0.2367, "step": 1083 }, { "epoch": 0.03162378201762063, "grad_norm": 1.0402870618881737, "learning_rate": 9.999932483957212e-06, "loss": 0.2162, "step": 1084 }, { "epoch": 0.03165295524826419, "grad_norm": 1.0734228405774093, "learning_rate": 9.999930006514811e-06, "loss": 0.2538, "step": 1085 }, { "epoch": 0.03168212847890776, "grad_norm": 1.064405888182433, "learning_rate": 9.999927484434229e-06, "loss": 0.2526, "step": 1086 }, { "epoch": 0.03171130170955132, "grad_norm": 1.072147764043234, "learning_rate": 9.999924917715486e-06, "loss": 0.1984, "step": 1087 }, { "epoch": 0.03174047494019488, "grad_norm": 0.9001795646488481, "learning_rate": 9.999922306358607e-06, "loss": 0.2155, "step": 1088 }, { "epoch": 0.03176964817083844, "grad_norm": 1.080438778596477, "learning_rate": 9.999919650363617e-06, "loss": 0.2185, "step": 1089 }, { "epoch": 0.031798821401482, "grad_norm": 0.9943418293671965, "learning_rate": 9.999916949730536e-06, "loss": 0.2397, "step": 1090 }, { "epoch": 0.03182799463212556, "grad_norm": 1.1837387132283383, "learning_rate": 9.999914204459393e-06, "loss": 0.2258, "step": 1091 }, { "epoch": 0.031857167862769126, "grad_norm": 1.3608341986920691, "learning_rate": 9.999911414550207e-06, "loss": 0.2383, "step": 1092 }, { "epoch": 0.031886341093412686, "grad_norm": 1.1532640252010724, "learning_rate": 9.999908580003006e-06, "loss": 0.2279, "step": 1093 }, { "epoch": 0.031915514324056246, "grad_norm": 1.2667090607333933, "learning_rate": 9.999905700817816e-06, "loss": 0.2466, "step": 1094 }, { "epoch": 0.031944687554699806, "grad_norm": 1.1186150208903016, "learning_rate": 9.99990277699466e-06, "loss": 0.2575, "step": 1095 }, { "epoch": 0.031973860785343367, "grad_norm": 1.04566986537825, "learning_rate": 9.999899808533566e-06, "loss": 0.2322, "step": 1096 }, { "epoch": 0.032003034015986934, "grad_norm": 1.1302419209531933, "learning_rate": 9.999896795434561e-06, "loss": 0.2613, "step": 1097 }, { "epoch": 0.032032207246630494, "grad_norm": 1.001348327485578, "learning_rate": 9.999893737697668e-06, "loss": 0.2061, "step": 1098 }, { "epoch": 0.032061380477274054, "grad_norm": 1.0936303751215548, "learning_rate": 9.99989063532292e-06, "loss": 0.2413, "step": 1099 }, { "epoch": 0.032090553707917614, "grad_norm": 1.2966792596343628, "learning_rate": 9.999887488310342e-06, "loss": 0.2158, "step": 1100 }, { "epoch": 0.032119726938561174, "grad_norm": 1.068266295553114, "learning_rate": 9.999884296659961e-06, "loss": 0.2343, "step": 1101 }, { "epoch": 0.032148900169204735, "grad_norm": 1.0216000271168946, "learning_rate": 9.999881060371808e-06, "loss": 0.2495, "step": 1102 }, { "epoch": 0.0321780733998483, "grad_norm": 1.4157251418123615, "learning_rate": 9.999877779445908e-06, "loss": 0.2736, "step": 1103 }, { "epoch": 0.03220724663049186, "grad_norm": 1.0096560165463653, "learning_rate": 9.999874453882294e-06, "loss": 0.2362, "step": 1104 }, { "epoch": 0.03223641986113542, "grad_norm": 0.8131015711421686, "learning_rate": 9.999871083680995e-06, "loss": 0.2317, "step": 1105 }, { "epoch": 0.03226559309177898, "grad_norm": 1.1036781682956833, "learning_rate": 9.99986766884204e-06, "loss": 0.2321, "step": 1106 }, { "epoch": 0.03229476632242254, "grad_norm": 1.1382838264022068, "learning_rate": 9.99986420936546e-06, "loss": 0.2198, "step": 1107 }, { "epoch": 0.03232393955306611, "grad_norm": 1.0162604892559457, "learning_rate": 9.999860705251288e-06, "loss": 0.2192, "step": 1108 }, { "epoch": 0.03235311278370967, "grad_norm": 0.8933296893054877, "learning_rate": 9.99985715649955e-06, "loss": 0.2077, "step": 1109 }, { "epoch": 0.03238228601435323, "grad_norm": 1.119167707859606, "learning_rate": 9.999853563110282e-06, "loss": 0.21, "step": 1110 }, { "epoch": 0.03241145924499679, "grad_norm": 1.082100130798858, "learning_rate": 9.999849925083516e-06, "loss": 0.226, "step": 1111 }, { "epoch": 0.03244063247564035, "grad_norm": 1.0214419161709158, "learning_rate": 9.999846242419282e-06, "loss": 0.1914, "step": 1112 }, { "epoch": 0.03246980570628391, "grad_norm": 0.9343600517953623, "learning_rate": 9.999842515117615e-06, "loss": 0.275, "step": 1113 }, { "epoch": 0.03249897893692748, "grad_norm": 0.9865952655303679, "learning_rate": 9.999838743178547e-06, "loss": 0.2129, "step": 1114 }, { "epoch": 0.03252815216757104, "grad_norm": 1.319896897819642, "learning_rate": 9.999834926602113e-06, "loss": 0.2174, "step": 1115 }, { "epoch": 0.0325573253982146, "grad_norm": 1.0521765205195404, "learning_rate": 9.999831065388345e-06, "loss": 0.226, "step": 1116 }, { "epoch": 0.03258649862885816, "grad_norm": 0.9654473865450861, "learning_rate": 9.999827159537281e-06, "loss": 0.2134, "step": 1117 }, { "epoch": 0.03261567185950172, "grad_norm": 1.123917566290155, "learning_rate": 9.999823209048951e-06, "loss": 0.2353, "step": 1118 }, { "epoch": 0.032644845090145286, "grad_norm": 0.9176866330298619, "learning_rate": 9.999819213923394e-06, "loss": 0.2297, "step": 1119 }, { "epoch": 0.032674018320788846, "grad_norm": 1.1257387803194792, "learning_rate": 9.999815174160646e-06, "loss": 0.2051, "step": 1120 }, { "epoch": 0.032703191551432406, "grad_norm": 1.0474059598016923, "learning_rate": 9.999811089760741e-06, "loss": 0.2263, "step": 1121 }, { "epoch": 0.032732364782075966, "grad_norm": 0.798373618759498, "learning_rate": 9.999806960723715e-06, "loss": 0.2278, "step": 1122 }, { "epoch": 0.03276153801271953, "grad_norm": 0.8280537246125049, "learning_rate": 9.999802787049609e-06, "loss": 0.2245, "step": 1123 }, { "epoch": 0.03279071124336309, "grad_norm": 0.9711641379405179, "learning_rate": 9.999798568738453e-06, "loss": 0.2524, "step": 1124 }, { "epoch": 0.032819884474006654, "grad_norm": 1.1246028188582697, "learning_rate": 9.99979430579029e-06, "loss": 0.2262, "step": 1125 }, { "epoch": 0.032849057704650214, "grad_norm": 0.953299071600111, "learning_rate": 9.99978999820516e-06, "loss": 0.1986, "step": 1126 }, { "epoch": 0.032878230935293774, "grad_norm": 0.9431805074183164, "learning_rate": 9.999785645983095e-06, "loss": 0.2109, "step": 1127 }, { "epoch": 0.032907404165937335, "grad_norm": 1.1668135926724232, "learning_rate": 9.999781249124142e-06, "loss": 0.2574, "step": 1128 }, { "epoch": 0.032936577396580895, "grad_norm": 0.9957053951434509, "learning_rate": 9.99977680762833e-06, "loss": 0.2129, "step": 1129 }, { "epoch": 0.03296575062722446, "grad_norm": 0.883050594109438, "learning_rate": 9.999772321495706e-06, "loss": 0.2155, "step": 1130 }, { "epoch": 0.03299492385786802, "grad_norm": 0.9997288408798505, "learning_rate": 9.999767790726309e-06, "loss": 0.2276, "step": 1131 }, { "epoch": 0.03302409708851158, "grad_norm": 1.3235571539055981, "learning_rate": 9.999763215320179e-06, "loss": 0.2235, "step": 1132 }, { "epoch": 0.03305327031915514, "grad_norm": 1.086190879888537, "learning_rate": 9.999758595277356e-06, "loss": 0.2637, "step": 1133 }, { "epoch": 0.0330824435497987, "grad_norm": 0.9198443741837063, "learning_rate": 9.999753930597882e-06, "loss": 0.2125, "step": 1134 }, { "epoch": 0.03311161678044226, "grad_norm": 1.046313029577075, "learning_rate": 9.999749221281798e-06, "loss": 0.2269, "step": 1135 }, { "epoch": 0.03314079001108583, "grad_norm": 1.0128006542158847, "learning_rate": 9.999744467329147e-06, "loss": 0.2413, "step": 1136 }, { "epoch": 0.03316996324172939, "grad_norm": 1.0689231153917713, "learning_rate": 9.999739668739971e-06, "loss": 0.2361, "step": 1137 }, { "epoch": 0.03319913647237295, "grad_norm": 1.036064213424324, "learning_rate": 9.999734825514312e-06, "loss": 0.2076, "step": 1138 }, { "epoch": 0.03322830970301651, "grad_norm": 0.9531214242931653, "learning_rate": 9.999729937652214e-06, "loss": 0.2032, "step": 1139 }, { "epoch": 0.03325748293366007, "grad_norm": 1.1916189472859937, "learning_rate": 9.999725005153721e-06, "loss": 0.2308, "step": 1140 }, { "epoch": 0.03328665616430364, "grad_norm": 1.093124559785437, "learning_rate": 9.999720028018877e-06, "loss": 0.2054, "step": 1141 }, { "epoch": 0.0333158293949472, "grad_norm": 1.1483154885704239, "learning_rate": 9.999715006247726e-06, "loss": 0.254, "step": 1142 }, { "epoch": 0.03334500262559076, "grad_norm": 1.2729315953713594, "learning_rate": 9.999709939840314e-06, "loss": 0.24, "step": 1143 }, { "epoch": 0.03337417585623432, "grad_norm": 1.156513619544326, "learning_rate": 9.999704828796683e-06, "loss": 0.232, "step": 1144 }, { "epoch": 0.03340334908687788, "grad_norm": 1.0072178870045805, "learning_rate": 9.999699673116882e-06, "loss": 0.225, "step": 1145 }, { "epoch": 0.03343252231752144, "grad_norm": 1.2746348783347192, "learning_rate": 9.999694472800956e-06, "loss": 0.2517, "step": 1146 }, { "epoch": 0.033461695548165006, "grad_norm": 1.034777373284874, "learning_rate": 9.99968922784895e-06, "loss": 0.2077, "step": 1147 }, { "epoch": 0.033490868778808566, "grad_norm": 0.9290033166511193, "learning_rate": 9.999683938260915e-06, "loss": 0.2146, "step": 1148 }, { "epoch": 0.033520042009452126, "grad_norm": 1.0897810951548232, "learning_rate": 9.999678604036893e-06, "loss": 0.2305, "step": 1149 }, { "epoch": 0.03354921524009569, "grad_norm": 1.0589171142505913, "learning_rate": 9.999673225176934e-06, "loss": 0.262, "step": 1150 }, { "epoch": 0.03357838847073925, "grad_norm": 1.2259342193695393, "learning_rate": 9.999667801681087e-06, "loss": 0.2618, "step": 1151 }, { "epoch": 0.033607561701382814, "grad_norm": 1.296660574782945, "learning_rate": 9.999662333549399e-06, "loss": 0.2627, "step": 1152 }, { "epoch": 0.033636734932026374, "grad_norm": 0.8510483303165537, "learning_rate": 9.999656820781917e-06, "loss": 0.2147, "step": 1153 }, { "epoch": 0.033665908162669934, "grad_norm": 1.023782054526501, "learning_rate": 9.999651263378696e-06, "loss": 0.2237, "step": 1154 }, { "epoch": 0.033695081393313495, "grad_norm": 0.8665232399303242, "learning_rate": 9.999645661339779e-06, "loss": 0.2135, "step": 1155 }, { "epoch": 0.033724254623957055, "grad_norm": 1.061822276647852, "learning_rate": 9.999640014665221e-06, "loss": 0.2472, "step": 1156 }, { "epoch": 0.033753427854600615, "grad_norm": 1.0176166037519079, "learning_rate": 9.99963432335507e-06, "loss": 0.2443, "step": 1157 }, { "epoch": 0.03378260108524418, "grad_norm": 0.8789299563084425, "learning_rate": 9.999628587409378e-06, "loss": 0.2371, "step": 1158 }, { "epoch": 0.03381177431588774, "grad_norm": 0.9076170170894672, "learning_rate": 9.999622806828193e-06, "loss": 0.2006, "step": 1159 }, { "epoch": 0.0338409475465313, "grad_norm": 1.2817894819207372, "learning_rate": 9.99961698161157e-06, "loss": 0.2241, "step": 1160 }, { "epoch": 0.03387012077717486, "grad_norm": 1.1012341450895315, "learning_rate": 9.999611111759562e-06, "loss": 0.2336, "step": 1161 }, { "epoch": 0.03389929400781842, "grad_norm": 0.9114744283139627, "learning_rate": 9.999605197272219e-06, "loss": 0.2028, "step": 1162 }, { "epoch": 0.03392846723846199, "grad_norm": 1.034475584854439, "learning_rate": 9.999599238149594e-06, "loss": 0.2502, "step": 1163 }, { "epoch": 0.03395764046910555, "grad_norm": 1.1624818383366493, "learning_rate": 9.999593234391739e-06, "loss": 0.2222, "step": 1164 }, { "epoch": 0.03398681369974911, "grad_norm": 0.956399900823704, "learning_rate": 9.99958718599871e-06, "loss": 0.2257, "step": 1165 }, { "epoch": 0.03401598693039267, "grad_norm": 0.9627877016735599, "learning_rate": 9.999581092970561e-06, "loss": 0.2126, "step": 1166 }, { "epoch": 0.03404516016103623, "grad_norm": 1.0693354601759546, "learning_rate": 9.999574955307345e-06, "loss": 0.2338, "step": 1167 }, { "epoch": 0.0340743333916798, "grad_norm": 1.0054022949745454, "learning_rate": 9.999568773009116e-06, "loss": 0.2175, "step": 1168 }, { "epoch": 0.03410350662232336, "grad_norm": 0.9618317973515895, "learning_rate": 9.999562546075932e-06, "loss": 0.2229, "step": 1169 }, { "epoch": 0.03413267985296692, "grad_norm": 1.0775137058196893, "learning_rate": 9.999556274507847e-06, "loss": 0.2223, "step": 1170 }, { "epoch": 0.03416185308361048, "grad_norm": 1.2646575418200323, "learning_rate": 9.999549958304917e-06, "loss": 0.2315, "step": 1171 }, { "epoch": 0.03419102631425404, "grad_norm": 1.286686590384167, "learning_rate": 9.999543597467199e-06, "loss": 0.2253, "step": 1172 }, { "epoch": 0.0342201995448976, "grad_norm": 0.968843466313081, "learning_rate": 9.999537191994747e-06, "loss": 0.2286, "step": 1173 }, { "epoch": 0.034249372775541166, "grad_norm": 1.2498129034205643, "learning_rate": 9.999530741887622e-06, "loss": 0.2256, "step": 1174 }, { "epoch": 0.034278546006184726, "grad_norm": 1.22475074679282, "learning_rate": 9.99952424714588e-06, "loss": 0.2319, "step": 1175 }, { "epoch": 0.03430771923682829, "grad_norm": 0.9952594767143536, "learning_rate": 9.99951770776958e-06, "loss": 0.2047, "step": 1176 }, { "epoch": 0.03433689246747185, "grad_norm": 1.0810281894508293, "learning_rate": 9.999511123758778e-06, "loss": 0.2247, "step": 1177 }, { "epoch": 0.03436606569811541, "grad_norm": 1.1338001686539567, "learning_rate": 9.999504495113533e-06, "loss": 0.2337, "step": 1178 }, { "epoch": 0.034395238928758974, "grad_norm": 0.9428062550979496, "learning_rate": 9.999497821833908e-06, "loss": 0.2058, "step": 1179 }, { "epoch": 0.034424412159402534, "grad_norm": 0.8369047360001224, "learning_rate": 9.999491103919958e-06, "loss": 0.2086, "step": 1180 }, { "epoch": 0.034453585390046094, "grad_norm": 1.2377381920897232, "learning_rate": 9.999484341371746e-06, "loss": 0.2293, "step": 1181 }, { "epoch": 0.034482758620689655, "grad_norm": 0.9937717334450213, "learning_rate": 9.99947753418933e-06, "loss": 0.2228, "step": 1182 }, { "epoch": 0.034511931851333215, "grad_norm": 0.9479845663406832, "learning_rate": 9.999470682372774e-06, "loss": 0.2136, "step": 1183 }, { "epoch": 0.034541105081976775, "grad_norm": 1.1796947730576115, "learning_rate": 9.999463785922136e-06, "loss": 0.2464, "step": 1184 }, { "epoch": 0.03457027831262034, "grad_norm": 1.1718954993744068, "learning_rate": 9.999456844837478e-06, "loss": 0.2327, "step": 1185 }, { "epoch": 0.0345994515432639, "grad_norm": 1.3750095905594888, "learning_rate": 9.999449859118864e-06, "loss": 0.2402, "step": 1186 }, { "epoch": 0.03462862477390746, "grad_norm": 1.040067389611131, "learning_rate": 9.999442828766354e-06, "loss": 0.2332, "step": 1187 }, { "epoch": 0.03465779800455102, "grad_norm": 0.9820412034818019, "learning_rate": 9.999435753780014e-06, "loss": 0.2128, "step": 1188 }, { "epoch": 0.03468697123519458, "grad_norm": 1.065750756453323, "learning_rate": 9.999428634159904e-06, "loss": 0.2201, "step": 1189 }, { "epoch": 0.03471614446583815, "grad_norm": 1.0596792182901573, "learning_rate": 9.999421469906088e-06, "loss": 0.242, "step": 1190 }, { "epoch": 0.03474531769648171, "grad_norm": 1.116131218118618, "learning_rate": 9.999414261018632e-06, "loss": 0.225, "step": 1191 }, { "epoch": 0.03477449092712527, "grad_norm": 1.2665545716134226, "learning_rate": 9.999407007497597e-06, "loss": 0.2271, "step": 1192 }, { "epoch": 0.03480366415776883, "grad_norm": 1.1856520967677182, "learning_rate": 9.999399709343051e-06, "loss": 0.228, "step": 1193 }, { "epoch": 0.03483283738841239, "grad_norm": 1.0543449316668356, "learning_rate": 9.999392366555056e-06, "loss": 0.1993, "step": 1194 }, { "epoch": 0.03486201061905595, "grad_norm": 1.09449362058283, "learning_rate": 9.999384979133682e-06, "loss": 0.2256, "step": 1195 }, { "epoch": 0.03489118384969952, "grad_norm": 1.3760256034494558, "learning_rate": 9.99937754707899e-06, "loss": 0.2213, "step": 1196 }, { "epoch": 0.03492035708034308, "grad_norm": 0.9917622178333504, "learning_rate": 9.999370070391051e-06, "loss": 0.2194, "step": 1197 }, { "epoch": 0.03494953031098664, "grad_norm": 1.080240115007742, "learning_rate": 9.999362549069928e-06, "loss": 0.2129, "step": 1198 }, { "epoch": 0.0349787035416302, "grad_norm": 1.2657859156040228, "learning_rate": 9.99935498311569e-06, "loss": 0.222, "step": 1199 }, { "epoch": 0.03500787677227376, "grad_norm": 1.1555732625979267, "learning_rate": 9.999347372528405e-06, "loss": 0.2269, "step": 1200 }, { "epoch": 0.035037050002917326, "grad_norm": 1.0211903221318566, "learning_rate": 9.999339717308138e-06, "loss": 0.2366, "step": 1201 }, { "epoch": 0.035066223233560886, "grad_norm": 1.0785203518884237, "learning_rate": 9.99933201745496e-06, "loss": 0.2094, "step": 1202 }, { "epoch": 0.03509539646420445, "grad_norm": 0.950386052115053, "learning_rate": 9.99932427296894e-06, "loss": 0.2594, "step": 1203 }, { "epoch": 0.03512456969484801, "grad_norm": 1.0639074983819572, "learning_rate": 9.999316483850147e-06, "loss": 0.2419, "step": 1204 }, { "epoch": 0.03515374292549157, "grad_norm": 0.8965983689343393, "learning_rate": 9.999308650098649e-06, "loss": 0.2275, "step": 1205 }, { "epoch": 0.03518291615613513, "grad_norm": 0.9542237939173827, "learning_rate": 9.999300771714518e-06, "loss": 0.2382, "step": 1206 }, { "epoch": 0.035212089386778694, "grad_norm": 1.1040491741166185, "learning_rate": 9.999292848697822e-06, "loss": 0.2091, "step": 1207 }, { "epoch": 0.035241262617422255, "grad_norm": 0.8636923209159175, "learning_rate": 9.999284881048632e-06, "loss": 0.1985, "step": 1208 }, { "epoch": 0.035270435848065815, "grad_norm": 0.9725826608998004, "learning_rate": 9.99927686876702e-06, "loss": 0.2266, "step": 1209 }, { "epoch": 0.035299609078709375, "grad_norm": 1.147718904673886, "learning_rate": 9.999268811853058e-06, "loss": 0.2204, "step": 1210 }, { "epoch": 0.035328782309352935, "grad_norm": 1.029354075593606, "learning_rate": 9.99926071030682e-06, "loss": 0.1992, "step": 1211 }, { "epoch": 0.0353579555399965, "grad_norm": 0.8408064466005166, "learning_rate": 9.999252564128373e-06, "loss": 0.2166, "step": 1212 }, { "epoch": 0.03538712877064006, "grad_norm": 0.9964253539333732, "learning_rate": 9.999244373317794e-06, "loss": 0.2179, "step": 1213 }, { "epoch": 0.03541630200128362, "grad_norm": 0.9281384144091911, "learning_rate": 9.999236137875152e-06, "loss": 0.2022, "step": 1214 }, { "epoch": 0.03544547523192718, "grad_norm": 1.000925384075284, "learning_rate": 9.999227857800526e-06, "loss": 0.2373, "step": 1215 }, { "epoch": 0.03547464846257074, "grad_norm": 1.1731101092695548, "learning_rate": 9.999219533093986e-06, "loss": 0.1929, "step": 1216 }, { "epoch": 0.0355038216932143, "grad_norm": 0.939288664547676, "learning_rate": 9.999211163755607e-06, "loss": 0.208, "step": 1217 }, { "epoch": 0.03553299492385787, "grad_norm": 0.9529581324176537, "learning_rate": 9.999202749785465e-06, "loss": 0.2301, "step": 1218 }, { "epoch": 0.03556216815450143, "grad_norm": 1.0034436056259843, "learning_rate": 9.999194291183633e-06, "loss": 0.2549, "step": 1219 }, { "epoch": 0.03559134138514499, "grad_norm": 0.8140498582760453, "learning_rate": 9.99918578795019e-06, "loss": 0.2059, "step": 1220 }, { "epoch": 0.03562051461578855, "grad_norm": 1.0141425705255867, "learning_rate": 9.999177240085207e-06, "loss": 0.2157, "step": 1221 }, { "epoch": 0.03564968784643211, "grad_norm": 1.1163699248491348, "learning_rate": 9.999168647588767e-06, "loss": 0.217, "step": 1222 }, { "epoch": 0.03567886107707568, "grad_norm": 0.9139091889221808, "learning_rate": 9.999160010460938e-06, "loss": 0.2209, "step": 1223 }, { "epoch": 0.03570803430771924, "grad_norm": 0.9758061128820017, "learning_rate": 9.999151328701804e-06, "loss": 0.2213, "step": 1224 }, { "epoch": 0.0357372075383628, "grad_norm": 1.142823736007986, "learning_rate": 9.99914260231144e-06, "loss": 0.2406, "step": 1225 }, { "epoch": 0.03576638076900636, "grad_norm": 1.0516074160132758, "learning_rate": 9.999133831289924e-06, "loss": 0.2119, "step": 1226 }, { "epoch": 0.03579555399964992, "grad_norm": 1.1786248717376187, "learning_rate": 9.999125015637337e-06, "loss": 0.2171, "step": 1227 }, { "epoch": 0.03582472723029348, "grad_norm": 1.0082450454305523, "learning_rate": 9.999116155353751e-06, "loss": 0.2248, "step": 1228 }, { "epoch": 0.035853900460937047, "grad_norm": 1.0995312497528595, "learning_rate": 9.999107250439253e-06, "loss": 0.2564, "step": 1229 }, { "epoch": 0.03588307369158061, "grad_norm": 0.9229222188137134, "learning_rate": 9.999098300893916e-06, "loss": 0.2179, "step": 1230 }, { "epoch": 0.03591224692222417, "grad_norm": 1.2543935634661596, "learning_rate": 9.999089306717827e-06, "loss": 0.2322, "step": 1231 }, { "epoch": 0.03594142015286773, "grad_norm": 1.0599017323782356, "learning_rate": 9.999080267911059e-06, "loss": 0.2477, "step": 1232 }, { "epoch": 0.03597059338351129, "grad_norm": 1.08198813220789, "learning_rate": 9.999071184473694e-06, "loss": 0.1955, "step": 1233 }, { "epoch": 0.035999766614154854, "grad_norm": 0.9296632008847323, "learning_rate": 9.999062056405818e-06, "loss": 0.2284, "step": 1234 }, { "epoch": 0.036028939844798415, "grad_norm": 1.124573375991063, "learning_rate": 9.999052883707508e-06, "loss": 0.2238, "step": 1235 }, { "epoch": 0.036058113075441975, "grad_norm": 1.1526743479336314, "learning_rate": 9.999043666378847e-06, "loss": 0.2219, "step": 1236 }, { "epoch": 0.036087286306085535, "grad_norm": 1.3432794585841652, "learning_rate": 9.999034404419918e-06, "loss": 0.2347, "step": 1237 }, { "epoch": 0.036116459536729095, "grad_norm": 1.2156880471894373, "learning_rate": 9.999025097830803e-06, "loss": 0.2454, "step": 1238 }, { "epoch": 0.036145632767372655, "grad_norm": 1.1655941652232784, "learning_rate": 9.999015746611587e-06, "loss": 0.2259, "step": 1239 }, { "epoch": 0.03617480599801622, "grad_norm": 0.9271587585084555, "learning_rate": 9.999006350762349e-06, "loss": 0.22, "step": 1240 }, { "epoch": 0.03620397922865978, "grad_norm": 1.335574920498898, "learning_rate": 9.998996910283177e-06, "loss": 0.2417, "step": 1241 }, { "epoch": 0.03623315245930334, "grad_norm": 1.2195165397277774, "learning_rate": 9.998987425174154e-06, "loss": 0.2118, "step": 1242 }, { "epoch": 0.0362623256899469, "grad_norm": 0.9770996619705205, "learning_rate": 9.998977895435365e-06, "loss": 0.2367, "step": 1243 }, { "epoch": 0.03629149892059046, "grad_norm": 1.1461435430787712, "learning_rate": 9.998968321066893e-06, "loss": 0.2259, "step": 1244 }, { "epoch": 0.03632067215123403, "grad_norm": 0.9637216324090665, "learning_rate": 9.998958702068825e-06, "loss": 0.2116, "step": 1245 }, { "epoch": 0.03634984538187759, "grad_norm": 1.3104402836663753, "learning_rate": 9.99894903844125e-06, "loss": 0.2229, "step": 1246 }, { "epoch": 0.03637901861252115, "grad_norm": 1.0646578161147395, "learning_rate": 9.99893933018425e-06, "loss": 0.2533, "step": 1247 }, { "epoch": 0.03640819184316471, "grad_norm": 0.9792975473419264, "learning_rate": 9.998929577297912e-06, "loss": 0.2534, "step": 1248 }, { "epoch": 0.03643736507380827, "grad_norm": 1.3632713777987333, "learning_rate": 9.998919779782326e-06, "loss": 0.2465, "step": 1249 }, { "epoch": 0.03646653830445183, "grad_norm": 1.1058489882816205, "learning_rate": 9.998909937637576e-06, "loss": 0.2462, "step": 1250 }, { "epoch": 0.0364957115350954, "grad_norm": 0.9250320891239557, "learning_rate": 9.998900050863751e-06, "loss": 0.2228, "step": 1251 }, { "epoch": 0.03652488476573896, "grad_norm": 1.0517853930639236, "learning_rate": 9.99889011946094e-06, "loss": 0.2159, "step": 1252 }, { "epoch": 0.03655405799638252, "grad_norm": 1.0641799816582995, "learning_rate": 9.998880143429233e-06, "loss": 0.2312, "step": 1253 }, { "epoch": 0.03658323122702608, "grad_norm": 1.0400544389053685, "learning_rate": 9.998870122768716e-06, "loss": 0.2234, "step": 1254 }, { "epoch": 0.03661240445766964, "grad_norm": 0.7784251761330888, "learning_rate": 9.99886005747948e-06, "loss": 0.2206, "step": 1255 }, { "epoch": 0.03664157768831321, "grad_norm": 0.9387970895902331, "learning_rate": 9.998849947561615e-06, "loss": 0.2338, "step": 1256 }, { "epoch": 0.03667075091895677, "grad_norm": 0.9265651405115524, "learning_rate": 9.99883979301521e-06, "loss": 0.2256, "step": 1257 }, { "epoch": 0.03669992414960033, "grad_norm": 1.0746778660457141, "learning_rate": 9.998829593840358e-06, "loss": 0.2585, "step": 1258 }, { "epoch": 0.03672909738024389, "grad_norm": 0.980393031070998, "learning_rate": 9.998819350037148e-06, "loss": 0.2649, "step": 1259 }, { "epoch": 0.03675827061088745, "grad_norm": 1.091527366559575, "learning_rate": 9.998809061605671e-06, "loss": 0.2196, "step": 1260 }, { "epoch": 0.036787443841531015, "grad_norm": 0.9969539814682798, "learning_rate": 9.998798728546022e-06, "loss": 0.2436, "step": 1261 }, { "epoch": 0.036816617072174575, "grad_norm": 1.0774496370979247, "learning_rate": 9.998788350858291e-06, "loss": 0.2032, "step": 1262 }, { "epoch": 0.036845790302818135, "grad_norm": 1.1692389257363829, "learning_rate": 9.99877792854257e-06, "loss": 0.2389, "step": 1263 }, { "epoch": 0.036874963533461695, "grad_norm": 1.0704672720932178, "learning_rate": 9.998767461598954e-06, "loss": 0.2021, "step": 1264 }, { "epoch": 0.036904136764105255, "grad_norm": 0.8743761740096346, "learning_rate": 9.998756950027535e-06, "loss": 0.2237, "step": 1265 }, { "epoch": 0.036933309994748816, "grad_norm": 0.8363473801995409, "learning_rate": 9.998746393828406e-06, "loss": 0.2225, "step": 1266 }, { "epoch": 0.03696248322539238, "grad_norm": 1.1059349135328747, "learning_rate": 9.998735793001663e-06, "loss": 0.2146, "step": 1267 }, { "epoch": 0.03699165645603594, "grad_norm": 1.4869298313402535, "learning_rate": 9.998725147547401e-06, "loss": 0.2393, "step": 1268 }, { "epoch": 0.0370208296866795, "grad_norm": 0.7916156050456058, "learning_rate": 9.998714457465715e-06, "loss": 0.193, "step": 1269 }, { "epoch": 0.03705000291732306, "grad_norm": 1.0332321823689818, "learning_rate": 9.998703722756698e-06, "loss": 0.2131, "step": 1270 }, { "epoch": 0.037079176147966623, "grad_norm": 1.0944133136162206, "learning_rate": 9.998692943420448e-06, "loss": 0.204, "step": 1271 }, { "epoch": 0.03710834937861019, "grad_norm": 1.0342856859310066, "learning_rate": 9.99868211945706e-06, "loss": 0.259, "step": 1272 }, { "epoch": 0.03713752260925375, "grad_norm": 0.8740729653604431, "learning_rate": 9.998671250866631e-06, "loss": 0.2106, "step": 1273 }, { "epoch": 0.03716669583989731, "grad_norm": 1.1146756165041172, "learning_rate": 9.998660337649261e-06, "loss": 0.2356, "step": 1274 }, { "epoch": 0.03719586907054087, "grad_norm": 1.0625759772365957, "learning_rate": 9.998649379805044e-06, "loss": 0.2227, "step": 1275 }, { "epoch": 0.03722504230118443, "grad_norm": 0.8965303127918206, "learning_rate": 9.998638377334076e-06, "loss": 0.21, "step": 1276 }, { "epoch": 0.03725421553182799, "grad_norm": 0.9839152452283733, "learning_rate": 9.99862733023646e-06, "loss": 0.2004, "step": 1277 }, { "epoch": 0.03728338876247156, "grad_norm": 0.9071206977228208, "learning_rate": 9.998616238512292e-06, "loss": 0.2256, "step": 1278 }, { "epoch": 0.03731256199311512, "grad_norm": 1.1487114913959353, "learning_rate": 9.998605102161672e-06, "loss": 0.198, "step": 1279 }, { "epoch": 0.03734173522375868, "grad_norm": 1.0496812943687999, "learning_rate": 9.998593921184699e-06, "loss": 0.2025, "step": 1280 }, { "epoch": 0.03737090845440224, "grad_norm": 1.0971957882567478, "learning_rate": 9.998582695581471e-06, "loss": 0.2136, "step": 1281 }, { "epoch": 0.0374000816850458, "grad_norm": 1.1588128358759724, "learning_rate": 9.99857142535209e-06, "loss": 0.2007, "step": 1282 }, { "epoch": 0.03742925491568937, "grad_norm": 0.9426336682930098, "learning_rate": 9.998560110496658e-06, "loss": 0.2297, "step": 1283 }, { "epoch": 0.03745842814633293, "grad_norm": 1.10538456148938, "learning_rate": 9.998548751015275e-06, "loss": 0.2436, "step": 1284 }, { "epoch": 0.03748760137697649, "grad_norm": 0.9765259385595629, "learning_rate": 9.998537346908041e-06, "loss": 0.2116, "step": 1285 }, { "epoch": 0.03751677460762005, "grad_norm": 1.0595574926908682, "learning_rate": 9.99852589817506e-06, "loss": 0.2166, "step": 1286 }, { "epoch": 0.03754594783826361, "grad_norm": 1.169670012152128, "learning_rate": 9.99851440481643e-06, "loss": 0.2492, "step": 1287 }, { "epoch": 0.03757512106890717, "grad_norm": 0.952212186128128, "learning_rate": 9.99850286683226e-06, "loss": 0.2302, "step": 1288 }, { "epoch": 0.037604294299550735, "grad_norm": 1.1727912344580984, "learning_rate": 9.998491284222647e-06, "loss": 0.2276, "step": 1289 }, { "epoch": 0.037633467530194295, "grad_norm": 1.0989007437200973, "learning_rate": 9.998479656987699e-06, "loss": 0.2535, "step": 1290 }, { "epoch": 0.037662640760837855, "grad_norm": 1.0529017059038746, "learning_rate": 9.998467985127518e-06, "loss": 0.2388, "step": 1291 }, { "epoch": 0.037691813991481415, "grad_norm": 1.097426098611742, "learning_rate": 9.998456268642207e-06, "loss": 0.2541, "step": 1292 }, { "epoch": 0.037720987222124976, "grad_norm": 0.9185947687485129, "learning_rate": 9.998444507531872e-06, "loss": 0.225, "step": 1293 }, { "epoch": 0.03775016045276854, "grad_norm": 0.9874842534819169, "learning_rate": 9.998432701796617e-06, "loss": 0.2301, "step": 1294 }, { "epoch": 0.0377793336834121, "grad_norm": 0.987295071667692, "learning_rate": 9.99842085143655e-06, "loss": 0.2116, "step": 1295 }, { "epoch": 0.03780850691405566, "grad_norm": 0.9891448350948405, "learning_rate": 9.998408956451773e-06, "loss": 0.22, "step": 1296 }, { "epoch": 0.03783768014469922, "grad_norm": 0.8364712310069523, "learning_rate": 9.998397016842394e-06, "loss": 0.1882, "step": 1297 }, { "epoch": 0.037866853375342784, "grad_norm": 1.0465819091625597, "learning_rate": 9.99838503260852e-06, "loss": 0.2097, "step": 1298 }, { "epoch": 0.037896026605986344, "grad_norm": 1.0831275110704366, "learning_rate": 9.998373003750259e-06, "loss": 0.2135, "step": 1299 }, { "epoch": 0.03792519983662991, "grad_norm": 1.0699745458763916, "learning_rate": 9.998360930267715e-06, "loss": 0.2468, "step": 1300 }, { "epoch": 0.03795437306727347, "grad_norm": 0.9599499742993474, "learning_rate": 9.998348812160999e-06, "loss": 0.2259, "step": 1301 }, { "epoch": 0.03798354629791703, "grad_norm": 1.0159198034611567, "learning_rate": 9.998336649430217e-06, "loss": 0.219, "step": 1302 }, { "epoch": 0.03801271952856059, "grad_norm": 0.9105327449863514, "learning_rate": 9.99832444207548e-06, "loss": 0.1939, "step": 1303 }, { "epoch": 0.03804189275920415, "grad_norm": 1.0172706973557566, "learning_rate": 9.998312190096896e-06, "loss": 0.212, "step": 1304 }, { "epoch": 0.03807106598984772, "grad_norm": 1.1388665063274868, "learning_rate": 9.998299893494572e-06, "loss": 0.1974, "step": 1305 }, { "epoch": 0.03810023922049128, "grad_norm": 0.9868309224658413, "learning_rate": 9.99828755226862e-06, "loss": 0.2216, "step": 1306 }, { "epoch": 0.03812941245113484, "grad_norm": 1.2572752732793357, "learning_rate": 9.998275166419152e-06, "loss": 0.2226, "step": 1307 }, { "epoch": 0.0381585856817784, "grad_norm": 1.1406110116712587, "learning_rate": 9.998262735946274e-06, "loss": 0.2108, "step": 1308 }, { "epoch": 0.03818775891242196, "grad_norm": 0.8878135344973613, "learning_rate": 9.9982502608501e-06, "loss": 0.2302, "step": 1309 }, { "epoch": 0.03821693214306552, "grad_norm": 1.0201696690286826, "learning_rate": 9.998237741130742e-06, "loss": 0.2345, "step": 1310 }, { "epoch": 0.03824610537370909, "grad_norm": 0.9213405910768891, "learning_rate": 9.998225176788309e-06, "loss": 0.2268, "step": 1311 }, { "epoch": 0.03827527860435265, "grad_norm": 0.9604706367061303, "learning_rate": 9.998212567822917e-06, "loss": 0.1968, "step": 1312 }, { "epoch": 0.03830445183499621, "grad_norm": 0.8607403891983637, "learning_rate": 9.998199914234674e-06, "loss": 0.2221, "step": 1313 }, { "epoch": 0.03833362506563977, "grad_norm": 0.9066436101604387, "learning_rate": 9.998187216023696e-06, "loss": 0.228, "step": 1314 }, { "epoch": 0.03836279829628333, "grad_norm": 0.9785700401350489, "learning_rate": 9.998174473190098e-06, "loss": 0.2111, "step": 1315 }, { "epoch": 0.038391971526926895, "grad_norm": 0.9121512263543413, "learning_rate": 9.99816168573399e-06, "loss": 0.2275, "step": 1316 }, { "epoch": 0.038421144757570455, "grad_norm": 1.0388189498393994, "learning_rate": 9.998148853655486e-06, "loss": 0.2348, "step": 1317 }, { "epoch": 0.038450317988214015, "grad_norm": 1.266719867745567, "learning_rate": 9.998135976954704e-06, "loss": 0.2389, "step": 1318 }, { "epoch": 0.038479491218857576, "grad_norm": 1.1566629911987631, "learning_rate": 9.998123055631756e-06, "loss": 0.2198, "step": 1319 }, { "epoch": 0.038508664449501136, "grad_norm": 1.2143406841243476, "learning_rate": 9.99811008968676e-06, "loss": 0.2266, "step": 1320 }, { "epoch": 0.038537837680144696, "grad_norm": 0.9275245921485578, "learning_rate": 9.998097079119828e-06, "loss": 0.2109, "step": 1321 }, { "epoch": 0.03856701091078826, "grad_norm": 1.2690957652371693, "learning_rate": 9.998084023931081e-06, "loss": 0.2031, "step": 1322 }, { "epoch": 0.03859618414143182, "grad_norm": 1.2652695614831517, "learning_rate": 9.998070924120631e-06, "loss": 0.2205, "step": 1323 }, { "epoch": 0.03862535737207538, "grad_norm": 0.9942675570568783, "learning_rate": 9.998057779688597e-06, "loss": 0.2145, "step": 1324 }, { "epoch": 0.038654530602718944, "grad_norm": 1.1329949453374601, "learning_rate": 9.998044590635099e-06, "loss": 0.2325, "step": 1325 }, { "epoch": 0.038683703833362504, "grad_norm": 0.8833575532196354, "learning_rate": 9.99803135696025e-06, "loss": 0.2047, "step": 1326 }, { "epoch": 0.03871287706400607, "grad_norm": 0.9744575379511667, "learning_rate": 9.998018078664169e-06, "loss": 0.2213, "step": 1327 }, { "epoch": 0.03874205029464963, "grad_norm": 0.9418066454427708, "learning_rate": 9.998004755746977e-06, "loss": 0.235, "step": 1328 }, { "epoch": 0.03877122352529319, "grad_norm": 1.0147343297476814, "learning_rate": 9.997991388208791e-06, "loss": 0.233, "step": 1329 }, { "epoch": 0.03880039675593675, "grad_norm": 1.1558484528664879, "learning_rate": 9.997977976049731e-06, "loss": 0.2271, "step": 1330 }, { "epoch": 0.03882956998658031, "grad_norm": 1.201303925343188, "learning_rate": 9.997964519269918e-06, "loss": 0.2345, "step": 1331 }, { "epoch": 0.03885874321722387, "grad_norm": 0.9019540794269065, "learning_rate": 9.99795101786947e-06, "loss": 0.2185, "step": 1332 }, { "epoch": 0.03888791644786744, "grad_norm": 1.283035114543337, "learning_rate": 9.997937471848508e-06, "loss": 0.1996, "step": 1333 }, { "epoch": 0.038917089678511, "grad_norm": 0.9430874796476467, "learning_rate": 9.997923881207155e-06, "loss": 0.2305, "step": 1334 }, { "epoch": 0.03894626290915456, "grad_norm": 0.9882398860434454, "learning_rate": 9.99791024594553e-06, "loss": 0.2537, "step": 1335 }, { "epoch": 0.03897543613979812, "grad_norm": 1.1549123435876236, "learning_rate": 9.997896566063754e-06, "loss": 0.2189, "step": 1336 }, { "epoch": 0.03900460937044168, "grad_norm": 1.0921869118312562, "learning_rate": 9.997882841561952e-06, "loss": 0.2072, "step": 1337 }, { "epoch": 0.03903378260108525, "grad_norm": 1.0278245070875547, "learning_rate": 9.997869072440245e-06, "loss": 0.2191, "step": 1338 }, { "epoch": 0.03906295583172881, "grad_norm": 1.1846947002143793, "learning_rate": 9.997855258698756e-06, "loss": 0.2164, "step": 1339 }, { "epoch": 0.03909212906237237, "grad_norm": 0.9517210256139034, "learning_rate": 9.997841400337608e-06, "loss": 0.2211, "step": 1340 }, { "epoch": 0.03912130229301593, "grad_norm": 1.1482527040068051, "learning_rate": 9.997827497356925e-06, "loss": 0.1905, "step": 1341 }, { "epoch": 0.03915047552365949, "grad_norm": 1.0407439080463416, "learning_rate": 9.997813549756831e-06, "loss": 0.2213, "step": 1342 }, { "epoch": 0.03917964875430305, "grad_norm": 1.1499738624169245, "learning_rate": 9.99779955753745e-06, "loss": 0.2798, "step": 1343 }, { "epoch": 0.039208821984946615, "grad_norm": 1.2375675402663109, "learning_rate": 9.99778552069891e-06, "loss": 0.2103, "step": 1344 }, { "epoch": 0.039237995215590175, "grad_norm": 0.8669936389379261, "learning_rate": 9.997771439241332e-06, "loss": 0.2392, "step": 1345 }, { "epoch": 0.039267168446233736, "grad_norm": 1.052290039102351, "learning_rate": 9.997757313164843e-06, "loss": 0.2091, "step": 1346 }, { "epoch": 0.039296341676877296, "grad_norm": 0.9268609287273716, "learning_rate": 9.997743142469571e-06, "loss": 0.2134, "step": 1347 }, { "epoch": 0.039325514907520856, "grad_norm": 0.9549665618673209, "learning_rate": 9.997728927155643e-06, "loss": 0.2041, "step": 1348 }, { "epoch": 0.03935468813816442, "grad_norm": 0.9331634674606833, "learning_rate": 9.997714667223181e-06, "loss": 0.2016, "step": 1349 }, { "epoch": 0.03938386136880798, "grad_norm": 0.9818661097744485, "learning_rate": 9.997700362672317e-06, "loss": 0.2192, "step": 1350 }, { "epoch": 0.039413034599451544, "grad_norm": 0.8404467533545338, "learning_rate": 9.997686013503178e-06, "loss": 0.2017, "step": 1351 }, { "epoch": 0.039442207830095104, "grad_norm": 0.9627892193536619, "learning_rate": 9.997671619715889e-06, "loss": 0.2244, "step": 1352 }, { "epoch": 0.039471381060738664, "grad_norm": 0.9226865047277064, "learning_rate": 9.997657181310584e-06, "loss": 0.2002, "step": 1353 }, { "epoch": 0.03950055429138223, "grad_norm": 1.1284562198697294, "learning_rate": 9.997642698287386e-06, "loss": 0.2183, "step": 1354 }, { "epoch": 0.03952972752202579, "grad_norm": 1.0154493713003871, "learning_rate": 9.997628170646428e-06, "loss": 0.2203, "step": 1355 }, { "epoch": 0.03955890075266935, "grad_norm": 1.028409596537489, "learning_rate": 9.997613598387838e-06, "loss": 0.2226, "step": 1356 }, { "epoch": 0.03958807398331291, "grad_norm": 1.0342809832679385, "learning_rate": 9.997598981511749e-06, "loss": 0.2271, "step": 1357 }, { "epoch": 0.03961724721395647, "grad_norm": 0.8296214691141625, "learning_rate": 9.997584320018287e-06, "loss": 0.1995, "step": 1358 }, { "epoch": 0.03964642044460003, "grad_norm": 0.9319299517444279, "learning_rate": 9.997569613907587e-06, "loss": 0.2081, "step": 1359 }, { "epoch": 0.0396755936752436, "grad_norm": 0.8792592970600249, "learning_rate": 9.997554863179777e-06, "loss": 0.2124, "step": 1360 }, { "epoch": 0.03970476690588716, "grad_norm": 0.8704137211657997, "learning_rate": 9.997540067834991e-06, "loss": 0.224, "step": 1361 }, { "epoch": 0.03973394013653072, "grad_norm": 0.9876591870234631, "learning_rate": 9.997525227873361e-06, "loss": 0.2022, "step": 1362 }, { "epoch": 0.03976311336717428, "grad_norm": 1.1291734542403797, "learning_rate": 9.997510343295018e-06, "loss": 0.2039, "step": 1363 }, { "epoch": 0.03979228659781784, "grad_norm": 1.1268418016229587, "learning_rate": 9.997495414100095e-06, "loss": 0.23, "step": 1364 }, { "epoch": 0.03982145982846141, "grad_norm": 1.148988880184851, "learning_rate": 9.997480440288726e-06, "loss": 0.2298, "step": 1365 }, { "epoch": 0.03985063305910497, "grad_norm": 1.1247163410569034, "learning_rate": 9.997465421861046e-06, "loss": 0.2555, "step": 1366 }, { "epoch": 0.03987980628974853, "grad_norm": 0.9965754859389144, "learning_rate": 9.997450358817185e-06, "loss": 0.1948, "step": 1367 }, { "epoch": 0.03990897952039209, "grad_norm": 1.0375175847296314, "learning_rate": 9.997435251157284e-06, "loss": 0.2693, "step": 1368 }, { "epoch": 0.03993815275103565, "grad_norm": 1.023653601087876, "learning_rate": 9.99742009888147e-06, "loss": 0.2025, "step": 1369 }, { "epoch": 0.03996732598167921, "grad_norm": 0.9448813207728343, "learning_rate": 9.997404901989884e-06, "loss": 0.1867, "step": 1370 }, { "epoch": 0.039996499212322775, "grad_norm": 0.938275403909158, "learning_rate": 9.997389660482662e-06, "loss": 0.2416, "step": 1371 }, { "epoch": 0.040025672442966335, "grad_norm": 0.9685428975847087, "learning_rate": 9.997374374359935e-06, "loss": 0.2569, "step": 1372 }, { "epoch": 0.040054845673609896, "grad_norm": 1.0739192210213488, "learning_rate": 9.997359043621844e-06, "loss": 0.2268, "step": 1373 }, { "epoch": 0.040084018904253456, "grad_norm": 0.9738521939663268, "learning_rate": 9.997343668268525e-06, "loss": 0.218, "step": 1374 }, { "epoch": 0.040113192134897016, "grad_norm": 0.9592428474829809, "learning_rate": 9.997328248300114e-06, "loss": 0.2071, "step": 1375 }, { "epoch": 0.04014236536554058, "grad_norm": 0.8635790952625635, "learning_rate": 9.997312783716751e-06, "loss": 0.2207, "step": 1376 }, { "epoch": 0.04017153859618414, "grad_norm": 0.9335635120510127, "learning_rate": 9.997297274518569e-06, "loss": 0.2169, "step": 1377 }, { "epoch": 0.040200711826827704, "grad_norm": 1.1756898645285696, "learning_rate": 9.997281720705713e-06, "loss": 0.228, "step": 1378 }, { "epoch": 0.040229885057471264, "grad_norm": 1.0855032514936684, "learning_rate": 9.997266122278317e-06, "loss": 0.2033, "step": 1379 }, { "epoch": 0.040259058288114824, "grad_norm": 0.8673801952091827, "learning_rate": 9.997250479236522e-06, "loss": 0.2193, "step": 1380 }, { "epoch": 0.040288231518758384, "grad_norm": 1.044271829891321, "learning_rate": 9.99723479158047e-06, "loss": 0.232, "step": 1381 }, { "epoch": 0.04031740474940195, "grad_norm": 1.1137257096053048, "learning_rate": 9.997219059310296e-06, "loss": 0.2152, "step": 1382 }, { "epoch": 0.04034657798004551, "grad_norm": 0.8227482080417956, "learning_rate": 9.997203282426144e-06, "loss": 0.2103, "step": 1383 }, { "epoch": 0.04037575121068907, "grad_norm": 0.9498515007487641, "learning_rate": 9.997187460928155e-06, "loss": 0.2167, "step": 1384 }, { "epoch": 0.04040492444133263, "grad_norm": 1.1769274227042676, "learning_rate": 9.997171594816467e-06, "loss": 0.2538, "step": 1385 }, { "epoch": 0.04043409767197619, "grad_norm": 0.9315945779231783, "learning_rate": 9.997155684091225e-06, "loss": 0.2333, "step": 1386 }, { "epoch": 0.04046327090261976, "grad_norm": 0.9803684457551111, "learning_rate": 9.997139728752571e-06, "loss": 0.1961, "step": 1387 }, { "epoch": 0.04049244413326332, "grad_norm": 0.9885088854204722, "learning_rate": 9.997123728800647e-06, "loss": 0.2194, "step": 1388 }, { "epoch": 0.04052161736390688, "grad_norm": 1.0034427516436237, "learning_rate": 9.997107684235592e-06, "loss": 0.2226, "step": 1389 }, { "epoch": 0.04055079059455044, "grad_norm": 0.9975148044713197, "learning_rate": 9.997091595057556e-06, "loss": 0.2282, "step": 1390 }, { "epoch": 0.040579963825194, "grad_norm": 1.2398854129625423, "learning_rate": 9.997075461266677e-06, "loss": 0.1854, "step": 1391 }, { "epoch": 0.04060913705583756, "grad_norm": 0.9221307377309209, "learning_rate": 9.997059282863103e-06, "loss": 0.2222, "step": 1392 }, { "epoch": 0.04063831028648113, "grad_norm": 1.4777014478559738, "learning_rate": 9.997043059846974e-06, "loss": 0.2392, "step": 1393 }, { "epoch": 0.04066748351712469, "grad_norm": 0.9999336775425626, "learning_rate": 9.997026792218439e-06, "loss": 0.2217, "step": 1394 }, { "epoch": 0.04069665674776825, "grad_norm": 1.0057735011149043, "learning_rate": 9.99701047997764e-06, "loss": 0.2011, "step": 1395 }, { "epoch": 0.04072582997841181, "grad_norm": 1.0771243992386983, "learning_rate": 9.996994123124727e-06, "loss": 0.2025, "step": 1396 }, { "epoch": 0.04075500320905537, "grad_norm": 1.1764991368919115, "learning_rate": 9.996977721659841e-06, "loss": 0.2158, "step": 1397 }, { "epoch": 0.040784176439698935, "grad_norm": 1.2576445137719265, "learning_rate": 9.996961275583133e-06, "loss": 0.2273, "step": 1398 }, { "epoch": 0.040813349670342496, "grad_norm": 0.9886776760583021, "learning_rate": 9.996944784894747e-06, "loss": 0.2327, "step": 1399 }, { "epoch": 0.040842522900986056, "grad_norm": 0.9695993546130057, "learning_rate": 9.99692824959483e-06, "loss": 0.2108, "step": 1400 }, { "epoch": 0.040871696131629616, "grad_norm": 0.9952565365059857, "learning_rate": 9.99691166968353e-06, "loss": 0.1976, "step": 1401 }, { "epoch": 0.040900869362273176, "grad_norm": 0.9257956453901235, "learning_rate": 9.996895045160997e-06, "loss": 0.2132, "step": 1402 }, { "epoch": 0.040930042592916736, "grad_norm": 0.9610088002531586, "learning_rate": 9.996878376027377e-06, "loss": 0.2241, "step": 1403 }, { "epoch": 0.040959215823560303, "grad_norm": 1.019045509669626, "learning_rate": 9.99686166228282e-06, "loss": 0.2383, "step": 1404 }, { "epoch": 0.040988389054203864, "grad_norm": 0.9650270584136241, "learning_rate": 9.996844903927475e-06, "loss": 0.198, "step": 1405 }, { "epoch": 0.041017562284847424, "grad_norm": 1.0796070829693196, "learning_rate": 9.996828100961491e-06, "loss": 0.2085, "step": 1406 }, { "epoch": 0.041046735515490984, "grad_norm": 0.9430226186570874, "learning_rate": 9.99681125338502e-06, "loss": 0.2449, "step": 1407 }, { "epoch": 0.041075908746134544, "grad_norm": 1.1829258383954429, "learning_rate": 9.99679436119821e-06, "loss": 0.2313, "step": 1408 }, { "epoch": 0.04110508197677811, "grad_norm": 0.9457366101358423, "learning_rate": 9.996777424401212e-06, "loss": 0.2551, "step": 1409 }, { "epoch": 0.04113425520742167, "grad_norm": 1.1009213065778383, "learning_rate": 9.996760442994177e-06, "loss": 0.2309, "step": 1410 }, { "epoch": 0.04116342843806523, "grad_norm": 0.9704531907078432, "learning_rate": 9.996743416977262e-06, "loss": 0.2367, "step": 1411 }, { "epoch": 0.04119260166870879, "grad_norm": 1.0429666547715157, "learning_rate": 9.99672634635061e-06, "loss": 0.2295, "step": 1412 }, { "epoch": 0.04122177489935235, "grad_norm": 1.0747472073388955, "learning_rate": 9.996709231114381e-06, "loss": 0.2381, "step": 1413 }, { "epoch": 0.04125094812999591, "grad_norm": 1.1662343939472601, "learning_rate": 9.996692071268724e-06, "loss": 0.2277, "step": 1414 }, { "epoch": 0.04128012136063948, "grad_norm": 0.9602644067615418, "learning_rate": 9.996674866813792e-06, "loss": 0.2253, "step": 1415 }, { "epoch": 0.04130929459128304, "grad_norm": 1.3243000135828542, "learning_rate": 9.99665761774974e-06, "loss": 0.2473, "step": 1416 }, { "epoch": 0.0413384678219266, "grad_norm": 0.9815509736248158, "learning_rate": 9.996640324076721e-06, "loss": 0.2018, "step": 1417 }, { "epoch": 0.04136764105257016, "grad_norm": 1.0714111788074172, "learning_rate": 9.996622985794891e-06, "loss": 0.1946, "step": 1418 }, { "epoch": 0.04139681428321372, "grad_norm": 0.9998604307350838, "learning_rate": 9.996605602904403e-06, "loss": 0.2087, "step": 1419 }, { "epoch": 0.04142598751385729, "grad_norm": 1.072961976478066, "learning_rate": 9.996588175405415e-06, "loss": 0.2381, "step": 1420 }, { "epoch": 0.04145516074450085, "grad_norm": 0.8373350153525165, "learning_rate": 9.996570703298078e-06, "loss": 0.1966, "step": 1421 }, { "epoch": 0.04148433397514441, "grad_norm": 1.089742446453986, "learning_rate": 9.996553186582552e-06, "loss": 0.2478, "step": 1422 }, { "epoch": 0.04151350720578797, "grad_norm": 0.9748766263828204, "learning_rate": 9.996535625258992e-06, "loss": 0.2123, "step": 1423 }, { "epoch": 0.04154268043643153, "grad_norm": 1.2175897141540069, "learning_rate": 9.996518019327555e-06, "loss": 0.2195, "step": 1424 }, { "epoch": 0.04157185366707509, "grad_norm": 1.0743819665833776, "learning_rate": 9.996500368788396e-06, "loss": 0.2273, "step": 1425 }, { "epoch": 0.041601026897718656, "grad_norm": 1.03651420740801, "learning_rate": 9.996482673641675e-06, "loss": 0.2063, "step": 1426 }, { "epoch": 0.041630200128362216, "grad_norm": 1.3119232498834812, "learning_rate": 9.996464933887551e-06, "loss": 0.2363, "step": 1427 }, { "epoch": 0.041659373359005776, "grad_norm": 0.9325800581765833, "learning_rate": 9.996447149526179e-06, "loss": 0.2264, "step": 1428 }, { "epoch": 0.041688546589649336, "grad_norm": 0.9127887289285253, "learning_rate": 9.99642932055772e-06, "loss": 0.2323, "step": 1429 }, { "epoch": 0.041717719820292896, "grad_norm": 1.1171056920038365, "learning_rate": 9.996411446982335e-06, "loss": 0.217, "step": 1430 }, { "epoch": 0.041746893050936464, "grad_norm": 0.9171130115726579, "learning_rate": 9.99639352880018e-06, "loss": 0.2261, "step": 1431 }, { "epoch": 0.041776066281580024, "grad_norm": 0.8525848364637603, "learning_rate": 9.996375566011415e-06, "loss": 0.2296, "step": 1432 }, { "epoch": 0.041805239512223584, "grad_norm": 0.8924284655904076, "learning_rate": 9.996357558616201e-06, "loss": 0.2172, "step": 1433 }, { "epoch": 0.041834412742867144, "grad_norm": 0.7701805957891904, "learning_rate": 9.996339506614703e-06, "loss": 0.2171, "step": 1434 }, { "epoch": 0.041863585973510704, "grad_norm": 0.9346432211708176, "learning_rate": 9.996321410007076e-06, "loss": 0.2163, "step": 1435 }, { "epoch": 0.04189275920415427, "grad_norm": 1.2609256382114076, "learning_rate": 9.996303268793484e-06, "loss": 0.2144, "step": 1436 }, { "epoch": 0.04192193243479783, "grad_norm": 0.9450348609382418, "learning_rate": 9.99628508297409e-06, "loss": 0.2139, "step": 1437 }, { "epoch": 0.04195110566544139, "grad_norm": 1.0011505709567554, "learning_rate": 9.996266852549056e-06, "loss": 0.2052, "step": 1438 }, { "epoch": 0.04198027889608495, "grad_norm": 0.9925306010267743, "learning_rate": 9.996248577518543e-06, "loss": 0.2141, "step": 1439 }, { "epoch": 0.04200945212672851, "grad_norm": 1.0884507101869654, "learning_rate": 9.996230257882716e-06, "loss": 0.2218, "step": 1440 }, { "epoch": 0.04203862535737207, "grad_norm": 1.141353351760358, "learning_rate": 9.99621189364174e-06, "loss": 0.2378, "step": 1441 }, { "epoch": 0.04206779858801564, "grad_norm": 1.0686080529219866, "learning_rate": 9.996193484795774e-06, "loss": 0.2053, "step": 1442 }, { "epoch": 0.0420969718186592, "grad_norm": 0.9193753146267514, "learning_rate": 9.996175031344985e-06, "loss": 0.2205, "step": 1443 }, { "epoch": 0.04212614504930276, "grad_norm": 1.2292913522378575, "learning_rate": 9.99615653328954e-06, "loss": 0.1881, "step": 1444 }, { "epoch": 0.04215531827994632, "grad_norm": 0.9432107283405086, "learning_rate": 9.996137990629601e-06, "loss": 0.2326, "step": 1445 }, { "epoch": 0.04218449151058988, "grad_norm": 1.0168940745265806, "learning_rate": 9.996119403365336e-06, "loss": 0.232, "step": 1446 }, { "epoch": 0.04221366474123345, "grad_norm": 0.766663241875437, "learning_rate": 9.996100771496908e-06, "loss": 0.2081, "step": 1447 }, { "epoch": 0.04224283797187701, "grad_norm": 0.9448176605055629, "learning_rate": 9.996082095024486e-06, "loss": 0.2257, "step": 1448 }, { "epoch": 0.04227201120252057, "grad_norm": 0.9189474902407835, "learning_rate": 9.996063373948236e-06, "loss": 0.2056, "step": 1449 }, { "epoch": 0.04230118443316413, "grad_norm": 0.9430389926796413, "learning_rate": 9.996044608268323e-06, "loss": 0.2285, "step": 1450 }, { "epoch": 0.04233035766380769, "grad_norm": 0.8736377488994879, "learning_rate": 9.996025797984917e-06, "loss": 0.2005, "step": 1451 }, { "epoch": 0.04235953089445125, "grad_norm": 0.9798793350912578, "learning_rate": 9.996006943098186e-06, "loss": 0.2003, "step": 1452 }, { "epoch": 0.042388704125094816, "grad_norm": 0.9939519675283078, "learning_rate": 9.995988043608298e-06, "loss": 0.2413, "step": 1453 }, { "epoch": 0.042417877355738376, "grad_norm": 1.1457999732943973, "learning_rate": 9.995969099515422e-06, "loss": 0.2392, "step": 1454 }, { "epoch": 0.042447050586381936, "grad_norm": 0.9996397635655915, "learning_rate": 9.995950110819725e-06, "loss": 0.2293, "step": 1455 }, { "epoch": 0.042476223817025496, "grad_norm": 1.0414568624002896, "learning_rate": 9.995931077521377e-06, "loss": 0.2315, "step": 1456 }, { "epoch": 0.042505397047669057, "grad_norm": 1.02445309232173, "learning_rate": 9.995911999620551e-06, "loss": 0.2548, "step": 1457 }, { "epoch": 0.042534570278312624, "grad_norm": 0.9462250634681262, "learning_rate": 9.995892877117415e-06, "loss": 0.2174, "step": 1458 }, { "epoch": 0.042563743508956184, "grad_norm": 1.3235852975520785, "learning_rate": 9.995873710012139e-06, "loss": 0.2447, "step": 1459 }, { "epoch": 0.042592916739599744, "grad_norm": 1.1324259520052409, "learning_rate": 9.995854498304896e-06, "loss": 0.2065, "step": 1460 }, { "epoch": 0.042622089970243304, "grad_norm": 0.9157819995964086, "learning_rate": 9.995835241995857e-06, "loss": 0.2081, "step": 1461 }, { "epoch": 0.042651263200886864, "grad_norm": 0.8460837172718613, "learning_rate": 9.995815941085193e-06, "loss": 0.2262, "step": 1462 }, { "epoch": 0.042680436431530425, "grad_norm": 0.8605506763271699, "learning_rate": 9.995796595573078e-06, "loss": 0.1842, "step": 1463 }, { "epoch": 0.04270960966217399, "grad_norm": 0.9943616633690506, "learning_rate": 9.995777205459682e-06, "loss": 0.2444, "step": 1464 }, { "epoch": 0.04273878289281755, "grad_norm": 0.8662157243640668, "learning_rate": 9.99575777074518e-06, "loss": 0.1969, "step": 1465 }, { "epoch": 0.04276795612346111, "grad_norm": 0.9849215744267956, "learning_rate": 9.995738291429745e-06, "loss": 0.1947, "step": 1466 }, { "epoch": 0.04279712935410467, "grad_norm": 0.9378503402257837, "learning_rate": 9.995718767513551e-06, "loss": 0.1978, "step": 1467 }, { "epoch": 0.04282630258474823, "grad_norm": 0.926220287544285, "learning_rate": 9.995699198996773e-06, "loss": 0.2013, "step": 1468 }, { "epoch": 0.0428554758153918, "grad_norm": 0.9176908859449656, "learning_rate": 9.995679585879585e-06, "loss": 0.1947, "step": 1469 }, { "epoch": 0.04288464904603536, "grad_norm": 1.0008999967219585, "learning_rate": 9.995659928162164e-06, "loss": 0.2238, "step": 1470 }, { "epoch": 0.04291382227667892, "grad_norm": 1.0602822912657908, "learning_rate": 9.995640225844682e-06, "loss": 0.2221, "step": 1471 }, { "epoch": 0.04294299550732248, "grad_norm": 0.9648436997158979, "learning_rate": 9.995620478927315e-06, "loss": 0.2049, "step": 1472 }, { "epoch": 0.04297216873796604, "grad_norm": 1.0664357527011148, "learning_rate": 9.995600687410244e-06, "loss": 0.2048, "step": 1473 }, { "epoch": 0.0430013419686096, "grad_norm": 0.9574922828372119, "learning_rate": 9.99558085129364e-06, "loss": 0.2222, "step": 1474 }, { "epoch": 0.04303051519925317, "grad_norm": 1.1493992224879264, "learning_rate": 9.995560970577685e-06, "loss": 0.2342, "step": 1475 }, { "epoch": 0.04305968842989673, "grad_norm": 1.2327635581648775, "learning_rate": 9.995541045262554e-06, "loss": 0.2257, "step": 1476 }, { "epoch": 0.04308886166054029, "grad_norm": 0.8601884200626423, "learning_rate": 9.995521075348423e-06, "loss": 0.1834, "step": 1477 }, { "epoch": 0.04311803489118385, "grad_norm": 1.0366460390906858, "learning_rate": 9.995501060835474e-06, "loss": 0.221, "step": 1478 }, { "epoch": 0.04314720812182741, "grad_norm": 1.0994996940409911, "learning_rate": 9.995481001723884e-06, "loss": 0.2226, "step": 1479 }, { "epoch": 0.043176381352470976, "grad_norm": 0.8794707319860001, "learning_rate": 9.995460898013831e-06, "loss": 0.2102, "step": 1480 }, { "epoch": 0.043205554583114536, "grad_norm": 1.088000625757929, "learning_rate": 9.995440749705496e-06, "loss": 0.2182, "step": 1481 }, { "epoch": 0.043234727813758096, "grad_norm": 0.9909439668431169, "learning_rate": 9.99542055679906e-06, "loss": 0.2199, "step": 1482 }, { "epoch": 0.043263901044401656, "grad_norm": 0.9167130629482909, "learning_rate": 9.9954003192947e-06, "loss": 0.2044, "step": 1483 }, { "epoch": 0.04329307427504522, "grad_norm": 0.8730508094905478, "learning_rate": 9.9953800371926e-06, "loss": 0.2026, "step": 1484 }, { "epoch": 0.04332224750568878, "grad_norm": 0.973396668159621, "learning_rate": 9.995359710492937e-06, "loss": 0.2085, "step": 1485 }, { "epoch": 0.043351420736332344, "grad_norm": 0.9718573235784893, "learning_rate": 9.995339339195898e-06, "loss": 0.2087, "step": 1486 }, { "epoch": 0.043380593966975904, "grad_norm": 0.9224864497323803, "learning_rate": 9.995318923301659e-06, "loss": 0.1913, "step": 1487 }, { "epoch": 0.043409767197619464, "grad_norm": 1.0016274912019036, "learning_rate": 9.995298462810407e-06, "loss": 0.2164, "step": 1488 }, { "epoch": 0.043438940428263025, "grad_norm": 1.1775308618689844, "learning_rate": 9.995277957722323e-06, "loss": 0.2464, "step": 1489 }, { "epoch": 0.043468113658906585, "grad_norm": 1.010011497366543, "learning_rate": 9.995257408037588e-06, "loss": 0.1977, "step": 1490 }, { "epoch": 0.04349728688955015, "grad_norm": 0.8892193731855427, "learning_rate": 9.995236813756388e-06, "loss": 0.1913, "step": 1491 }, { "epoch": 0.04352646012019371, "grad_norm": 1.0432195456684967, "learning_rate": 9.995216174878908e-06, "loss": 0.2269, "step": 1492 }, { "epoch": 0.04355563335083727, "grad_norm": 1.1672149910620269, "learning_rate": 9.99519549140533e-06, "loss": 0.2365, "step": 1493 }, { "epoch": 0.04358480658148083, "grad_norm": 0.9795863159696633, "learning_rate": 9.995174763335837e-06, "loss": 0.2178, "step": 1494 }, { "epoch": 0.04361397981212439, "grad_norm": 1.2355885439701726, "learning_rate": 9.995153990670618e-06, "loss": 0.2278, "step": 1495 }, { "epoch": 0.04364315304276795, "grad_norm": 1.0361904879259465, "learning_rate": 9.995133173409856e-06, "loss": 0.2164, "step": 1496 }, { "epoch": 0.04367232627341152, "grad_norm": 0.9175487337753576, "learning_rate": 9.995112311553736e-06, "loss": 0.2017, "step": 1497 }, { "epoch": 0.04370149950405508, "grad_norm": 0.9087516508905001, "learning_rate": 9.995091405102449e-06, "loss": 0.2148, "step": 1498 }, { "epoch": 0.04373067273469864, "grad_norm": 1.0065575322472309, "learning_rate": 9.995070454056175e-06, "loss": 0.2172, "step": 1499 }, { "epoch": 0.0437598459653422, "grad_norm": 1.4500252208703486, "learning_rate": 9.995049458415108e-06, "loss": 0.2393, "step": 1500 }, { "epoch": 0.04378901919598576, "grad_norm": 1.3234772355923852, "learning_rate": 9.995028418179429e-06, "loss": 0.2184, "step": 1501 }, { "epoch": 0.04381819242662933, "grad_norm": 0.9495426174390178, "learning_rate": 9.99500733334933e-06, "loss": 0.187, "step": 1502 }, { "epoch": 0.04384736565727289, "grad_norm": 1.2749208543526, "learning_rate": 9.994986203924996e-06, "loss": 0.2038, "step": 1503 }, { "epoch": 0.04387653888791645, "grad_norm": 1.2059401236051581, "learning_rate": 9.99496502990662e-06, "loss": 0.2012, "step": 1504 }, { "epoch": 0.04390571211856001, "grad_norm": 1.3535856574377956, "learning_rate": 9.994943811294387e-06, "loss": 0.2214, "step": 1505 }, { "epoch": 0.04393488534920357, "grad_norm": 1.01008938424475, "learning_rate": 9.994922548088488e-06, "loss": 0.2145, "step": 1506 }, { "epoch": 0.04396405857984713, "grad_norm": 1.106945856950492, "learning_rate": 9.994901240289114e-06, "loss": 0.2117, "step": 1507 }, { "epoch": 0.043993231810490696, "grad_norm": 1.0718073273861746, "learning_rate": 9.994879887896453e-06, "loss": 0.2093, "step": 1508 }, { "epoch": 0.044022405041134256, "grad_norm": 0.8134033627259928, "learning_rate": 9.994858490910699e-06, "loss": 0.1983, "step": 1509 }, { "epoch": 0.044051578271777816, "grad_norm": 1.1558254681157425, "learning_rate": 9.994837049332038e-06, "loss": 0.2175, "step": 1510 }, { "epoch": 0.04408075150242138, "grad_norm": 1.0413670712359624, "learning_rate": 9.994815563160665e-06, "loss": 0.2267, "step": 1511 }, { "epoch": 0.04410992473306494, "grad_norm": 0.8713546158791305, "learning_rate": 9.994794032396772e-06, "loss": 0.2053, "step": 1512 }, { "epoch": 0.044139097963708504, "grad_norm": 0.8762434296611706, "learning_rate": 9.99477245704055e-06, "loss": 0.2149, "step": 1513 }, { "epoch": 0.044168271194352064, "grad_norm": 1.140460284007135, "learning_rate": 9.99475083709219e-06, "loss": 0.2151, "step": 1514 }, { "epoch": 0.044197444424995624, "grad_norm": 0.9512975940821824, "learning_rate": 9.994729172551889e-06, "loss": 0.2065, "step": 1515 }, { "epoch": 0.044226617655639185, "grad_norm": 1.045748755414856, "learning_rate": 9.994707463419839e-06, "loss": 0.2101, "step": 1516 }, { "epoch": 0.044255790886282745, "grad_norm": 1.155441077931251, "learning_rate": 9.99468570969623e-06, "loss": 0.2313, "step": 1517 }, { "epoch": 0.044284964116926305, "grad_norm": 0.8182222302235104, "learning_rate": 9.99466391138126e-06, "loss": 0.1956, "step": 1518 }, { "epoch": 0.04431413734756987, "grad_norm": 0.985013760566948, "learning_rate": 9.994642068475127e-06, "loss": 0.1939, "step": 1519 }, { "epoch": 0.04434331057821343, "grad_norm": 1.1592479237406494, "learning_rate": 9.994620180978019e-06, "loss": 0.2333, "step": 1520 }, { "epoch": 0.04437248380885699, "grad_norm": 0.9415477013451679, "learning_rate": 9.994598248890132e-06, "loss": 0.2004, "step": 1521 }, { "epoch": 0.04440165703950055, "grad_norm": 0.887643002790456, "learning_rate": 9.994576272211666e-06, "loss": 0.2114, "step": 1522 }, { "epoch": 0.04443083027014411, "grad_norm": 1.0270416990258484, "learning_rate": 9.994554250942818e-06, "loss": 0.2181, "step": 1523 }, { "epoch": 0.04446000350078768, "grad_norm": 0.9805689129660959, "learning_rate": 9.994532185083778e-06, "loss": 0.2171, "step": 1524 }, { "epoch": 0.04448917673143124, "grad_norm": 1.0786672431143527, "learning_rate": 9.99451007463475e-06, "loss": 0.2019, "step": 1525 }, { "epoch": 0.0445183499620748, "grad_norm": 0.9372296678013002, "learning_rate": 9.994487919595925e-06, "loss": 0.2117, "step": 1526 }, { "epoch": 0.04454752319271836, "grad_norm": 1.0254043986779158, "learning_rate": 9.994465719967507e-06, "loss": 0.2313, "step": 1527 }, { "epoch": 0.04457669642336192, "grad_norm": 1.1989115949156917, "learning_rate": 9.994443475749692e-06, "loss": 0.1993, "step": 1528 }, { "epoch": 0.04460586965400549, "grad_norm": 0.9809271376858866, "learning_rate": 9.994421186942675e-06, "loss": 0.2251, "step": 1529 }, { "epoch": 0.04463504288464905, "grad_norm": 0.8140246694342183, "learning_rate": 9.99439885354666e-06, "loss": 0.1776, "step": 1530 }, { "epoch": 0.04466421611529261, "grad_norm": 0.9703925915399549, "learning_rate": 9.994376475561842e-06, "loss": 0.2158, "step": 1531 }, { "epoch": 0.04469338934593617, "grad_norm": 1.104517324078042, "learning_rate": 9.994354052988424e-06, "loss": 0.2174, "step": 1532 }, { "epoch": 0.04472256257657973, "grad_norm": 0.8316422371741246, "learning_rate": 9.994331585826606e-06, "loss": 0.2145, "step": 1533 }, { "epoch": 0.04475173580722329, "grad_norm": 0.9830344315374484, "learning_rate": 9.994309074076589e-06, "loss": 0.2276, "step": 1534 }, { "epoch": 0.044780909037866856, "grad_norm": 1.040520479452583, "learning_rate": 9.994286517738572e-06, "loss": 0.2396, "step": 1535 }, { "epoch": 0.044810082268510416, "grad_norm": 0.8665381336119805, "learning_rate": 9.994263916812757e-06, "loss": 0.2034, "step": 1536 }, { "epoch": 0.04483925549915398, "grad_norm": 0.8774297599044266, "learning_rate": 9.994241271299344e-06, "loss": 0.2083, "step": 1537 }, { "epoch": 0.04486842872979754, "grad_norm": 0.8705735421286882, "learning_rate": 9.994218581198539e-06, "loss": 0.2052, "step": 1538 }, { "epoch": 0.0448976019604411, "grad_norm": 1.0544589605513472, "learning_rate": 9.994195846510543e-06, "loss": 0.2401, "step": 1539 }, { "epoch": 0.044926775191084664, "grad_norm": 1.0616515252387178, "learning_rate": 9.994173067235557e-06, "loss": 0.2041, "step": 1540 }, { "epoch": 0.044955948421728224, "grad_norm": 0.7933865528135348, "learning_rate": 9.994150243373789e-06, "loss": 0.2138, "step": 1541 }, { "epoch": 0.044985121652371785, "grad_norm": 1.0397077363518006, "learning_rate": 9.994127374925438e-06, "loss": 0.2298, "step": 1542 }, { "epoch": 0.045014294883015345, "grad_norm": 0.887150825337438, "learning_rate": 9.99410446189071e-06, "loss": 0.2078, "step": 1543 }, { "epoch": 0.045043468113658905, "grad_norm": 0.7227271216130112, "learning_rate": 9.99408150426981e-06, "loss": 0.1886, "step": 1544 }, { "epoch": 0.045072641344302465, "grad_norm": 0.9403018061880659, "learning_rate": 9.994058502062942e-06, "loss": 0.2202, "step": 1545 }, { "epoch": 0.04510181457494603, "grad_norm": 1.16465291239193, "learning_rate": 9.994035455270313e-06, "loss": 0.2232, "step": 1546 }, { "epoch": 0.04513098780558959, "grad_norm": 1.0377796312985823, "learning_rate": 9.994012363892124e-06, "loss": 0.2132, "step": 1547 }, { "epoch": 0.04516016103623315, "grad_norm": 0.9365964797790705, "learning_rate": 9.993989227928588e-06, "loss": 0.1932, "step": 1548 }, { "epoch": 0.04518933426687671, "grad_norm": 0.9807639657732039, "learning_rate": 9.993966047379908e-06, "loss": 0.1995, "step": 1549 }, { "epoch": 0.04521850749752027, "grad_norm": 1.1793814247486445, "learning_rate": 9.993942822246292e-06, "loss": 0.2334, "step": 1550 }, { "epoch": 0.04524768072816384, "grad_norm": 0.9107580647389542, "learning_rate": 9.993919552527945e-06, "loss": 0.2024, "step": 1551 }, { "epoch": 0.0452768539588074, "grad_norm": 0.9549283293210068, "learning_rate": 9.993896238225079e-06, "loss": 0.1968, "step": 1552 }, { "epoch": 0.04530602718945096, "grad_norm": 1.1968822968012605, "learning_rate": 9.993872879337896e-06, "loss": 0.2188, "step": 1553 }, { "epoch": 0.04533520042009452, "grad_norm": 1.0057792745103897, "learning_rate": 9.993849475866611e-06, "loss": 0.2346, "step": 1554 }, { "epoch": 0.04536437365073808, "grad_norm": 1.1397853199149683, "learning_rate": 9.993826027811427e-06, "loss": 0.2131, "step": 1555 }, { "epoch": 0.04539354688138164, "grad_norm": 1.3361540815581716, "learning_rate": 9.993802535172558e-06, "loss": 0.2385, "step": 1556 }, { "epoch": 0.04542272011202521, "grad_norm": 1.2649994201282369, "learning_rate": 9.993778997950212e-06, "loss": 0.2326, "step": 1557 }, { "epoch": 0.04545189334266877, "grad_norm": 1.1605134543612885, "learning_rate": 9.9937554161446e-06, "loss": 0.2347, "step": 1558 }, { "epoch": 0.04548106657331233, "grad_norm": 1.041034454393136, "learning_rate": 9.993731789755931e-06, "loss": 0.2091, "step": 1559 }, { "epoch": 0.04551023980395589, "grad_norm": 1.0357172359899414, "learning_rate": 9.993708118784417e-06, "loss": 0.218, "step": 1560 }, { "epoch": 0.04553941303459945, "grad_norm": 1.065139304455861, "learning_rate": 9.993684403230268e-06, "loss": 0.208, "step": 1561 }, { "epoch": 0.045568586265243016, "grad_norm": 1.2222053867043257, "learning_rate": 9.993660643093698e-06, "loss": 0.2312, "step": 1562 }, { "epoch": 0.045597759495886576, "grad_norm": 0.7402429115781198, "learning_rate": 9.993636838374917e-06, "loss": 0.1861, "step": 1563 }, { "epoch": 0.04562693272653014, "grad_norm": 0.9226390491358516, "learning_rate": 9.99361298907414e-06, "loss": 0.2405, "step": 1564 }, { "epoch": 0.0456561059571737, "grad_norm": 1.1469985183875429, "learning_rate": 9.993589095191575e-06, "loss": 0.2036, "step": 1565 }, { "epoch": 0.04568527918781726, "grad_norm": 0.9693897306501376, "learning_rate": 9.993565156727443e-06, "loss": 0.1917, "step": 1566 }, { "epoch": 0.04571445241846082, "grad_norm": 1.1274672267838741, "learning_rate": 9.99354117368195e-06, "loss": 0.2022, "step": 1567 }, { "epoch": 0.045743625649104384, "grad_norm": 1.240372697669746, "learning_rate": 9.993517146055314e-06, "loss": 0.2095, "step": 1568 }, { "epoch": 0.045772798879747945, "grad_norm": 0.9120602913148715, "learning_rate": 9.99349307384775e-06, "loss": 0.1988, "step": 1569 }, { "epoch": 0.045801972110391505, "grad_norm": 1.1822521899955165, "learning_rate": 9.993468957059472e-06, "loss": 0.2203, "step": 1570 }, { "epoch": 0.045831145341035065, "grad_norm": 0.9593066799616233, "learning_rate": 9.993444795690694e-06, "loss": 0.1952, "step": 1571 }, { "epoch": 0.045860318571678625, "grad_norm": 1.0210061544811042, "learning_rate": 9.993420589741634e-06, "loss": 0.2136, "step": 1572 }, { "epoch": 0.04588949180232219, "grad_norm": 1.1139539809465127, "learning_rate": 9.993396339212505e-06, "loss": 0.2269, "step": 1573 }, { "epoch": 0.04591866503296575, "grad_norm": 1.104926442800261, "learning_rate": 9.993372044103528e-06, "loss": 0.2181, "step": 1574 }, { "epoch": 0.04594783826360931, "grad_norm": 1.0134102391549935, "learning_rate": 9.993347704414915e-06, "loss": 0.2036, "step": 1575 }, { "epoch": 0.04597701149425287, "grad_norm": 0.9529470474929717, "learning_rate": 9.993323320146888e-06, "loss": 0.1964, "step": 1576 }, { "epoch": 0.04600618472489643, "grad_norm": 0.9056768183692107, "learning_rate": 9.99329889129966e-06, "loss": 0.213, "step": 1577 }, { "epoch": 0.04603535795553999, "grad_norm": 1.0069441295048178, "learning_rate": 9.993274417873454e-06, "loss": 0.2074, "step": 1578 }, { "epoch": 0.04606453118618356, "grad_norm": 1.1463857655151746, "learning_rate": 9.993249899868484e-06, "loss": 0.2324, "step": 1579 }, { "epoch": 0.04609370441682712, "grad_norm": 1.0098795470169168, "learning_rate": 9.993225337284973e-06, "loss": 0.2036, "step": 1580 }, { "epoch": 0.04612287764747068, "grad_norm": 0.9880110224986607, "learning_rate": 9.993200730123137e-06, "loss": 0.2347, "step": 1581 }, { "epoch": 0.04615205087811424, "grad_norm": 2.314498525771706, "learning_rate": 9.993176078383198e-06, "loss": 0.2144, "step": 1582 }, { "epoch": 0.0461812241087578, "grad_norm": 0.9938216294739247, "learning_rate": 9.993151382065372e-06, "loss": 0.2137, "step": 1583 }, { "epoch": 0.04621039733940137, "grad_norm": 0.8233384269887488, "learning_rate": 9.993126641169884e-06, "loss": 0.2071, "step": 1584 }, { "epoch": 0.04623957057004493, "grad_norm": 0.9796380743525783, "learning_rate": 9.993101855696955e-06, "loss": 0.2138, "step": 1585 }, { "epoch": 0.04626874380068849, "grad_norm": 0.9481253593913958, "learning_rate": 9.993077025646802e-06, "loss": 0.2028, "step": 1586 }, { "epoch": 0.04629791703133205, "grad_norm": 0.8992629592784864, "learning_rate": 9.99305215101965e-06, "loss": 0.22, "step": 1587 }, { "epoch": 0.04632709026197561, "grad_norm": 0.9038992823317034, "learning_rate": 9.993027231815722e-06, "loss": 0.2264, "step": 1588 }, { "epoch": 0.04635626349261917, "grad_norm": 0.9287985595121647, "learning_rate": 9.993002268035237e-06, "loss": 0.2084, "step": 1589 }, { "epoch": 0.04638543672326274, "grad_norm": 0.9586303799953108, "learning_rate": 9.99297725967842e-06, "loss": 0.2085, "step": 1590 }, { "epoch": 0.0464146099539063, "grad_norm": 1.0202403101467916, "learning_rate": 9.992952206745494e-06, "loss": 0.2071, "step": 1591 }, { "epoch": 0.04644378318454986, "grad_norm": 1.0218848534619405, "learning_rate": 9.992927109236684e-06, "loss": 0.2122, "step": 1592 }, { "epoch": 0.04647295641519342, "grad_norm": 1.029016927298267, "learning_rate": 9.99290196715221e-06, "loss": 0.2096, "step": 1593 }, { "epoch": 0.04650212964583698, "grad_norm": 1.049779304993706, "learning_rate": 9.9928767804923e-06, "loss": 0.1966, "step": 1594 }, { "epoch": 0.046531302876480544, "grad_norm": 0.9050569421058766, "learning_rate": 9.99285154925718e-06, "loss": 0.2015, "step": 1595 }, { "epoch": 0.046560476107124105, "grad_norm": 0.9535046111228952, "learning_rate": 9.992826273447072e-06, "loss": 0.2117, "step": 1596 }, { "epoch": 0.046589649337767665, "grad_norm": 0.9989884600454102, "learning_rate": 9.9928009530622e-06, "loss": 0.2196, "step": 1597 }, { "epoch": 0.046618822568411225, "grad_norm": 1.0894001553216668, "learning_rate": 9.992775588102797e-06, "loss": 0.1903, "step": 1598 }, { "epoch": 0.046647995799054785, "grad_norm": 0.9933183508765261, "learning_rate": 9.992750178569084e-06, "loss": 0.2124, "step": 1599 }, { "epoch": 0.046677169029698345, "grad_norm": 0.7758056013643674, "learning_rate": 9.992724724461289e-06, "loss": 0.2065, "step": 1600 }, { "epoch": 0.04670634226034191, "grad_norm": 0.9613749623422736, "learning_rate": 9.992699225779641e-06, "loss": 0.2316, "step": 1601 }, { "epoch": 0.04673551549098547, "grad_norm": 1.1799737646147839, "learning_rate": 9.992673682524366e-06, "loss": 0.2316, "step": 1602 }, { "epoch": 0.04676468872162903, "grad_norm": 1.2648274575330423, "learning_rate": 9.99264809469569e-06, "loss": 0.2398, "step": 1603 }, { "epoch": 0.04679386195227259, "grad_norm": 1.2029527954061672, "learning_rate": 9.992622462293845e-06, "loss": 0.2238, "step": 1604 }, { "epoch": 0.04682303518291615, "grad_norm": 0.9771608790612691, "learning_rate": 9.992596785319057e-06, "loss": 0.2002, "step": 1605 }, { "epoch": 0.04685220841355972, "grad_norm": 0.9399009127460336, "learning_rate": 9.99257106377156e-06, "loss": 0.2116, "step": 1606 }, { "epoch": 0.04688138164420328, "grad_norm": 0.940691461243414, "learning_rate": 9.992545297651578e-06, "loss": 0.2136, "step": 1607 }, { "epoch": 0.04691055487484684, "grad_norm": 1.1359818303475295, "learning_rate": 9.992519486959345e-06, "loss": 0.2322, "step": 1608 }, { "epoch": 0.0469397281054904, "grad_norm": 1.0748126340976316, "learning_rate": 9.992493631695089e-06, "loss": 0.2113, "step": 1609 }, { "epoch": 0.04696890133613396, "grad_norm": 1.0834980895099093, "learning_rate": 9.99246773185904e-06, "loss": 0.2652, "step": 1610 }, { "epoch": 0.04699807456677752, "grad_norm": 1.0644096924821413, "learning_rate": 9.992441787451432e-06, "loss": 0.2122, "step": 1611 }, { "epoch": 0.04702724779742109, "grad_norm": 1.0092083097213227, "learning_rate": 9.992415798472496e-06, "loss": 0.2179, "step": 1612 }, { "epoch": 0.04705642102806465, "grad_norm": 0.8137581387339601, "learning_rate": 9.992389764922464e-06, "loss": 0.2069, "step": 1613 }, { "epoch": 0.04708559425870821, "grad_norm": 1.3476142279147683, "learning_rate": 9.992363686801568e-06, "loss": 0.2394, "step": 1614 }, { "epoch": 0.04711476748935177, "grad_norm": 1.030814079950729, "learning_rate": 9.992337564110038e-06, "loss": 0.2268, "step": 1615 }, { "epoch": 0.04714394071999533, "grad_norm": 1.0241879382155972, "learning_rate": 9.992311396848113e-06, "loss": 0.2204, "step": 1616 }, { "epoch": 0.0471731139506389, "grad_norm": 0.9256986412470775, "learning_rate": 9.992285185016022e-06, "loss": 0.2208, "step": 1617 }, { "epoch": 0.04720228718128246, "grad_norm": 1.1143884070712264, "learning_rate": 9.992258928614002e-06, "loss": 0.2413, "step": 1618 }, { "epoch": 0.04723146041192602, "grad_norm": 0.8125416427878479, "learning_rate": 9.992232627642284e-06, "loss": 0.2072, "step": 1619 }, { "epoch": 0.04726063364256958, "grad_norm": 0.8811217609983716, "learning_rate": 9.992206282101106e-06, "loss": 0.2078, "step": 1620 }, { "epoch": 0.04728980687321314, "grad_norm": 0.932856647391488, "learning_rate": 9.992179891990703e-06, "loss": 0.2095, "step": 1621 }, { "epoch": 0.047318980103856705, "grad_norm": 0.9214838875133651, "learning_rate": 9.992153457311308e-06, "loss": 0.2038, "step": 1622 }, { "epoch": 0.047348153334500265, "grad_norm": 0.9426686113551024, "learning_rate": 9.99212697806316e-06, "loss": 0.2249, "step": 1623 }, { "epoch": 0.047377326565143825, "grad_norm": 0.9968463502456327, "learning_rate": 9.992100454246494e-06, "loss": 0.2193, "step": 1624 }, { "epoch": 0.047406499795787385, "grad_norm": 0.9871110081566117, "learning_rate": 9.992073885861546e-06, "loss": 0.2156, "step": 1625 }, { "epoch": 0.047435673026430945, "grad_norm": 1.0524692523570862, "learning_rate": 9.992047272908554e-06, "loss": 0.2117, "step": 1626 }, { "epoch": 0.047464846257074506, "grad_norm": 0.893359779581283, "learning_rate": 9.992020615387756e-06, "loss": 0.1994, "step": 1627 }, { "epoch": 0.04749401948771807, "grad_norm": 1.1422946182731655, "learning_rate": 9.991993913299392e-06, "loss": 0.2082, "step": 1628 }, { "epoch": 0.04752319271836163, "grad_norm": 1.0213423646132167, "learning_rate": 9.991967166643695e-06, "loss": 0.2074, "step": 1629 }, { "epoch": 0.04755236594900519, "grad_norm": 0.8688140390904798, "learning_rate": 9.991940375420907e-06, "loss": 0.2072, "step": 1630 }, { "epoch": 0.04758153917964875, "grad_norm": 1.0926007970399714, "learning_rate": 9.991913539631267e-06, "loss": 0.2259, "step": 1631 }, { "epoch": 0.047610712410292313, "grad_norm": 1.5254049005774917, "learning_rate": 9.991886659275016e-06, "loss": 0.2821, "step": 1632 }, { "epoch": 0.04763988564093588, "grad_norm": 1.0016734470443482, "learning_rate": 9.991859734352391e-06, "loss": 0.1872, "step": 1633 }, { "epoch": 0.04766905887157944, "grad_norm": 1.1477664097592586, "learning_rate": 9.991832764863635e-06, "loss": 0.2349, "step": 1634 }, { "epoch": 0.047698232102223, "grad_norm": 1.0263970694145326, "learning_rate": 9.991805750808986e-06, "loss": 0.2047, "step": 1635 }, { "epoch": 0.04772740533286656, "grad_norm": 1.15791772232876, "learning_rate": 9.99177869218869e-06, "loss": 0.2363, "step": 1636 }, { "epoch": 0.04775657856351012, "grad_norm": 1.0025654025994395, "learning_rate": 9.991751589002985e-06, "loss": 0.2121, "step": 1637 }, { "epoch": 0.04778575179415368, "grad_norm": 0.898770828722737, "learning_rate": 9.99172444125211e-06, "loss": 0.199, "step": 1638 }, { "epoch": 0.04781492502479725, "grad_norm": 0.9471821780665727, "learning_rate": 9.991697248936313e-06, "loss": 0.2112, "step": 1639 }, { "epoch": 0.04784409825544081, "grad_norm": 0.7901277486431224, "learning_rate": 9.991670012055836e-06, "loss": 0.2362, "step": 1640 }, { "epoch": 0.04787327148608437, "grad_norm": 0.9431150578435697, "learning_rate": 9.991642730610919e-06, "loss": 0.2316, "step": 1641 }, { "epoch": 0.04790244471672793, "grad_norm": 0.872533087573785, "learning_rate": 9.991615404601808e-06, "loss": 0.2141, "step": 1642 }, { "epoch": 0.04793161794737149, "grad_norm": 0.878117108498137, "learning_rate": 9.991588034028746e-06, "loss": 0.2118, "step": 1643 }, { "epoch": 0.04796079117801506, "grad_norm": 0.8220249744036633, "learning_rate": 9.991560618891978e-06, "loss": 0.1948, "step": 1644 }, { "epoch": 0.04798996440865862, "grad_norm": 0.8555776022492638, "learning_rate": 9.991533159191748e-06, "loss": 0.1847, "step": 1645 }, { "epoch": 0.04801913763930218, "grad_norm": 0.8560024473690883, "learning_rate": 9.9915056549283e-06, "loss": 0.205, "step": 1646 }, { "epoch": 0.04804831086994574, "grad_norm": 1.0626266583178325, "learning_rate": 9.991478106101884e-06, "loss": 0.2026, "step": 1647 }, { "epoch": 0.0480774841005893, "grad_norm": 1.0366821465949032, "learning_rate": 9.991450512712742e-06, "loss": 0.2175, "step": 1648 }, { "epoch": 0.04810665733123286, "grad_norm": 0.9532431541562103, "learning_rate": 9.99142287476112e-06, "loss": 0.2355, "step": 1649 }, { "epoch": 0.048135830561876425, "grad_norm": 1.1071808286644875, "learning_rate": 9.991395192247267e-06, "loss": 0.2178, "step": 1650 }, { "epoch": 0.048165003792519985, "grad_norm": 1.0919039036106617, "learning_rate": 9.991367465171428e-06, "loss": 0.2171, "step": 1651 }, { "epoch": 0.048194177023163545, "grad_norm": 1.0521939051560518, "learning_rate": 9.991339693533855e-06, "loss": 0.2329, "step": 1652 }, { "epoch": 0.048223350253807105, "grad_norm": 1.2599307604008543, "learning_rate": 9.99131187733479e-06, "loss": 0.2113, "step": 1653 }, { "epoch": 0.048252523484450666, "grad_norm": 1.0138597731674888, "learning_rate": 9.991284016574485e-06, "loss": 0.1956, "step": 1654 }, { "epoch": 0.04828169671509423, "grad_norm": 1.0015503180634682, "learning_rate": 9.991256111253188e-06, "loss": 0.1881, "step": 1655 }, { "epoch": 0.04831086994573779, "grad_norm": 1.4548200339884936, "learning_rate": 9.991228161371147e-06, "loss": 0.2136, "step": 1656 }, { "epoch": 0.04834004317638135, "grad_norm": 0.9606732047312863, "learning_rate": 9.991200166928613e-06, "loss": 0.2435, "step": 1657 }, { "epoch": 0.04836921640702491, "grad_norm": 1.1041675199523862, "learning_rate": 9.991172127925836e-06, "loss": 0.2497, "step": 1658 }, { "epoch": 0.048398389637668474, "grad_norm": 1.0056461141323014, "learning_rate": 9.991144044363066e-06, "loss": 0.2228, "step": 1659 }, { "epoch": 0.048427562868312034, "grad_norm": 0.8770300574055199, "learning_rate": 9.991115916240553e-06, "loss": 0.2213, "step": 1660 }, { "epoch": 0.0484567360989556, "grad_norm": 0.9344253606928895, "learning_rate": 9.991087743558548e-06, "loss": 0.246, "step": 1661 }, { "epoch": 0.04848590932959916, "grad_norm": 1.1217884327800407, "learning_rate": 9.991059526317304e-06, "loss": 0.217, "step": 1662 }, { "epoch": 0.04851508256024272, "grad_norm": 0.7930454799108859, "learning_rate": 9.991031264517071e-06, "loss": 0.1797, "step": 1663 }, { "epoch": 0.04854425579088628, "grad_norm": 0.8719649707868175, "learning_rate": 9.991002958158102e-06, "loss": 0.2095, "step": 1664 }, { "epoch": 0.04857342902152984, "grad_norm": 1.0968692164460105, "learning_rate": 9.990974607240651e-06, "loss": 0.2396, "step": 1665 }, { "epoch": 0.04860260225217341, "grad_norm": 1.0112037472179267, "learning_rate": 9.990946211764971e-06, "loss": 0.2356, "step": 1666 }, { "epoch": 0.04863177548281697, "grad_norm": 1.0670663463650947, "learning_rate": 9.990917771731313e-06, "loss": 0.2219, "step": 1667 }, { "epoch": 0.04866094871346053, "grad_norm": 1.0379911288760688, "learning_rate": 9.990889287139933e-06, "loss": 0.2028, "step": 1668 }, { "epoch": 0.04869012194410409, "grad_norm": 0.7639772036524248, "learning_rate": 9.990860757991085e-06, "loss": 0.1853, "step": 1669 }, { "epoch": 0.04871929517474765, "grad_norm": 0.9674274347028614, "learning_rate": 9.990832184285025e-06, "loss": 0.2193, "step": 1670 }, { "epoch": 0.04874846840539121, "grad_norm": 0.9557308291471724, "learning_rate": 9.990803566022006e-06, "loss": 0.2198, "step": 1671 }, { "epoch": 0.04877764163603478, "grad_norm": 0.8525217282539468, "learning_rate": 9.990774903202282e-06, "loss": 0.1895, "step": 1672 }, { "epoch": 0.04880681486667834, "grad_norm": 1.1871635907429425, "learning_rate": 9.990746195826113e-06, "loss": 0.2313, "step": 1673 }, { "epoch": 0.0488359880973219, "grad_norm": 0.9281359347017197, "learning_rate": 9.99071744389375e-06, "loss": 0.194, "step": 1674 }, { "epoch": 0.04886516132796546, "grad_norm": 1.1358069207452808, "learning_rate": 9.990688647405457e-06, "loss": 0.2299, "step": 1675 }, { "epoch": 0.04889433455860902, "grad_norm": 0.9558605429837128, "learning_rate": 9.990659806361487e-06, "loss": 0.1973, "step": 1676 }, { "epoch": 0.048923507789252585, "grad_norm": 0.9014631057553211, "learning_rate": 9.990630920762096e-06, "loss": 0.2143, "step": 1677 }, { "epoch": 0.048952681019896145, "grad_norm": 1.0099362039814408, "learning_rate": 9.990601990607544e-06, "loss": 0.2007, "step": 1678 }, { "epoch": 0.048981854250539705, "grad_norm": 1.0241491990455205, "learning_rate": 9.99057301589809e-06, "loss": 0.2192, "step": 1679 }, { "epoch": 0.049011027481183266, "grad_norm": 0.9551060818761057, "learning_rate": 9.99054399663399e-06, "loss": 0.2423, "step": 1680 }, { "epoch": 0.049040200711826826, "grad_norm": 1.0235115751315123, "learning_rate": 9.990514932815505e-06, "loss": 0.2223, "step": 1681 }, { "epoch": 0.049069373942470386, "grad_norm": 1.0437110676507673, "learning_rate": 9.990485824442893e-06, "loss": 0.2292, "step": 1682 }, { "epoch": 0.04909854717311395, "grad_norm": 0.8227736075106783, "learning_rate": 9.990456671516418e-06, "loss": 0.2299, "step": 1683 }, { "epoch": 0.04912772040375751, "grad_norm": 1.1425436215855296, "learning_rate": 9.990427474036333e-06, "loss": 0.2075, "step": 1684 }, { "epoch": 0.04915689363440107, "grad_norm": 1.1496941580824722, "learning_rate": 9.990398232002907e-06, "loss": 0.2763, "step": 1685 }, { "epoch": 0.049186066865044634, "grad_norm": 0.8975550080507411, "learning_rate": 9.990368945416392e-06, "loss": 0.2092, "step": 1686 }, { "epoch": 0.049215240095688194, "grad_norm": 0.8923290018228252, "learning_rate": 9.990339614277058e-06, "loss": 0.2019, "step": 1687 }, { "epoch": 0.04924441332633176, "grad_norm": 1.0635083174019502, "learning_rate": 9.990310238585162e-06, "loss": 0.2094, "step": 1688 }, { "epoch": 0.04927358655697532, "grad_norm": 1.021382709157859, "learning_rate": 9.990280818340968e-06, "loss": 0.2252, "step": 1689 }, { "epoch": 0.04930275978761888, "grad_norm": 0.9332255670808633, "learning_rate": 9.990251353544738e-06, "loss": 0.1944, "step": 1690 }, { "epoch": 0.04933193301826244, "grad_norm": 1.0526499907531872, "learning_rate": 9.990221844196734e-06, "loss": 0.1899, "step": 1691 }, { "epoch": 0.049361106248906, "grad_norm": 0.9494404648611682, "learning_rate": 9.990192290297223e-06, "loss": 0.2116, "step": 1692 }, { "epoch": 0.04939027947954956, "grad_norm": 1.3654028412022048, "learning_rate": 9.990162691846466e-06, "loss": 0.2082, "step": 1693 }, { "epoch": 0.04941945271019313, "grad_norm": 1.1887938946549368, "learning_rate": 9.990133048844726e-06, "loss": 0.1969, "step": 1694 }, { "epoch": 0.04944862594083669, "grad_norm": 1.1183445820649272, "learning_rate": 9.99010336129227e-06, "loss": 0.2169, "step": 1695 }, { "epoch": 0.04947779917148025, "grad_norm": 1.1069014753156676, "learning_rate": 9.990073629189364e-06, "loss": 0.1969, "step": 1696 }, { "epoch": 0.04950697240212381, "grad_norm": 0.929966883144153, "learning_rate": 9.99004385253627e-06, "loss": 0.1927, "step": 1697 }, { "epoch": 0.04953614563276737, "grad_norm": 0.7625456696110425, "learning_rate": 9.990014031333256e-06, "loss": 0.1831, "step": 1698 }, { "epoch": 0.04956531886341094, "grad_norm": 0.8287507863528123, "learning_rate": 9.989984165580588e-06, "loss": 0.2018, "step": 1699 }, { "epoch": 0.0495944920940545, "grad_norm": 1.0421683718229273, "learning_rate": 9.989954255278534e-06, "loss": 0.1863, "step": 1700 }, { "epoch": 0.04962366532469806, "grad_norm": 1.030293521807953, "learning_rate": 9.989924300427356e-06, "loss": 0.1967, "step": 1701 }, { "epoch": 0.04965283855534162, "grad_norm": 1.0855062412490677, "learning_rate": 9.989894301027328e-06, "loss": 0.2197, "step": 1702 }, { "epoch": 0.04968201178598518, "grad_norm": 1.039058519086821, "learning_rate": 9.989864257078715e-06, "loss": 0.2157, "step": 1703 }, { "epoch": 0.04971118501662874, "grad_norm": 0.9145943238871364, "learning_rate": 9.989834168581784e-06, "loss": 0.1884, "step": 1704 }, { "epoch": 0.049740358247272305, "grad_norm": 0.9387708509635466, "learning_rate": 9.989804035536805e-06, "loss": 0.2036, "step": 1705 }, { "epoch": 0.049769531477915865, "grad_norm": 0.9956052149277963, "learning_rate": 9.989773857944048e-06, "loss": 0.2192, "step": 1706 }, { "epoch": 0.049798704708559426, "grad_norm": 1.0293819252919336, "learning_rate": 9.989743635803779e-06, "loss": 0.2334, "step": 1707 }, { "epoch": 0.049827877939202986, "grad_norm": 0.8222808592083097, "learning_rate": 9.989713369116271e-06, "loss": 0.2021, "step": 1708 }, { "epoch": 0.049857051169846546, "grad_norm": 0.9689052093946374, "learning_rate": 9.989683057881794e-06, "loss": 0.236, "step": 1709 }, { "epoch": 0.04988622440049011, "grad_norm": 0.8636164962595155, "learning_rate": 9.989652702100616e-06, "loss": 0.2122, "step": 1710 }, { "epoch": 0.04991539763113367, "grad_norm": 0.9883710485548339, "learning_rate": 9.989622301773011e-06, "loss": 0.2155, "step": 1711 }, { "epoch": 0.049944570861777234, "grad_norm": 0.8859232847320321, "learning_rate": 9.989591856899248e-06, "loss": 0.2236, "step": 1712 }, { "epoch": 0.049973744092420794, "grad_norm": 0.9338670941696584, "learning_rate": 9.989561367479603e-06, "loss": 0.2154, "step": 1713 }, { "epoch": 0.050002917323064354, "grad_norm": 1.0418470685744265, "learning_rate": 9.989530833514342e-06, "loss": 0.2294, "step": 1714 }, { "epoch": 0.05003209055370792, "grad_norm": 0.9223354819604049, "learning_rate": 9.989500255003743e-06, "loss": 0.2107, "step": 1715 }, { "epoch": 0.05006126378435148, "grad_norm": 0.9554485296783357, "learning_rate": 9.989469631948075e-06, "loss": 0.2039, "step": 1716 }, { "epoch": 0.05009043701499504, "grad_norm": 0.9688904025197402, "learning_rate": 9.989438964347614e-06, "loss": 0.2272, "step": 1717 }, { "epoch": 0.0501196102456386, "grad_norm": 1.0793616107432007, "learning_rate": 9.989408252202632e-06, "loss": 0.1994, "step": 1718 }, { "epoch": 0.05014878347628216, "grad_norm": 1.0896751899895707, "learning_rate": 9.989377495513407e-06, "loss": 0.2178, "step": 1719 }, { "epoch": 0.05017795670692572, "grad_norm": 1.0354239852979008, "learning_rate": 9.989346694280208e-06, "loss": 0.2494, "step": 1720 }, { "epoch": 0.05020712993756929, "grad_norm": 1.08500778617434, "learning_rate": 9.989315848503314e-06, "loss": 0.194, "step": 1721 }, { "epoch": 0.05023630316821285, "grad_norm": 0.851551946042162, "learning_rate": 9.989284958182998e-06, "loss": 0.2071, "step": 1722 }, { "epoch": 0.05026547639885641, "grad_norm": 0.8830058696664645, "learning_rate": 9.989254023319539e-06, "loss": 0.2268, "step": 1723 }, { "epoch": 0.05029464962949997, "grad_norm": 1.062087466055277, "learning_rate": 9.98922304391321e-06, "loss": 0.2171, "step": 1724 }, { "epoch": 0.05032382286014353, "grad_norm": 1.0310098785935518, "learning_rate": 9.98919201996429e-06, "loss": 0.2071, "step": 1725 }, { "epoch": 0.0503529960907871, "grad_norm": 0.9616539437889566, "learning_rate": 9.989160951473051e-06, "loss": 0.2104, "step": 1726 }, { "epoch": 0.05038216932143066, "grad_norm": 0.9808099587682962, "learning_rate": 9.989129838439778e-06, "loss": 0.1964, "step": 1727 }, { "epoch": 0.05041134255207422, "grad_norm": 0.9796749181222015, "learning_rate": 9.989098680864741e-06, "loss": 0.1986, "step": 1728 }, { "epoch": 0.05044051578271778, "grad_norm": 1.1296507753056482, "learning_rate": 9.989067478748225e-06, "loss": 0.2021, "step": 1729 }, { "epoch": 0.05046968901336134, "grad_norm": 1.0043817801041206, "learning_rate": 9.989036232090506e-06, "loss": 0.2308, "step": 1730 }, { "epoch": 0.0504988622440049, "grad_norm": 0.9883080734561521, "learning_rate": 9.98900494089186e-06, "loss": 0.2, "step": 1731 }, { "epoch": 0.050528035474648465, "grad_norm": 1.1953902816615842, "learning_rate": 9.98897360515257e-06, "loss": 0.214, "step": 1732 }, { "epoch": 0.050557208705292025, "grad_norm": 1.0499755971419535, "learning_rate": 9.988942224872916e-06, "loss": 0.2194, "step": 1733 }, { "epoch": 0.050586381935935586, "grad_norm": 1.3567395572332095, "learning_rate": 9.988910800053174e-06, "loss": 0.2297, "step": 1734 }, { "epoch": 0.050615555166579146, "grad_norm": 1.058652552738655, "learning_rate": 9.988879330693629e-06, "loss": 0.2193, "step": 1735 }, { "epoch": 0.050644728397222706, "grad_norm": 1.1152018443812863, "learning_rate": 9.98884781679456e-06, "loss": 0.2138, "step": 1736 }, { "epoch": 0.05067390162786627, "grad_norm": 1.1431676401102813, "learning_rate": 9.988816258356249e-06, "loss": 0.2326, "step": 1737 }, { "epoch": 0.05070307485850983, "grad_norm": 1.0671413599887805, "learning_rate": 9.988784655378976e-06, "loss": 0.2211, "step": 1738 }, { "epoch": 0.050732248089153394, "grad_norm": 0.9707393920933772, "learning_rate": 9.988753007863025e-06, "loss": 0.1939, "step": 1739 }, { "epoch": 0.050761421319796954, "grad_norm": 0.8133631276971836, "learning_rate": 9.98872131580868e-06, "loss": 0.2146, "step": 1740 }, { "epoch": 0.050790594550440514, "grad_norm": 0.8605833063521691, "learning_rate": 9.98868957921622e-06, "loss": 0.2003, "step": 1741 }, { "epoch": 0.050819767781084074, "grad_norm": 0.7330354260552346, "learning_rate": 9.98865779808593e-06, "loss": 0.1961, "step": 1742 }, { "epoch": 0.05084894101172764, "grad_norm": 0.8580404109212706, "learning_rate": 9.988625972418096e-06, "loss": 0.1902, "step": 1743 }, { "epoch": 0.0508781142423712, "grad_norm": 1.1967846988616961, "learning_rate": 9.988594102212999e-06, "loss": 0.1938, "step": 1744 }, { "epoch": 0.05090728747301476, "grad_norm": 1.0104081967587848, "learning_rate": 9.988562187470925e-06, "loss": 0.2121, "step": 1745 }, { "epoch": 0.05093646070365832, "grad_norm": 1.1529529643483754, "learning_rate": 9.988530228192158e-06, "loss": 0.2084, "step": 1746 }, { "epoch": 0.05096563393430188, "grad_norm": 0.9197788799969081, "learning_rate": 9.988498224376985e-06, "loss": 0.2006, "step": 1747 }, { "epoch": 0.05099480716494545, "grad_norm": 1.0174692305896282, "learning_rate": 9.988466176025689e-06, "loss": 0.1982, "step": 1748 }, { "epoch": 0.05102398039558901, "grad_norm": 0.9349769771620468, "learning_rate": 9.988434083138561e-06, "loss": 0.2276, "step": 1749 }, { "epoch": 0.05105315362623257, "grad_norm": 1.0934151233929899, "learning_rate": 9.988401945715882e-06, "loss": 0.2126, "step": 1750 }, { "epoch": 0.05108232685687613, "grad_norm": 1.36762340528711, "learning_rate": 9.98836976375794e-06, "loss": 0.2178, "step": 1751 }, { "epoch": 0.05111150008751969, "grad_norm": 1.0101928167570686, "learning_rate": 9.988337537265026e-06, "loss": 0.2071, "step": 1752 }, { "epoch": 0.05114067331816325, "grad_norm": 1.1283120636047486, "learning_rate": 9.988305266237425e-06, "loss": 0.2166, "step": 1753 }, { "epoch": 0.05116984654880682, "grad_norm": 0.9499565542893285, "learning_rate": 9.988272950675423e-06, "loss": 0.2103, "step": 1754 }, { "epoch": 0.05119901977945038, "grad_norm": 1.0255609302975421, "learning_rate": 9.988240590579314e-06, "loss": 0.2094, "step": 1755 }, { "epoch": 0.05122819301009394, "grad_norm": 1.0197668974193146, "learning_rate": 9.988208185949382e-06, "loss": 0.2069, "step": 1756 }, { "epoch": 0.0512573662407375, "grad_norm": 1.018468946312996, "learning_rate": 9.988175736785919e-06, "loss": 0.2054, "step": 1757 }, { "epoch": 0.05128653947138106, "grad_norm": 0.7720587618144342, "learning_rate": 9.988143243089214e-06, "loss": 0.1945, "step": 1758 }, { "epoch": 0.051315712702024625, "grad_norm": 0.8976148147428786, "learning_rate": 9.988110704859557e-06, "loss": 0.1977, "step": 1759 }, { "epoch": 0.051344885932668186, "grad_norm": 0.8569834394311729, "learning_rate": 9.988078122097238e-06, "loss": 0.2066, "step": 1760 }, { "epoch": 0.051374059163311746, "grad_norm": 0.8870857932805177, "learning_rate": 9.988045494802548e-06, "loss": 0.2169, "step": 1761 }, { "epoch": 0.051403232393955306, "grad_norm": 0.9764283677320921, "learning_rate": 9.988012822975778e-06, "loss": 0.2199, "step": 1762 }, { "epoch": 0.051432405624598866, "grad_norm": 0.8400425016905786, "learning_rate": 9.987980106617221e-06, "loss": 0.2174, "step": 1763 }, { "epoch": 0.051461578855242426, "grad_norm": 0.8448586166941309, "learning_rate": 9.987947345727167e-06, "loss": 0.1997, "step": 1764 }, { "epoch": 0.051490752085885994, "grad_norm": 0.9292200392314232, "learning_rate": 9.987914540305911e-06, "loss": 0.2065, "step": 1765 }, { "epoch": 0.051519925316529554, "grad_norm": 0.9746200610832763, "learning_rate": 9.987881690353744e-06, "loss": 0.2139, "step": 1766 }, { "epoch": 0.051549098547173114, "grad_norm": 1.0032499501603669, "learning_rate": 9.987848795870962e-06, "loss": 0.1997, "step": 1767 }, { "epoch": 0.051578271777816674, "grad_norm": 0.961661839373899, "learning_rate": 9.987815856857856e-06, "loss": 0.1922, "step": 1768 }, { "epoch": 0.051607445008460234, "grad_norm": 0.9737648956227785, "learning_rate": 9.98778287331472e-06, "loss": 0.1909, "step": 1769 }, { "epoch": 0.0516366182391038, "grad_norm": 1.1386668813997758, "learning_rate": 9.987749845241849e-06, "loss": 0.2159, "step": 1770 }, { "epoch": 0.05166579146974736, "grad_norm": 1.1632667233396496, "learning_rate": 9.987716772639537e-06, "loss": 0.2037, "step": 1771 }, { "epoch": 0.05169496470039092, "grad_norm": 1.1498587634493553, "learning_rate": 9.987683655508082e-06, "loss": 0.2186, "step": 1772 }, { "epoch": 0.05172413793103448, "grad_norm": 1.0121456586626543, "learning_rate": 9.987650493847778e-06, "loss": 0.2564, "step": 1773 }, { "epoch": 0.05175331116167804, "grad_norm": 1.0798339386031093, "learning_rate": 9.98761728765892e-06, "loss": 0.2089, "step": 1774 }, { "epoch": 0.0517824843923216, "grad_norm": 1.06653919711271, "learning_rate": 9.987584036941806e-06, "loss": 0.2035, "step": 1775 }, { "epoch": 0.05181165762296517, "grad_norm": 1.3176017160123297, "learning_rate": 9.987550741696734e-06, "loss": 0.2037, "step": 1776 }, { "epoch": 0.05184083085360873, "grad_norm": 1.2445537573465832, "learning_rate": 9.987517401923996e-06, "loss": 0.2313, "step": 1777 }, { "epoch": 0.05187000408425229, "grad_norm": 0.9860102086983821, "learning_rate": 9.987484017623896e-06, "loss": 0.2273, "step": 1778 }, { "epoch": 0.05189917731489585, "grad_norm": 0.9877336386360043, "learning_rate": 9.987450588796729e-06, "loss": 0.2009, "step": 1779 }, { "epoch": 0.05192835054553941, "grad_norm": 0.8539206765930014, "learning_rate": 9.987417115442793e-06, "loss": 0.2181, "step": 1780 }, { "epoch": 0.05195752377618298, "grad_norm": 0.9826417333117898, "learning_rate": 9.987383597562388e-06, "loss": 0.2471, "step": 1781 }, { "epoch": 0.05198669700682654, "grad_norm": 1.0993517355804936, "learning_rate": 9.987350035155813e-06, "loss": 0.225, "step": 1782 }, { "epoch": 0.0520158702374701, "grad_norm": 0.7973722927989587, "learning_rate": 9.987316428223367e-06, "loss": 0.182, "step": 1783 }, { "epoch": 0.05204504346811366, "grad_norm": 0.9459140369300196, "learning_rate": 9.98728277676535e-06, "loss": 0.1943, "step": 1784 }, { "epoch": 0.05207421669875722, "grad_norm": 0.9004056542612794, "learning_rate": 9.987249080782065e-06, "loss": 0.2117, "step": 1785 }, { "epoch": 0.05210338992940078, "grad_norm": 0.8568729774458818, "learning_rate": 9.987215340273809e-06, "loss": 0.2147, "step": 1786 }, { "epoch": 0.052132563160044346, "grad_norm": 0.9424697505205792, "learning_rate": 9.987181555240886e-06, "loss": 0.2059, "step": 1787 }, { "epoch": 0.052161736390687906, "grad_norm": 0.9504645368660839, "learning_rate": 9.987147725683595e-06, "loss": 0.2197, "step": 1788 }, { "epoch": 0.052190909621331466, "grad_norm": 0.8920022479488012, "learning_rate": 9.987113851602241e-06, "loss": 0.2079, "step": 1789 }, { "epoch": 0.052220082851975026, "grad_norm": 0.8335692029373526, "learning_rate": 9.987079932997124e-06, "loss": 0.2085, "step": 1790 }, { "epoch": 0.052249256082618586, "grad_norm": 0.8922263766690413, "learning_rate": 9.98704596986855e-06, "loss": 0.2016, "step": 1791 }, { "epoch": 0.052278429313262154, "grad_norm": 1.1374022594460134, "learning_rate": 9.987011962216817e-06, "loss": 0.2218, "step": 1792 }, { "epoch": 0.052307602543905714, "grad_norm": 1.0103153351072, "learning_rate": 9.986977910042236e-06, "loss": 0.2133, "step": 1793 }, { "epoch": 0.052336775774549274, "grad_norm": 0.8372195469361688, "learning_rate": 9.986943813345102e-06, "loss": 0.204, "step": 1794 }, { "epoch": 0.052365949005192834, "grad_norm": 0.9685977819820961, "learning_rate": 9.986909672125726e-06, "loss": 0.2102, "step": 1795 }, { "epoch": 0.052395122235836394, "grad_norm": 0.8709555196077586, "learning_rate": 9.98687548638441e-06, "loss": 0.1972, "step": 1796 }, { "epoch": 0.052424295466479955, "grad_norm": 0.994100812796675, "learning_rate": 9.986841256121462e-06, "loss": 0.2141, "step": 1797 }, { "epoch": 0.05245346869712352, "grad_norm": 0.8335077470829617, "learning_rate": 9.986806981337186e-06, "loss": 0.2094, "step": 1798 }, { "epoch": 0.05248264192776708, "grad_norm": 1.0429819177319595, "learning_rate": 9.986772662031886e-06, "loss": 0.2103, "step": 1799 }, { "epoch": 0.05251181515841064, "grad_norm": 1.1556976067958373, "learning_rate": 9.986738298205872e-06, "loss": 0.2137, "step": 1800 }, { "epoch": 0.0525409883890542, "grad_norm": 0.863431425020644, "learning_rate": 9.986703889859447e-06, "loss": 0.1968, "step": 1801 }, { "epoch": 0.05257016161969776, "grad_norm": 1.0146475839847757, "learning_rate": 9.98666943699292e-06, "loss": 0.2019, "step": 1802 }, { "epoch": 0.05259933485034133, "grad_norm": 1.018489384083942, "learning_rate": 9.9866349396066e-06, "loss": 0.2103, "step": 1803 }, { "epoch": 0.05262850808098489, "grad_norm": 0.9442883325392579, "learning_rate": 9.986600397700792e-06, "loss": 0.2222, "step": 1804 }, { "epoch": 0.05265768131162845, "grad_norm": 0.8475062797662067, "learning_rate": 9.986565811275808e-06, "loss": 0.1946, "step": 1805 }, { "epoch": 0.05268685454227201, "grad_norm": 0.9145959988323972, "learning_rate": 9.986531180331954e-06, "loss": 0.2294, "step": 1806 }, { "epoch": 0.05271602777291557, "grad_norm": 0.9104170814839893, "learning_rate": 9.986496504869539e-06, "loss": 0.1987, "step": 1807 }, { "epoch": 0.05274520100355914, "grad_norm": 0.9828131742766328, "learning_rate": 9.986461784888874e-06, "loss": 0.2209, "step": 1808 }, { "epoch": 0.0527743742342027, "grad_norm": 1.0438944454902002, "learning_rate": 9.98642702039027e-06, "loss": 0.2289, "step": 1809 }, { "epoch": 0.05280354746484626, "grad_norm": 0.9798356376138588, "learning_rate": 9.986392211374036e-06, "loss": 0.2171, "step": 1810 }, { "epoch": 0.05283272069548982, "grad_norm": 0.9239154507567835, "learning_rate": 9.986357357840482e-06, "loss": 0.2172, "step": 1811 }, { "epoch": 0.05286189392613338, "grad_norm": 0.9252103679074746, "learning_rate": 9.986322459789919e-06, "loss": 0.1894, "step": 1812 }, { "epoch": 0.05289106715677694, "grad_norm": 1.0655456133056938, "learning_rate": 9.986287517222659e-06, "loss": 0.1848, "step": 1813 }, { "epoch": 0.052920240387420506, "grad_norm": 1.0343671450226655, "learning_rate": 9.986252530139016e-06, "loss": 0.1938, "step": 1814 }, { "epoch": 0.052949413618064066, "grad_norm": 1.1083047523984533, "learning_rate": 9.9862174985393e-06, "loss": 0.2002, "step": 1815 }, { "epoch": 0.052978586848707626, "grad_norm": 1.0550743876531532, "learning_rate": 9.986182422423825e-06, "loss": 0.1892, "step": 1816 }, { "epoch": 0.053007760079351186, "grad_norm": 1.0801337910542261, "learning_rate": 9.986147301792904e-06, "loss": 0.2391, "step": 1817 }, { "epoch": 0.05303693330999475, "grad_norm": 1.023483843297668, "learning_rate": 9.986112136646849e-06, "loss": 0.2415, "step": 1818 }, { "epoch": 0.053066106540638314, "grad_norm": 1.0004524211652035, "learning_rate": 9.986076926985975e-06, "loss": 0.2269, "step": 1819 }, { "epoch": 0.053095279771281874, "grad_norm": 1.0348783975430784, "learning_rate": 9.986041672810595e-06, "loss": 0.2063, "step": 1820 }, { "epoch": 0.053124453001925434, "grad_norm": 1.106985517179938, "learning_rate": 9.98600637412103e-06, "loss": 0.2134, "step": 1821 }, { "epoch": 0.053153626232568994, "grad_norm": 0.976414008602237, "learning_rate": 9.985971030917586e-06, "loss": 0.2133, "step": 1822 }, { "epoch": 0.053182799463212554, "grad_norm": 1.011429960505763, "learning_rate": 9.985935643200584e-06, "loss": 0.2085, "step": 1823 }, { "epoch": 0.053211972693856115, "grad_norm": 0.8787937997521821, "learning_rate": 9.985900210970339e-06, "loss": 0.1981, "step": 1824 }, { "epoch": 0.05324114592449968, "grad_norm": 0.9403096401930826, "learning_rate": 9.985864734227168e-06, "loss": 0.2079, "step": 1825 }, { "epoch": 0.05327031915514324, "grad_norm": 0.8821673620918404, "learning_rate": 9.985829212971386e-06, "loss": 0.2124, "step": 1826 }, { "epoch": 0.0532994923857868, "grad_norm": 0.8829264311916839, "learning_rate": 9.98579364720331e-06, "loss": 0.1871, "step": 1827 }, { "epoch": 0.05332866561643036, "grad_norm": 0.9244431599258233, "learning_rate": 9.98575803692326e-06, "loss": 0.187, "step": 1828 }, { "epoch": 0.05335783884707392, "grad_norm": 1.131042655378558, "learning_rate": 9.985722382131554e-06, "loss": 0.2301, "step": 1829 }, { "epoch": 0.05338701207771749, "grad_norm": 1.147087812053146, "learning_rate": 9.985686682828506e-06, "loss": 0.2075, "step": 1830 }, { "epoch": 0.05341618530836105, "grad_norm": 1.2341563445994703, "learning_rate": 9.985650939014438e-06, "loss": 0.2015, "step": 1831 }, { "epoch": 0.05344535853900461, "grad_norm": 0.9085919110664943, "learning_rate": 9.98561515068967e-06, "loss": 0.2337, "step": 1832 }, { "epoch": 0.05347453176964817, "grad_norm": 1.0343138810120596, "learning_rate": 9.98557931785452e-06, "loss": 0.2218, "step": 1833 }, { "epoch": 0.05350370500029173, "grad_norm": 0.9640557208643279, "learning_rate": 9.985543440509305e-06, "loss": 0.1899, "step": 1834 }, { "epoch": 0.05353287823093529, "grad_norm": 0.9833535393271521, "learning_rate": 9.985507518654352e-06, "loss": 0.1991, "step": 1835 }, { "epoch": 0.05356205146157886, "grad_norm": 0.9299675607443383, "learning_rate": 9.985471552289976e-06, "loss": 0.2165, "step": 1836 }, { "epoch": 0.05359122469222242, "grad_norm": 0.9548816897494452, "learning_rate": 9.985435541416499e-06, "loss": 0.1963, "step": 1837 }, { "epoch": 0.05362039792286598, "grad_norm": 1.0268622050061784, "learning_rate": 9.985399486034246e-06, "loss": 0.2194, "step": 1838 }, { "epoch": 0.05364957115350954, "grad_norm": 1.2219135992154067, "learning_rate": 9.985363386143537e-06, "loss": 0.2047, "step": 1839 }, { "epoch": 0.0536787443841531, "grad_norm": 0.8638113387730253, "learning_rate": 9.985327241744692e-06, "loss": 0.1953, "step": 1840 }, { "epoch": 0.053707917614796666, "grad_norm": 0.8408932603755563, "learning_rate": 9.985291052838035e-06, "loss": 0.2041, "step": 1841 }, { "epoch": 0.053737090845440226, "grad_norm": 1.0482465033233304, "learning_rate": 9.985254819423891e-06, "loss": 0.208, "step": 1842 }, { "epoch": 0.053766264076083786, "grad_norm": 0.9725931765566865, "learning_rate": 9.985218541502581e-06, "loss": 0.2129, "step": 1843 }, { "epoch": 0.053795437306727346, "grad_norm": 0.8500853960485782, "learning_rate": 9.98518221907443e-06, "loss": 0.1917, "step": 1844 }, { "epoch": 0.05382461053737091, "grad_norm": 0.9499032113249442, "learning_rate": 9.985145852139763e-06, "loss": 0.1876, "step": 1845 }, { "epoch": 0.05385378376801447, "grad_norm": 1.198222177096465, "learning_rate": 9.985109440698903e-06, "loss": 0.2062, "step": 1846 }, { "epoch": 0.053882956998658034, "grad_norm": 1.1844301846482599, "learning_rate": 9.985072984752177e-06, "loss": 0.2536, "step": 1847 }, { "epoch": 0.053912130229301594, "grad_norm": 0.9866838497921299, "learning_rate": 9.985036484299909e-06, "loss": 0.21, "step": 1848 }, { "epoch": 0.053941303459945154, "grad_norm": 0.8089665687431796, "learning_rate": 9.984999939342426e-06, "loss": 0.1833, "step": 1849 }, { "epoch": 0.053970476690588715, "grad_norm": 1.0039516026030095, "learning_rate": 9.984963349880053e-06, "loss": 0.2169, "step": 1850 }, { "epoch": 0.053999649921232275, "grad_norm": 0.7919274387544472, "learning_rate": 9.984926715913115e-06, "loss": 0.2059, "step": 1851 }, { "epoch": 0.05402882315187584, "grad_norm": 0.9087935115881111, "learning_rate": 9.984890037441944e-06, "loss": 0.1813, "step": 1852 }, { "epoch": 0.0540579963825194, "grad_norm": 0.947217691867987, "learning_rate": 9.984853314466865e-06, "loss": 0.2081, "step": 1853 }, { "epoch": 0.05408716961316296, "grad_norm": 0.9038864990631458, "learning_rate": 9.984816546988202e-06, "loss": 0.2045, "step": 1854 }, { "epoch": 0.05411634284380652, "grad_norm": 1.0652095810031903, "learning_rate": 9.984779735006291e-06, "loss": 0.2084, "step": 1855 }, { "epoch": 0.05414551607445008, "grad_norm": 0.8575839585957018, "learning_rate": 9.984742878521456e-06, "loss": 0.2194, "step": 1856 }, { "epoch": 0.05417468930509364, "grad_norm": 1.177202848254806, "learning_rate": 9.984705977534024e-06, "loss": 0.233, "step": 1857 }, { "epoch": 0.05420386253573721, "grad_norm": 0.9385058659291741, "learning_rate": 9.98466903204433e-06, "loss": 0.2068, "step": 1858 }, { "epoch": 0.05423303576638077, "grad_norm": 0.9191219594294061, "learning_rate": 9.984632042052697e-06, "loss": 0.1858, "step": 1859 }, { "epoch": 0.05426220899702433, "grad_norm": 1.0403206931395317, "learning_rate": 9.984595007559463e-06, "loss": 0.2072, "step": 1860 }, { "epoch": 0.05429138222766789, "grad_norm": 1.03842369920911, "learning_rate": 9.984557928564952e-06, "loss": 0.1951, "step": 1861 }, { "epoch": 0.05432055545831145, "grad_norm": 1.058675781860113, "learning_rate": 9.984520805069499e-06, "loss": 0.2315, "step": 1862 }, { "epoch": 0.05434972868895502, "grad_norm": 0.8732767947654804, "learning_rate": 9.984483637073435e-06, "loss": 0.2074, "step": 1863 }, { "epoch": 0.05437890191959858, "grad_norm": 1.0706967745492497, "learning_rate": 9.984446424577089e-06, "loss": 0.2123, "step": 1864 }, { "epoch": 0.05440807515024214, "grad_norm": 1.030682987315947, "learning_rate": 9.984409167580795e-06, "loss": 0.2286, "step": 1865 }, { "epoch": 0.0544372483808857, "grad_norm": 1.0252946488237773, "learning_rate": 9.984371866084888e-06, "loss": 0.2053, "step": 1866 }, { "epoch": 0.05446642161152926, "grad_norm": 0.9362847529121195, "learning_rate": 9.984334520089698e-06, "loss": 0.1978, "step": 1867 }, { "epoch": 0.05449559484217282, "grad_norm": 1.3522443404635422, "learning_rate": 9.984297129595559e-06, "loss": 0.2195, "step": 1868 }, { "epoch": 0.054524768072816386, "grad_norm": 1.0720617236396515, "learning_rate": 9.984259694602805e-06, "loss": 0.2041, "step": 1869 }, { "epoch": 0.054553941303459946, "grad_norm": 1.0099605376182157, "learning_rate": 9.98422221511177e-06, "loss": 0.1878, "step": 1870 }, { "epoch": 0.054583114534103506, "grad_norm": 0.9798177316702981, "learning_rate": 9.984184691122789e-06, "loss": 0.208, "step": 1871 }, { "epoch": 0.05461228776474707, "grad_norm": 1.0716844630049964, "learning_rate": 9.984147122636197e-06, "loss": 0.2119, "step": 1872 }, { "epoch": 0.05464146099539063, "grad_norm": 0.8567982002026963, "learning_rate": 9.98410950965233e-06, "loss": 0.1929, "step": 1873 }, { "epoch": 0.054670634226034194, "grad_norm": 0.9096358723226816, "learning_rate": 9.984071852171522e-06, "loss": 0.1963, "step": 1874 }, { "epoch": 0.054699807456677754, "grad_norm": 1.0511720503575723, "learning_rate": 9.984034150194111e-06, "loss": 0.2366, "step": 1875 }, { "epoch": 0.054728980687321314, "grad_norm": 0.9256344022278301, "learning_rate": 9.983996403720433e-06, "loss": 0.1963, "step": 1876 }, { "epoch": 0.054758153917964875, "grad_norm": 0.9009449825233125, "learning_rate": 9.983958612750823e-06, "loss": 0.1908, "step": 1877 }, { "epoch": 0.054787327148608435, "grad_norm": 0.8809903046256545, "learning_rate": 9.983920777285623e-06, "loss": 0.192, "step": 1878 }, { "epoch": 0.054816500379251995, "grad_norm": 0.9111034618443862, "learning_rate": 9.983882897325168e-06, "loss": 0.1809, "step": 1879 }, { "epoch": 0.05484567360989556, "grad_norm": 0.9654746262667905, "learning_rate": 9.983844972869795e-06, "loss": 0.2326, "step": 1880 }, { "epoch": 0.05487484684053912, "grad_norm": 1.2852692308548157, "learning_rate": 9.983807003919843e-06, "loss": 0.2487, "step": 1881 }, { "epoch": 0.05490402007118268, "grad_norm": 1.0916802151149374, "learning_rate": 9.983768990475653e-06, "loss": 0.2199, "step": 1882 }, { "epoch": 0.05493319330182624, "grad_norm": 1.436610939999952, "learning_rate": 9.983730932537563e-06, "loss": 0.2137, "step": 1883 }, { "epoch": 0.0549623665324698, "grad_norm": 1.0219366994511527, "learning_rate": 9.983692830105914e-06, "loss": 0.2131, "step": 1884 }, { "epoch": 0.05499153976311337, "grad_norm": 1.1247098446347263, "learning_rate": 9.983654683181044e-06, "loss": 0.1929, "step": 1885 }, { "epoch": 0.05502071299375693, "grad_norm": 1.1384705581143768, "learning_rate": 9.983616491763295e-06, "loss": 0.2042, "step": 1886 }, { "epoch": 0.05504988622440049, "grad_norm": 1.0535108093057595, "learning_rate": 9.983578255853005e-06, "loss": 0.1956, "step": 1887 }, { "epoch": 0.05507905945504405, "grad_norm": 1.0663686940223367, "learning_rate": 9.983539975450522e-06, "loss": 0.2327, "step": 1888 }, { "epoch": 0.05510823268568761, "grad_norm": 1.0476600776117433, "learning_rate": 9.983501650556182e-06, "loss": 0.2326, "step": 1889 }, { "epoch": 0.05513740591633117, "grad_norm": 0.990293055374146, "learning_rate": 9.98346328117033e-06, "loss": 0.1957, "step": 1890 }, { "epoch": 0.05516657914697474, "grad_norm": 0.9188685309373221, "learning_rate": 9.983424867293305e-06, "loss": 0.2152, "step": 1891 }, { "epoch": 0.0551957523776183, "grad_norm": 1.0898344218818747, "learning_rate": 9.983386408925454e-06, "loss": 0.1892, "step": 1892 }, { "epoch": 0.05522492560826186, "grad_norm": 1.0735068947886786, "learning_rate": 9.983347906067119e-06, "loss": 0.1996, "step": 1893 }, { "epoch": 0.05525409883890542, "grad_norm": 1.012606888062534, "learning_rate": 9.983309358718642e-06, "loss": 0.2004, "step": 1894 }, { "epoch": 0.05528327206954898, "grad_norm": 1.1319489368529263, "learning_rate": 9.98327076688037e-06, "loss": 0.2186, "step": 1895 }, { "epoch": 0.055312445300192546, "grad_norm": 1.2018814733508303, "learning_rate": 9.983232130552646e-06, "loss": 0.1927, "step": 1896 }, { "epoch": 0.055341618530836106, "grad_norm": 1.0115475359025725, "learning_rate": 9.983193449735817e-06, "loss": 0.2108, "step": 1897 }, { "epoch": 0.05537079176147967, "grad_norm": 1.0588049836102444, "learning_rate": 9.983154724430224e-06, "loss": 0.2062, "step": 1898 }, { "epoch": 0.05539996499212323, "grad_norm": 0.8387924635726959, "learning_rate": 9.983115954636215e-06, "loss": 0.1824, "step": 1899 }, { "epoch": 0.05542913822276679, "grad_norm": 1.1120552835005346, "learning_rate": 9.983077140354138e-06, "loss": 0.2061, "step": 1900 }, { "epoch": 0.055458311453410354, "grad_norm": 0.8382455374929305, "learning_rate": 9.983038281584338e-06, "loss": 0.1873, "step": 1901 }, { "epoch": 0.055487484684053914, "grad_norm": 1.1235872104607034, "learning_rate": 9.98299937832716e-06, "loss": 0.195, "step": 1902 }, { "epoch": 0.055516657914697475, "grad_norm": 0.8194562048653667, "learning_rate": 9.982960430582954e-06, "loss": 0.1975, "step": 1903 }, { "epoch": 0.055545831145341035, "grad_norm": 0.9193206952649976, "learning_rate": 9.982921438352067e-06, "loss": 0.2097, "step": 1904 }, { "epoch": 0.055575004375984595, "grad_norm": 0.8498663432478195, "learning_rate": 9.982882401634846e-06, "loss": 0.1953, "step": 1905 }, { "epoch": 0.055604177606628155, "grad_norm": 0.861597356877587, "learning_rate": 9.98284332043164e-06, "loss": 0.1863, "step": 1906 }, { "epoch": 0.05563335083727172, "grad_norm": 0.9800120746903566, "learning_rate": 9.982804194742801e-06, "loss": 0.1998, "step": 1907 }, { "epoch": 0.05566252406791528, "grad_norm": 0.7502622720531859, "learning_rate": 9.982765024568675e-06, "loss": 0.1987, "step": 1908 }, { "epoch": 0.05569169729855884, "grad_norm": 1.000226061863958, "learning_rate": 9.982725809909611e-06, "loss": 0.2327, "step": 1909 }, { "epoch": 0.0557208705292024, "grad_norm": 1.0521598794386595, "learning_rate": 9.98268655076596e-06, "loss": 0.2027, "step": 1910 }, { "epoch": 0.05575004375984596, "grad_norm": 0.8616711043293097, "learning_rate": 9.982647247138075e-06, "loss": 0.2032, "step": 1911 }, { "epoch": 0.05577921699048953, "grad_norm": 0.9758956868989882, "learning_rate": 9.982607899026302e-06, "loss": 0.1951, "step": 1912 }, { "epoch": 0.05580839022113309, "grad_norm": 0.7968087778332504, "learning_rate": 9.982568506430998e-06, "loss": 0.1932, "step": 1913 }, { "epoch": 0.05583756345177665, "grad_norm": 1.010309091247346, "learning_rate": 9.982529069352509e-06, "loss": 0.2081, "step": 1914 }, { "epoch": 0.05586673668242021, "grad_norm": 1.0235427591374133, "learning_rate": 9.982489587791192e-06, "loss": 0.1811, "step": 1915 }, { "epoch": 0.05589590991306377, "grad_norm": 0.8834228764818954, "learning_rate": 9.982450061747397e-06, "loss": 0.1919, "step": 1916 }, { "epoch": 0.05592508314370733, "grad_norm": 1.1209481285353378, "learning_rate": 9.982410491221477e-06, "loss": 0.1993, "step": 1917 }, { "epoch": 0.0559542563743509, "grad_norm": 1.0146252281617338, "learning_rate": 9.982370876213785e-06, "loss": 0.2142, "step": 1918 }, { "epoch": 0.05598342960499446, "grad_norm": 1.0691181834273153, "learning_rate": 9.982331216724676e-06, "loss": 0.1805, "step": 1919 }, { "epoch": 0.05601260283563802, "grad_norm": 1.2740121476083013, "learning_rate": 9.982291512754503e-06, "loss": 0.2071, "step": 1920 }, { "epoch": 0.05604177606628158, "grad_norm": 0.9348187850907693, "learning_rate": 9.98225176430362e-06, "loss": 0.209, "step": 1921 }, { "epoch": 0.05607094929692514, "grad_norm": 1.1066368739997507, "learning_rate": 9.982211971372384e-06, "loss": 0.1875, "step": 1922 }, { "epoch": 0.056100122527568706, "grad_norm": 0.9388244848923358, "learning_rate": 9.982172133961148e-06, "loss": 0.1922, "step": 1923 }, { "epoch": 0.056129295758212266, "grad_norm": 1.34356212438608, "learning_rate": 9.982132252070271e-06, "loss": 0.2184, "step": 1924 }, { "epoch": 0.05615846898885583, "grad_norm": 1.015099583720631, "learning_rate": 9.982092325700103e-06, "loss": 0.2274, "step": 1925 }, { "epoch": 0.05618764221949939, "grad_norm": 0.9407140103948982, "learning_rate": 9.982052354851007e-06, "loss": 0.1928, "step": 1926 }, { "epoch": 0.05621681545014295, "grad_norm": 1.2565158444534883, "learning_rate": 9.982012339523335e-06, "loss": 0.2387, "step": 1927 }, { "epoch": 0.05624598868078651, "grad_norm": 0.9817298152237669, "learning_rate": 9.981972279717446e-06, "loss": 0.2045, "step": 1928 }, { "epoch": 0.056275161911430074, "grad_norm": 1.0505208017484677, "learning_rate": 9.981932175433697e-06, "loss": 0.1913, "step": 1929 }, { "epoch": 0.056304335142073635, "grad_norm": 1.011894686119928, "learning_rate": 9.981892026672449e-06, "loss": 0.2249, "step": 1930 }, { "epoch": 0.056333508372717195, "grad_norm": 0.9690974288239764, "learning_rate": 9.981851833434058e-06, "loss": 0.1974, "step": 1931 }, { "epoch": 0.056362681603360755, "grad_norm": 1.0278293284259004, "learning_rate": 9.981811595718882e-06, "loss": 0.2146, "step": 1932 }, { "epoch": 0.056391854834004315, "grad_norm": 0.8300068746950454, "learning_rate": 9.981771313527283e-06, "loss": 0.1974, "step": 1933 }, { "epoch": 0.05642102806464788, "grad_norm": 1.04244037260496, "learning_rate": 9.981730986859617e-06, "loss": 0.208, "step": 1934 }, { "epoch": 0.05645020129529144, "grad_norm": 0.8649873612347396, "learning_rate": 9.981690615716246e-06, "loss": 0.2176, "step": 1935 }, { "epoch": 0.056479374525935, "grad_norm": 1.0223505095805647, "learning_rate": 9.98165020009753e-06, "loss": 0.1971, "step": 1936 }, { "epoch": 0.05650854775657856, "grad_norm": 0.9045741861785711, "learning_rate": 9.981609740003833e-06, "loss": 0.1864, "step": 1937 }, { "epoch": 0.05653772098722212, "grad_norm": 1.0648583047941091, "learning_rate": 9.981569235435511e-06, "loss": 0.227, "step": 1938 }, { "epoch": 0.05656689421786568, "grad_norm": 0.9632099651165861, "learning_rate": 9.98152868639293e-06, "loss": 0.2101, "step": 1939 }, { "epoch": 0.05659606744850925, "grad_norm": 1.0210597468742788, "learning_rate": 9.981488092876448e-06, "loss": 0.2127, "step": 1940 }, { "epoch": 0.05662524067915281, "grad_norm": 1.0617786672123894, "learning_rate": 9.981447454886431e-06, "loss": 0.2293, "step": 1941 }, { "epoch": 0.05665441390979637, "grad_norm": 1.1119559740593121, "learning_rate": 9.981406772423238e-06, "loss": 0.2131, "step": 1942 }, { "epoch": 0.05668358714043993, "grad_norm": 0.9274663281988038, "learning_rate": 9.981366045487237e-06, "loss": 0.2134, "step": 1943 }, { "epoch": 0.05671276037108349, "grad_norm": 0.8160314094716041, "learning_rate": 9.981325274078788e-06, "loss": 0.1877, "step": 1944 }, { "epoch": 0.05674193360172706, "grad_norm": 0.9385816920754576, "learning_rate": 9.981284458198256e-06, "loss": 0.2127, "step": 1945 }, { "epoch": 0.05677110683237062, "grad_norm": 1.0282197362729375, "learning_rate": 9.981243597846006e-06, "loss": 0.2109, "step": 1946 }, { "epoch": 0.05680028006301418, "grad_norm": 0.8829812410880271, "learning_rate": 9.981202693022402e-06, "loss": 0.1819, "step": 1947 }, { "epoch": 0.05682945329365774, "grad_norm": 0.8247331199287998, "learning_rate": 9.98116174372781e-06, "loss": 0.1824, "step": 1948 }, { "epoch": 0.0568586265243013, "grad_norm": 1.0800299153360786, "learning_rate": 9.981120749962595e-06, "loss": 0.2071, "step": 1949 }, { "epoch": 0.05688779975494486, "grad_norm": 1.1559209610798193, "learning_rate": 9.981079711727123e-06, "loss": 0.2372, "step": 1950 }, { "epoch": 0.05691697298558843, "grad_norm": 1.0102993867748309, "learning_rate": 9.98103862902176e-06, "loss": 0.218, "step": 1951 }, { "epoch": 0.05694614621623199, "grad_norm": 1.0342717337758949, "learning_rate": 9.980997501846874e-06, "loss": 0.2478, "step": 1952 }, { "epoch": 0.05697531944687555, "grad_norm": 1.2275850937294275, "learning_rate": 9.98095633020283e-06, "loss": 0.2119, "step": 1953 }, { "epoch": 0.05700449267751911, "grad_norm": 1.0425157057129126, "learning_rate": 9.980915114089997e-06, "loss": 0.2492, "step": 1954 }, { "epoch": 0.05703366590816267, "grad_norm": 0.9505753262803261, "learning_rate": 9.980873853508744e-06, "loss": 0.1798, "step": 1955 }, { "epoch": 0.057062839138806234, "grad_norm": 0.8890408882605577, "learning_rate": 9.980832548459438e-06, "loss": 0.1912, "step": 1956 }, { "epoch": 0.057092012369449795, "grad_norm": 1.1234316354262521, "learning_rate": 9.980791198942449e-06, "loss": 0.2051, "step": 1957 }, { "epoch": 0.057121185600093355, "grad_norm": 0.8539045011047467, "learning_rate": 9.980749804958142e-06, "loss": 0.196, "step": 1958 }, { "epoch": 0.057150358830736915, "grad_norm": 1.0449744737509585, "learning_rate": 9.980708366506892e-06, "loss": 0.2136, "step": 1959 }, { "epoch": 0.057179532061380475, "grad_norm": 0.9131467350350049, "learning_rate": 9.980666883589066e-06, "loss": 0.2092, "step": 1960 }, { "epoch": 0.057208705292024035, "grad_norm": 1.0036185750520146, "learning_rate": 9.980625356205036e-06, "loss": 0.2002, "step": 1961 }, { "epoch": 0.0572378785226676, "grad_norm": 0.7412115243381714, "learning_rate": 9.980583784355171e-06, "loss": 0.1757, "step": 1962 }, { "epoch": 0.05726705175331116, "grad_norm": 1.0872784702101763, "learning_rate": 9.980542168039843e-06, "loss": 0.2324, "step": 1963 }, { "epoch": 0.05729622498395472, "grad_norm": 1.002135450712531, "learning_rate": 9.980500507259423e-06, "loss": 0.2205, "step": 1964 }, { "epoch": 0.05732539821459828, "grad_norm": 0.8774046598244668, "learning_rate": 9.980458802014285e-06, "loss": 0.1949, "step": 1965 }, { "epoch": 0.05735457144524184, "grad_norm": 0.9070400059442462, "learning_rate": 9.980417052304798e-06, "loss": 0.1959, "step": 1966 }, { "epoch": 0.05738374467588541, "grad_norm": 0.9288842557182622, "learning_rate": 9.98037525813134e-06, "loss": 0.2181, "step": 1967 }, { "epoch": 0.05741291790652897, "grad_norm": 0.9941150963868122, "learning_rate": 9.980333419494275e-06, "loss": 0.1934, "step": 1968 }, { "epoch": 0.05744209113717253, "grad_norm": 0.9205959032965471, "learning_rate": 9.980291536393985e-06, "loss": 0.1963, "step": 1969 }, { "epoch": 0.05747126436781609, "grad_norm": 0.9087053739773382, "learning_rate": 9.980249608830842e-06, "loss": 0.1938, "step": 1970 }, { "epoch": 0.05750043759845965, "grad_norm": 1.0001762011083941, "learning_rate": 9.980207636805218e-06, "loss": 0.2065, "step": 1971 }, { "epoch": 0.05752961082910321, "grad_norm": 0.9188547817314544, "learning_rate": 9.98016562031749e-06, "loss": 0.1981, "step": 1972 }, { "epoch": 0.05755878405974678, "grad_norm": 0.9671072125018545, "learning_rate": 9.980123559368032e-06, "loss": 0.2117, "step": 1973 }, { "epoch": 0.05758795729039034, "grad_norm": 0.9290917183858046, "learning_rate": 9.980081453957219e-06, "loss": 0.2168, "step": 1974 }, { "epoch": 0.0576171305210339, "grad_norm": 1.046498723225571, "learning_rate": 9.980039304085429e-06, "loss": 0.2031, "step": 1975 }, { "epoch": 0.05764630375167746, "grad_norm": 0.8340365718130629, "learning_rate": 9.979997109753035e-06, "loss": 0.1991, "step": 1976 }, { "epoch": 0.05767547698232102, "grad_norm": 0.942880536303, "learning_rate": 9.979954870960417e-06, "loss": 0.2031, "step": 1977 }, { "epoch": 0.05770465021296459, "grad_norm": 0.9021832572875198, "learning_rate": 9.97991258770795e-06, "loss": 0.1885, "step": 1978 }, { "epoch": 0.05773382344360815, "grad_norm": 1.0183555583589448, "learning_rate": 9.979870259996013e-06, "loss": 0.2325, "step": 1979 }, { "epoch": 0.05776299667425171, "grad_norm": 0.9307096496027774, "learning_rate": 9.979827887824983e-06, "loss": 0.2026, "step": 1980 }, { "epoch": 0.05779216990489527, "grad_norm": 0.891606564374665, "learning_rate": 9.979785471195238e-06, "loss": 0.1961, "step": 1981 }, { "epoch": 0.05782134313553883, "grad_norm": 0.8284938160218519, "learning_rate": 9.979743010107158e-06, "loss": 0.2038, "step": 1982 }, { "epoch": 0.05785051636618239, "grad_norm": 0.8671698559128383, "learning_rate": 9.979700504561118e-06, "loss": 0.2196, "step": 1983 }, { "epoch": 0.057879689596825955, "grad_norm": 0.795225896992179, "learning_rate": 9.979657954557504e-06, "loss": 0.2025, "step": 1984 }, { "epoch": 0.057908862827469515, "grad_norm": 0.8283274741978116, "learning_rate": 9.97961536009669e-06, "loss": 0.2174, "step": 1985 }, { "epoch": 0.057938036058113075, "grad_norm": 0.9335261797634911, "learning_rate": 9.97957272117906e-06, "loss": 0.2038, "step": 1986 }, { "epoch": 0.057967209288756635, "grad_norm": 0.7981795817685567, "learning_rate": 9.979530037804995e-06, "loss": 0.1917, "step": 1987 }, { "epoch": 0.057996382519400196, "grad_norm": 0.8962893771671759, "learning_rate": 9.979487309974874e-06, "loss": 0.2258, "step": 1988 }, { "epoch": 0.05802555575004376, "grad_norm": 0.8931335398606022, "learning_rate": 9.979444537689078e-06, "loss": 0.2107, "step": 1989 }, { "epoch": 0.05805472898068732, "grad_norm": 0.9052813707407674, "learning_rate": 9.979401720947989e-06, "loss": 0.2215, "step": 1990 }, { "epoch": 0.05808390221133088, "grad_norm": 0.91560999339626, "learning_rate": 9.979358859751994e-06, "loss": 0.2007, "step": 1991 }, { "epoch": 0.05811307544197444, "grad_norm": 0.9598330084334362, "learning_rate": 9.979315954101466e-06, "loss": 0.2168, "step": 1992 }, { "epoch": 0.058142248672618004, "grad_norm": 1.0927238732461215, "learning_rate": 9.979273003996798e-06, "loss": 0.2151, "step": 1993 }, { "epoch": 0.05817142190326157, "grad_norm": 1.4720458308251883, "learning_rate": 9.979230009438368e-06, "loss": 0.1992, "step": 1994 }, { "epoch": 0.05820059513390513, "grad_norm": 1.2242446883341342, "learning_rate": 9.979186970426562e-06, "loss": 0.2135, "step": 1995 }, { "epoch": 0.05822976836454869, "grad_norm": 1.0233646807064443, "learning_rate": 9.979143886961762e-06, "loss": 0.2088, "step": 1996 }, { "epoch": 0.05825894159519225, "grad_norm": 0.8835682418967595, "learning_rate": 9.979100759044355e-06, "loss": 0.2071, "step": 1997 }, { "epoch": 0.05828811482583581, "grad_norm": 1.2947625093001198, "learning_rate": 9.979057586674724e-06, "loss": 0.2396, "step": 1998 }, { "epoch": 0.05831728805647937, "grad_norm": 1.1049690907290772, "learning_rate": 9.979014369853257e-06, "loss": 0.2177, "step": 1999 }, { "epoch": 0.05834646128712294, "grad_norm": 0.9824809590555131, "learning_rate": 9.978971108580336e-06, "loss": 0.2488, "step": 2000 }, { "epoch": 0.0583756345177665, "grad_norm": 0.870881960356909, "learning_rate": 9.978927802856351e-06, "loss": 0.1933, "step": 2001 }, { "epoch": 0.05840480774841006, "grad_norm": 1.0664186432210303, "learning_rate": 9.978884452681688e-06, "loss": 0.216, "step": 2002 }, { "epoch": 0.05843398097905362, "grad_norm": 0.9522652056931812, "learning_rate": 9.978841058056731e-06, "loss": 0.2212, "step": 2003 }, { "epoch": 0.05846315420969718, "grad_norm": 0.9962790246780536, "learning_rate": 9.978797618981871e-06, "loss": 0.2167, "step": 2004 }, { "epoch": 0.05849232744034075, "grad_norm": 0.8910113731013437, "learning_rate": 9.978754135457495e-06, "loss": 0.2063, "step": 2005 }, { "epoch": 0.05852150067098431, "grad_norm": 1.054217756380746, "learning_rate": 9.97871060748399e-06, "loss": 0.2163, "step": 2006 }, { "epoch": 0.05855067390162787, "grad_norm": 0.9946047777949233, "learning_rate": 9.978667035061744e-06, "loss": 0.1969, "step": 2007 }, { "epoch": 0.05857984713227143, "grad_norm": 1.0504870261393238, "learning_rate": 9.97862341819115e-06, "loss": 0.2329, "step": 2008 }, { "epoch": 0.05860902036291499, "grad_norm": 1.0364764713063785, "learning_rate": 9.978579756872592e-06, "loss": 0.2247, "step": 2009 }, { "epoch": 0.05863819359355855, "grad_norm": 1.1422669472235525, "learning_rate": 9.978536051106463e-06, "loss": 0.2159, "step": 2010 }, { "epoch": 0.058667366824202115, "grad_norm": 1.0130676558291927, "learning_rate": 9.978492300893153e-06, "loss": 0.2107, "step": 2011 }, { "epoch": 0.058696540054845675, "grad_norm": 1.0082381433490109, "learning_rate": 9.978448506233051e-06, "loss": 0.1724, "step": 2012 }, { "epoch": 0.058725713285489235, "grad_norm": 1.2784693655345292, "learning_rate": 9.978404667126551e-06, "loss": 0.2165, "step": 2013 }, { "epoch": 0.058754886516132795, "grad_norm": 1.0081354924071733, "learning_rate": 9.978360783574042e-06, "loss": 0.2168, "step": 2014 }, { "epoch": 0.058784059746776356, "grad_norm": 1.1363437190587646, "learning_rate": 9.978316855575916e-06, "loss": 0.2045, "step": 2015 }, { "epoch": 0.05881323297741992, "grad_norm": 1.0779278282124554, "learning_rate": 9.978272883132566e-06, "loss": 0.1864, "step": 2016 }, { "epoch": 0.05884240620806348, "grad_norm": 1.0006301243041265, "learning_rate": 9.978228866244383e-06, "loss": 0.2105, "step": 2017 }, { "epoch": 0.05887157943870704, "grad_norm": 0.8534965953262954, "learning_rate": 9.97818480491176e-06, "loss": 0.1791, "step": 2018 }, { "epoch": 0.0589007526693506, "grad_norm": 1.1380684561062862, "learning_rate": 9.978140699135096e-06, "loss": 0.1959, "step": 2019 }, { "epoch": 0.058929925899994164, "grad_norm": 0.9092644734509622, "learning_rate": 9.978096548914778e-06, "loss": 0.1901, "step": 2020 }, { "epoch": 0.058959099130637724, "grad_norm": 1.029610767674436, "learning_rate": 9.9780523542512e-06, "loss": 0.232, "step": 2021 }, { "epoch": 0.05898827236128129, "grad_norm": 0.9509463445738663, "learning_rate": 9.978008115144761e-06, "loss": 0.2083, "step": 2022 }, { "epoch": 0.05901744559192485, "grad_norm": 1.005289032997692, "learning_rate": 9.977963831595854e-06, "loss": 0.2071, "step": 2023 }, { "epoch": 0.05904661882256841, "grad_norm": 0.8294734518263973, "learning_rate": 9.977919503604874e-06, "loss": 0.207, "step": 2024 }, { "epoch": 0.05907579205321197, "grad_norm": 0.9621392304939402, "learning_rate": 9.977875131172217e-06, "loss": 0.2089, "step": 2025 }, { "epoch": 0.05910496528385553, "grad_norm": 0.8859118314173687, "learning_rate": 9.97783071429828e-06, "loss": 0.1857, "step": 2026 }, { "epoch": 0.0591341385144991, "grad_norm": 0.8735600781913359, "learning_rate": 9.977786252983457e-06, "loss": 0.2273, "step": 2027 }, { "epoch": 0.05916331174514266, "grad_norm": 0.9573591880266328, "learning_rate": 9.977741747228148e-06, "loss": 0.2202, "step": 2028 }, { "epoch": 0.05919248497578622, "grad_norm": 0.7455728674735858, "learning_rate": 9.977697197032748e-06, "loss": 0.2016, "step": 2029 }, { "epoch": 0.05922165820642978, "grad_norm": 0.8173184985776126, "learning_rate": 9.977652602397657e-06, "loss": 0.1896, "step": 2030 }, { "epoch": 0.05925083143707334, "grad_norm": 1.0818039921066034, "learning_rate": 9.977607963323271e-06, "loss": 0.1841, "step": 2031 }, { "epoch": 0.0592800046677169, "grad_norm": 1.0360598980075821, "learning_rate": 9.977563279809988e-06, "loss": 0.199, "step": 2032 }, { "epoch": 0.05930917789836047, "grad_norm": 0.8977184522135336, "learning_rate": 9.97751855185821e-06, "loss": 0.2106, "step": 2033 }, { "epoch": 0.05933835112900403, "grad_norm": 1.03659852654072, "learning_rate": 9.977473779468334e-06, "loss": 0.2273, "step": 2034 }, { "epoch": 0.05936752435964759, "grad_norm": 1.080838841640869, "learning_rate": 9.977428962640761e-06, "loss": 0.1978, "step": 2035 }, { "epoch": 0.05939669759029115, "grad_norm": 0.9819375788267134, "learning_rate": 9.977384101375888e-06, "loss": 0.2239, "step": 2036 }, { "epoch": 0.05942587082093471, "grad_norm": 0.7784810299083599, "learning_rate": 9.97733919567412e-06, "loss": 0.1818, "step": 2037 }, { "epoch": 0.059455044051578275, "grad_norm": 1.072008696160696, "learning_rate": 9.977294245535856e-06, "loss": 0.2055, "step": 2038 }, { "epoch": 0.059484217282221835, "grad_norm": 1.0240489608288772, "learning_rate": 9.977249250961499e-06, "loss": 0.1994, "step": 2039 }, { "epoch": 0.059513390512865395, "grad_norm": 0.8617751983860222, "learning_rate": 9.977204211951446e-06, "loss": 0.2183, "step": 2040 }, { "epoch": 0.059542563743508956, "grad_norm": 0.8679195463876015, "learning_rate": 9.977159128506102e-06, "loss": 0.2032, "step": 2041 }, { "epoch": 0.059571736974152516, "grad_norm": 1.0049132936024472, "learning_rate": 9.97711400062587e-06, "loss": 0.1828, "step": 2042 }, { "epoch": 0.059600910204796076, "grad_norm": 1.000480612387385, "learning_rate": 9.977068828311153e-06, "loss": 0.1925, "step": 2043 }, { "epoch": 0.05963008343543964, "grad_norm": 1.046292525768253, "learning_rate": 9.977023611562353e-06, "loss": 0.2182, "step": 2044 }, { "epoch": 0.0596592566660832, "grad_norm": 1.143336121358048, "learning_rate": 9.976978350379874e-06, "loss": 0.2134, "step": 2045 }, { "epoch": 0.05968842989672676, "grad_norm": 1.1360663591828706, "learning_rate": 9.97693304476412e-06, "loss": 0.1778, "step": 2046 }, { "epoch": 0.059717603127370324, "grad_norm": 0.936331408856632, "learning_rate": 9.976887694715499e-06, "loss": 0.1715, "step": 2047 }, { "epoch": 0.059746776358013884, "grad_norm": 0.8936052799897413, "learning_rate": 9.976842300234408e-06, "loss": 0.2057, "step": 2048 }, { "epoch": 0.05977594958865745, "grad_norm": 1.1175970002868079, "learning_rate": 9.976796861321261e-06, "loss": 0.2327, "step": 2049 }, { "epoch": 0.05980512281930101, "grad_norm": 1.231150848434283, "learning_rate": 9.976751377976457e-06, "loss": 0.2057, "step": 2050 }, { "epoch": 0.05983429604994457, "grad_norm": 1.2699398517881084, "learning_rate": 9.976705850200406e-06, "loss": 0.2203, "step": 2051 }, { "epoch": 0.05986346928058813, "grad_norm": 1.0646876206759452, "learning_rate": 9.976660277993512e-06, "loss": 0.1984, "step": 2052 }, { "epoch": 0.05989264251123169, "grad_norm": 0.8956860862984926, "learning_rate": 9.976614661356185e-06, "loss": 0.2195, "step": 2053 }, { "epoch": 0.05992181574187525, "grad_norm": 0.8997181148171646, "learning_rate": 9.976569000288829e-06, "loss": 0.1961, "step": 2054 }, { "epoch": 0.05995098897251882, "grad_norm": 1.2379573760388698, "learning_rate": 9.976523294791853e-06, "loss": 0.2122, "step": 2055 }, { "epoch": 0.05998016220316238, "grad_norm": 0.9920211167696346, "learning_rate": 9.976477544865665e-06, "loss": 0.2087, "step": 2056 }, { "epoch": 0.06000933543380594, "grad_norm": 1.0675356660624904, "learning_rate": 9.976431750510676e-06, "loss": 0.1898, "step": 2057 }, { "epoch": 0.0600385086644495, "grad_norm": 1.0178051082291153, "learning_rate": 9.976385911727288e-06, "loss": 0.1789, "step": 2058 }, { "epoch": 0.06006768189509306, "grad_norm": 1.125079253882043, "learning_rate": 9.976340028515919e-06, "loss": 0.2228, "step": 2059 }, { "epoch": 0.06009685512573663, "grad_norm": 1.082058563289299, "learning_rate": 9.97629410087697e-06, "loss": 0.2186, "step": 2060 }, { "epoch": 0.06012602835638019, "grad_norm": 1.0186351006469425, "learning_rate": 9.976248128810857e-06, "loss": 0.2472, "step": 2061 }, { "epoch": 0.06015520158702375, "grad_norm": 0.831530272410781, "learning_rate": 9.97620211231799e-06, "loss": 0.194, "step": 2062 }, { "epoch": 0.06018437481766731, "grad_norm": 1.1083999837317409, "learning_rate": 9.976156051398777e-06, "loss": 0.184, "step": 2063 }, { "epoch": 0.06021354804831087, "grad_norm": 1.3425873591207818, "learning_rate": 9.97610994605363e-06, "loss": 0.2222, "step": 2064 }, { "epoch": 0.06024272127895443, "grad_norm": 1.0101926257248675, "learning_rate": 9.976063796282963e-06, "loss": 0.1892, "step": 2065 }, { "epoch": 0.060271894509597995, "grad_norm": 0.9631860566449668, "learning_rate": 9.976017602087184e-06, "loss": 0.1887, "step": 2066 }, { "epoch": 0.060301067740241555, "grad_norm": 1.1055800441615407, "learning_rate": 9.97597136346671e-06, "loss": 0.2246, "step": 2067 }, { "epoch": 0.060330240970885116, "grad_norm": 0.8726134047829306, "learning_rate": 9.97592508042195e-06, "loss": 0.2035, "step": 2068 }, { "epoch": 0.060359414201528676, "grad_norm": 0.934936738990123, "learning_rate": 9.97587875295332e-06, "loss": 0.2137, "step": 2069 }, { "epoch": 0.060388587432172236, "grad_norm": 0.9841407267817264, "learning_rate": 9.975832381061232e-06, "loss": 0.2112, "step": 2070 }, { "epoch": 0.0604177606628158, "grad_norm": 0.9152843445444, "learning_rate": 9.9757859647461e-06, "loss": 0.2042, "step": 2071 }, { "epoch": 0.06044693389345936, "grad_norm": 1.0854987493128536, "learning_rate": 9.975739504008338e-06, "loss": 0.2077, "step": 2072 }, { "epoch": 0.060476107124102924, "grad_norm": 0.8586813028540773, "learning_rate": 9.975692998848363e-06, "loss": 0.1957, "step": 2073 }, { "epoch": 0.060505280354746484, "grad_norm": 0.9470440215588506, "learning_rate": 9.975646449266588e-06, "loss": 0.216, "step": 2074 }, { "epoch": 0.060534453585390044, "grad_norm": 0.9768831241799613, "learning_rate": 9.97559985526343e-06, "loss": 0.2233, "step": 2075 }, { "epoch": 0.060563626816033604, "grad_norm": 0.763257404956762, "learning_rate": 9.975553216839302e-06, "loss": 0.1833, "step": 2076 }, { "epoch": 0.06059280004667717, "grad_norm": 0.8869851474454065, "learning_rate": 9.975506533994625e-06, "loss": 0.2049, "step": 2077 }, { "epoch": 0.06062197327732073, "grad_norm": 1.0869497748315717, "learning_rate": 9.975459806729813e-06, "loss": 0.2135, "step": 2078 }, { "epoch": 0.06065114650796429, "grad_norm": 1.1255167613899464, "learning_rate": 9.975413035045283e-06, "loss": 0.1918, "step": 2079 }, { "epoch": 0.06068031973860785, "grad_norm": 0.9134421328922334, "learning_rate": 9.975366218941452e-06, "loss": 0.2113, "step": 2080 }, { "epoch": 0.06070949296925141, "grad_norm": 0.8104014621563806, "learning_rate": 9.975319358418742e-06, "loss": 0.2129, "step": 2081 }, { "epoch": 0.06073866619989498, "grad_norm": 1.000356659589392, "learning_rate": 9.975272453477566e-06, "loss": 0.2514, "step": 2082 }, { "epoch": 0.06076783943053854, "grad_norm": 0.9025457848077182, "learning_rate": 9.975225504118346e-06, "loss": 0.2025, "step": 2083 }, { "epoch": 0.0607970126611821, "grad_norm": 0.9106535213473448, "learning_rate": 9.975178510341502e-06, "loss": 0.2402, "step": 2084 }, { "epoch": 0.06082618589182566, "grad_norm": 0.9690881799970482, "learning_rate": 9.97513147214745e-06, "loss": 0.222, "step": 2085 }, { "epoch": 0.06085535912246922, "grad_norm": 1.1146932688117845, "learning_rate": 9.975084389536612e-06, "loss": 0.1889, "step": 2086 }, { "epoch": 0.06088453235311279, "grad_norm": 1.213574459301282, "learning_rate": 9.975037262509408e-06, "loss": 0.1824, "step": 2087 }, { "epoch": 0.06091370558375635, "grad_norm": 1.068821129367693, "learning_rate": 9.974990091066258e-06, "loss": 0.2122, "step": 2088 }, { "epoch": 0.06094287881439991, "grad_norm": 0.9554917426061998, "learning_rate": 9.974942875207587e-06, "loss": 0.2019, "step": 2089 }, { "epoch": 0.06097205204504347, "grad_norm": 0.8726338411025051, "learning_rate": 9.974895614933814e-06, "loss": 0.2087, "step": 2090 }, { "epoch": 0.06100122527568703, "grad_norm": 1.0445440885430617, "learning_rate": 9.974848310245357e-06, "loss": 0.2152, "step": 2091 }, { "epoch": 0.06103039850633059, "grad_norm": 0.9555721092593108, "learning_rate": 9.974800961142644e-06, "loss": 0.2054, "step": 2092 }, { "epoch": 0.061059571736974155, "grad_norm": 1.143783034132988, "learning_rate": 9.974753567626095e-06, "loss": 0.2132, "step": 2093 }, { "epoch": 0.061088744967617715, "grad_norm": 0.9620146347135539, "learning_rate": 9.974706129696134e-06, "loss": 0.2086, "step": 2094 }, { "epoch": 0.061117918198261276, "grad_norm": 1.1374169372691725, "learning_rate": 9.974658647353183e-06, "loss": 0.2216, "step": 2095 }, { "epoch": 0.061147091428904836, "grad_norm": 1.1718917859515348, "learning_rate": 9.974611120597669e-06, "loss": 0.2303, "step": 2096 }, { "epoch": 0.061176264659548396, "grad_norm": 0.8625288177060566, "learning_rate": 9.974563549430015e-06, "loss": 0.2146, "step": 2097 }, { "epoch": 0.06120543789019196, "grad_norm": 0.8379967462769726, "learning_rate": 9.974515933850643e-06, "loss": 0.1687, "step": 2098 }, { "epoch": 0.06123461112083552, "grad_norm": 0.776264281700595, "learning_rate": 9.97446827385998e-06, "loss": 0.1764, "step": 2099 }, { "epoch": 0.061263784351479084, "grad_norm": 0.8726575437798988, "learning_rate": 9.974420569458453e-06, "loss": 0.2087, "step": 2100 }, { "epoch": 0.061292957582122644, "grad_norm": 0.8770632912289654, "learning_rate": 9.974372820646488e-06, "loss": 0.208, "step": 2101 }, { "epoch": 0.061322130812766204, "grad_norm": 0.8742580299006035, "learning_rate": 9.974325027424508e-06, "loss": 0.2045, "step": 2102 }, { "epoch": 0.061351304043409764, "grad_norm": 0.8363969625727758, "learning_rate": 9.974277189792942e-06, "loss": 0.1734, "step": 2103 }, { "epoch": 0.06138047727405333, "grad_norm": 0.9791199443738712, "learning_rate": 9.974229307752216e-06, "loss": 0.2064, "step": 2104 }, { "epoch": 0.06140965050469689, "grad_norm": 0.8474516665512822, "learning_rate": 9.97418138130276e-06, "loss": 0.2158, "step": 2105 }, { "epoch": 0.06143882373534045, "grad_norm": 1.1531210677607857, "learning_rate": 9.974133410444999e-06, "loss": 0.209, "step": 2106 }, { "epoch": 0.06146799696598401, "grad_norm": 1.1580693325474198, "learning_rate": 9.974085395179363e-06, "loss": 0.1918, "step": 2107 }, { "epoch": 0.06149717019662757, "grad_norm": 1.1289985282730919, "learning_rate": 9.974037335506279e-06, "loss": 0.2347, "step": 2108 }, { "epoch": 0.06152634342727114, "grad_norm": 1.0283689599134265, "learning_rate": 9.973989231426177e-06, "loss": 0.1917, "step": 2109 }, { "epoch": 0.0615555166579147, "grad_norm": 1.289355969406982, "learning_rate": 9.973941082939488e-06, "loss": 0.2044, "step": 2110 }, { "epoch": 0.06158468988855826, "grad_norm": 1.0637924130768446, "learning_rate": 9.97389289004664e-06, "loss": 0.2256, "step": 2111 }, { "epoch": 0.06161386311920182, "grad_norm": 0.8672140515563873, "learning_rate": 9.973844652748063e-06, "loss": 0.205, "step": 2112 }, { "epoch": 0.06164303634984538, "grad_norm": 1.0234528960065306, "learning_rate": 9.973796371044187e-06, "loss": 0.1795, "step": 2113 }, { "epoch": 0.06167220958048894, "grad_norm": 1.2992995590118168, "learning_rate": 9.973748044935446e-06, "loss": 0.2143, "step": 2114 }, { "epoch": 0.06170138281113251, "grad_norm": 1.0693041903325287, "learning_rate": 9.97369967442227e-06, "loss": 0.2112, "step": 2115 }, { "epoch": 0.06173055604177607, "grad_norm": 0.9951063543454214, "learning_rate": 9.973651259505091e-06, "loss": 0.2215, "step": 2116 }, { "epoch": 0.06175972927241963, "grad_norm": 1.1663975162463882, "learning_rate": 9.973602800184339e-06, "loss": 0.1981, "step": 2117 }, { "epoch": 0.06178890250306319, "grad_norm": 0.975352262670444, "learning_rate": 9.973554296460449e-06, "loss": 0.1943, "step": 2118 }, { "epoch": 0.06181807573370675, "grad_norm": 1.0415545024998738, "learning_rate": 9.973505748333853e-06, "loss": 0.1886, "step": 2119 }, { "epoch": 0.061847248964350315, "grad_norm": 0.801931958725028, "learning_rate": 9.973457155804988e-06, "loss": 0.1918, "step": 2120 }, { "epoch": 0.061876422194993876, "grad_norm": 1.215346746364988, "learning_rate": 9.973408518874281e-06, "loss": 0.2037, "step": 2121 }, { "epoch": 0.061905595425637436, "grad_norm": 1.0733016608293688, "learning_rate": 9.973359837542173e-06, "loss": 0.2045, "step": 2122 }, { "epoch": 0.061934768656280996, "grad_norm": 0.9902456673038482, "learning_rate": 9.973311111809094e-06, "loss": 0.2541, "step": 2123 }, { "epoch": 0.061963941886924556, "grad_norm": 1.0124167059750098, "learning_rate": 9.97326234167548e-06, "loss": 0.2081, "step": 2124 }, { "epoch": 0.061993115117568116, "grad_norm": 1.2004141251112712, "learning_rate": 9.97321352714177e-06, "loss": 0.2013, "step": 2125 }, { "epoch": 0.062022288348211684, "grad_norm": 0.9116694966161774, "learning_rate": 9.973164668208394e-06, "loss": 0.1998, "step": 2126 }, { "epoch": 0.062051461578855244, "grad_norm": 1.074177487403381, "learning_rate": 9.973115764875792e-06, "loss": 0.2186, "step": 2127 }, { "epoch": 0.062080634809498804, "grad_norm": 0.9602229025107512, "learning_rate": 9.973066817144398e-06, "loss": 0.2039, "step": 2128 }, { "epoch": 0.062109808040142364, "grad_norm": 0.8902293676577513, "learning_rate": 9.973017825014652e-06, "loss": 0.2004, "step": 2129 }, { "epoch": 0.062138981270785924, "grad_norm": 0.8580018720372313, "learning_rate": 9.972968788486992e-06, "loss": 0.1958, "step": 2130 }, { "epoch": 0.06216815450142949, "grad_norm": 1.041394416810979, "learning_rate": 9.972919707561852e-06, "loss": 0.2243, "step": 2131 }, { "epoch": 0.06219732773207305, "grad_norm": 1.021644935083276, "learning_rate": 9.97287058223967e-06, "loss": 0.2111, "step": 2132 }, { "epoch": 0.06222650096271661, "grad_norm": 1.0695922251783396, "learning_rate": 9.97282141252089e-06, "loss": 0.2568, "step": 2133 }, { "epoch": 0.06225567419336017, "grad_norm": 0.8600497510893946, "learning_rate": 9.972772198405945e-06, "loss": 0.1948, "step": 2134 }, { "epoch": 0.06228484742400373, "grad_norm": 0.9383435682456496, "learning_rate": 9.972722939895279e-06, "loss": 0.1983, "step": 2135 }, { "epoch": 0.06231402065464729, "grad_norm": 0.9670154811684295, "learning_rate": 9.972673636989327e-06, "loss": 0.1963, "step": 2136 }, { "epoch": 0.06234319388529086, "grad_norm": 1.0911027522815946, "learning_rate": 9.972624289688533e-06, "loss": 0.1784, "step": 2137 }, { "epoch": 0.06237236711593442, "grad_norm": 1.1053076465996292, "learning_rate": 9.972574897993338e-06, "loss": 0.201, "step": 2138 }, { "epoch": 0.06240154034657798, "grad_norm": 1.022009117982567, "learning_rate": 9.97252546190418e-06, "loss": 0.2232, "step": 2139 }, { "epoch": 0.06243071357722154, "grad_norm": 0.8704373189860868, "learning_rate": 9.972475981421502e-06, "loss": 0.202, "step": 2140 }, { "epoch": 0.0624598868078651, "grad_norm": 0.8073386618088111, "learning_rate": 9.972426456545745e-06, "loss": 0.2024, "step": 2141 }, { "epoch": 0.06248906003850867, "grad_norm": 0.9390175056440538, "learning_rate": 9.972376887277353e-06, "loss": 0.1864, "step": 2142 }, { "epoch": 0.06251823326915222, "grad_norm": 0.8741455883610038, "learning_rate": 9.972327273616765e-06, "loss": 0.1939, "step": 2143 }, { "epoch": 0.06254740649979579, "grad_norm": 0.8554803354568955, "learning_rate": 9.972277615564428e-06, "loss": 0.1739, "step": 2144 }, { "epoch": 0.06257657973043936, "grad_norm": 0.928084158501416, "learning_rate": 9.972227913120782e-06, "loss": 0.2174, "step": 2145 }, { "epoch": 0.06260575296108291, "grad_norm": 0.9267381160754137, "learning_rate": 9.972178166286273e-06, "loss": 0.1803, "step": 2146 }, { "epoch": 0.06263492619172648, "grad_norm": 0.8733856471252143, "learning_rate": 9.972128375061345e-06, "loss": 0.209, "step": 2147 }, { "epoch": 0.06266409942237003, "grad_norm": 1.0027675655267019, "learning_rate": 9.97207853944644e-06, "loss": 0.1997, "step": 2148 }, { "epoch": 0.0626932726530136, "grad_norm": 0.8467316584835951, "learning_rate": 9.972028659442006e-06, "loss": 0.2078, "step": 2149 }, { "epoch": 0.06272244588365716, "grad_norm": 0.9590674906366835, "learning_rate": 9.971978735048487e-06, "loss": 0.1902, "step": 2150 }, { "epoch": 0.06275161911430072, "grad_norm": 0.9162306588055258, "learning_rate": 9.971928766266328e-06, "loss": 0.2028, "step": 2151 }, { "epoch": 0.06278079234494428, "grad_norm": 1.0010169971801015, "learning_rate": 9.971878753095975e-06, "loss": 0.2138, "step": 2152 }, { "epoch": 0.06280996557558784, "grad_norm": 0.9096766102413545, "learning_rate": 9.971828695537877e-06, "loss": 0.2317, "step": 2153 }, { "epoch": 0.0628391388062314, "grad_norm": 0.8916366375062106, "learning_rate": 9.97177859359248e-06, "loss": 0.209, "step": 2154 }, { "epoch": 0.06286831203687496, "grad_norm": 0.8601853419671421, "learning_rate": 9.97172844726023e-06, "loss": 0.1851, "step": 2155 }, { "epoch": 0.06289748526751852, "grad_norm": 0.9660578264513769, "learning_rate": 9.971678256541573e-06, "loss": 0.199, "step": 2156 }, { "epoch": 0.06292665849816209, "grad_norm": 1.021043723589428, "learning_rate": 9.971628021436962e-06, "loss": 0.2355, "step": 2157 }, { "epoch": 0.06295583172880564, "grad_norm": 1.044111847940825, "learning_rate": 9.971577741946841e-06, "loss": 0.2054, "step": 2158 }, { "epoch": 0.06298500495944921, "grad_norm": 0.947055773303262, "learning_rate": 9.971527418071663e-06, "loss": 0.1827, "step": 2159 }, { "epoch": 0.06301417819009276, "grad_norm": 0.9863468194876462, "learning_rate": 9.971477049811873e-06, "loss": 0.24, "step": 2160 }, { "epoch": 0.06304335142073633, "grad_norm": 0.8877793611940751, "learning_rate": 9.971426637167924e-06, "loss": 0.1986, "step": 2161 }, { "epoch": 0.0630725246513799, "grad_norm": 0.9084343410271073, "learning_rate": 9.971376180140264e-06, "loss": 0.1767, "step": 2162 }, { "epoch": 0.06310169788202345, "grad_norm": 0.9435400721303311, "learning_rate": 9.971325678729344e-06, "loss": 0.2152, "step": 2163 }, { "epoch": 0.06313087111266702, "grad_norm": 0.8291741826520561, "learning_rate": 9.971275132935616e-06, "loss": 0.2022, "step": 2164 }, { "epoch": 0.06316004434331057, "grad_norm": 1.102884801299515, "learning_rate": 9.97122454275953e-06, "loss": 0.1944, "step": 2165 }, { "epoch": 0.06318921757395414, "grad_norm": 0.7718465396264464, "learning_rate": 9.971173908201536e-06, "loss": 0.1794, "step": 2166 }, { "epoch": 0.06321839080459771, "grad_norm": 0.9722053784672414, "learning_rate": 9.971123229262091e-06, "loss": 0.2162, "step": 2167 }, { "epoch": 0.06324756403524126, "grad_norm": 0.7703595361079513, "learning_rate": 9.971072505941643e-06, "loss": 0.2148, "step": 2168 }, { "epoch": 0.06327673726588483, "grad_norm": 0.8627728017973386, "learning_rate": 9.971021738240648e-06, "loss": 0.2194, "step": 2169 }, { "epoch": 0.06330591049652838, "grad_norm": 0.8168786062791672, "learning_rate": 9.970970926159556e-06, "loss": 0.1784, "step": 2170 }, { "epoch": 0.06333508372717195, "grad_norm": 0.9458401907464605, "learning_rate": 9.970920069698822e-06, "loss": 0.1989, "step": 2171 }, { "epoch": 0.06336425695781552, "grad_norm": 0.8681931135470768, "learning_rate": 9.970869168858901e-06, "loss": 0.2097, "step": 2172 }, { "epoch": 0.06339343018845907, "grad_norm": 1.0313342547076338, "learning_rate": 9.970818223640246e-06, "loss": 0.2039, "step": 2173 }, { "epoch": 0.06342260341910264, "grad_norm": 0.9147472245352678, "learning_rate": 9.970767234043315e-06, "loss": 0.1973, "step": 2174 }, { "epoch": 0.06345177664974619, "grad_norm": 1.0247069348055946, "learning_rate": 9.970716200068557e-06, "loss": 0.2036, "step": 2175 }, { "epoch": 0.06348094988038976, "grad_norm": 0.9377552652308071, "learning_rate": 9.970665121716434e-06, "loss": 0.2063, "step": 2176 }, { "epoch": 0.06351012311103331, "grad_norm": 0.8406212963866995, "learning_rate": 9.9706139989874e-06, "loss": 0.2351, "step": 2177 }, { "epoch": 0.06353929634167688, "grad_norm": 0.910002269981033, "learning_rate": 9.970562831881908e-06, "loss": 0.221, "step": 2178 }, { "epoch": 0.06356846957232044, "grad_norm": 1.0106532591392394, "learning_rate": 9.97051162040042e-06, "loss": 0.1821, "step": 2179 }, { "epoch": 0.063597642802964, "grad_norm": 0.8683149191940535, "learning_rate": 9.970460364543388e-06, "loss": 0.1846, "step": 2180 }, { "epoch": 0.06362681603360756, "grad_norm": 0.8223202382821131, "learning_rate": 9.970409064311275e-06, "loss": 0.1887, "step": 2181 }, { "epoch": 0.06365598926425112, "grad_norm": 1.1540314998941879, "learning_rate": 9.970357719704535e-06, "loss": 0.1998, "step": 2182 }, { "epoch": 0.06368516249489468, "grad_norm": 0.8113968764563977, "learning_rate": 9.97030633072363e-06, "loss": 0.1957, "step": 2183 }, { "epoch": 0.06371433572553825, "grad_norm": 1.0028786750066798, "learning_rate": 9.970254897369014e-06, "loss": 0.1942, "step": 2184 }, { "epoch": 0.0637435089561818, "grad_norm": 0.8008843570967988, "learning_rate": 9.970203419641152e-06, "loss": 0.2071, "step": 2185 }, { "epoch": 0.06377268218682537, "grad_norm": 0.9577374456325305, "learning_rate": 9.970151897540496e-06, "loss": 0.1916, "step": 2186 }, { "epoch": 0.06380185541746893, "grad_norm": 0.7721337864368466, "learning_rate": 9.970100331067515e-06, "loss": 0.1907, "step": 2187 }, { "epoch": 0.06383102864811249, "grad_norm": 0.8214557848609138, "learning_rate": 9.97004872022266e-06, "loss": 0.189, "step": 2188 }, { "epoch": 0.06386020187875606, "grad_norm": 0.8534221733409112, "learning_rate": 9.969997065006399e-06, "loss": 0.1985, "step": 2189 }, { "epoch": 0.06388937510939961, "grad_norm": 0.9731900665243962, "learning_rate": 9.96994536541919e-06, "loss": 0.2277, "step": 2190 }, { "epoch": 0.06391854834004318, "grad_norm": 0.9098656679861912, "learning_rate": 9.969893621461495e-06, "loss": 0.2049, "step": 2191 }, { "epoch": 0.06394772157068673, "grad_norm": 1.156345296203105, "learning_rate": 9.969841833133778e-06, "loss": 0.1972, "step": 2192 }, { "epoch": 0.0639768948013303, "grad_norm": 1.029714304537173, "learning_rate": 9.969790000436498e-06, "loss": 0.2061, "step": 2193 }, { "epoch": 0.06400606803197387, "grad_norm": 0.839849005411028, "learning_rate": 9.969738123370118e-06, "loss": 0.2277, "step": 2194 }, { "epoch": 0.06403524126261742, "grad_norm": 1.1541475573991666, "learning_rate": 9.969686201935105e-06, "loss": 0.2031, "step": 2195 }, { "epoch": 0.06406441449326099, "grad_norm": 1.039905017633847, "learning_rate": 9.969634236131918e-06, "loss": 0.1992, "step": 2196 }, { "epoch": 0.06409358772390454, "grad_norm": 1.0827334882608222, "learning_rate": 9.969582225961025e-06, "loss": 0.185, "step": 2197 }, { "epoch": 0.06412276095454811, "grad_norm": 0.8294662301533692, "learning_rate": 9.969530171422886e-06, "loss": 0.2021, "step": 2198 }, { "epoch": 0.06415193418519166, "grad_norm": 1.16260507700567, "learning_rate": 9.969478072517968e-06, "loss": 0.1953, "step": 2199 }, { "epoch": 0.06418110741583523, "grad_norm": 0.910075438686915, "learning_rate": 9.969425929246739e-06, "loss": 0.204, "step": 2200 }, { "epoch": 0.0642102806464788, "grad_norm": 0.7471934485024296, "learning_rate": 9.969373741609659e-06, "loss": 0.1982, "step": 2201 }, { "epoch": 0.06423945387712235, "grad_norm": 0.8166737270541704, "learning_rate": 9.969321509607197e-06, "loss": 0.1888, "step": 2202 }, { "epoch": 0.06426862710776592, "grad_norm": 1.0086772959630657, "learning_rate": 9.969269233239819e-06, "loss": 0.1879, "step": 2203 }, { "epoch": 0.06429780033840947, "grad_norm": 0.9159443963949536, "learning_rate": 9.96921691250799e-06, "loss": 0.1971, "step": 2204 }, { "epoch": 0.06432697356905304, "grad_norm": 0.884458649367082, "learning_rate": 9.969164547412182e-06, "loss": 0.1744, "step": 2205 }, { "epoch": 0.0643561467996966, "grad_norm": 0.8610354642199655, "learning_rate": 9.969112137952856e-06, "loss": 0.1923, "step": 2206 }, { "epoch": 0.06438532003034016, "grad_norm": 1.1471539639614219, "learning_rate": 9.969059684130484e-06, "loss": 0.1913, "step": 2207 }, { "epoch": 0.06441449326098372, "grad_norm": 1.0115666785152742, "learning_rate": 9.969007185945534e-06, "loss": 0.2078, "step": 2208 }, { "epoch": 0.06444366649162728, "grad_norm": 0.9262368849840441, "learning_rate": 9.968954643398474e-06, "loss": 0.2172, "step": 2209 }, { "epoch": 0.06447283972227084, "grad_norm": 1.2146042940550938, "learning_rate": 9.968902056489773e-06, "loss": 0.2038, "step": 2210 }, { "epoch": 0.06450201295291441, "grad_norm": 1.1360427085963671, "learning_rate": 9.9688494252199e-06, "loss": 0.2207, "step": 2211 }, { "epoch": 0.06453118618355796, "grad_norm": 0.9416998773571001, "learning_rate": 9.968796749589328e-06, "loss": 0.2076, "step": 2212 }, { "epoch": 0.06456035941420153, "grad_norm": 0.9456392384313109, "learning_rate": 9.96874402959852e-06, "loss": 0.1948, "step": 2213 }, { "epoch": 0.06458953264484509, "grad_norm": 1.03189788717096, "learning_rate": 9.968691265247954e-06, "loss": 0.1727, "step": 2214 }, { "epoch": 0.06461870587548865, "grad_norm": 0.8727096691708024, "learning_rate": 9.968638456538101e-06, "loss": 0.2015, "step": 2215 }, { "epoch": 0.06464787910613222, "grad_norm": 1.02655922936091, "learning_rate": 9.968585603469427e-06, "loss": 0.2137, "step": 2216 }, { "epoch": 0.06467705233677577, "grad_norm": 1.028569638988256, "learning_rate": 9.968532706042406e-06, "loss": 0.2149, "step": 2217 }, { "epoch": 0.06470622556741934, "grad_norm": 1.0048528039586941, "learning_rate": 9.968479764257513e-06, "loss": 0.2076, "step": 2218 }, { "epoch": 0.0647353987980629, "grad_norm": 0.8780669246841813, "learning_rate": 9.968426778115218e-06, "loss": 0.1826, "step": 2219 }, { "epoch": 0.06476457202870646, "grad_norm": 0.8531201656836703, "learning_rate": 9.968373747615996e-06, "loss": 0.178, "step": 2220 }, { "epoch": 0.06479374525935001, "grad_norm": 0.8613243761746758, "learning_rate": 9.968320672760318e-06, "loss": 0.1789, "step": 2221 }, { "epoch": 0.06482291848999358, "grad_norm": 0.9149738482474605, "learning_rate": 9.968267553548659e-06, "loss": 0.19, "step": 2222 }, { "epoch": 0.06485209172063715, "grad_norm": 1.0257280791333028, "learning_rate": 9.968214389981494e-06, "loss": 0.211, "step": 2223 }, { "epoch": 0.0648812649512807, "grad_norm": 0.9591929787116885, "learning_rate": 9.968161182059297e-06, "loss": 0.1982, "step": 2224 }, { "epoch": 0.06491043818192427, "grad_norm": 0.8951345868663532, "learning_rate": 9.968107929782543e-06, "loss": 0.1922, "step": 2225 }, { "epoch": 0.06493961141256782, "grad_norm": 0.7471293912834188, "learning_rate": 9.968054633151707e-06, "loss": 0.1967, "step": 2226 }, { "epoch": 0.06496878464321139, "grad_norm": 0.8574026233590367, "learning_rate": 9.968001292167264e-06, "loss": 0.2329, "step": 2227 }, { "epoch": 0.06499795787385496, "grad_norm": 0.873056545143532, "learning_rate": 9.967947906829694e-06, "loss": 0.227, "step": 2228 }, { "epoch": 0.06502713110449851, "grad_norm": 0.9092896561420567, "learning_rate": 9.967894477139468e-06, "loss": 0.2438, "step": 2229 }, { "epoch": 0.06505630433514208, "grad_norm": 0.8392856461878418, "learning_rate": 9.967841003097068e-06, "loss": 0.1972, "step": 2230 }, { "epoch": 0.06508547756578563, "grad_norm": 0.9152020310585482, "learning_rate": 9.967787484702968e-06, "loss": 0.1983, "step": 2231 }, { "epoch": 0.0651146507964292, "grad_norm": 0.7857343524542046, "learning_rate": 9.96773392195765e-06, "loss": 0.1929, "step": 2232 }, { "epoch": 0.06514382402707276, "grad_norm": 0.8205865412173147, "learning_rate": 9.967680314861587e-06, "loss": 0.211, "step": 2233 }, { "epoch": 0.06517299725771632, "grad_norm": 0.8703512415040333, "learning_rate": 9.967626663415261e-06, "loss": 0.2158, "step": 2234 }, { "epoch": 0.06520217048835988, "grad_norm": 0.9683867770588545, "learning_rate": 9.96757296761915e-06, "loss": 0.2311, "step": 2235 }, { "epoch": 0.06523134371900344, "grad_norm": 0.8237442188773494, "learning_rate": 9.967519227473733e-06, "loss": 0.1998, "step": 2236 }, { "epoch": 0.065260516949647, "grad_norm": 0.8338489173292051, "learning_rate": 9.96746544297949e-06, "loss": 0.1877, "step": 2237 }, { "epoch": 0.06528969018029057, "grad_norm": 0.9623474023766088, "learning_rate": 9.967411614136902e-06, "loss": 0.1998, "step": 2238 }, { "epoch": 0.06531886341093412, "grad_norm": 1.1544915738343882, "learning_rate": 9.967357740946448e-06, "loss": 0.1996, "step": 2239 }, { "epoch": 0.06534803664157769, "grad_norm": 0.8763839105754301, "learning_rate": 9.967303823408612e-06, "loss": 0.1962, "step": 2240 }, { "epoch": 0.06537720987222125, "grad_norm": 0.8973607034259703, "learning_rate": 9.96724986152387e-06, "loss": 0.2032, "step": 2241 }, { "epoch": 0.06540638310286481, "grad_norm": 1.0217125120852515, "learning_rate": 9.96719585529271e-06, "loss": 0.2016, "step": 2242 }, { "epoch": 0.06543555633350838, "grad_norm": 0.914861141037201, "learning_rate": 9.96714180471561e-06, "loss": 0.2156, "step": 2243 }, { "epoch": 0.06546472956415193, "grad_norm": 0.852790688174234, "learning_rate": 9.967087709793053e-06, "loss": 0.1942, "step": 2244 }, { "epoch": 0.0654939027947955, "grad_norm": 0.8901363975744998, "learning_rate": 9.967033570525525e-06, "loss": 0.188, "step": 2245 }, { "epoch": 0.06552307602543905, "grad_norm": 0.8225870555184273, "learning_rate": 9.966979386913504e-06, "loss": 0.2115, "step": 2246 }, { "epoch": 0.06555224925608262, "grad_norm": 0.863887118505021, "learning_rate": 9.966925158957479e-06, "loss": 0.1863, "step": 2247 }, { "epoch": 0.06558142248672617, "grad_norm": 0.9544258461725252, "learning_rate": 9.966870886657932e-06, "loss": 0.214, "step": 2248 }, { "epoch": 0.06561059571736974, "grad_norm": 1.417183988957697, "learning_rate": 9.966816570015345e-06, "loss": 0.2219, "step": 2249 }, { "epoch": 0.06563976894801331, "grad_norm": 1.1918637172628517, "learning_rate": 9.966762209030208e-06, "loss": 0.2249, "step": 2250 }, { "epoch": 0.06566894217865686, "grad_norm": 0.8183769491972958, "learning_rate": 9.966707803703002e-06, "loss": 0.2128, "step": 2251 }, { "epoch": 0.06569811540930043, "grad_norm": 1.0000162894697773, "learning_rate": 9.966653354034214e-06, "loss": 0.2209, "step": 2252 }, { "epoch": 0.06572728863994398, "grad_norm": 1.0300333643567683, "learning_rate": 9.966598860024332e-06, "loss": 0.1842, "step": 2253 }, { "epoch": 0.06575646187058755, "grad_norm": 0.9772418886702062, "learning_rate": 9.966544321673839e-06, "loss": 0.2076, "step": 2254 }, { "epoch": 0.06578563510123112, "grad_norm": 1.2678545558541043, "learning_rate": 9.966489738983226e-06, "loss": 0.1676, "step": 2255 }, { "epoch": 0.06581480833187467, "grad_norm": 1.107490942511155, "learning_rate": 9.966435111952977e-06, "loss": 0.2144, "step": 2256 }, { "epoch": 0.06584398156251824, "grad_norm": 0.921844265717796, "learning_rate": 9.966380440583581e-06, "loss": 0.1835, "step": 2257 }, { "epoch": 0.06587315479316179, "grad_norm": 1.3402928936873384, "learning_rate": 9.966325724875527e-06, "loss": 0.2103, "step": 2258 }, { "epoch": 0.06590232802380536, "grad_norm": 0.855362458923824, "learning_rate": 9.9662709648293e-06, "loss": 0.2007, "step": 2259 }, { "epoch": 0.06593150125444892, "grad_norm": 1.0063562289024135, "learning_rate": 9.966216160445394e-06, "loss": 0.2166, "step": 2260 }, { "epoch": 0.06596067448509248, "grad_norm": 1.0831744996916741, "learning_rate": 9.966161311724296e-06, "loss": 0.2093, "step": 2261 }, { "epoch": 0.06598984771573604, "grad_norm": 0.6840487396997437, "learning_rate": 9.966106418666494e-06, "loss": 0.1893, "step": 2262 }, { "epoch": 0.0660190209463796, "grad_norm": 0.9684448740508934, "learning_rate": 9.96605148127248e-06, "loss": 0.2043, "step": 2263 }, { "epoch": 0.06604819417702316, "grad_norm": 1.0038273974017846, "learning_rate": 9.965996499542742e-06, "loss": 0.1828, "step": 2264 }, { "epoch": 0.06607736740766673, "grad_norm": 0.8998160043586104, "learning_rate": 9.965941473477775e-06, "loss": 0.2128, "step": 2265 }, { "epoch": 0.06610654063831028, "grad_norm": 0.8605748093744029, "learning_rate": 9.965886403078067e-06, "loss": 0.209, "step": 2266 }, { "epoch": 0.06613571386895385, "grad_norm": 0.7303764695161269, "learning_rate": 9.965831288344112e-06, "loss": 0.2088, "step": 2267 }, { "epoch": 0.0661648870995974, "grad_norm": 0.88000142850267, "learning_rate": 9.9657761292764e-06, "loss": 0.2036, "step": 2268 }, { "epoch": 0.06619406033024097, "grad_norm": 0.76853823738418, "learning_rate": 9.965720925875421e-06, "loss": 0.1771, "step": 2269 }, { "epoch": 0.06622323356088453, "grad_norm": 0.916048883347126, "learning_rate": 9.965665678141673e-06, "loss": 0.2045, "step": 2270 }, { "epoch": 0.06625240679152809, "grad_norm": 1.0360752776009832, "learning_rate": 9.96561038607565e-06, "loss": 0.1836, "step": 2271 }, { "epoch": 0.06628158002217166, "grad_norm": 0.935275447511629, "learning_rate": 9.96555504967784e-06, "loss": 0.2047, "step": 2272 }, { "epoch": 0.06631075325281521, "grad_norm": 0.8368259005842033, "learning_rate": 9.965499668948741e-06, "loss": 0.1808, "step": 2273 }, { "epoch": 0.06633992648345878, "grad_norm": 1.2344291155833595, "learning_rate": 9.965444243888846e-06, "loss": 0.2463, "step": 2274 }, { "epoch": 0.06636909971410233, "grad_norm": 1.2281465329487147, "learning_rate": 9.96538877449865e-06, "loss": 0.2175, "step": 2275 }, { "epoch": 0.0663982729447459, "grad_norm": 1.0374219247249994, "learning_rate": 9.965333260778649e-06, "loss": 0.1772, "step": 2276 }, { "epoch": 0.06642744617538947, "grad_norm": 0.8921888732790692, "learning_rate": 9.965277702729338e-06, "loss": 0.2241, "step": 2277 }, { "epoch": 0.06645661940603302, "grad_norm": 1.0007032657722952, "learning_rate": 9.965222100351211e-06, "loss": 0.1979, "step": 2278 }, { "epoch": 0.06648579263667659, "grad_norm": 0.8774868372628707, "learning_rate": 9.965166453644767e-06, "loss": 0.1953, "step": 2279 }, { "epoch": 0.06651496586732014, "grad_norm": 0.7244194252569438, "learning_rate": 9.965110762610504e-06, "loss": 0.1711, "step": 2280 }, { "epoch": 0.06654413909796371, "grad_norm": 1.0813616908314334, "learning_rate": 9.965055027248915e-06, "loss": 0.2493, "step": 2281 }, { "epoch": 0.06657331232860728, "grad_norm": 0.9372654404430211, "learning_rate": 9.964999247560501e-06, "loss": 0.2176, "step": 2282 }, { "epoch": 0.06660248555925083, "grad_norm": 0.9265101736038648, "learning_rate": 9.96494342354576e-06, "loss": 0.193, "step": 2283 }, { "epoch": 0.0666316587898944, "grad_norm": 1.1503195481058759, "learning_rate": 9.964887555205189e-06, "loss": 0.2132, "step": 2284 }, { "epoch": 0.06666083202053795, "grad_norm": 0.9757278540844526, "learning_rate": 9.964831642539285e-06, "loss": 0.202, "step": 2285 }, { "epoch": 0.06669000525118152, "grad_norm": 1.2181286560018258, "learning_rate": 9.964775685548552e-06, "loss": 0.2026, "step": 2286 }, { "epoch": 0.06671917848182508, "grad_norm": 0.8442524527932412, "learning_rate": 9.964719684233486e-06, "loss": 0.2049, "step": 2287 }, { "epoch": 0.06674835171246864, "grad_norm": 0.9553116229166884, "learning_rate": 9.964663638594587e-06, "loss": 0.1791, "step": 2288 }, { "epoch": 0.0667775249431122, "grad_norm": 0.8899230068080627, "learning_rate": 9.964607548632356e-06, "loss": 0.1953, "step": 2289 }, { "epoch": 0.06680669817375576, "grad_norm": 1.0511859131465593, "learning_rate": 9.964551414347297e-06, "loss": 0.2116, "step": 2290 }, { "epoch": 0.06683587140439932, "grad_norm": 0.9932528612213981, "learning_rate": 9.964495235739907e-06, "loss": 0.1906, "step": 2291 }, { "epoch": 0.06686504463504288, "grad_norm": 1.0239559240853664, "learning_rate": 9.964439012810686e-06, "loss": 0.1783, "step": 2292 }, { "epoch": 0.06689421786568645, "grad_norm": 1.2101691483406114, "learning_rate": 9.96438274556014e-06, "loss": 0.2021, "step": 2293 }, { "epoch": 0.06692339109633001, "grad_norm": 0.8340825736135985, "learning_rate": 9.96432643398877e-06, "loss": 0.2169, "step": 2294 }, { "epoch": 0.06695256432697357, "grad_norm": 0.9372531126845941, "learning_rate": 9.96427007809708e-06, "loss": 0.2003, "step": 2295 }, { "epoch": 0.06698173755761713, "grad_norm": 0.9408517668032419, "learning_rate": 9.964213677885571e-06, "loss": 0.1893, "step": 2296 }, { "epoch": 0.06701091078826069, "grad_norm": 0.9129308162969344, "learning_rate": 9.964157233354745e-06, "loss": 0.1915, "step": 2297 }, { "epoch": 0.06704008401890425, "grad_norm": 0.8845099707748527, "learning_rate": 9.964100744505111e-06, "loss": 0.2003, "step": 2298 }, { "epoch": 0.06706925724954782, "grad_norm": 1.063371864190776, "learning_rate": 9.96404421133717e-06, "loss": 0.2201, "step": 2299 }, { "epoch": 0.06709843048019137, "grad_norm": 1.2343004147392833, "learning_rate": 9.963987633851427e-06, "loss": 0.1992, "step": 2300 }, { "epoch": 0.06712760371083494, "grad_norm": 0.8096666210595943, "learning_rate": 9.963931012048387e-06, "loss": 0.1975, "step": 2301 }, { "epoch": 0.0671567769414785, "grad_norm": 0.9578026541774669, "learning_rate": 9.963874345928557e-06, "loss": 0.2318, "step": 2302 }, { "epoch": 0.06718595017212206, "grad_norm": 0.9635990484625206, "learning_rate": 9.963817635492441e-06, "loss": 0.2109, "step": 2303 }, { "epoch": 0.06721512340276563, "grad_norm": 0.9000620424505866, "learning_rate": 9.963760880740545e-06, "loss": 0.1993, "step": 2304 }, { "epoch": 0.06724429663340918, "grad_norm": 0.8117717998021512, "learning_rate": 9.96370408167338e-06, "loss": 0.2174, "step": 2305 }, { "epoch": 0.06727346986405275, "grad_norm": 0.8858406507149971, "learning_rate": 9.963647238291446e-06, "loss": 0.1928, "step": 2306 }, { "epoch": 0.0673026430946963, "grad_norm": 0.9669617480035934, "learning_rate": 9.963590350595258e-06, "loss": 0.1973, "step": 2307 }, { "epoch": 0.06733181632533987, "grad_norm": 0.7658286769158015, "learning_rate": 9.963533418585318e-06, "loss": 0.1798, "step": 2308 }, { "epoch": 0.06736098955598344, "grad_norm": 0.9558386659030622, "learning_rate": 9.963476442262136e-06, "loss": 0.2138, "step": 2309 }, { "epoch": 0.06739016278662699, "grad_norm": 0.9344254127484777, "learning_rate": 9.963419421626224e-06, "loss": 0.1998, "step": 2310 }, { "epoch": 0.06741933601727056, "grad_norm": 0.8800064663338417, "learning_rate": 9.963362356678086e-06, "loss": 0.2017, "step": 2311 }, { "epoch": 0.06744850924791411, "grad_norm": 0.7737275162632699, "learning_rate": 9.963305247418234e-06, "loss": 0.1742, "step": 2312 }, { "epoch": 0.06747768247855768, "grad_norm": 1.1441889633269098, "learning_rate": 9.963248093847179e-06, "loss": 0.1959, "step": 2313 }, { "epoch": 0.06750685570920123, "grad_norm": 1.052257520574918, "learning_rate": 9.963190895965428e-06, "loss": 0.1829, "step": 2314 }, { "epoch": 0.0675360289398448, "grad_norm": 1.0701099476644802, "learning_rate": 9.963133653773495e-06, "loss": 0.1834, "step": 2315 }, { "epoch": 0.06756520217048836, "grad_norm": 0.8497336606664448, "learning_rate": 9.963076367271889e-06, "loss": 0.2044, "step": 2316 }, { "epoch": 0.06759437540113192, "grad_norm": 0.9013699327004027, "learning_rate": 9.96301903646112e-06, "loss": 0.2138, "step": 2317 }, { "epoch": 0.06762354863177548, "grad_norm": 0.8988222302331794, "learning_rate": 9.962961661341707e-06, "loss": 0.2128, "step": 2318 }, { "epoch": 0.06765272186241904, "grad_norm": 0.9897416661460551, "learning_rate": 9.962904241914151e-06, "loss": 0.2091, "step": 2319 }, { "epoch": 0.0676818950930626, "grad_norm": 1.0865752192965261, "learning_rate": 9.962846778178974e-06, "loss": 0.2044, "step": 2320 }, { "epoch": 0.06771106832370617, "grad_norm": 0.918354437620551, "learning_rate": 9.962789270136687e-06, "loss": 0.1885, "step": 2321 }, { "epoch": 0.06774024155434973, "grad_norm": 1.2876227223562233, "learning_rate": 9.962731717787798e-06, "loss": 0.204, "step": 2322 }, { "epoch": 0.06776941478499329, "grad_norm": 0.8520317395261258, "learning_rate": 9.962674121132827e-06, "loss": 0.1953, "step": 2323 }, { "epoch": 0.06779858801563685, "grad_norm": 1.011606949292786, "learning_rate": 9.962616480172287e-06, "loss": 0.2028, "step": 2324 }, { "epoch": 0.06782776124628041, "grad_norm": 0.9435219564123694, "learning_rate": 9.96255879490669e-06, "loss": 0.1701, "step": 2325 }, { "epoch": 0.06785693447692398, "grad_norm": 0.9355932918071468, "learning_rate": 9.962501065336553e-06, "loss": 0.2142, "step": 2326 }, { "epoch": 0.06788610770756753, "grad_norm": 1.0504961242919153, "learning_rate": 9.962443291462393e-06, "loss": 0.1902, "step": 2327 }, { "epoch": 0.0679152809382111, "grad_norm": 1.0262234020607701, "learning_rate": 9.962385473284723e-06, "loss": 0.188, "step": 2328 }, { "epoch": 0.06794445416885465, "grad_norm": 0.8601566986948037, "learning_rate": 9.962327610804059e-06, "loss": 0.1796, "step": 2329 }, { "epoch": 0.06797362739949822, "grad_norm": 0.8643775448630018, "learning_rate": 9.962269704020919e-06, "loss": 0.196, "step": 2330 }, { "epoch": 0.06800280063014179, "grad_norm": 1.0711486392766931, "learning_rate": 9.962211752935821e-06, "loss": 0.1832, "step": 2331 }, { "epoch": 0.06803197386078534, "grad_norm": 0.8854287171512436, "learning_rate": 9.96215375754928e-06, "loss": 0.2064, "step": 2332 }, { "epoch": 0.06806114709142891, "grad_norm": 1.0101785669871297, "learning_rate": 9.962095717861816e-06, "loss": 0.1963, "step": 2333 }, { "epoch": 0.06809032032207246, "grad_norm": 1.0012062687223648, "learning_rate": 9.962037633873945e-06, "loss": 0.2374, "step": 2334 }, { "epoch": 0.06811949355271603, "grad_norm": 0.8667464615236197, "learning_rate": 9.961979505586185e-06, "loss": 0.187, "step": 2335 }, { "epoch": 0.0681486667833596, "grad_norm": 0.9361547793247379, "learning_rate": 9.961921332999058e-06, "loss": 0.2079, "step": 2336 }, { "epoch": 0.06817784001400315, "grad_norm": 0.9839156956812861, "learning_rate": 9.961863116113083e-06, "loss": 0.2029, "step": 2337 }, { "epoch": 0.06820701324464672, "grad_norm": 1.0421774610372838, "learning_rate": 9.961804854928778e-06, "loss": 0.2236, "step": 2338 }, { "epoch": 0.06823618647529027, "grad_norm": 1.0811409838186092, "learning_rate": 9.961746549446662e-06, "loss": 0.2267, "step": 2339 }, { "epoch": 0.06826535970593384, "grad_norm": 1.0460234373857449, "learning_rate": 9.961688199667259e-06, "loss": 0.1794, "step": 2340 }, { "epoch": 0.06829453293657739, "grad_norm": 0.9051163139657413, "learning_rate": 9.961629805591088e-06, "loss": 0.2255, "step": 2341 }, { "epoch": 0.06832370616722096, "grad_norm": 1.1277562297268768, "learning_rate": 9.96157136721867e-06, "loss": 0.1808, "step": 2342 }, { "epoch": 0.06835287939786452, "grad_norm": 0.9348417394825488, "learning_rate": 9.961512884550529e-06, "loss": 0.2151, "step": 2343 }, { "epoch": 0.06838205262850808, "grad_norm": 1.0292995123316866, "learning_rate": 9.961454357587183e-06, "loss": 0.1855, "step": 2344 }, { "epoch": 0.06841122585915164, "grad_norm": 0.9322885776989647, "learning_rate": 9.961395786329158e-06, "loss": 0.2051, "step": 2345 }, { "epoch": 0.0684403990897952, "grad_norm": 1.0105816732637924, "learning_rate": 9.961337170776974e-06, "loss": 0.1959, "step": 2346 }, { "epoch": 0.06846957232043877, "grad_norm": 1.0499178862163263, "learning_rate": 9.961278510931159e-06, "loss": 0.2075, "step": 2347 }, { "epoch": 0.06849874555108233, "grad_norm": 0.8912862401866359, "learning_rate": 9.961219806792232e-06, "loss": 0.1925, "step": 2348 }, { "epoch": 0.06852791878172589, "grad_norm": 0.9819560623045472, "learning_rate": 9.96116105836072e-06, "loss": 0.185, "step": 2349 }, { "epoch": 0.06855709201236945, "grad_norm": 0.9962237144968342, "learning_rate": 9.961102265637144e-06, "loss": 0.2011, "step": 2350 }, { "epoch": 0.068586265243013, "grad_norm": 1.0182809788713403, "learning_rate": 9.961043428622035e-06, "loss": 0.2263, "step": 2351 }, { "epoch": 0.06861543847365657, "grad_norm": 0.9143174112065799, "learning_rate": 9.960984547315912e-06, "loss": 0.2016, "step": 2352 }, { "epoch": 0.06864461170430014, "grad_norm": 1.0485378149425884, "learning_rate": 9.960925621719303e-06, "loss": 0.1839, "step": 2353 }, { "epoch": 0.0686737849349437, "grad_norm": 1.0526971123647655, "learning_rate": 9.960866651832736e-06, "loss": 0.1631, "step": 2354 }, { "epoch": 0.06870295816558726, "grad_norm": 0.8948910382362572, "learning_rate": 9.960807637656735e-06, "loss": 0.1991, "step": 2355 }, { "epoch": 0.06873213139623081, "grad_norm": 1.1887194133978207, "learning_rate": 9.960748579191828e-06, "loss": 0.2164, "step": 2356 }, { "epoch": 0.06876130462687438, "grad_norm": 1.1574222769922864, "learning_rate": 9.960689476438541e-06, "loss": 0.1861, "step": 2357 }, { "epoch": 0.06879047785751795, "grad_norm": 0.7514501214475601, "learning_rate": 9.960630329397403e-06, "loss": 0.1801, "step": 2358 }, { "epoch": 0.0688196510881615, "grad_norm": 0.9347542331968323, "learning_rate": 9.960571138068942e-06, "loss": 0.222, "step": 2359 }, { "epoch": 0.06884882431880507, "grad_norm": 0.8621373614996176, "learning_rate": 9.960511902453685e-06, "loss": 0.2159, "step": 2360 }, { "epoch": 0.06887799754944862, "grad_norm": 0.9284847136860013, "learning_rate": 9.960452622552163e-06, "loss": 0.1902, "step": 2361 }, { "epoch": 0.06890717078009219, "grad_norm": 0.9108127425244467, "learning_rate": 9.960393298364904e-06, "loss": 0.1985, "step": 2362 }, { "epoch": 0.06893634401073574, "grad_norm": 0.8798707880760961, "learning_rate": 9.960333929892438e-06, "loss": 0.1968, "step": 2363 }, { "epoch": 0.06896551724137931, "grad_norm": 0.9092462067129579, "learning_rate": 9.960274517135294e-06, "loss": 0.2275, "step": 2364 }, { "epoch": 0.06899469047202288, "grad_norm": 1.0368817334127765, "learning_rate": 9.960215060094004e-06, "loss": 0.2058, "step": 2365 }, { "epoch": 0.06902386370266643, "grad_norm": 0.861514927853887, "learning_rate": 9.960155558769097e-06, "loss": 0.1853, "step": 2366 }, { "epoch": 0.06905303693331, "grad_norm": 0.8636982992003643, "learning_rate": 9.960096013161105e-06, "loss": 0.223, "step": 2367 }, { "epoch": 0.06908221016395355, "grad_norm": 0.860306211958087, "learning_rate": 9.960036423270561e-06, "loss": 0.2064, "step": 2368 }, { "epoch": 0.06911138339459712, "grad_norm": 0.847459846554446, "learning_rate": 9.959976789097997e-06, "loss": 0.1825, "step": 2369 }, { "epoch": 0.06914055662524068, "grad_norm": 0.9282284890303654, "learning_rate": 9.959917110643942e-06, "loss": 0.1952, "step": 2370 }, { "epoch": 0.06916972985588424, "grad_norm": 0.939952568470686, "learning_rate": 9.959857387908931e-06, "loss": 0.1776, "step": 2371 }, { "epoch": 0.0691989030865278, "grad_norm": 1.0191405770354907, "learning_rate": 9.959797620893498e-06, "loss": 0.2041, "step": 2372 }, { "epoch": 0.06922807631717136, "grad_norm": 0.7120147314681421, "learning_rate": 9.959737809598177e-06, "loss": 0.184, "step": 2373 }, { "epoch": 0.06925724954781493, "grad_norm": 0.8907912228070642, "learning_rate": 9.959677954023501e-06, "loss": 0.2127, "step": 2374 }, { "epoch": 0.06928642277845849, "grad_norm": 0.9414504551311939, "learning_rate": 9.959618054170003e-06, "loss": 0.2173, "step": 2375 }, { "epoch": 0.06931559600910205, "grad_norm": 0.7052813775278587, "learning_rate": 9.959558110038218e-06, "loss": 0.1791, "step": 2376 }, { "epoch": 0.06934476923974561, "grad_norm": 0.9652752271670552, "learning_rate": 9.959498121628683e-06, "loss": 0.2138, "step": 2377 }, { "epoch": 0.06937394247038917, "grad_norm": 1.0808645757388777, "learning_rate": 9.959438088941935e-06, "loss": 0.2052, "step": 2378 }, { "epoch": 0.06940311570103273, "grad_norm": 0.9018716780531703, "learning_rate": 9.959378011978504e-06, "loss": 0.183, "step": 2379 }, { "epoch": 0.0694322889316763, "grad_norm": 1.0419770280542815, "learning_rate": 9.959317890738932e-06, "loss": 0.203, "step": 2380 }, { "epoch": 0.06946146216231985, "grad_norm": 0.969712062246678, "learning_rate": 9.959257725223753e-06, "loss": 0.2298, "step": 2381 }, { "epoch": 0.06949063539296342, "grad_norm": 0.8279362437059269, "learning_rate": 9.959197515433505e-06, "loss": 0.1666, "step": 2382 }, { "epoch": 0.06951980862360697, "grad_norm": 0.974389601516134, "learning_rate": 9.959137261368725e-06, "loss": 0.2003, "step": 2383 }, { "epoch": 0.06954898185425054, "grad_norm": 0.9440985070740611, "learning_rate": 9.959076963029954e-06, "loss": 0.209, "step": 2384 }, { "epoch": 0.0695781550848941, "grad_norm": 1.0767766171698354, "learning_rate": 9.959016620417725e-06, "loss": 0.1983, "step": 2385 }, { "epoch": 0.06960732831553766, "grad_norm": 1.0014248062894908, "learning_rate": 9.95895623353258e-06, "loss": 0.2096, "step": 2386 }, { "epoch": 0.06963650154618123, "grad_norm": 0.7556710962392882, "learning_rate": 9.958895802375056e-06, "loss": 0.2061, "step": 2387 }, { "epoch": 0.06966567477682478, "grad_norm": 0.9292753584936179, "learning_rate": 9.958835326945698e-06, "loss": 0.1853, "step": 2388 }, { "epoch": 0.06969484800746835, "grad_norm": 1.1008176939215812, "learning_rate": 9.958774807245039e-06, "loss": 0.201, "step": 2389 }, { "epoch": 0.0697240212381119, "grad_norm": 1.0318162019529176, "learning_rate": 9.958714243273624e-06, "loss": 0.1892, "step": 2390 }, { "epoch": 0.06975319446875547, "grad_norm": 0.9803870456434441, "learning_rate": 9.95865363503199e-06, "loss": 0.2192, "step": 2391 }, { "epoch": 0.06978236769939904, "grad_norm": 1.1702309635721455, "learning_rate": 9.958592982520681e-06, "loss": 0.2317, "step": 2392 }, { "epoch": 0.06981154093004259, "grad_norm": 0.9783216659503141, "learning_rate": 9.958532285740238e-06, "loss": 0.1876, "step": 2393 }, { "epoch": 0.06984071416068616, "grad_norm": 0.7905536273298849, "learning_rate": 9.958471544691201e-06, "loss": 0.1973, "step": 2394 }, { "epoch": 0.06986988739132971, "grad_norm": 1.077268354827615, "learning_rate": 9.958410759374116e-06, "loss": 0.2185, "step": 2395 }, { "epoch": 0.06989906062197328, "grad_norm": 1.126968531852992, "learning_rate": 9.958349929789521e-06, "loss": 0.1984, "step": 2396 }, { "epoch": 0.06992823385261684, "grad_norm": 0.7881223910381542, "learning_rate": 9.958289055937963e-06, "loss": 0.1918, "step": 2397 }, { "epoch": 0.0699574070832604, "grad_norm": 0.9537166975001522, "learning_rate": 9.958228137819984e-06, "loss": 0.197, "step": 2398 }, { "epoch": 0.06998658031390396, "grad_norm": 0.9744682320428244, "learning_rate": 9.958167175436128e-06, "loss": 0.214, "step": 2399 }, { "epoch": 0.07001575354454752, "grad_norm": 1.1523567877347596, "learning_rate": 9.95810616878694e-06, "loss": 0.1957, "step": 2400 }, { "epoch": 0.07004492677519109, "grad_norm": 1.1944507329865188, "learning_rate": 9.958045117872961e-06, "loss": 0.187, "step": 2401 }, { "epoch": 0.07007410000583465, "grad_norm": 0.9685427581118649, "learning_rate": 9.95798402269474e-06, "loss": 0.1911, "step": 2402 }, { "epoch": 0.0701032732364782, "grad_norm": 1.0326345432613742, "learning_rate": 9.95792288325282e-06, "loss": 0.1912, "step": 2403 }, { "epoch": 0.07013244646712177, "grad_norm": 0.9634055727972706, "learning_rate": 9.95786169954775e-06, "loss": 0.1985, "step": 2404 }, { "epoch": 0.07016161969776533, "grad_norm": 0.9193623921254882, "learning_rate": 9.957800471580074e-06, "loss": 0.2138, "step": 2405 }, { "epoch": 0.0701907929284089, "grad_norm": 0.9821151777603562, "learning_rate": 9.957739199350339e-06, "loss": 0.2002, "step": 2406 }, { "epoch": 0.07021996615905245, "grad_norm": 0.9099336925257334, "learning_rate": 9.95767788285909e-06, "loss": 0.1921, "step": 2407 }, { "epoch": 0.07024913938969601, "grad_norm": 0.7324101817432253, "learning_rate": 9.957616522106878e-06, "loss": 0.1831, "step": 2408 }, { "epoch": 0.07027831262033958, "grad_norm": 0.8722205327852851, "learning_rate": 9.95755511709425e-06, "loss": 0.1838, "step": 2409 }, { "epoch": 0.07030748585098313, "grad_norm": 1.0160175849577036, "learning_rate": 9.957493667821752e-06, "loss": 0.1973, "step": 2410 }, { "epoch": 0.0703366590816267, "grad_norm": 1.0796794263996108, "learning_rate": 9.957432174289934e-06, "loss": 0.228, "step": 2411 }, { "epoch": 0.07036583231227025, "grad_norm": 1.1028917050992295, "learning_rate": 9.957370636499346e-06, "loss": 0.2113, "step": 2412 }, { "epoch": 0.07039500554291382, "grad_norm": 1.1035408149593489, "learning_rate": 9.957309054450534e-06, "loss": 0.2053, "step": 2413 }, { "epoch": 0.07042417877355739, "grad_norm": 1.0248996546132965, "learning_rate": 9.957247428144052e-06, "loss": 0.1859, "step": 2414 }, { "epoch": 0.07045335200420094, "grad_norm": 1.0670146838617907, "learning_rate": 9.957185757580448e-06, "loss": 0.1889, "step": 2415 }, { "epoch": 0.07048252523484451, "grad_norm": 0.9198217089388051, "learning_rate": 9.957124042760274e-06, "loss": 0.1887, "step": 2416 }, { "epoch": 0.07051169846548806, "grad_norm": 1.1104964904412704, "learning_rate": 9.957062283684078e-06, "loss": 0.1753, "step": 2417 }, { "epoch": 0.07054087169613163, "grad_norm": 1.0434878111586023, "learning_rate": 9.957000480352415e-06, "loss": 0.2157, "step": 2418 }, { "epoch": 0.0705700449267752, "grad_norm": 0.8205827054933801, "learning_rate": 9.956938632765833e-06, "loss": 0.1835, "step": 2419 }, { "epoch": 0.07059921815741875, "grad_norm": 0.9370711144255537, "learning_rate": 9.956876740924888e-06, "loss": 0.1918, "step": 2420 }, { "epoch": 0.07062839138806232, "grad_norm": 0.9241820231784976, "learning_rate": 9.956814804830131e-06, "loss": 0.1937, "step": 2421 }, { "epoch": 0.07065756461870587, "grad_norm": 0.8320079140917186, "learning_rate": 9.956752824482114e-06, "loss": 0.1989, "step": 2422 }, { "epoch": 0.07068673784934944, "grad_norm": 0.9235242375593017, "learning_rate": 9.956690799881391e-06, "loss": 0.2022, "step": 2423 }, { "epoch": 0.070715911079993, "grad_norm": 0.862395217085308, "learning_rate": 9.956628731028516e-06, "loss": 0.2068, "step": 2424 }, { "epoch": 0.07074508431063656, "grad_norm": 0.8779579436971858, "learning_rate": 9.956566617924043e-06, "loss": 0.1978, "step": 2425 }, { "epoch": 0.07077425754128013, "grad_norm": 0.8552995115610158, "learning_rate": 9.956504460568525e-06, "loss": 0.1976, "step": 2426 }, { "epoch": 0.07080343077192368, "grad_norm": 0.8616896467486487, "learning_rate": 9.95644225896252e-06, "loss": 0.2032, "step": 2427 }, { "epoch": 0.07083260400256725, "grad_norm": 1.1605532484673293, "learning_rate": 9.956380013106582e-06, "loss": 0.2089, "step": 2428 }, { "epoch": 0.07086177723321081, "grad_norm": 1.000970860784796, "learning_rate": 9.956317723001265e-06, "loss": 0.2094, "step": 2429 }, { "epoch": 0.07089095046385437, "grad_norm": 0.9507506852609013, "learning_rate": 9.956255388647127e-06, "loss": 0.2075, "step": 2430 }, { "epoch": 0.07092012369449793, "grad_norm": 1.0275419870209836, "learning_rate": 9.956193010044725e-06, "loss": 0.2001, "step": 2431 }, { "epoch": 0.07094929692514149, "grad_norm": 1.1099346786745798, "learning_rate": 9.956130587194615e-06, "loss": 0.18, "step": 2432 }, { "epoch": 0.07097847015578505, "grad_norm": 0.9167391311402326, "learning_rate": 9.956068120097353e-06, "loss": 0.2279, "step": 2433 }, { "epoch": 0.0710076433864286, "grad_norm": 0.997229453392759, "learning_rate": 9.956005608753499e-06, "loss": 0.1833, "step": 2434 }, { "epoch": 0.07103681661707217, "grad_norm": 0.9326126299386502, "learning_rate": 9.95594305316361e-06, "loss": 0.2074, "step": 2435 }, { "epoch": 0.07106598984771574, "grad_norm": 0.8114360126713511, "learning_rate": 9.955880453328243e-06, "loss": 0.1873, "step": 2436 }, { "epoch": 0.0710951630783593, "grad_norm": 1.1166856325162593, "learning_rate": 9.95581780924796e-06, "loss": 0.192, "step": 2437 }, { "epoch": 0.07112433630900286, "grad_norm": 0.9857179562554458, "learning_rate": 9.955755120923319e-06, "loss": 0.1774, "step": 2438 }, { "epoch": 0.07115350953964641, "grad_norm": 1.0638188423148094, "learning_rate": 9.955692388354876e-06, "loss": 0.2159, "step": 2439 }, { "epoch": 0.07118268277028998, "grad_norm": 1.0349445235061012, "learning_rate": 9.955629611543198e-06, "loss": 0.218, "step": 2440 }, { "epoch": 0.07121185600093355, "grad_norm": 0.8570386046990435, "learning_rate": 9.95556679048884e-06, "loss": 0.1897, "step": 2441 }, { "epoch": 0.0712410292315771, "grad_norm": 0.9321227916859394, "learning_rate": 9.955503925192365e-06, "loss": 0.222, "step": 2442 }, { "epoch": 0.07127020246222067, "grad_norm": 1.1486333156118809, "learning_rate": 9.955441015654334e-06, "loss": 0.1724, "step": 2443 }, { "epoch": 0.07129937569286422, "grad_norm": 0.8610017320801263, "learning_rate": 9.955378061875309e-06, "loss": 0.1985, "step": 2444 }, { "epoch": 0.07132854892350779, "grad_norm": 0.9887204063111942, "learning_rate": 9.955315063855851e-06, "loss": 0.1992, "step": 2445 }, { "epoch": 0.07135772215415136, "grad_norm": 1.0296967608410998, "learning_rate": 9.955252021596524e-06, "loss": 0.187, "step": 2446 }, { "epoch": 0.07138689538479491, "grad_norm": 1.042432065215246, "learning_rate": 9.955188935097888e-06, "loss": 0.2054, "step": 2447 }, { "epoch": 0.07141606861543848, "grad_norm": 1.0335384342747953, "learning_rate": 9.95512580436051e-06, "loss": 0.1951, "step": 2448 }, { "epoch": 0.07144524184608203, "grad_norm": 1.068551528570425, "learning_rate": 9.955062629384952e-06, "loss": 0.2164, "step": 2449 }, { "epoch": 0.0714744150767256, "grad_norm": 0.8822694536965376, "learning_rate": 9.954999410171775e-06, "loss": 0.1946, "step": 2450 }, { "epoch": 0.07150358830736916, "grad_norm": 0.952687521069965, "learning_rate": 9.954936146721548e-06, "loss": 0.194, "step": 2451 }, { "epoch": 0.07153276153801272, "grad_norm": 1.0031368490503632, "learning_rate": 9.954872839034836e-06, "loss": 0.2031, "step": 2452 }, { "epoch": 0.07156193476865629, "grad_norm": 0.9135427669353791, "learning_rate": 9.954809487112198e-06, "loss": 0.1763, "step": 2453 }, { "epoch": 0.07159110799929984, "grad_norm": 0.9643004903890515, "learning_rate": 9.954746090954205e-06, "loss": 0.1695, "step": 2454 }, { "epoch": 0.0716202812299434, "grad_norm": 0.9528310407040534, "learning_rate": 9.954682650561423e-06, "loss": 0.214, "step": 2455 }, { "epoch": 0.07164945446058696, "grad_norm": 0.8547133992363304, "learning_rate": 9.954619165934417e-06, "loss": 0.227, "step": 2456 }, { "epoch": 0.07167862769123053, "grad_norm": 0.9051851367771333, "learning_rate": 9.954555637073752e-06, "loss": 0.1706, "step": 2457 }, { "epoch": 0.07170780092187409, "grad_norm": 0.8861919955521864, "learning_rate": 9.95449206398e-06, "loss": 0.2051, "step": 2458 }, { "epoch": 0.07173697415251765, "grad_norm": 0.9730791963232464, "learning_rate": 9.954428446653723e-06, "loss": 0.201, "step": 2459 }, { "epoch": 0.07176614738316121, "grad_norm": 0.9933523719193613, "learning_rate": 9.954364785095493e-06, "loss": 0.1991, "step": 2460 }, { "epoch": 0.07179532061380477, "grad_norm": 0.8492267930360498, "learning_rate": 9.954301079305875e-06, "loss": 0.1834, "step": 2461 }, { "epoch": 0.07182449384444833, "grad_norm": 1.0898044476044764, "learning_rate": 9.95423732928544e-06, "loss": 0.1983, "step": 2462 }, { "epoch": 0.0718536670750919, "grad_norm": 0.9642681007485292, "learning_rate": 9.95417353503476e-06, "loss": 0.2028, "step": 2463 }, { "epoch": 0.07188284030573545, "grad_norm": 0.8881218865673711, "learning_rate": 9.9541096965544e-06, "loss": 0.1994, "step": 2464 }, { "epoch": 0.07191201353637902, "grad_norm": 0.9375368969560157, "learning_rate": 9.954045813844929e-06, "loss": 0.1858, "step": 2465 }, { "epoch": 0.07194118676702257, "grad_norm": 0.9285210062506181, "learning_rate": 9.953981886906921e-06, "loss": 0.1921, "step": 2466 }, { "epoch": 0.07197035999766614, "grad_norm": 1.004301277208077, "learning_rate": 9.953917915740944e-06, "loss": 0.2061, "step": 2467 }, { "epoch": 0.07199953322830971, "grad_norm": 0.9226092100920326, "learning_rate": 9.953853900347572e-06, "loss": 0.2316, "step": 2468 }, { "epoch": 0.07202870645895326, "grad_norm": 0.8940228359318462, "learning_rate": 9.953789840727374e-06, "loss": 0.2167, "step": 2469 }, { "epoch": 0.07205787968959683, "grad_norm": 0.7997127468207761, "learning_rate": 9.953725736880925e-06, "loss": 0.2064, "step": 2470 }, { "epoch": 0.07208705292024038, "grad_norm": 0.9171188662970678, "learning_rate": 9.953661588808795e-06, "loss": 0.1954, "step": 2471 }, { "epoch": 0.07211622615088395, "grad_norm": 0.8660430614460405, "learning_rate": 9.953597396511555e-06, "loss": 0.2022, "step": 2472 }, { "epoch": 0.07214539938152752, "grad_norm": 0.9180021940704193, "learning_rate": 9.95353315998978e-06, "loss": 0.1714, "step": 2473 }, { "epoch": 0.07217457261217107, "grad_norm": 0.8869049918164137, "learning_rate": 9.953468879244045e-06, "loss": 0.1886, "step": 2474 }, { "epoch": 0.07220374584281464, "grad_norm": 0.8894304734100424, "learning_rate": 9.95340455427492e-06, "loss": 0.1792, "step": 2475 }, { "epoch": 0.07223291907345819, "grad_norm": 0.9035187211235377, "learning_rate": 9.953340185082982e-06, "loss": 0.2004, "step": 2476 }, { "epoch": 0.07226209230410176, "grad_norm": 0.8558932996514017, "learning_rate": 9.953275771668807e-06, "loss": 0.1821, "step": 2477 }, { "epoch": 0.07229126553474531, "grad_norm": 1.0864380103500642, "learning_rate": 9.953211314032967e-06, "loss": 0.1976, "step": 2478 }, { "epoch": 0.07232043876538888, "grad_norm": 0.7988910475774128, "learning_rate": 9.95314681217604e-06, "loss": 0.1813, "step": 2479 }, { "epoch": 0.07234961199603245, "grad_norm": 0.9068000257420371, "learning_rate": 9.9530822660986e-06, "loss": 0.1949, "step": 2480 }, { "epoch": 0.072378785226676, "grad_norm": 0.9470315605128883, "learning_rate": 9.953017675801225e-06, "loss": 0.1993, "step": 2481 }, { "epoch": 0.07240795845731957, "grad_norm": 0.9435008351969455, "learning_rate": 9.952953041284488e-06, "loss": 0.1859, "step": 2482 }, { "epoch": 0.07243713168796312, "grad_norm": 0.9990786184141739, "learning_rate": 9.952888362548971e-06, "loss": 0.2186, "step": 2483 }, { "epoch": 0.07246630491860669, "grad_norm": 0.9145289797257048, "learning_rate": 9.952823639595248e-06, "loss": 0.208, "step": 2484 }, { "epoch": 0.07249547814925025, "grad_norm": 1.0575528341843174, "learning_rate": 9.952758872423897e-06, "loss": 0.2204, "step": 2485 }, { "epoch": 0.0725246513798938, "grad_norm": 0.8410179058919071, "learning_rate": 9.952694061035499e-06, "loss": 0.1962, "step": 2486 }, { "epoch": 0.07255382461053737, "grad_norm": 0.9140375553260064, "learning_rate": 9.952629205430631e-06, "loss": 0.189, "step": 2487 }, { "epoch": 0.07258299784118093, "grad_norm": 1.2227293723778352, "learning_rate": 9.95256430560987e-06, "loss": 0.2248, "step": 2488 }, { "epoch": 0.0726121710718245, "grad_norm": 0.997622801955556, "learning_rate": 9.952499361573797e-06, "loss": 0.1888, "step": 2489 }, { "epoch": 0.07264134430246806, "grad_norm": 0.8271726846289789, "learning_rate": 9.952434373322993e-06, "loss": 0.1957, "step": 2490 }, { "epoch": 0.07267051753311161, "grad_norm": 0.8662622401647231, "learning_rate": 9.952369340858037e-06, "loss": 0.1893, "step": 2491 }, { "epoch": 0.07269969076375518, "grad_norm": 1.0754646388722005, "learning_rate": 9.95230426417951e-06, "loss": 0.2122, "step": 2492 }, { "epoch": 0.07272886399439873, "grad_norm": 1.0034699914219778, "learning_rate": 9.952239143287992e-06, "loss": 0.2295, "step": 2493 }, { "epoch": 0.0727580372250423, "grad_norm": 1.037099578814681, "learning_rate": 9.952173978184065e-06, "loss": 0.2452, "step": 2494 }, { "epoch": 0.07278721045568587, "grad_norm": 0.882954788289804, "learning_rate": 9.952108768868311e-06, "loss": 0.1975, "step": 2495 }, { "epoch": 0.07281638368632942, "grad_norm": 0.8946645911590452, "learning_rate": 9.952043515341315e-06, "loss": 0.2023, "step": 2496 }, { "epoch": 0.07284555691697299, "grad_norm": 0.8918220605361443, "learning_rate": 9.951978217603652e-06, "loss": 0.1913, "step": 2497 }, { "epoch": 0.07287473014761654, "grad_norm": 0.8738879545689714, "learning_rate": 9.951912875655913e-06, "loss": 0.175, "step": 2498 }, { "epoch": 0.07290390337826011, "grad_norm": 0.9694918935764463, "learning_rate": 9.951847489498675e-06, "loss": 0.2217, "step": 2499 }, { "epoch": 0.07293307660890366, "grad_norm": 0.9743991490788483, "learning_rate": 9.951782059132528e-06, "loss": 0.1693, "step": 2500 }, { "epoch": 0.07296224983954723, "grad_norm": 0.9085291604921203, "learning_rate": 9.95171658455805e-06, "loss": 0.1954, "step": 2501 }, { "epoch": 0.0729914230701908, "grad_norm": 0.9292300982103625, "learning_rate": 9.951651065775831e-06, "loss": 0.2006, "step": 2502 }, { "epoch": 0.07302059630083435, "grad_norm": 0.9665078351867818, "learning_rate": 9.951585502786452e-06, "loss": 0.195, "step": 2503 }, { "epoch": 0.07304976953147792, "grad_norm": 0.8681058912661461, "learning_rate": 9.9515198955905e-06, "loss": 0.1977, "step": 2504 }, { "epoch": 0.07307894276212147, "grad_norm": 0.8466550917204185, "learning_rate": 9.95145424418856e-06, "loss": 0.1948, "step": 2505 }, { "epoch": 0.07310811599276504, "grad_norm": 1.2150285220508747, "learning_rate": 9.951388548581218e-06, "loss": 0.1869, "step": 2506 }, { "epoch": 0.0731372892234086, "grad_norm": 0.9931115293779862, "learning_rate": 9.951322808769062e-06, "loss": 0.189, "step": 2507 }, { "epoch": 0.07316646245405216, "grad_norm": 0.8626432422597347, "learning_rate": 9.951257024752678e-06, "loss": 0.2124, "step": 2508 }, { "epoch": 0.07319563568469573, "grad_norm": 0.9069895047602374, "learning_rate": 9.951191196532653e-06, "loss": 0.2139, "step": 2509 }, { "epoch": 0.07322480891533928, "grad_norm": 0.991581796264132, "learning_rate": 9.951125324109573e-06, "loss": 0.2091, "step": 2510 }, { "epoch": 0.07325398214598285, "grad_norm": 0.9564016164853112, "learning_rate": 9.951059407484032e-06, "loss": 0.2086, "step": 2511 }, { "epoch": 0.07328315537662641, "grad_norm": 0.824312750052677, "learning_rate": 9.950993446656612e-06, "loss": 0.2035, "step": 2512 }, { "epoch": 0.07331232860726997, "grad_norm": 1.0688203964707534, "learning_rate": 9.950927441627905e-06, "loss": 0.1888, "step": 2513 }, { "epoch": 0.07334150183791353, "grad_norm": 0.9778269925076594, "learning_rate": 9.950861392398499e-06, "loss": 0.2116, "step": 2514 }, { "epoch": 0.07337067506855709, "grad_norm": 1.1299564714170196, "learning_rate": 9.950795298968986e-06, "loss": 0.2042, "step": 2515 }, { "epoch": 0.07339984829920065, "grad_norm": 0.8718630474472272, "learning_rate": 9.950729161339951e-06, "loss": 0.196, "step": 2516 }, { "epoch": 0.07342902152984422, "grad_norm": 0.8128594163537469, "learning_rate": 9.95066297951199e-06, "loss": 0.185, "step": 2517 }, { "epoch": 0.07345819476048777, "grad_norm": 0.7886401178405563, "learning_rate": 9.950596753485693e-06, "loss": 0.1924, "step": 2518 }, { "epoch": 0.07348736799113134, "grad_norm": 0.7225279092141481, "learning_rate": 9.950530483261649e-06, "loss": 0.1983, "step": 2519 }, { "epoch": 0.0735165412217749, "grad_norm": 0.8504606643688458, "learning_rate": 9.95046416884045e-06, "loss": 0.1919, "step": 2520 }, { "epoch": 0.07354571445241846, "grad_norm": 1.0296881981133896, "learning_rate": 9.95039781022269e-06, "loss": 0.2238, "step": 2521 }, { "epoch": 0.07357488768306203, "grad_norm": 0.8873545789881933, "learning_rate": 9.950331407408958e-06, "loss": 0.1834, "step": 2522 }, { "epoch": 0.07360406091370558, "grad_norm": 0.8140051656943608, "learning_rate": 9.95026496039985e-06, "loss": 0.1756, "step": 2523 }, { "epoch": 0.07363323414434915, "grad_norm": 0.9609860973193264, "learning_rate": 9.950198469195959e-06, "loss": 0.2111, "step": 2524 }, { "epoch": 0.0736624073749927, "grad_norm": 0.9451889863966915, "learning_rate": 9.950131933797876e-06, "loss": 0.198, "step": 2525 }, { "epoch": 0.07369158060563627, "grad_norm": 0.9025713220549293, "learning_rate": 9.950065354206198e-06, "loss": 0.1906, "step": 2526 }, { "epoch": 0.07372075383627982, "grad_norm": 1.1171116402543344, "learning_rate": 9.949998730421519e-06, "loss": 0.1878, "step": 2527 }, { "epoch": 0.07374992706692339, "grad_norm": 0.8216241005939505, "learning_rate": 9.949932062444431e-06, "loss": 0.2053, "step": 2528 }, { "epoch": 0.07377910029756696, "grad_norm": 1.2913869392142565, "learning_rate": 9.949865350275532e-06, "loss": 0.1963, "step": 2529 }, { "epoch": 0.07380827352821051, "grad_norm": 0.8574517560317876, "learning_rate": 9.949798593915418e-06, "loss": 0.1922, "step": 2530 }, { "epoch": 0.07383744675885408, "grad_norm": 1.1689085454686512, "learning_rate": 9.949731793364683e-06, "loss": 0.1907, "step": 2531 }, { "epoch": 0.07386661998949763, "grad_norm": 1.2915789591712317, "learning_rate": 9.949664948623923e-06, "loss": 0.1945, "step": 2532 }, { "epoch": 0.0738957932201412, "grad_norm": 0.7767667508933432, "learning_rate": 9.949598059693737e-06, "loss": 0.1708, "step": 2533 }, { "epoch": 0.07392496645078477, "grad_norm": 1.2243554126197655, "learning_rate": 9.94953112657472e-06, "loss": 0.2032, "step": 2534 }, { "epoch": 0.07395413968142832, "grad_norm": 1.1787710785802916, "learning_rate": 9.949464149267473e-06, "loss": 0.19, "step": 2535 }, { "epoch": 0.07398331291207189, "grad_norm": 0.906760896516415, "learning_rate": 9.94939712777259e-06, "loss": 0.1989, "step": 2536 }, { "epoch": 0.07401248614271544, "grad_norm": 1.031484955743404, "learning_rate": 9.949330062090671e-06, "loss": 0.1758, "step": 2537 }, { "epoch": 0.074041659373359, "grad_norm": 0.9411214101633906, "learning_rate": 9.949262952222316e-06, "loss": 0.1878, "step": 2538 }, { "epoch": 0.07407083260400257, "grad_norm": 0.9884594827492988, "learning_rate": 9.94919579816812e-06, "loss": 0.1961, "step": 2539 }, { "epoch": 0.07410000583464613, "grad_norm": 1.0737036981799195, "learning_rate": 9.949128599928687e-06, "loss": 0.199, "step": 2540 }, { "epoch": 0.0741291790652897, "grad_norm": 0.9696232110495232, "learning_rate": 9.949061357504617e-06, "loss": 0.1732, "step": 2541 }, { "epoch": 0.07415835229593325, "grad_norm": 1.1846628887886361, "learning_rate": 9.948994070896508e-06, "loss": 0.1945, "step": 2542 }, { "epoch": 0.07418752552657681, "grad_norm": 1.4202641562292888, "learning_rate": 9.948926740104958e-06, "loss": 0.204, "step": 2543 }, { "epoch": 0.07421669875722038, "grad_norm": 0.933920277116389, "learning_rate": 9.948859365130574e-06, "loss": 0.1673, "step": 2544 }, { "epoch": 0.07424587198786393, "grad_norm": 0.99870713723646, "learning_rate": 9.948791945973955e-06, "loss": 0.2584, "step": 2545 }, { "epoch": 0.0742750452185075, "grad_norm": 0.944141519756461, "learning_rate": 9.948724482635703e-06, "loss": 0.1935, "step": 2546 }, { "epoch": 0.07430421844915105, "grad_norm": 0.9033999505149384, "learning_rate": 9.94865697511642e-06, "loss": 0.2236, "step": 2547 }, { "epoch": 0.07433339167979462, "grad_norm": 0.8211666350624257, "learning_rate": 9.94858942341671e-06, "loss": 0.1818, "step": 2548 }, { "epoch": 0.07436256491043818, "grad_norm": 0.9274903197590763, "learning_rate": 9.948521827537172e-06, "loss": 0.1812, "step": 2549 }, { "epoch": 0.07439173814108174, "grad_norm": 0.9605098584721782, "learning_rate": 9.948454187478414e-06, "loss": 0.1965, "step": 2550 }, { "epoch": 0.07442091137172531, "grad_norm": 0.9480236670737756, "learning_rate": 9.948386503241039e-06, "loss": 0.1996, "step": 2551 }, { "epoch": 0.07445008460236886, "grad_norm": 0.8485166444346322, "learning_rate": 9.94831877482565e-06, "loss": 0.1988, "step": 2552 }, { "epoch": 0.07447925783301243, "grad_norm": 0.825194544691037, "learning_rate": 9.948251002232852e-06, "loss": 0.1853, "step": 2553 }, { "epoch": 0.07450843106365598, "grad_norm": 0.9736642606664729, "learning_rate": 9.948183185463252e-06, "loss": 0.213, "step": 2554 }, { "epoch": 0.07453760429429955, "grad_norm": 0.8261115363986782, "learning_rate": 9.948115324517451e-06, "loss": 0.1845, "step": 2555 }, { "epoch": 0.07456677752494312, "grad_norm": 1.0089780084904925, "learning_rate": 9.948047419396059e-06, "loss": 0.1782, "step": 2556 }, { "epoch": 0.07459595075558667, "grad_norm": 0.7853899303405794, "learning_rate": 9.947979470099682e-06, "loss": 0.1911, "step": 2557 }, { "epoch": 0.07462512398623024, "grad_norm": 1.0173764561506085, "learning_rate": 9.947911476628923e-06, "loss": 0.2073, "step": 2558 }, { "epoch": 0.07465429721687379, "grad_norm": 0.8505969321849713, "learning_rate": 9.947843438984392e-06, "loss": 0.2028, "step": 2559 }, { "epoch": 0.07468347044751736, "grad_norm": 0.794195364019822, "learning_rate": 9.947775357166699e-06, "loss": 0.1719, "step": 2560 }, { "epoch": 0.07471264367816093, "grad_norm": 0.835272658713182, "learning_rate": 9.947707231176444e-06, "loss": 0.1784, "step": 2561 }, { "epoch": 0.07474181690880448, "grad_norm": 0.9894806609791134, "learning_rate": 9.947639061014242e-06, "loss": 0.1715, "step": 2562 }, { "epoch": 0.07477099013944805, "grad_norm": 0.9869328334441443, "learning_rate": 9.9475708466807e-06, "loss": 0.1863, "step": 2563 }, { "epoch": 0.0748001633700916, "grad_norm": 1.159662679891997, "learning_rate": 9.947502588176427e-06, "loss": 0.2023, "step": 2564 }, { "epoch": 0.07482933660073517, "grad_norm": 0.9242264876067878, "learning_rate": 9.947434285502032e-06, "loss": 0.1819, "step": 2565 }, { "epoch": 0.07485850983137873, "grad_norm": 1.026543496068096, "learning_rate": 9.947365938658124e-06, "loss": 0.1965, "step": 2566 }, { "epoch": 0.07488768306202229, "grad_norm": 0.9841963172592002, "learning_rate": 9.947297547645314e-06, "loss": 0.1957, "step": 2567 }, { "epoch": 0.07491685629266585, "grad_norm": 1.2404952203932327, "learning_rate": 9.947229112464213e-06, "loss": 0.1963, "step": 2568 }, { "epoch": 0.07494602952330941, "grad_norm": 0.9740693352878483, "learning_rate": 9.947160633115431e-06, "loss": 0.1969, "step": 2569 }, { "epoch": 0.07497520275395297, "grad_norm": 0.870458868803175, "learning_rate": 9.94709210959958e-06, "loss": 0.1857, "step": 2570 }, { "epoch": 0.07500437598459653, "grad_norm": 0.9470708798435789, "learning_rate": 9.947023541917271e-06, "loss": 0.1886, "step": 2571 }, { "epoch": 0.0750335492152401, "grad_norm": 1.1836894201983172, "learning_rate": 9.946954930069117e-06, "loss": 0.2086, "step": 2572 }, { "epoch": 0.07506272244588366, "grad_norm": 0.9832880021505518, "learning_rate": 9.946886274055731e-06, "loss": 0.202, "step": 2573 }, { "epoch": 0.07509189567652721, "grad_norm": 0.9515418412970081, "learning_rate": 9.946817573877725e-06, "loss": 0.208, "step": 2574 }, { "epoch": 0.07512106890717078, "grad_norm": 0.9548031685703873, "learning_rate": 9.946748829535714e-06, "loss": 0.1928, "step": 2575 }, { "epoch": 0.07515024213781434, "grad_norm": 0.8728612685822629, "learning_rate": 9.946680041030308e-06, "loss": 0.2178, "step": 2576 }, { "epoch": 0.0751794153684579, "grad_norm": 0.9042425278014952, "learning_rate": 9.946611208362123e-06, "loss": 0.2207, "step": 2577 }, { "epoch": 0.07520858859910147, "grad_norm": 0.8710367260702849, "learning_rate": 9.946542331531777e-06, "loss": 0.1995, "step": 2578 }, { "epoch": 0.07523776182974502, "grad_norm": 0.8886974016626152, "learning_rate": 9.946473410539878e-06, "loss": 0.2174, "step": 2579 }, { "epoch": 0.07526693506038859, "grad_norm": 0.8335754120537804, "learning_rate": 9.946404445387048e-06, "loss": 0.2076, "step": 2580 }, { "epoch": 0.07529610829103214, "grad_norm": 0.8780117152861452, "learning_rate": 9.946335436073899e-06, "loss": 0.1905, "step": 2581 }, { "epoch": 0.07532528152167571, "grad_norm": 0.9089534703220984, "learning_rate": 9.946266382601049e-06, "loss": 0.1763, "step": 2582 }, { "epoch": 0.07535445475231928, "grad_norm": 0.7917110926617236, "learning_rate": 9.946197284969112e-06, "loss": 0.2208, "step": 2583 }, { "epoch": 0.07538362798296283, "grad_norm": 1.1303322563806806, "learning_rate": 9.946128143178708e-06, "loss": 0.2117, "step": 2584 }, { "epoch": 0.0754128012136064, "grad_norm": 1.0074994278245815, "learning_rate": 9.946058957230451e-06, "loss": 0.1866, "step": 2585 }, { "epoch": 0.07544197444424995, "grad_norm": 0.918129918274096, "learning_rate": 9.945989727124963e-06, "loss": 0.204, "step": 2586 }, { "epoch": 0.07547114767489352, "grad_norm": 0.7543863200792966, "learning_rate": 9.945920452862856e-06, "loss": 0.1771, "step": 2587 }, { "epoch": 0.07550032090553709, "grad_norm": 1.3205281719710635, "learning_rate": 9.945851134444754e-06, "loss": 0.2061, "step": 2588 }, { "epoch": 0.07552949413618064, "grad_norm": 1.0235724508465391, "learning_rate": 9.945781771871274e-06, "loss": 0.1994, "step": 2589 }, { "epoch": 0.0755586673668242, "grad_norm": 0.920875422648793, "learning_rate": 9.945712365143034e-06, "loss": 0.1996, "step": 2590 }, { "epoch": 0.07558784059746776, "grad_norm": 0.9255287465514315, "learning_rate": 9.945642914260655e-06, "loss": 0.2, "step": 2591 }, { "epoch": 0.07561701382811133, "grad_norm": 1.0882154267587745, "learning_rate": 9.945573419224757e-06, "loss": 0.2171, "step": 2592 }, { "epoch": 0.07564618705875488, "grad_norm": 1.111913818943444, "learning_rate": 9.945503880035958e-06, "loss": 0.1969, "step": 2593 }, { "epoch": 0.07567536028939845, "grad_norm": 0.8757571257745991, "learning_rate": 9.945434296694883e-06, "loss": 0.2094, "step": 2594 }, { "epoch": 0.07570453352004201, "grad_norm": 0.8715141905293745, "learning_rate": 9.94536466920215e-06, "loss": 0.1984, "step": 2595 }, { "epoch": 0.07573370675068557, "grad_norm": 1.2895697791871283, "learning_rate": 9.945294997558384e-06, "loss": 0.2181, "step": 2596 }, { "epoch": 0.07576287998132913, "grad_norm": 1.0281992159004434, "learning_rate": 9.945225281764203e-06, "loss": 0.199, "step": 2597 }, { "epoch": 0.07579205321197269, "grad_norm": 1.0161962229050718, "learning_rate": 9.945155521820232e-06, "loss": 0.1731, "step": 2598 }, { "epoch": 0.07582122644261625, "grad_norm": 0.9895792606137934, "learning_rate": 9.945085717727093e-06, "loss": 0.1997, "step": 2599 }, { "epoch": 0.07585039967325982, "grad_norm": 0.8103461045109309, "learning_rate": 9.945015869485409e-06, "loss": 0.1735, "step": 2600 }, { "epoch": 0.07587957290390338, "grad_norm": 0.7954580081561768, "learning_rate": 9.944945977095803e-06, "loss": 0.1902, "step": 2601 }, { "epoch": 0.07590874613454694, "grad_norm": 0.8832405590401877, "learning_rate": 9.9448760405589e-06, "loss": 0.1729, "step": 2602 }, { "epoch": 0.0759379193651905, "grad_norm": 0.9456933396832424, "learning_rate": 9.944806059875326e-06, "loss": 0.2013, "step": 2603 }, { "epoch": 0.07596709259583406, "grad_norm": 1.0812812205345452, "learning_rate": 9.944736035045702e-06, "loss": 0.2016, "step": 2604 }, { "epoch": 0.07599626582647763, "grad_norm": 0.8352054121513081, "learning_rate": 9.944665966070654e-06, "loss": 0.168, "step": 2605 }, { "epoch": 0.07602543905712118, "grad_norm": 0.9275361524424524, "learning_rate": 9.944595852950812e-06, "loss": 0.1932, "step": 2606 }, { "epoch": 0.07605461228776475, "grad_norm": 0.9762002102562343, "learning_rate": 9.944525695686795e-06, "loss": 0.1877, "step": 2607 }, { "epoch": 0.0760837855184083, "grad_norm": 0.8302742683084885, "learning_rate": 9.944455494279235e-06, "loss": 0.1964, "step": 2608 }, { "epoch": 0.07611295874905187, "grad_norm": 0.9651412018240907, "learning_rate": 9.944385248728757e-06, "loss": 0.1935, "step": 2609 }, { "epoch": 0.07614213197969544, "grad_norm": 1.0580469462137965, "learning_rate": 9.944314959035987e-06, "loss": 0.1954, "step": 2610 }, { "epoch": 0.07617130521033899, "grad_norm": 0.8436060684882135, "learning_rate": 9.944244625201553e-06, "loss": 0.1786, "step": 2611 }, { "epoch": 0.07620047844098256, "grad_norm": 1.1668726972890329, "learning_rate": 9.944174247226084e-06, "loss": 0.201, "step": 2612 }, { "epoch": 0.07622965167162611, "grad_norm": 0.9050530434600664, "learning_rate": 9.944103825110207e-06, "loss": 0.2082, "step": 2613 }, { "epoch": 0.07625882490226968, "grad_norm": 0.9657438371151073, "learning_rate": 9.944033358854553e-06, "loss": 0.1855, "step": 2614 }, { "epoch": 0.07628799813291325, "grad_norm": 0.8982666967367076, "learning_rate": 9.943962848459747e-06, "loss": 0.1957, "step": 2615 }, { "epoch": 0.0763171713635568, "grad_norm": 0.8917214421128401, "learning_rate": 9.943892293926422e-06, "loss": 0.2097, "step": 2616 }, { "epoch": 0.07634634459420037, "grad_norm": 1.0212730113490176, "learning_rate": 9.943821695255208e-06, "loss": 0.1911, "step": 2617 }, { "epoch": 0.07637551782484392, "grad_norm": 1.2312845391322114, "learning_rate": 9.943751052446732e-06, "loss": 0.1713, "step": 2618 }, { "epoch": 0.07640469105548749, "grad_norm": 0.7339266003982186, "learning_rate": 9.943680365501628e-06, "loss": 0.1962, "step": 2619 }, { "epoch": 0.07643386428613104, "grad_norm": 0.769726512109223, "learning_rate": 9.943609634420526e-06, "loss": 0.1659, "step": 2620 }, { "epoch": 0.0764630375167746, "grad_norm": 1.1079342427199865, "learning_rate": 9.943538859204056e-06, "loss": 0.2021, "step": 2621 }, { "epoch": 0.07649221074741817, "grad_norm": 0.9860008360785368, "learning_rate": 9.943468039852852e-06, "loss": 0.1891, "step": 2622 }, { "epoch": 0.07652138397806173, "grad_norm": 0.7521624716160008, "learning_rate": 9.943397176367546e-06, "loss": 0.1832, "step": 2623 }, { "epoch": 0.0765505572087053, "grad_norm": 1.0201516196014724, "learning_rate": 9.94332626874877e-06, "loss": 0.1984, "step": 2624 }, { "epoch": 0.07657973043934885, "grad_norm": 1.221830997682814, "learning_rate": 9.943255316997156e-06, "loss": 0.21, "step": 2625 }, { "epoch": 0.07660890366999241, "grad_norm": 0.8614480246302127, "learning_rate": 9.943184321113339e-06, "loss": 0.2104, "step": 2626 }, { "epoch": 0.07663807690063598, "grad_norm": 0.8262000373867923, "learning_rate": 9.943113281097953e-06, "loss": 0.1722, "step": 2627 }, { "epoch": 0.07666725013127954, "grad_norm": 0.9084643356559261, "learning_rate": 9.943042196951631e-06, "loss": 0.1972, "step": 2628 }, { "epoch": 0.0766964233619231, "grad_norm": 0.8752944963258539, "learning_rate": 9.942971068675009e-06, "loss": 0.1875, "step": 2629 }, { "epoch": 0.07672559659256666, "grad_norm": 0.795338642633206, "learning_rate": 9.942899896268721e-06, "loss": 0.1535, "step": 2630 }, { "epoch": 0.07675476982321022, "grad_norm": 0.9344221503966518, "learning_rate": 9.942828679733402e-06, "loss": 0.2219, "step": 2631 }, { "epoch": 0.07678394305385379, "grad_norm": 1.4776393321268981, "learning_rate": 9.942757419069688e-06, "loss": 0.1929, "step": 2632 }, { "epoch": 0.07681311628449734, "grad_norm": 1.0484382039943305, "learning_rate": 9.942686114278218e-06, "loss": 0.1843, "step": 2633 }, { "epoch": 0.07684228951514091, "grad_norm": 0.7543481335208506, "learning_rate": 9.942614765359625e-06, "loss": 0.1758, "step": 2634 }, { "epoch": 0.07687146274578446, "grad_norm": 1.1031882754003925, "learning_rate": 9.942543372314548e-06, "loss": 0.1963, "step": 2635 }, { "epoch": 0.07690063597642803, "grad_norm": 1.0430234254755133, "learning_rate": 9.942471935143623e-06, "loss": 0.2032, "step": 2636 }, { "epoch": 0.0769298092070716, "grad_norm": 1.0339827850226333, "learning_rate": 9.942400453847487e-06, "loss": 0.1958, "step": 2637 }, { "epoch": 0.07695898243771515, "grad_norm": 1.1071622241519605, "learning_rate": 9.94232892842678e-06, "loss": 0.2067, "step": 2638 }, { "epoch": 0.07698815566835872, "grad_norm": 1.0958838219887979, "learning_rate": 9.942257358882144e-06, "loss": 0.1747, "step": 2639 }, { "epoch": 0.07701732889900227, "grad_norm": 1.0717453136637745, "learning_rate": 9.94218574521421e-06, "loss": 0.227, "step": 2640 }, { "epoch": 0.07704650212964584, "grad_norm": 0.9185332528220834, "learning_rate": 9.942114087423622e-06, "loss": 0.2231, "step": 2641 }, { "epoch": 0.07707567536028939, "grad_norm": 0.9051003261109557, "learning_rate": 9.942042385511022e-06, "loss": 0.1943, "step": 2642 }, { "epoch": 0.07710484859093296, "grad_norm": 1.3066658504609052, "learning_rate": 9.941970639477044e-06, "loss": 0.1824, "step": 2643 }, { "epoch": 0.07713402182157653, "grad_norm": 0.9224265104180501, "learning_rate": 9.941898849322333e-06, "loss": 0.2068, "step": 2644 }, { "epoch": 0.07716319505222008, "grad_norm": 0.8542627557072558, "learning_rate": 9.94182701504753e-06, "loss": 0.1943, "step": 2645 }, { "epoch": 0.07719236828286365, "grad_norm": 1.1004733462542646, "learning_rate": 9.941755136653273e-06, "loss": 0.2051, "step": 2646 }, { "epoch": 0.0772215415135072, "grad_norm": 0.964762488155317, "learning_rate": 9.941683214140207e-06, "loss": 0.2048, "step": 2647 }, { "epoch": 0.07725071474415077, "grad_norm": 0.8247333942088099, "learning_rate": 9.941611247508973e-06, "loss": 0.1727, "step": 2648 }, { "epoch": 0.07727988797479433, "grad_norm": 1.0219784976162776, "learning_rate": 9.941539236760213e-06, "loss": 0.2032, "step": 2649 }, { "epoch": 0.07730906120543789, "grad_norm": 1.1353265879464371, "learning_rate": 9.94146718189457e-06, "loss": 0.1955, "step": 2650 }, { "epoch": 0.07733823443608145, "grad_norm": 0.7962805265253471, "learning_rate": 9.94139508291269e-06, "loss": 0.1886, "step": 2651 }, { "epoch": 0.07736740766672501, "grad_norm": 0.7955698233236265, "learning_rate": 9.941322939815213e-06, "loss": 0.1976, "step": 2652 }, { "epoch": 0.07739658089736857, "grad_norm": 1.0542124619816051, "learning_rate": 9.941250752602783e-06, "loss": 0.1962, "step": 2653 }, { "epoch": 0.07742575412801214, "grad_norm": 0.8402126269339003, "learning_rate": 9.941178521276046e-06, "loss": 0.1893, "step": 2654 }, { "epoch": 0.0774549273586557, "grad_norm": 0.8447199467017881, "learning_rate": 9.941106245835648e-06, "loss": 0.1937, "step": 2655 }, { "epoch": 0.07748410058929926, "grad_norm": 0.912593285122189, "learning_rate": 9.941033926282233e-06, "loss": 0.1825, "step": 2656 }, { "epoch": 0.07751327381994282, "grad_norm": 0.9718797109379124, "learning_rate": 9.940961562616446e-06, "loss": 0.1984, "step": 2657 }, { "epoch": 0.07754244705058638, "grad_norm": 0.9868174289875705, "learning_rate": 9.940889154838934e-06, "loss": 0.1878, "step": 2658 }, { "epoch": 0.07757162028122995, "grad_norm": 1.0507159006122682, "learning_rate": 9.940816702950343e-06, "loss": 0.1867, "step": 2659 }, { "epoch": 0.0776007935118735, "grad_norm": 1.0525048615759034, "learning_rate": 9.940744206951318e-06, "loss": 0.1996, "step": 2660 }, { "epoch": 0.07762996674251707, "grad_norm": 1.0223398335816087, "learning_rate": 9.940671666842512e-06, "loss": 0.1994, "step": 2661 }, { "epoch": 0.07765913997316062, "grad_norm": 0.9068473842074293, "learning_rate": 9.940599082624566e-06, "loss": 0.173, "step": 2662 }, { "epoch": 0.07768831320380419, "grad_norm": 0.899211220038558, "learning_rate": 9.940526454298132e-06, "loss": 0.1894, "step": 2663 }, { "epoch": 0.07771748643444774, "grad_norm": 0.9777365591898555, "learning_rate": 9.940453781863857e-06, "loss": 0.1943, "step": 2664 }, { "epoch": 0.07774665966509131, "grad_norm": 0.7923847513019054, "learning_rate": 9.940381065322388e-06, "loss": 0.2013, "step": 2665 }, { "epoch": 0.07777583289573488, "grad_norm": 0.8198327533951112, "learning_rate": 9.94030830467438e-06, "loss": 0.1779, "step": 2666 }, { "epoch": 0.07780500612637843, "grad_norm": 1.0282763387788505, "learning_rate": 9.940235499920476e-06, "loss": 0.1892, "step": 2667 }, { "epoch": 0.077834179357022, "grad_norm": 0.955231002477153, "learning_rate": 9.940162651061329e-06, "loss": 0.1873, "step": 2668 }, { "epoch": 0.07786335258766555, "grad_norm": 0.9416850910784917, "learning_rate": 9.940089758097591e-06, "loss": 0.1858, "step": 2669 }, { "epoch": 0.07789252581830912, "grad_norm": 0.9282490361042554, "learning_rate": 9.94001682102991e-06, "loss": 0.204, "step": 2670 }, { "epoch": 0.07792169904895269, "grad_norm": 0.9752447389088829, "learning_rate": 9.939943839858936e-06, "loss": 0.2057, "step": 2671 }, { "epoch": 0.07795087227959624, "grad_norm": 0.9947648135003092, "learning_rate": 9.939870814585327e-06, "loss": 0.1849, "step": 2672 }, { "epoch": 0.0779800455102398, "grad_norm": 0.968443584009168, "learning_rate": 9.939797745209727e-06, "loss": 0.2005, "step": 2673 }, { "epoch": 0.07800921874088336, "grad_norm": 0.9606764833604585, "learning_rate": 9.939724631732793e-06, "loss": 0.2075, "step": 2674 }, { "epoch": 0.07803839197152693, "grad_norm": 1.0200740151040866, "learning_rate": 9.939651474155176e-06, "loss": 0.2015, "step": 2675 }, { "epoch": 0.0780675652021705, "grad_norm": 1.0026526550299906, "learning_rate": 9.93957827247753e-06, "loss": 0.178, "step": 2676 }, { "epoch": 0.07809673843281405, "grad_norm": 0.9075067466521576, "learning_rate": 9.93950502670051e-06, "loss": 0.1905, "step": 2677 }, { "epoch": 0.07812591166345761, "grad_norm": 1.0334341557588698, "learning_rate": 9.939431736824767e-06, "loss": 0.2049, "step": 2678 }, { "epoch": 0.07815508489410117, "grad_norm": 0.9574596778896336, "learning_rate": 9.939358402850955e-06, "loss": 0.1964, "step": 2679 }, { "epoch": 0.07818425812474473, "grad_norm": 1.0897485305686212, "learning_rate": 9.939285024779734e-06, "loss": 0.2029, "step": 2680 }, { "epoch": 0.0782134313553883, "grad_norm": 0.9828221224677702, "learning_rate": 9.939211602611754e-06, "loss": 0.1822, "step": 2681 }, { "epoch": 0.07824260458603186, "grad_norm": 1.1855913890247984, "learning_rate": 9.93913813634767e-06, "loss": 0.1943, "step": 2682 }, { "epoch": 0.07827177781667542, "grad_norm": 1.2297052617062867, "learning_rate": 9.939064625988142e-06, "loss": 0.2273, "step": 2683 }, { "epoch": 0.07830095104731898, "grad_norm": 0.8934870583091481, "learning_rate": 9.938991071533823e-06, "loss": 0.1936, "step": 2684 }, { "epoch": 0.07833012427796254, "grad_norm": 1.244846335122825, "learning_rate": 9.938917472985372e-06, "loss": 0.1949, "step": 2685 }, { "epoch": 0.0783592975086061, "grad_norm": 1.1793669347515032, "learning_rate": 9.938843830343443e-06, "loss": 0.1697, "step": 2686 }, { "epoch": 0.07838847073924966, "grad_norm": 0.8600819325514898, "learning_rate": 9.938770143608695e-06, "loss": 0.211, "step": 2687 }, { "epoch": 0.07841764396989323, "grad_norm": 0.904292666952364, "learning_rate": 9.938696412781787e-06, "loss": 0.2164, "step": 2688 }, { "epoch": 0.07844681720053678, "grad_norm": 1.0879181073662818, "learning_rate": 9.938622637863377e-06, "loss": 0.2169, "step": 2689 }, { "epoch": 0.07847599043118035, "grad_norm": 0.8801298156236977, "learning_rate": 9.938548818854124e-06, "loss": 0.1878, "step": 2690 }, { "epoch": 0.0785051636618239, "grad_norm": 0.8137726067530414, "learning_rate": 9.938474955754685e-06, "loss": 0.1956, "step": 2691 }, { "epoch": 0.07853433689246747, "grad_norm": 1.0321234197281965, "learning_rate": 9.93840104856572e-06, "loss": 0.1922, "step": 2692 }, { "epoch": 0.07856351012311104, "grad_norm": 0.9366913737022776, "learning_rate": 9.93832709728789e-06, "loss": 0.1887, "step": 2693 }, { "epoch": 0.07859268335375459, "grad_norm": 0.7059196165237965, "learning_rate": 9.938253101921852e-06, "loss": 0.1824, "step": 2694 }, { "epoch": 0.07862185658439816, "grad_norm": 1.0892404704290901, "learning_rate": 9.938179062468272e-06, "loss": 0.1808, "step": 2695 }, { "epoch": 0.07865102981504171, "grad_norm": 1.0510282012379226, "learning_rate": 9.938104978927807e-06, "loss": 0.1961, "step": 2696 }, { "epoch": 0.07868020304568528, "grad_norm": 0.9814648931511375, "learning_rate": 9.938030851301122e-06, "loss": 0.1865, "step": 2697 }, { "epoch": 0.07870937627632885, "grad_norm": 0.9219050370141942, "learning_rate": 9.937956679588874e-06, "loss": 0.1965, "step": 2698 }, { "epoch": 0.0787385495069724, "grad_norm": 1.2533633717403911, "learning_rate": 9.937882463791727e-06, "loss": 0.1815, "step": 2699 }, { "epoch": 0.07876772273761597, "grad_norm": 1.1228788934518406, "learning_rate": 9.937808203910345e-06, "loss": 0.2045, "step": 2700 }, { "epoch": 0.07879689596825952, "grad_norm": 0.9634574103341637, "learning_rate": 9.93773389994539e-06, "loss": 0.1988, "step": 2701 }, { "epoch": 0.07882606919890309, "grad_norm": 1.021651442566582, "learning_rate": 9.937659551897526e-06, "loss": 0.1836, "step": 2702 }, { "epoch": 0.07885524242954665, "grad_norm": 1.0105141364475234, "learning_rate": 9.937585159767416e-06, "loss": 0.1978, "step": 2703 }, { "epoch": 0.07888441566019021, "grad_norm": 1.021767068751826, "learning_rate": 9.937510723555723e-06, "loss": 0.1799, "step": 2704 }, { "epoch": 0.07891358889083377, "grad_norm": 0.9066280821815615, "learning_rate": 9.937436243263115e-06, "loss": 0.1985, "step": 2705 }, { "epoch": 0.07894276212147733, "grad_norm": 0.841122543552312, "learning_rate": 9.937361718890255e-06, "loss": 0.2056, "step": 2706 }, { "epoch": 0.0789719353521209, "grad_norm": 0.9077763985090129, "learning_rate": 9.937287150437807e-06, "loss": 0.2134, "step": 2707 }, { "epoch": 0.07900110858276446, "grad_norm": 0.9746503628502282, "learning_rate": 9.937212537906438e-06, "loss": 0.2175, "step": 2708 }, { "epoch": 0.07903028181340802, "grad_norm": 1.0464230380507649, "learning_rate": 9.937137881296814e-06, "loss": 0.1983, "step": 2709 }, { "epoch": 0.07905945504405158, "grad_norm": 1.200070722162346, "learning_rate": 9.937063180609602e-06, "loss": 0.2105, "step": 2710 }, { "epoch": 0.07908862827469514, "grad_norm": 0.8591917355902438, "learning_rate": 9.936988435845469e-06, "loss": 0.1901, "step": 2711 }, { "epoch": 0.0791178015053387, "grad_norm": 0.8876176216163101, "learning_rate": 9.93691364700508e-06, "loss": 0.1849, "step": 2712 }, { "epoch": 0.07914697473598226, "grad_norm": 1.1549833181924853, "learning_rate": 9.936838814089107e-06, "loss": 0.1925, "step": 2713 }, { "epoch": 0.07917614796662582, "grad_norm": 1.0203950851234787, "learning_rate": 9.936763937098213e-06, "loss": 0.2012, "step": 2714 }, { "epoch": 0.07920532119726939, "grad_norm": 1.0481209030947731, "learning_rate": 9.93668901603307e-06, "loss": 0.2047, "step": 2715 }, { "epoch": 0.07923449442791294, "grad_norm": 0.9819989793255824, "learning_rate": 9.936614050894346e-06, "loss": 0.1842, "step": 2716 }, { "epoch": 0.07926366765855651, "grad_norm": 0.9479496180338147, "learning_rate": 9.93653904168271e-06, "loss": 0.1967, "step": 2717 }, { "epoch": 0.07929284088920006, "grad_norm": 1.106088483378671, "learning_rate": 9.936463988398834e-06, "loss": 0.1801, "step": 2718 }, { "epoch": 0.07932201411984363, "grad_norm": 0.8381484977399565, "learning_rate": 9.936388891043384e-06, "loss": 0.2019, "step": 2719 }, { "epoch": 0.0793511873504872, "grad_norm": 0.9140491887527451, "learning_rate": 9.936313749617032e-06, "loss": 0.1896, "step": 2720 }, { "epoch": 0.07938036058113075, "grad_norm": 0.9834658135188353, "learning_rate": 9.93623856412045e-06, "loss": 0.1963, "step": 2721 }, { "epoch": 0.07940953381177432, "grad_norm": 0.870219693261259, "learning_rate": 9.93616333455431e-06, "loss": 0.2083, "step": 2722 }, { "epoch": 0.07943870704241787, "grad_norm": 0.8545790111060516, "learning_rate": 9.93608806091928e-06, "loss": 0.2006, "step": 2723 }, { "epoch": 0.07946788027306144, "grad_norm": 0.8398428387455098, "learning_rate": 9.936012743216034e-06, "loss": 0.1972, "step": 2724 }, { "epoch": 0.079497053503705, "grad_norm": 0.9003407969640789, "learning_rate": 9.935937381445247e-06, "loss": 0.1978, "step": 2725 }, { "epoch": 0.07952622673434856, "grad_norm": 0.918600237288775, "learning_rate": 9.935861975607586e-06, "loss": 0.2031, "step": 2726 }, { "epoch": 0.07955539996499213, "grad_norm": 1.001836761949255, "learning_rate": 9.93578652570373e-06, "loss": 0.1818, "step": 2727 }, { "epoch": 0.07958457319563568, "grad_norm": 0.7416507055637092, "learning_rate": 9.935711031734349e-06, "loss": 0.19, "step": 2728 }, { "epoch": 0.07961374642627925, "grad_norm": 1.1444359194944251, "learning_rate": 9.935635493700117e-06, "loss": 0.2049, "step": 2729 }, { "epoch": 0.07964291965692281, "grad_norm": 0.901635866880554, "learning_rate": 9.935559911601713e-06, "loss": 0.1933, "step": 2730 }, { "epoch": 0.07967209288756637, "grad_norm": 0.9359464537723414, "learning_rate": 9.935484285439806e-06, "loss": 0.2042, "step": 2731 }, { "epoch": 0.07970126611820993, "grad_norm": 0.8740846860070967, "learning_rate": 9.935408615215075e-06, "loss": 0.1722, "step": 2732 }, { "epoch": 0.07973043934885349, "grad_norm": 0.8249563066594702, "learning_rate": 9.935332900928192e-06, "loss": 0.1991, "step": 2733 }, { "epoch": 0.07975961257949706, "grad_norm": 1.0008799259776313, "learning_rate": 9.935257142579835e-06, "loss": 0.1953, "step": 2734 }, { "epoch": 0.07978878581014061, "grad_norm": 0.9306772494535623, "learning_rate": 9.93518134017068e-06, "loss": 0.2069, "step": 2735 }, { "epoch": 0.07981795904078418, "grad_norm": 0.935440707262837, "learning_rate": 9.935105493701406e-06, "loss": 0.1979, "step": 2736 }, { "epoch": 0.07984713227142774, "grad_norm": 0.8310594003377255, "learning_rate": 9.935029603172689e-06, "loss": 0.1726, "step": 2737 }, { "epoch": 0.0798763055020713, "grad_norm": 0.8204925660785971, "learning_rate": 9.934953668585205e-06, "loss": 0.1868, "step": 2738 }, { "epoch": 0.07990547873271486, "grad_norm": 1.1133498944659208, "learning_rate": 9.93487768993963e-06, "loss": 0.1967, "step": 2739 }, { "epoch": 0.07993465196335842, "grad_norm": 0.9255066100765028, "learning_rate": 9.93480166723665e-06, "loss": 0.2357, "step": 2740 }, { "epoch": 0.07996382519400198, "grad_norm": 0.9069965715365438, "learning_rate": 9.934725600476935e-06, "loss": 0.1891, "step": 2741 }, { "epoch": 0.07999299842464555, "grad_norm": 1.0889468089974361, "learning_rate": 9.934649489661168e-06, "loss": 0.2092, "step": 2742 }, { "epoch": 0.0800221716552891, "grad_norm": 1.0411829070438932, "learning_rate": 9.934573334790029e-06, "loss": 0.1998, "step": 2743 }, { "epoch": 0.08005134488593267, "grad_norm": 0.994705550420692, "learning_rate": 9.934497135864198e-06, "loss": 0.1823, "step": 2744 }, { "epoch": 0.08008051811657622, "grad_norm": 0.9356101296702157, "learning_rate": 9.934420892884352e-06, "loss": 0.182, "step": 2745 }, { "epoch": 0.08010969134721979, "grad_norm": 0.9463574646080707, "learning_rate": 9.934344605851179e-06, "loss": 0.1776, "step": 2746 }, { "epoch": 0.08013886457786336, "grad_norm": 1.064276672676239, "learning_rate": 9.93426827476535e-06, "loss": 0.1967, "step": 2747 }, { "epoch": 0.08016803780850691, "grad_norm": 0.9202510035566694, "learning_rate": 9.934191899627555e-06, "loss": 0.1857, "step": 2748 }, { "epoch": 0.08019721103915048, "grad_norm": 0.8217108165975183, "learning_rate": 9.934115480438471e-06, "loss": 0.166, "step": 2749 }, { "epoch": 0.08022638426979403, "grad_norm": 0.8994533678584419, "learning_rate": 9.934039017198784e-06, "loss": 0.1742, "step": 2750 }, { "epoch": 0.0802555575004376, "grad_norm": 0.8821589782245868, "learning_rate": 9.933962509909173e-06, "loss": 0.2001, "step": 2751 }, { "epoch": 0.08028473073108117, "grad_norm": 0.9057562916177336, "learning_rate": 9.933885958570323e-06, "loss": 0.2066, "step": 2752 }, { "epoch": 0.08031390396172472, "grad_norm": 1.01509641287476, "learning_rate": 9.933809363182916e-06, "loss": 0.1882, "step": 2753 }, { "epoch": 0.08034307719236829, "grad_norm": 0.9693622053620701, "learning_rate": 9.933732723747638e-06, "loss": 0.1919, "step": 2754 }, { "epoch": 0.08037225042301184, "grad_norm": 0.9054770900695217, "learning_rate": 9.933656040265172e-06, "loss": 0.1758, "step": 2755 }, { "epoch": 0.08040142365365541, "grad_norm": 1.1345735848261322, "learning_rate": 9.9335793127362e-06, "loss": 0.2141, "step": 2756 }, { "epoch": 0.08043059688429896, "grad_norm": 0.9157774978521533, "learning_rate": 9.933502541161413e-06, "loss": 0.181, "step": 2757 }, { "epoch": 0.08045977011494253, "grad_norm": 0.8205035273923003, "learning_rate": 9.933425725541493e-06, "loss": 0.198, "step": 2758 }, { "epoch": 0.0804889433455861, "grad_norm": 1.1078514936409194, "learning_rate": 9.933348865877125e-06, "loss": 0.1876, "step": 2759 }, { "epoch": 0.08051811657622965, "grad_norm": 0.9466618575867122, "learning_rate": 9.933271962168993e-06, "loss": 0.2011, "step": 2760 }, { "epoch": 0.08054728980687322, "grad_norm": 1.0194453198695188, "learning_rate": 9.93319501441779e-06, "loss": 0.1894, "step": 2761 }, { "epoch": 0.08057646303751677, "grad_norm": 0.8180571092945929, "learning_rate": 9.9331180226242e-06, "loss": 0.1935, "step": 2762 }, { "epoch": 0.08060563626816034, "grad_norm": 1.0405861116911927, "learning_rate": 9.933040986788909e-06, "loss": 0.2106, "step": 2763 }, { "epoch": 0.0806348094988039, "grad_norm": 0.8460896887974851, "learning_rate": 9.932963906912603e-06, "loss": 0.1856, "step": 2764 }, { "epoch": 0.08066398272944746, "grad_norm": 0.7751705907994965, "learning_rate": 9.932886782995977e-06, "loss": 0.1892, "step": 2765 }, { "epoch": 0.08069315596009102, "grad_norm": 1.0849083426806656, "learning_rate": 9.932809615039714e-06, "loss": 0.1973, "step": 2766 }, { "epoch": 0.08072232919073458, "grad_norm": 0.79395450879649, "learning_rate": 9.932732403044502e-06, "loss": 0.1811, "step": 2767 }, { "epoch": 0.08075150242137814, "grad_norm": 0.9203116392343945, "learning_rate": 9.932655147011034e-06, "loss": 0.1812, "step": 2768 }, { "epoch": 0.08078067565202171, "grad_norm": 0.7840840738602628, "learning_rate": 9.93257784694e-06, "loss": 0.1869, "step": 2769 }, { "epoch": 0.08080984888266526, "grad_norm": 0.8152327691890939, "learning_rate": 9.932500502832087e-06, "loss": 0.1934, "step": 2770 }, { "epoch": 0.08083902211330883, "grad_norm": 0.9607678098915426, "learning_rate": 9.932423114687988e-06, "loss": 0.1858, "step": 2771 }, { "epoch": 0.08086819534395238, "grad_norm": 0.7847020955786086, "learning_rate": 9.932345682508393e-06, "loss": 0.2001, "step": 2772 }, { "epoch": 0.08089736857459595, "grad_norm": 1.0975533217950417, "learning_rate": 9.93226820629399e-06, "loss": 0.1943, "step": 2773 }, { "epoch": 0.08092654180523952, "grad_norm": 1.0910647082549227, "learning_rate": 9.932190686045478e-06, "loss": 0.2256, "step": 2774 }, { "epoch": 0.08095571503588307, "grad_norm": 6.994995426750002, "learning_rate": 9.932113121763542e-06, "loss": 0.2086, "step": 2775 }, { "epoch": 0.08098488826652664, "grad_norm": 1.2525824847267382, "learning_rate": 9.93203551344888e-06, "loss": 0.1986, "step": 2776 }, { "epoch": 0.08101406149717019, "grad_norm": 1.1556186623538949, "learning_rate": 9.931957861102181e-06, "loss": 0.2066, "step": 2777 }, { "epoch": 0.08104323472781376, "grad_norm": 1.2286560015579542, "learning_rate": 9.93188016472414e-06, "loss": 0.2073, "step": 2778 }, { "epoch": 0.08107240795845731, "grad_norm": 1.1692159101193345, "learning_rate": 9.931802424315448e-06, "loss": 0.189, "step": 2779 }, { "epoch": 0.08110158118910088, "grad_norm": 1.169928906398308, "learning_rate": 9.931724639876806e-06, "loss": 0.168, "step": 2780 }, { "epoch": 0.08113075441974445, "grad_norm": 1.1798753346118862, "learning_rate": 9.931646811408899e-06, "loss": 0.2168, "step": 2781 }, { "epoch": 0.081159927650388, "grad_norm": 1.139026510056305, "learning_rate": 9.931568938912428e-06, "loss": 0.2013, "step": 2782 }, { "epoch": 0.08118910088103157, "grad_norm": 0.9073070629248077, "learning_rate": 9.931491022388087e-06, "loss": 0.1781, "step": 2783 }, { "epoch": 0.08121827411167512, "grad_norm": 1.0617980509299942, "learning_rate": 9.931413061836573e-06, "loss": 0.1888, "step": 2784 }, { "epoch": 0.08124744734231869, "grad_norm": 1.0467083373748136, "learning_rate": 9.931335057258579e-06, "loss": 0.1987, "step": 2785 }, { "epoch": 0.08127662057296225, "grad_norm": 0.9096295779546549, "learning_rate": 9.931257008654801e-06, "loss": 0.1807, "step": 2786 }, { "epoch": 0.08130579380360581, "grad_norm": 1.0581114770644902, "learning_rate": 9.931178916025941e-06, "loss": 0.1992, "step": 2787 }, { "epoch": 0.08133496703424938, "grad_norm": 0.901276263214751, "learning_rate": 9.931100779372691e-06, "loss": 0.2196, "step": 2788 }, { "epoch": 0.08136414026489293, "grad_norm": 0.8226719310667839, "learning_rate": 9.93102259869575e-06, "loss": 0.1913, "step": 2789 }, { "epoch": 0.0813933134955365, "grad_norm": 0.9334413386187128, "learning_rate": 9.930944373995816e-06, "loss": 0.1995, "step": 2790 }, { "epoch": 0.08142248672618006, "grad_norm": 0.8547959893057678, "learning_rate": 9.93086610527359e-06, "loss": 0.168, "step": 2791 }, { "epoch": 0.08145165995682362, "grad_norm": 0.8424939866021695, "learning_rate": 9.930787792529768e-06, "loss": 0.1803, "step": 2792 }, { "epoch": 0.08148083318746718, "grad_norm": 0.7925941421375563, "learning_rate": 9.930709435765049e-06, "loss": 0.1807, "step": 2793 }, { "epoch": 0.08151000641811074, "grad_norm": 0.8068038095868088, "learning_rate": 9.930631034980132e-06, "loss": 0.1995, "step": 2794 }, { "epoch": 0.0815391796487543, "grad_norm": 1.1927830385184788, "learning_rate": 9.93055259017572e-06, "loss": 0.2237, "step": 2795 }, { "epoch": 0.08156835287939787, "grad_norm": 0.87189253766637, "learning_rate": 9.93047410135251e-06, "loss": 0.1778, "step": 2796 }, { "epoch": 0.08159752611004142, "grad_norm": 0.9239119098014479, "learning_rate": 9.930395568511205e-06, "loss": 0.1726, "step": 2797 }, { "epoch": 0.08162669934068499, "grad_norm": 0.8123218897094974, "learning_rate": 9.930316991652506e-06, "loss": 0.2261, "step": 2798 }, { "epoch": 0.08165587257132854, "grad_norm": 1.0795451179377848, "learning_rate": 9.930238370777112e-06, "loss": 0.2013, "step": 2799 }, { "epoch": 0.08168504580197211, "grad_norm": 0.9989426857442159, "learning_rate": 9.93015970588573e-06, "loss": 0.1877, "step": 2800 }, { "epoch": 0.08171421903261568, "grad_norm": 0.809457448580003, "learning_rate": 9.930080996979055e-06, "loss": 0.2254, "step": 2801 }, { "epoch": 0.08174339226325923, "grad_norm": 0.8504300144113776, "learning_rate": 9.930002244057795e-06, "loss": 0.2054, "step": 2802 }, { "epoch": 0.0817725654939028, "grad_norm": 1.0160328925502884, "learning_rate": 9.929923447122654e-06, "loss": 0.2227, "step": 2803 }, { "epoch": 0.08180173872454635, "grad_norm": 0.859542823941737, "learning_rate": 9.92984460617433e-06, "loss": 0.1837, "step": 2804 }, { "epoch": 0.08183091195518992, "grad_norm": 1.3241622148617618, "learning_rate": 9.929765721213533e-06, "loss": 0.1782, "step": 2805 }, { "epoch": 0.08186008518583347, "grad_norm": 0.9965291614881215, "learning_rate": 9.929686792240965e-06, "loss": 0.1826, "step": 2806 }, { "epoch": 0.08188925841647704, "grad_norm": 1.086457509226983, "learning_rate": 9.929607819257327e-06, "loss": 0.2149, "step": 2807 }, { "epoch": 0.08191843164712061, "grad_norm": 0.909790091638877, "learning_rate": 9.929528802263331e-06, "loss": 0.214, "step": 2808 }, { "epoch": 0.08194760487776416, "grad_norm": 0.9813151341080305, "learning_rate": 9.929449741259675e-06, "loss": 0.1862, "step": 2809 }, { "epoch": 0.08197677810840773, "grad_norm": 1.0301680924957024, "learning_rate": 9.92937063624707e-06, "loss": 0.1895, "step": 2810 }, { "epoch": 0.08200595133905128, "grad_norm": 1.1282745828440568, "learning_rate": 9.929291487226221e-06, "loss": 0.1759, "step": 2811 }, { "epoch": 0.08203512456969485, "grad_norm": 1.0243361260094277, "learning_rate": 9.929212294197834e-06, "loss": 0.1858, "step": 2812 }, { "epoch": 0.08206429780033841, "grad_norm": 0.9394166173833635, "learning_rate": 9.929133057162616e-06, "loss": 0.2236, "step": 2813 }, { "epoch": 0.08209347103098197, "grad_norm": 1.0602639828605804, "learning_rate": 9.929053776121276e-06, "loss": 0.1997, "step": 2814 }, { "epoch": 0.08212264426162554, "grad_norm": 0.9907947906298216, "learning_rate": 9.92897445107452e-06, "loss": 0.1889, "step": 2815 }, { "epoch": 0.08215181749226909, "grad_norm": 1.0825967856642165, "learning_rate": 9.928895082023056e-06, "loss": 0.1892, "step": 2816 }, { "epoch": 0.08218099072291266, "grad_norm": 1.129993170967017, "learning_rate": 9.928815668967592e-06, "loss": 0.1786, "step": 2817 }, { "epoch": 0.08221016395355622, "grad_norm": 0.9319226639332902, "learning_rate": 9.928736211908841e-06, "loss": 0.1943, "step": 2818 }, { "epoch": 0.08223933718419978, "grad_norm": 0.8958238329213462, "learning_rate": 9.92865671084751e-06, "loss": 0.1892, "step": 2819 }, { "epoch": 0.08226851041484334, "grad_norm": 1.1202960962969155, "learning_rate": 9.928577165784306e-06, "loss": 0.1752, "step": 2820 }, { "epoch": 0.0822976836454869, "grad_norm": 0.9182878870187241, "learning_rate": 9.928497576719943e-06, "loss": 0.218, "step": 2821 }, { "epoch": 0.08232685687613046, "grad_norm": 1.152626927527983, "learning_rate": 9.92841794365513e-06, "loss": 0.1809, "step": 2822 }, { "epoch": 0.08235603010677403, "grad_norm": 0.8891267211475501, "learning_rate": 9.928338266590578e-06, "loss": 0.1851, "step": 2823 }, { "epoch": 0.08238520333741758, "grad_norm": 0.9388552579142749, "learning_rate": 9.928258545526999e-06, "loss": 0.2464, "step": 2824 }, { "epoch": 0.08241437656806115, "grad_norm": 1.0039462884984038, "learning_rate": 9.928178780465103e-06, "loss": 0.1702, "step": 2825 }, { "epoch": 0.0824435497987047, "grad_norm": 0.8654622861712875, "learning_rate": 9.928098971405604e-06, "loss": 0.1692, "step": 2826 }, { "epoch": 0.08247272302934827, "grad_norm": 0.9468776200338908, "learning_rate": 9.928019118349214e-06, "loss": 0.208, "step": 2827 }, { "epoch": 0.08250189625999182, "grad_norm": 0.8675639463620226, "learning_rate": 9.927939221296645e-06, "loss": 0.1893, "step": 2828 }, { "epoch": 0.08253106949063539, "grad_norm": 0.9140124809001825, "learning_rate": 9.927859280248613e-06, "loss": 0.1912, "step": 2829 }, { "epoch": 0.08256024272127896, "grad_norm": 0.8808135522217446, "learning_rate": 9.927779295205828e-06, "loss": 0.1882, "step": 2830 }, { "epoch": 0.08258941595192251, "grad_norm": 1.1263294413792957, "learning_rate": 9.927699266169006e-06, "loss": 0.2129, "step": 2831 }, { "epoch": 0.08261858918256608, "grad_norm": 1.1464325707882075, "learning_rate": 9.927619193138862e-06, "loss": 0.2159, "step": 2832 }, { "epoch": 0.08264776241320963, "grad_norm": 0.9193497134407058, "learning_rate": 9.927539076116108e-06, "loss": 0.1843, "step": 2833 }, { "epoch": 0.0826769356438532, "grad_norm": 1.0620327484528727, "learning_rate": 9.927458915101463e-06, "loss": 0.1875, "step": 2834 }, { "epoch": 0.08270610887449677, "grad_norm": 1.297023435604498, "learning_rate": 9.92737871009564e-06, "loss": 0.1988, "step": 2835 }, { "epoch": 0.08273528210514032, "grad_norm": 1.1594631116160867, "learning_rate": 9.927298461099358e-06, "loss": 0.1945, "step": 2836 }, { "epoch": 0.08276445533578389, "grad_norm": 2.71844039216031, "learning_rate": 9.92721816811333e-06, "loss": 0.2019, "step": 2837 }, { "epoch": 0.08279362856642744, "grad_norm": 1.1300278850126155, "learning_rate": 9.927137831138275e-06, "loss": 0.2028, "step": 2838 }, { "epoch": 0.08282280179707101, "grad_norm": 0.9754568136533278, "learning_rate": 9.92705745017491e-06, "loss": 0.2007, "step": 2839 }, { "epoch": 0.08285197502771458, "grad_norm": 0.8622160475156416, "learning_rate": 9.926977025223954e-06, "loss": 0.2024, "step": 2840 }, { "epoch": 0.08288114825835813, "grad_norm": 0.9267529229951084, "learning_rate": 9.92689655628612e-06, "loss": 0.1839, "step": 2841 }, { "epoch": 0.0829103214890017, "grad_norm": 0.9914073092895164, "learning_rate": 9.926816043362132e-06, "loss": 0.2059, "step": 2842 }, { "epoch": 0.08293949471964525, "grad_norm": 0.8820006093688145, "learning_rate": 9.926735486452706e-06, "loss": 0.2116, "step": 2843 }, { "epoch": 0.08296866795028882, "grad_norm": 1.128717413870026, "learning_rate": 9.92665488555856e-06, "loss": 0.1931, "step": 2844 }, { "epoch": 0.08299784118093238, "grad_norm": 1.033345513230471, "learning_rate": 9.926574240680417e-06, "loss": 0.2084, "step": 2845 }, { "epoch": 0.08302701441157594, "grad_norm": 0.7905999006884825, "learning_rate": 9.926493551818995e-06, "loss": 0.1836, "step": 2846 }, { "epoch": 0.0830561876422195, "grad_norm": 0.9106042662231119, "learning_rate": 9.926412818975015e-06, "loss": 0.1792, "step": 2847 }, { "epoch": 0.08308536087286306, "grad_norm": 0.7649859787618821, "learning_rate": 9.926332042149196e-06, "loss": 0.1835, "step": 2848 }, { "epoch": 0.08311453410350662, "grad_norm": 0.9070524418435134, "learning_rate": 9.926251221342262e-06, "loss": 0.2235, "step": 2849 }, { "epoch": 0.08314370733415018, "grad_norm": 0.8590134215437185, "learning_rate": 9.926170356554932e-06, "loss": 0.1774, "step": 2850 }, { "epoch": 0.08317288056479374, "grad_norm": 1.0204603380841695, "learning_rate": 9.92608944778793e-06, "loss": 0.214, "step": 2851 }, { "epoch": 0.08320205379543731, "grad_norm": 0.9968437364023346, "learning_rate": 9.926008495041975e-06, "loss": 0.2257, "step": 2852 }, { "epoch": 0.08323122702608086, "grad_norm": 0.9535351588974474, "learning_rate": 9.925927498317794e-06, "loss": 0.1844, "step": 2853 }, { "epoch": 0.08326040025672443, "grad_norm": 0.8854242104496456, "learning_rate": 9.925846457616109e-06, "loss": 0.2026, "step": 2854 }, { "epoch": 0.08328957348736798, "grad_norm": 1.0089888511502039, "learning_rate": 9.925765372937641e-06, "loss": 0.192, "step": 2855 }, { "epoch": 0.08331874671801155, "grad_norm": 0.917845127766735, "learning_rate": 9.925684244283116e-06, "loss": 0.2179, "step": 2856 }, { "epoch": 0.08334791994865512, "grad_norm": 0.9949885017616943, "learning_rate": 9.925603071653258e-06, "loss": 0.1836, "step": 2857 }, { "epoch": 0.08337709317929867, "grad_norm": 1.0751050383811247, "learning_rate": 9.925521855048794e-06, "loss": 0.2328, "step": 2858 }, { "epoch": 0.08340626640994224, "grad_norm": 0.906733513481403, "learning_rate": 9.925440594470444e-06, "loss": 0.1967, "step": 2859 }, { "epoch": 0.08343543964058579, "grad_norm": 1.142701110240169, "learning_rate": 9.925359289918937e-06, "loss": 0.2128, "step": 2860 }, { "epoch": 0.08346461287122936, "grad_norm": 0.9905148856143702, "learning_rate": 9.925277941394998e-06, "loss": 0.213, "step": 2861 }, { "epoch": 0.08349378610187293, "grad_norm": 1.0512130964236224, "learning_rate": 9.925196548899353e-06, "loss": 0.1755, "step": 2862 }, { "epoch": 0.08352295933251648, "grad_norm": 1.0806773648246382, "learning_rate": 9.925115112432728e-06, "loss": 0.1709, "step": 2863 }, { "epoch": 0.08355213256316005, "grad_norm": 1.1075388767413175, "learning_rate": 9.925033631995854e-06, "loss": 0.192, "step": 2864 }, { "epoch": 0.0835813057938036, "grad_norm": 0.9388043533551024, "learning_rate": 9.924952107589452e-06, "loss": 0.2066, "step": 2865 }, { "epoch": 0.08361047902444717, "grad_norm": 0.9950697119120068, "learning_rate": 9.924870539214256e-06, "loss": 0.2075, "step": 2866 }, { "epoch": 0.08363965225509074, "grad_norm": 1.1421071021035174, "learning_rate": 9.924788926870989e-06, "loss": 0.2287, "step": 2867 }, { "epoch": 0.08366882548573429, "grad_norm": 1.0471766176853352, "learning_rate": 9.924707270560383e-06, "loss": 0.1956, "step": 2868 }, { "epoch": 0.08369799871637786, "grad_norm": 0.91963288783241, "learning_rate": 9.924625570283167e-06, "loss": 0.2252, "step": 2869 }, { "epoch": 0.08372717194702141, "grad_norm": 1.0305444563482429, "learning_rate": 9.92454382604007e-06, "loss": 0.1757, "step": 2870 }, { "epoch": 0.08375634517766498, "grad_norm": 0.9452937508991428, "learning_rate": 9.92446203783182e-06, "loss": 0.1947, "step": 2871 }, { "epoch": 0.08378551840830854, "grad_norm": 0.9541093318719566, "learning_rate": 9.924380205659147e-06, "loss": 0.196, "step": 2872 }, { "epoch": 0.0838146916389521, "grad_norm": 1.112014158471839, "learning_rate": 9.924298329522786e-06, "loss": 0.1981, "step": 2873 }, { "epoch": 0.08384386486959566, "grad_norm": 0.901707974059267, "learning_rate": 9.924216409423464e-06, "loss": 0.182, "step": 2874 }, { "epoch": 0.08387303810023922, "grad_norm": 1.1860376632262863, "learning_rate": 9.924134445361913e-06, "loss": 0.2023, "step": 2875 }, { "epoch": 0.08390221133088278, "grad_norm": 1.0321403578435027, "learning_rate": 9.924052437338865e-06, "loss": 0.1765, "step": 2876 }, { "epoch": 0.08393138456152634, "grad_norm": 0.8207461696475347, "learning_rate": 9.923970385355052e-06, "loss": 0.1855, "step": 2877 }, { "epoch": 0.0839605577921699, "grad_norm": 0.9138012734625626, "learning_rate": 9.92388828941121e-06, "loss": 0.2035, "step": 2878 }, { "epoch": 0.08398973102281347, "grad_norm": 0.9461767070447371, "learning_rate": 9.923806149508066e-06, "loss": 0.1844, "step": 2879 }, { "epoch": 0.08401890425345702, "grad_norm": 0.9880288764358837, "learning_rate": 9.923723965646356e-06, "loss": 0.1901, "step": 2880 }, { "epoch": 0.08404807748410059, "grad_norm": 1.1643190997949617, "learning_rate": 9.923641737826815e-06, "loss": 0.2148, "step": 2881 }, { "epoch": 0.08407725071474415, "grad_norm": 1.1033208593677126, "learning_rate": 9.923559466050174e-06, "loss": 0.1941, "step": 2882 }, { "epoch": 0.08410642394538771, "grad_norm": 0.9737642643596599, "learning_rate": 9.923477150317172e-06, "loss": 0.1997, "step": 2883 }, { "epoch": 0.08413559717603128, "grad_norm": 0.9261757169279775, "learning_rate": 9.92339479062854e-06, "loss": 0.2009, "step": 2884 }, { "epoch": 0.08416477040667483, "grad_norm": 1.258990034617718, "learning_rate": 9.923312386985013e-06, "loss": 0.1933, "step": 2885 }, { "epoch": 0.0841939436373184, "grad_norm": 1.001298937861448, "learning_rate": 9.92322993938733e-06, "loss": 0.1971, "step": 2886 }, { "epoch": 0.08422311686796195, "grad_norm": 0.9213789051844401, "learning_rate": 9.923147447836226e-06, "loss": 0.1794, "step": 2887 }, { "epoch": 0.08425229009860552, "grad_norm": 0.7759502889329065, "learning_rate": 9.923064912332436e-06, "loss": 0.1844, "step": 2888 }, { "epoch": 0.08428146332924909, "grad_norm": 0.9828130081912211, "learning_rate": 9.922982332876698e-06, "loss": 0.1866, "step": 2889 }, { "epoch": 0.08431063655989264, "grad_norm": 0.9141924436531134, "learning_rate": 9.922899709469748e-06, "loss": 0.1957, "step": 2890 }, { "epoch": 0.08433980979053621, "grad_norm": 0.9530853995878734, "learning_rate": 9.922817042112326e-06, "loss": 0.201, "step": 2891 }, { "epoch": 0.08436898302117976, "grad_norm": 0.809485663870376, "learning_rate": 9.922734330805169e-06, "loss": 0.1921, "step": 2892 }, { "epoch": 0.08439815625182333, "grad_norm": 0.8076078712216355, "learning_rate": 9.922651575549013e-06, "loss": 0.2064, "step": 2893 }, { "epoch": 0.0844273294824669, "grad_norm": 0.9071757954275966, "learning_rate": 9.9225687763446e-06, "loss": 0.2151, "step": 2894 }, { "epoch": 0.08445650271311045, "grad_norm": 0.9745045000878537, "learning_rate": 9.922485933192667e-06, "loss": 0.177, "step": 2895 }, { "epoch": 0.08448567594375402, "grad_norm": 0.9922703437221619, "learning_rate": 9.922403046093956e-06, "loss": 0.1813, "step": 2896 }, { "epoch": 0.08451484917439757, "grad_norm": 0.9315399021583822, "learning_rate": 9.922320115049205e-06, "loss": 0.1787, "step": 2897 }, { "epoch": 0.08454402240504114, "grad_norm": 0.8776975219008598, "learning_rate": 9.922237140059157e-06, "loss": 0.1704, "step": 2898 }, { "epoch": 0.08457319563568469, "grad_norm": 1.1553772509854796, "learning_rate": 9.922154121124548e-06, "loss": 0.28, "step": 2899 }, { "epoch": 0.08460236886632826, "grad_norm": 0.8628025604026724, "learning_rate": 9.922071058246122e-06, "loss": 0.1691, "step": 2900 }, { "epoch": 0.08463154209697182, "grad_norm": 0.9123746640465588, "learning_rate": 9.921987951424624e-06, "loss": 0.1756, "step": 2901 }, { "epoch": 0.08466071532761538, "grad_norm": 0.9395113792693988, "learning_rate": 9.92190480066079e-06, "loss": 0.2124, "step": 2902 }, { "epoch": 0.08468988855825894, "grad_norm": 0.8051932965639589, "learning_rate": 9.921821605955366e-06, "loss": 0.2006, "step": 2903 }, { "epoch": 0.0847190617889025, "grad_norm": 0.8792275575625256, "learning_rate": 9.921738367309091e-06, "loss": 0.188, "step": 2904 }, { "epoch": 0.08474823501954606, "grad_norm": 0.9120241887495304, "learning_rate": 9.921655084722713e-06, "loss": 0.1948, "step": 2905 }, { "epoch": 0.08477740825018963, "grad_norm": 0.9919025555721173, "learning_rate": 9.921571758196973e-06, "loss": 0.2279, "step": 2906 }, { "epoch": 0.08480658148083318, "grad_norm": 0.8772008946481051, "learning_rate": 9.921488387732617e-06, "loss": 0.1788, "step": 2907 }, { "epoch": 0.08483575471147675, "grad_norm": 0.8265557172512299, "learning_rate": 9.921404973330385e-06, "loss": 0.1836, "step": 2908 }, { "epoch": 0.0848649279421203, "grad_norm": 0.9060913602449435, "learning_rate": 9.921321514991024e-06, "loss": 0.1791, "step": 2909 }, { "epoch": 0.08489410117276387, "grad_norm": 0.8626848225961726, "learning_rate": 9.92123801271528e-06, "loss": 0.1884, "step": 2910 }, { "epoch": 0.08492327440340744, "grad_norm": 0.8905954737845782, "learning_rate": 9.921154466503899e-06, "loss": 0.1761, "step": 2911 }, { "epoch": 0.08495244763405099, "grad_norm": 0.9143770626772574, "learning_rate": 9.921070876357625e-06, "loss": 0.1756, "step": 2912 }, { "epoch": 0.08498162086469456, "grad_norm": 0.929480628058618, "learning_rate": 9.920987242277205e-06, "loss": 0.202, "step": 2913 }, { "epoch": 0.08501079409533811, "grad_norm": 0.8884063264213005, "learning_rate": 9.920903564263385e-06, "loss": 0.1967, "step": 2914 }, { "epoch": 0.08503996732598168, "grad_norm": 0.9497256865252193, "learning_rate": 9.920819842316914e-06, "loss": 0.2079, "step": 2915 }, { "epoch": 0.08506914055662525, "grad_norm": 0.9076402904311048, "learning_rate": 9.920736076438535e-06, "loss": 0.1875, "step": 2916 }, { "epoch": 0.0850983137872688, "grad_norm": 0.8785093924513132, "learning_rate": 9.920652266629002e-06, "loss": 0.2004, "step": 2917 }, { "epoch": 0.08512748701791237, "grad_norm": 0.9594248321183944, "learning_rate": 9.92056841288906e-06, "loss": 0.1886, "step": 2918 }, { "epoch": 0.08515666024855592, "grad_norm": 0.7301481664940029, "learning_rate": 9.920484515219458e-06, "loss": 0.2005, "step": 2919 }, { "epoch": 0.08518583347919949, "grad_norm": 0.8705722867537141, "learning_rate": 9.920400573620943e-06, "loss": 0.1737, "step": 2920 }, { "epoch": 0.08521500670984304, "grad_norm": 0.9298330380751542, "learning_rate": 9.920316588094268e-06, "loss": 0.2029, "step": 2921 }, { "epoch": 0.08524417994048661, "grad_norm": 0.8401546420998232, "learning_rate": 9.92023255864018e-06, "loss": 0.1906, "step": 2922 }, { "epoch": 0.08527335317113018, "grad_norm": 0.9615083026920218, "learning_rate": 9.92014848525943e-06, "loss": 0.1863, "step": 2923 }, { "epoch": 0.08530252640177373, "grad_norm": 1.0750265050292815, "learning_rate": 9.92006436795277e-06, "loss": 0.188, "step": 2924 }, { "epoch": 0.0853316996324173, "grad_norm": 0.9646879674547842, "learning_rate": 9.919980206720949e-06, "loss": 0.179, "step": 2925 }, { "epoch": 0.08536087286306085, "grad_norm": 0.7327944202303368, "learning_rate": 9.919896001564721e-06, "loss": 0.1659, "step": 2926 }, { "epoch": 0.08539004609370442, "grad_norm": 0.9456678912751119, "learning_rate": 9.919811752484834e-06, "loss": 0.2574, "step": 2927 }, { "epoch": 0.08541921932434798, "grad_norm": 0.983425907080063, "learning_rate": 9.919727459482043e-06, "loss": 0.2018, "step": 2928 }, { "epoch": 0.08544839255499154, "grad_norm": 0.803034174790023, "learning_rate": 9.919643122557099e-06, "loss": 0.177, "step": 2929 }, { "epoch": 0.0854775657856351, "grad_norm": 0.8841768667334998, "learning_rate": 9.919558741710757e-06, "loss": 0.2047, "step": 2930 }, { "epoch": 0.08550673901627866, "grad_norm": 0.8384717905943977, "learning_rate": 9.919474316943767e-06, "loss": 0.1876, "step": 2931 }, { "epoch": 0.08553591224692222, "grad_norm": 0.9552471273658159, "learning_rate": 9.919389848256886e-06, "loss": 0.1822, "step": 2932 }, { "epoch": 0.08556508547756579, "grad_norm": 0.861439992265136, "learning_rate": 9.919305335650866e-06, "loss": 0.1677, "step": 2933 }, { "epoch": 0.08559425870820934, "grad_norm": 0.9117444486814688, "learning_rate": 9.919220779126464e-06, "loss": 0.1805, "step": 2934 }, { "epoch": 0.08562343193885291, "grad_norm": 0.9329925244000081, "learning_rate": 9.919136178684432e-06, "loss": 0.1942, "step": 2935 }, { "epoch": 0.08565260516949647, "grad_norm": 1.0201358946837327, "learning_rate": 9.919051534325526e-06, "loss": 0.1848, "step": 2936 }, { "epoch": 0.08568177840014003, "grad_norm": 1.0060928362882617, "learning_rate": 9.918966846050502e-06, "loss": 0.1728, "step": 2937 }, { "epoch": 0.0857109516307836, "grad_norm": 0.93465579707486, "learning_rate": 9.918882113860117e-06, "loss": 0.2069, "step": 2938 }, { "epoch": 0.08574012486142715, "grad_norm": 0.9294178165199828, "learning_rate": 9.918797337755125e-06, "loss": 0.2154, "step": 2939 }, { "epoch": 0.08576929809207072, "grad_norm": 1.0235219812827783, "learning_rate": 9.918712517736288e-06, "loss": 0.163, "step": 2940 }, { "epoch": 0.08579847132271427, "grad_norm": 0.9363081392345324, "learning_rate": 9.918627653804358e-06, "loss": 0.1693, "step": 2941 }, { "epoch": 0.08582764455335784, "grad_norm": 0.8096250351067642, "learning_rate": 9.918542745960094e-06, "loss": 0.1792, "step": 2942 }, { "epoch": 0.0858568177840014, "grad_norm": 1.030032549066214, "learning_rate": 9.918457794204255e-06, "loss": 0.2217, "step": 2943 }, { "epoch": 0.08588599101464496, "grad_norm": 0.8574877595755447, "learning_rate": 9.918372798537599e-06, "loss": 0.1828, "step": 2944 }, { "epoch": 0.08591516424528853, "grad_norm": 1.491100058222644, "learning_rate": 9.918287758960885e-06, "loss": 0.2054, "step": 2945 }, { "epoch": 0.08594433747593208, "grad_norm": 0.9083850752775197, "learning_rate": 9.918202675474872e-06, "loss": 0.1918, "step": 2946 }, { "epoch": 0.08597351070657565, "grad_norm": 0.9011727890489442, "learning_rate": 9.91811754808032e-06, "loss": 0.1913, "step": 2947 }, { "epoch": 0.0860026839372192, "grad_norm": 0.7906158588954184, "learning_rate": 9.918032376777987e-06, "loss": 0.1825, "step": 2948 }, { "epoch": 0.08603185716786277, "grad_norm": 0.8329374188141475, "learning_rate": 9.917947161568635e-06, "loss": 0.1932, "step": 2949 }, { "epoch": 0.08606103039850634, "grad_norm": 0.9847432416449173, "learning_rate": 9.917861902453026e-06, "loss": 0.1805, "step": 2950 }, { "epoch": 0.08609020362914989, "grad_norm": 0.77088051200454, "learning_rate": 9.91777659943192e-06, "loss": 0.198, "step": 2951 }, { "epoch": 0.08611937685979346, "grad_norm": 0.9664209900370838, "learning_rate": 9.917691252506077e-06, "loss": 0.2013, "step": 2952 }, { "epoch": 0.08614855009043701, "grad_norm": 0.8164306184048502, "learning_rate": 9.917605861676263e-06, "loss": 0.1877, "step": 2953 }, { "epoch": 0.08617772332108058, "grad_norm": 0.9170505125275688, "learning_rate": 9.917520426943234e-06, "loss": 0.1819, "step": 2954 }, { "epoch": 0.08620689655172414, "grad_norm": 0.8540539865375996, "learning_rate": 9.91743494830776e-06, "loss": 0.1946, "step": 2955 }, { "epoch": 0.0862360697823677, "grad_norm": 0.8564212394033919, "learning_rate": 9.9173494257706e-06, "loss": 0.1847, "step": 2956 }, { "epoch": 0.08626524301301126, "grad_norm": 0.9910363759857238, "learning_rate": 9.917263859332517e-06, "loss": 0.2041, "step": 2957 }, { "epoch": 0.08629441624365482, "grad_norm": 0.8074326237234796, "learning_rate": 9.917178248994276e-06, "loss": 0.1802, "step": 2958 }, { "epoch": 0.08632358947429838, "grad_norm": 0.8084312759161405, "learning_rate": 9.917092594756644e-06, "loss": 0.1797, "step": 2959 }, { "epoch": 0.08635276270494195, "grad_norm": 0.902437331471456, "learning_rate": 9.91700689662038e-06, "loss": 0.2053, "step": 2960 }, { "epoch": 0.0863819359355855, "grad_norm": 1.1399668089315442, "learning_rate": 9.916921154586255e-06, "loss": 0.2152, "step": 2961 }, { "epoch": 0.08641110916622907, "grad_norm": 1.0049544203280714, "learning_rate": 9.91683536865503e-06, "loss": 0.1975, "step": 2962 }, { "epoch": 0.08644028239687263, "grad_norm": 0.912389918373014, "learning_rate": 9.916749538827472e-06, "loss": 0.196, "step": 2963 }, { "epoch": 0.08646945562751619, "grad_norm": 0.8774603790351778, "learning_rate": 9.916663665104348e-06, "loss": 0.1969, "step": 2964 }, { "epoch": 0.08649862885815976, "grad_norm": 0.8048369930767437, "learning_rate": 9.916577747486425e-06, "loss": 0.1683, "step": 2965 }, { "epoch": 0.08652780208880331, "grad_norm": 0.9037903315671819, "learning_rate": 9.91649178597447e-06, "loss": 0.2098, "step": 2966 }, { "epoch": 0.08655697531944688, "grad_norm": 0.9839554970042445, "learning_rate": 9.91640578056925e-06, "loss": 0.1768, "step": 2967 }, { "epoch": 0.08658614855009043, "grad_norm": 0.8167726210545194, "learning_rate": 9.916319731271532e-06, "loss": 0.1728, "step": 2968 }, { "epoch": 0.086615321780734, "grad_norm": 0.8284331586776322, "learning_rate": 9.916233638082086e-06, "loss": 0.1754, "step": 2969 }, { "epoch": 0.08664449501137755, "grad_norm": 0.8480392275632824, "learning_rate": 9.916147501001679e-06, "loss": 0.1781, "step": 2970 }, { "epoch": 0.08667366824202112, "grad_norm": 0.8980497097528721, "learning_rate": 9.91606132003108e-06, "loss": 0.1839, "step": 2971 }, { "epoch": 0.08670284147266469, "grad_norm": 0.9619761370527177, "learning_rate": 9.91597509517106e-06, "loss": 0.1868, "step": 2972 }, { "epoch": 0.08673201470330824, "grad_norm": 0.7884627570569046, "learning_rate": 9.91588882642239e-06, "loss": 0.2301, "step": 2973 }, { "epoch": 0.08676118793395181, "grad_norm": 0.7750463676329331, "learning_rate": 9.915802513785835e-06, "loss": 0.1739, "step": 2974 }, { "epoch": 0.08679036116459536, "grad_norm": 0.825933172220525, "learning_rate": 9.91571615726217e-06, "loss": 0.176, "step": 2975 }, { "epoch": 0.08681953439523893, "grad_norm": 0.8739152510151846, "learning_rate": 9.915629756852163e-06, "loss": 0.209, "step": 2976 }, { "epoch": 0.0868487076258825, "grad_norm": 0.9315939547274, "learning_rate": 9.915543312556588e-06, "loss": 0.2316, "step": 2977 }, { "epoch": 0.08687788085652605, "grad_norm": 0.7991220099424584, "learning_rate": 9.915456824376217e-06, "loss": 0.2085, "step": 2978 }, { "epoch": 0.08690705408716962, "grad_norm": 0.9678247234032558, "learning_rate": 9.915370292311818e-06, "loss": 0.2058, "step": 2979 }, { "epoch": 0.08693622731781317, "grad_norm": 0.9505488319158721, "learning_rate": 9.91528371636417e-06, "loss": 0.1984, "step": 2980 }, { "epoch": 0.08696540054845674, "grad_norm": 0.8523923035068051, "learning_rate": 9.915197096534039e-06, "loss": 0.1738, "step": 2981 }, { "epoch": 0.0869945737791003, "grad_norm": 0.9849848988500524, "learning_rate": 9.915110432822203e-06, "loss": 0.2035, "step": 2982 }, { "epoch": 0.08702374700974386, "grad_norm": 0.7955156687986064, "learning_rate": 9.915023725229435e-06, "loss": 0.1719, "step": 2983 }, { "epoch": 0.08705292024038742, "grad_norm": 0.8459914807033535, "learning_rate": 9.914936973756509e-06, "loss": 0.1807, "step": 2984 }, { "epoch": 0.08708209347103098, "grad_norm": 1.01317682427066, "learning_rate": 9.914850178404199e-06, "loss": 0.1929, "step": 2985 }, { "epoch": 0.08711126670167454, "grad_norm": 0.8749675976675539, "learning_rate": 9.914763339173279e-06, "loss": 0.1938, "step": 2986 }, { "epoch": 0.08714043993231811, "grad_norm": 1.0050341942248369, "learning_rate": 9.914676456064526e-06, "loss": 0.1933, "step": 2987 }, { "epoch": 0.08716961316296166, "grad_norm": 0.8836090974873869, "learning_rate": 9.914589529078713e-06, "loss": 0.1998, "step": 2988 }, { "epoch": 0.08719878639360523, "grad_norm": 1.1617059750490621, "learning_rate": 9.914502558216618e-06, "loss": 0.1878, "step": 2989 }, { "epoch": 0.08722795962424879, "grad_norm": 0.9042435047273026, "learning_rate": 9.91441554347902e-06, "loss": 0.1999, "step": 2990 }, { "epoch": 0.08725713285489235, "grad_norm": 0.78858604502361, "learning_rate": 9.91432848486669e-06, "loss": 0.1955, "step": 2991 }, { "epoch": 0.0872863060855359, "grad_norm": 0.913998913718363, "learning_rate": 9.91424138238041e-06, "loss": 0.2084, "step": 2992 }, { "epoch": 0.08731547931617947, "grad_norm": 0.8933043256883373, "learning_rate": 9.914154236020957e-06, "loss": 0.1769, "step": 2993 }, { "epoch": 0.08734465254682304, "grad_norm": 0.8455693392256065, "learning_rate": 9.914067045789107e-06, "loss": 0.1945, "step": 2994 }, { "epoch": 0.0873738257774666, "grad_norm": 1.0031771539640966, "learning_rate": 9.913979811685638e-06, "loss": 0.1998, "step": 2995 }, { "epoch": 0.08740299900811016, "grad_norm": 0.8820102377931416, "learning_rate": 9.913892533711331e-06, "loss": 0.1791, "step": 2996 }, { "epoch": 0.08743217223875371, "grad_norm": 0.9376324370591926, "learning_rate": 9.913805211866967e-06, "loss": 0.1785, "step": 2997 }, { "epoch": 0.08746134546939728, "grad_norm": 1.0575494307685773, "learning_rate": 9.913717846153322e-06, "loss": 0.1782, "step": 2998 }, { "epoch": 0.08749051870004085, "grad_norm": 1.4428421928061463, "learning_rate": 9.913630436571176e-06, "loss": 0.195, "step": 2999 }, { "epoch": 0.0875196919306844, "grad_norm": 1.2558891724714025, "learning_rate": 9.91354298312131e-06, "loss": 0.225, "step": 3000 }, { "epoch": 0.08754886516132797, "grad_norm": 0.9906785696055594, "learning_rate": 9.913455485804506e-06, "loss": 0.1908, "step": 3001 }, { "epoch": 0.08757803839197152, "grad_norm": 0.9814461413562301, "learning_rate": 9.913367944621545e-06, "loss": 0.182, "step": 3002 }, { "epoch": 0.08760721162261509, "grad_norm": 1.3326132631853649, "learning_rate": 9.913280359573207e-06, "loss": 0.1837, "step": 3003 }, { "epoch": 0.08763638485325866, "grad_norm": 0.8927915911853632, "learning_rate": 9.913192730660275e-06, "loss": 0.1795, "step": 3004 }, { "epoch": 0.08766555808390221, "grad_norm": 1.0050729571802486, "learning_rate": 9.913105057883532e-06, "loss": 0.1945, "step": 3005 }, { "epoch": 0.08769473131454578, "grad_norm": 0.9128684143400813, "learning_rate": 9.91301734124376e-06, "loss": 0.1964, "step": 3006 }, { "epoch": 0.08772390454518933, "grad_norm": 0.8438485794169703, "learning_rate": 9.91292958074174e-06, "loss": 0.1732, "step": 3007 }, { "epoch": 0.0877530777758329, "grad_norm": 0.8477518508629068, "learning_rate": 9.91284177637826e-06, "loss": 0.2083, "step": 3008 }, { "epoch": 0.08778225100647646, "grad_norm": 0.7869919198682958, "learning_rate": 9.9127539281541e-06, "loss": 0.1763, "step": 3009 }, { "epoch": 0.08781142423712002, "grad_norm": 1.0437124670764184, "learning_rate": 9.912666036070045e-06, "loss": 0.1778, "step": 3010 }, { "epoch": 0.08784059746776358, "grad_norm": 0.874233790713194, "learning_rate": 9.912578100126883e-06, "loss": 0.1874, "step": 3011 }, { "epoch": 0.08786977069840714, "grad_norm": 0.8595788089308047, "learning_rate": 9.912490120325394e-06, "loss": 0.2016, "step": 3012 }, { "epoch": 0.0878989439290507, "grad_norm": 0.9981321545014091, "learning_rate": 9.912402096666367e-06, "loss": 0.1904, "step": 3013 }, { "epoch": 0.08792811715969426, "grad_norm": 0.9073995176985775, "learning_rate": 9.912314029150586e-06, "loss": 0.1721, "step": 3014 }, { "epoch": 0.08795729039033783, "grad_norm": 0.9292271893163728, "learning_rate": 9.912225917778838e-06, "loss": 0.1936, "step": 3015 }, { "epoch": 0.08798646362098139, "grad_norm": 0.9206644134436318, "learning_rate": 9.91213776255191e-06, "loss": 0.195, "step": 3016 }, { "epoch": 0.08801563685162495, "grad_norm": 0.8990250912326241, "learning_rate": 9.912049563470589e-06, "loss": 0.2245, "step": 3017 }, { "epoch": 0.08804481008226851, "grad_norm": 0.9241519313027056, "learning_rate": 9.91196132053566e-06, "loss": 0.1705, "step": 3018 }, { "epoch": 0.08807398331291207, "grad_norm": 0.9298534231910399, "learning_rate": 9.911873033747916e-06, "loss": 0.2189, "step": 3019 }, { "epoch": 0.08810315654355563, "grad_norm": 0.7907726777360939, "learning_rate": 9.911784703108141e-06, "loss": 0.1865, "step": 3020 }, { "epoch": 0.0881323297741992, "grad_norm": 1.0804125823083952, "learning_rate": 9.911696328617126e-06, "loss": 0.206, "step": 3021 }, { "epoch": 0.08816150300484275, "grad_norm": 0.7719793440825188, "learning_rate": 9.911607910275655e-06, "loss": 0.1666, "step": 3022 }, { "epoch": 0.08819067623548632, "grad_norm": 0.9937527633913799, "learning_rate": 9.911519448084526e-06, "loss": 0.2015, "step": 3023 }, { "epoch": 0.08821984946612987, "grad_norm": 0.9353945419009075, "learning_rate": 9.91143094204452e-06, "loss": 0.236, "step": 3024 }, { "epoch": 0.08824902269677344, "grad_norm": 1.0133951671553068, "learning_rate": 9.911342392156432e-06, "loss": 0.1743, "step": 3025 }, { "epoch": 0.08827819592741701, "grad_norm": 0.989671455261747, "learning_rate": 9.911253798421051e-06, "loss": 0.2082, "step": 3026 }, { "epoch": 0.08830736915806056, "grad_norm": 1.2663908906167805, "learning_rate": 9.91116516083917e-06, "loss": 0.1768, "step": 3027 }, { "epoch": 0.08833654238870413, "grad_norm": 0.8667957135089173, "learning_rate": 9.911076479411578e-06, "loss": 0.1738, "step": 3028 }, { "epoch": 0.08836571561934768, "grad_norm": 1.0614484471945946, "learning_rate": 9.910987754139067e-06, "loss": 0.2223, "step": 3029 }, { "epoch": 0.08839488884999125, "grad_norm": 1.0329687572312753, "learning_rate": 9.91089898502243e-06, "loss": 0.1865, "step": 3030 }, { "epoch": 0.08842406208063482, "grad_norm": 1.067140489676507, "learning_rate": 9.910810172062462e-06, "loss": 0.1996, "step": 3031 }, { "epoch": 0.08845323531127837, "grad_norm": 0.96701487872926, "learning_rate": 9.91072131525995e-06, "loss": 0.1864, "step": 3032 }, { "epoch": 0.08848240854192194, "grad_norm": 0.9739058647898851, "learning_rate": 9.910632414615691e-06, "loss": 0.2033, "step": 3033 }, { "epoch": 0.08851158177256549, "grad_norm": 0.9240948823187121, "learning_rate": 9.910543470130478e-06, "loss": 0.2031, "step": 3034 }, { "epoch": 0.08854075500320906, "grad_norm": 1.0302705675412827, "learning_rate": 9.910454481805105e-06, "loss": 0.1894, "step": 3035 }, { "epoch": 0.08856992823385261, "grad_norm": 1.0976861082356608, "learning_rate": 9.910365449640367e-06, "loss": 0.2008, "step": 3036 }, { "epoch": 0.08859910146449618, "grad_norm": 0.9165697419348509, "learning_rate": 9.910276373637058e-06, "loss": 0.192, "step": 3037 }, { "epoch": 0.08862827469513974, "grad_norm": 1.4118806773504327, "learning_rate": 9.910187253795974e-06, "loss": 0.2065, "step": 3038 }, { "epoch": 0.0886574479257833, "grad_norm": 0.8733323283579463, "learning_rate": 9.91009809011791e-06, "loss": 0.1819, "step": 3039 }, { "epoch": 0.08868662115642686, "grad_norm": 0.732815032517925, "learning_rate": 9.910008882603664e-06, "loss": 0.1768, "step": 3040 }, { "epoch": 0.08871579438707042, "grad_norm": 1.1269245047076029, "learning_rate": 9.909919631254028e-06, "loss": 0.1973, "step": 3041 }, { "epoch": 0.08874496761771399, "grad_norm": 0.9555805411047182, "learning_rate": 9.909830336069803e-06, "loss": 0.2164, "step": 3042 }, { "epoch": 0.08877414084835755, "grad_norm": 0.9834162396195962, "learning_rate": 9.909740997051786e-06, "loss": 0.1821, "step": 3043 }, { "epoch": 0.0888033140790011, "grad_norm": 0.8057651821738971, "learning_rate": 9.909651614200773e-06, "loss": 0.1889, "step": 3044 }, { "epoch": 0.08883248730964467, "grad_norm": 0.9273619873149924, "learning_rate": 9.90956218751756e-06, "loss": 0.1914, "step": 3045 }, { "epoch": 0.08886166054028823, "grad_norm": 0.7597660008902855, "learning_rate": 9.90947271700295e-06, "loss": 0.1796, "step": 3046 }, { "epoch": 0.08889083377093179, "grad_norm": 0.7956731608419013, "learning_rate": 9.909383202657739e-06, "loss": 0.1871, "step": 3047 }, { "epoch": 0.08892000700157536, "grad_norm": 0.7726968702012784, "learning_rate": 9.909293644482727e-06, "loss": 0.1549, "step": 3048 }, { "epoch": 0.08894918023221891, "grad_norm": 0.9075519329697846, "learning_rate": 9.909204042478713e-06, "loss": 0.1771, "step": 3049 }, { "epoch": 0.08897835346286248, "grad_norm": 0.9671623273112833, "learning_rate": 9.9091143966465e-06, "loss": 0.1916, "step": 3050 }, { "epoch": 0.08900752669350603, "grad_norm": 0.9015163634226375, "learning_rate": 9.909024706986881e-06, "loss": 0.2011, "step": 3051 }, { "epoch": 0.0890366999241496, "grad_norm": 0.9184962450164161, "learning_rate": 9.908934973500664e-06, "loss": 0.1803, "step": 3052 }, { "epoch": 0.08906587315479317, "grad_norm": 0.8450153788163894, "learning_rate": 9.908845196188647e-06, "loss": 0.1614, "step": 3053 }, { "epoch": 0.08909504638543672, "grad_norm": 0.9553291965529953, "learning_rate": 9.908755375051631e-06, "loss": 0.194, "step": 3054 }, { "epoch": 0.08912421961608029, "grad_norm": 0.891058644169043, "learning_rate": 9.90866551009042e-06, "loss": 0.1671, "step": 3055 }, { "epoch": 0.08915339284672384, "grad_norm": 1.2104446913879496, "learning_rate": 9.908575601305815e-06, "loss": 0.1813, "step": 3056 }, { "epoch": 0.08918256607736741, "grad_norm": 1.0127375435632076, "learning_rate": 9.908485648698618e-06, "loss": 0.2193, "step": 3057 }, { "epoch": 0.08921173930801098, "grad_norm": 1.0258627847235318, "learning_rate": 9.908395652269633e-06, "loss": 0.1938, "step": 3058 }, { "epoch": 0.08924091253865453, "grad_norm": 1.0079976664411516, "learning_rate": 9.908305612019665e-06, "loss": 0.2049, "step": 3059 }, { "epoch": 0.0892700857692981, "grad_norm": 0.9704927433343631, "learning_rate": 9.908215527949514e-06, "loss": 0.1988, "step": 3060 }, { "epoch": 0.08929925899994165, "grad_norm": 0.8499876970223335, "learning_rate": 9.908125400059988e-06, "loss": 0.1726, "step": 3061 }, { "epoch": 0.08932843223058522, "grad_norm": 0.8942257336748414, "learning_rate": 9.908035228351888e-06, "loss": 0.1965, "step": 3062 }, { "epoch": 0.08935760546122877, "grad_norm": 0.9569510087355682, "learning_rate": 9.907945012826022e-06, "loss": 0.1926, "step": 3063 }, { "epoch": 0.08938677869187234, "grad_norm": 1.0157032076843844, "learning_rate": 9.907854753483194e-06, "loss": 0.2116, "step": 3064 }, { "epoch": 0.0894159519225159, "grad_norm": 1.0428423346131277, "learning_rate": 9.907764450324213e-06, "loss": 0.192, "step": 3065 }, { "epoch": 0.08944512515315946, "grad_norm": 0.9531557202518242, "learning_rate": 9.90767410334988e-06, "loss": 0.1769, "step": 3066 }, { "epoch": 0.08947429838380302, "grad_norm": 0.9251978560430241, "learning_rate": 9.907583712561007e-06, "loss": 0.1977, "step": 3067 }, { "epoch": 0.08950347161444658, "grad_norm": 0.8586274452048444, "learning_rate": 9.907493277958395e-06, "loss": 0.1731, "step": 3068 }, { "epoch": 0.08953264484509015, "grad_norm": 0.9827735005065518, "learning_rate": 9.907402799542856e-06, "loss": 0.2121, "step": 3069 }, { "epoch": 0.08956181807573371, "grad_norm": 0.9753572676253564, "learning_rate": 9.907312277315196e-06, "loss": 0.18, "step": 3070 }, { "epoch": 0.08959099130637727, "grad_norm": 0.8210386693880609, "learning_rate": 9.907221711276224e-06, "loss": 0.1682, "step": 3071 }, { "epoch": 0.08962016453702083, "grad_norm": 0.8229563047627599, "learning_rate": 9.907131101426748e-06, "loss": 0.1851, "step": 3072 }, { "epoch": 0.08964933776766439, "grad_norm": 1.1028219063692366, "learning_rate": 9.907040447767575e-06, "loss": 0.193, "step": 3073 }, { "epoch": 0.08967851099830795, "grad_norm": 0.9994190165962931, "learning_rate": 9.906949750299519e-06, "loss": 0.1843, "step": 3074 }, { "epoch": 0.08970768422895152, "grad_norm": 0.8739944914336276, "learning_rate": 9.906859009023386e-06, "loss": 0.1813, "step": 3075 }, { "epoch": 0.08973685745959507, "grad_norm": 0.9650162467973781, "learning_rate": 9.906768223939986e-06, "loss": 0.1986, "step": 3076 }, { "epoch": 0.08976603069023864, "grad_norm": 1.203808331887953, "learning_rate": 9.906677395050132e-06, "loss": 0.2227, "step": 3077 }, { "epoch": 0.0897952039208822, "grad_norm": 0.9256040756127788, "learning_rate": 9.906586522354633e-06, "loss": 0.1764, "step": 3078 }, { "epoch": 0.08982437715152576, "grad_norm": 0.9056929166129613, "learning_rate": 9.9064956058543e-06, "loss": 0.1918, "step": 3079 }, { "epoch": 0.08985355038216933, "grad_norm": 0.925389887214733, "learning_rate": 9.906404645549947e-06, "loss": 0.1699, "step": 3080 }, { "epoch": 0.08988272361281288, "grad_norm": 1.0533964757737913, "learning_rate": 9.906313641442385e-06, "loss": 0.1714, "step": 3081 }, { "epoch": 0.08991189684345645, "grad_norm": 1.0862831572218263, "learning_rate": 9.906222593532424e-06, "loss": 0.2107, "step": 3082 }, { "epoch": 0.0899410700741, "grad_norm": 0.8008642476942929, "learning_rate": 9.906131501820881e-06, "loss": 0.1854, "step": 3083 }, { "epoch": 0.08997024330474357, "grad_norm": 1.3107928344527915, "learning_rate": 9.906040366308565e-06, "loss": 0.1836, "step": 3084 }, { "epoch": 0.08999941653538712, "grad_norm": 1.1093016703702059, "learning_rate": 9.905949186996293e-06, "loss": 0.1997, "step": 3085 }, { "epoch": 0.09002858976603069, "grad_norm": 1.219609482626167, "learning_rate": 9.905857963884878e-06, "loss": 0.1899, "step": 3086 }, { "epoch": 0.09005776299667426, "grad_norm": 0.8906354180218126, "learning_rate": 9.905766696975134e-06, "loss": 0.1925, "step": 3087 }, { "epoch": 0.09008693622731781, "grad_norm": 0.8998464826192879, "learning_rate": 9.905675386267877e-06, "loss": 0.2051, "step": 3088 }, { "epoch": 0.09011610945796138, "grad_norm": 1.0311402053101608, "learning_rate": 9.90558403176392e-06, "loss": 0.1956, "step": 3089 }, { "epoch": 0.09014528268860493, "grad_norm": 1.046837754144681, "learning_rate": 9.90549263346408e-06, "loss": 0.2266, "step": 3090 }, { "epoch": 0.0901744559192485, "grad_norm": 0.9232934703463918, "learning_rate": 9.905401191369172e-06, "loss": 0.2041, "step": 3091 }, { "epoch": 0.09020362914989206, "grad_norm": 0.929324485804581, "learning_rate": 9.905309705480014e-06, "loss": 0.2204, "step": 3092 }, { "epoch": 0.09023280238053562, "grad_norm": 0.7903364123471041, "learning_rate": 9.905218175797421e-06, "loss": 0.1999, "step": 3093 }, { "epoch": 0.09026197561117918, "grad_norm": 1.137748834396601, "learning_rate": 9.905126602322212e-06, "loss": 0.1671, "step": 3094 }, { "epoch": 0.09029114884182274, "grad_norm": 1.044897327008593, "learning_rate": 9.905034985055205e-06, "loss": 0.1846, "step": 3095 }, { "epoch": 0.0903203220724663, "grad_norm": 0.7780612022090226, "learning_rate": 9.904943323997216e-06, "loss": 0.1588, "step": 3096 }, { "epoch": 0.09034949530310987, "grad_norm": 0.8476472221427317, "learning_rate": 9.904851619149063e-06, "loss": 0.1766, "step": 3097 }, { "epoch": 0.09037866853375343, "grad_norm": 0.922397270879313, "learning_rate": 9.904759870511564e-06, "loss": 0.165, "step": 3098 }, { "epoch": 0.09040784176439699, "grad_norm": 0.9510848219792485, "learning_rate": 9.904668078085543e-06, "loss": 0.2115, "step": 3099 }, { "epoch": 0.09043701499504055, "grad_norm": 0.8949759049240087, "learning_rate": 9.904576241871814e-06, "loss": 0.1911, "step": 3100 }, { "epoch": 0.09046618822568411, "grad_norm": 1.4688418395431837, "learning_rate": 9.9044843618712e-06, "loss": 0.1785, "step": 3101 }, { "epoch": 0.09049536145632768, "grad_norm": 0.7970945369810468, "learning_rate": 9.90439243808452e-06, "loss": 0.2037, "step": 3102 }, { "epoch": 0.09052453468697123, "grad_norm": 0.8455029656333727, "learning_rate": 9.904300470512595e-06, "loss": 0.1704, "step": 3103 }, { "epoch": 0.0905537079176148, "grad_norm": 0.9547802543491765, "learning_rate": 9.904208459156247e-06, "loss": 0.1919, "step": 3104 }, { "epoch": 0.09058288114825835, "grad_norm": 1.006677853859439, "learning_rate": 9.904116404016296e-06, "loss": 0.2218, "step": 3105 }, { "epoch": 0.09061205437890192, "grad_norm": 0.8237642844111117, "learning_rate": 9.904024305093564e-06, "loss": 0.1964, "step": 3106 }, { "epoch": 0.09064122760954547, "grad_norm": 0.8582909828870113, "learning_rate": 9.903932162388875e-06, "loss": 0.2008, "step": 3107 }, { "epoch": 0.09067040084018904, "grad_norm": 0.7976472158239346, "learning_rate": 9.903839975903049e-06, "loss": 0.1964, "step": 3108 }, { "epoch": 0.09069957407083261, "grad_norm": 0.8758348784855697, "learning_rate": 9.903747745636912e-06, "loss": 0.2018, "step": 3109 }, { "epoch": 0.09072874730147616, "grad_norm": 0.9311384467244032, "learning_rate": 9.903655471591285e-06, "loss": 0.1821, "step": 3110 }, { "epoch": 0.09075792053211973, "grad_norm": 0.8383641189741342, "learning_rate": 9.903563153766992e-06, "loss": 0.1604, "step": 3111 }, { "epoch": 0.09078709376276328, "grad_norm": 0.7450909120524571, "learning_rate": 9.903470792164857e-06, "loss": 0.1886, "step": 3112 }, { "epoch": 0.09081626699340685, "grad_norm": 0.9430597561494886, "learning_rate": 9.903378386785707e-06, "loss": 0.2151, "step": 3113 }, { "epoch": 0.09084544022405042, "grad_norm": 1.0206645907134217, "learning_rate": 9.903285937630364e-06, "loss": 0.1744, "step": 3114 }, { "epoch": 0.09087461345469397, "grad_norm": 0.8434807430942434, "learning_rate": 9.903193444699656e-06, "loss": 0.1908, "step": 3115 }, { "epoch": 0.09090378668533754, "grad_norm": 0.9500140411201377, "learning_rate": 9.903100907994407e-06, "loss": 0.2084, "step": 3116 }, { "epoch": 0.09093295991598109, "grad_norm": 0.837192644453322, "learning_rate": 9.903008327515442e-06, "loss": 0.1884, "step": 3117 }, { "epoch": 0.09096213314662466, "grad_norm": 0.8378672650614899, "learning_rate": 9.902915703263591e-06, "loss": 0.1848, "step": 3118 }, { "epoch": 0.09099130637726822, "grad_norm": 0.9663374647208482, "learning_rate": 9.902823035239678e-06, "loss": 0.1759, "step": 3119 }, { "epoch": 0.09102047960791178, "grad_norm": 0.841080490878279, "learning_rate": 9.902730323444531e-06, "loss": 0.2047, "step": 3120 }, { "epoch": 0.09104965283855534, "grad_norm": 0.9822202779269245, "learning_rate": 9.902637567878979e-06, "loss": 0.1889, "step": 3121 }, { "epoch": 0.0910788260691989, "grad_norm": 0.8996420337880603, "learning_rate": 9.90254476854385e-06, "loss": 0.1784, "step": 3122 }, { "epoch": 0.09110799929984247, "grad_norm": 1.3390013111690215, "learning_rate": 9.90245192543997e-06, "loss": 0.1851, "step": 3123 }, { "epoch": 0.09113717253048603, "grad_norm": 0.8384136121624998, "learning_rate": 9.90235903856817e-06, "loss": 0.192, "step": 3124 }, { "epoch": 0.09116634576112959, "grad_norm": 0.8420761828985089, "learning_rate": 9.902266107929279e-06, "loss": 0.1807, "step": 3125 }, { "epoch": 0.09119551899177315, "grad_norm": 0.8803474977758997, "learning_rate": 9.902173133524125e-06, "loss": 0.1902, "step": 3126 }, { "epoch": 0.0912246922224167, "grad_norm": 0.9323609834892589, "learning_rate": 9.902080115353541e-06, "loss": 0.2006, "step": 3127 }, { "epoch": 0.09125386545306027, "grad_norm": 0.9239092626296833, "learning_rate": 9.901987053418355e-06, "loss": 0.1729, "step": 3128 }, { "epoch": 0.09128303868370383, "grad_norm": 0.9192162854775283, "learning_rate": 9.901893947719401e-06, "loss": 0.1734, "step": 3129 }, { "epoch": 0.0913122119143474, "grad_norm": 1.1515306150746933, "learning_rate": 9.901800798257506e-06, "loss": 0.2072, "step": 3130 }, { "epoch": 0.09134138514499096, "grad_norm": 0.9095002771703233, "learning_rate": 9.901707605033504e-06, "loss": 0.1883, "step": 3131 }, { "epoch": 0.09137055837563451, "grad_norm": 0.9494728333330158, "learning_rate": 9.901614368048226e-06, "loss": 0.1689, "step": 3132 }, { "epoch": 0.09139973160627808, "grad_norm": 1.0741251960370346, "learning_rate": 9.901521087302508e-06, "loss": 0.1848, "step": 3133 }, { "epoch": 0.09142890483692163, "grad_norm": 0.9276160324018725, "learning_rate": 9.901427762797176e-06, "loss": 0.201, "step": 3134 }, { "epoch": 0.0914580780675652, "grad_norm": 0.9020587477903261, "learning_rate": 9.901334394533069e-06, "loss": 0.1815, "step": 3135 }, { "epoch": 0.09148725129820877, "grad_norm": 0.9543040013265817, "learning_rate": 9.901240982511017e-06, "loss": 0.1853, "step": 3136 }, { "epoch": 0.09151642452885232, "grad_norm": 0.8074048966683778, "learning_rate": 9.901147526731857e-06, "loss": 0.2072, "step": 3137 }, { "epoch": 0.09154559775949589, "grad_norm": 0.8168688399715941, "learning_rate": 9.901054027196422e-06, "loss": 0.1705, "step": 3138 }, { "epoch": 0.09157477099013944, "grad_norm": 0.9696701967770042, "learning_rate": 9.900960483905546e-06, "loss": 0.1854, "step": 3139 }, { "epoch": 0.09160394422078301, "grad_norm": 0.8608991500956572, "learning_rate": 9.900866896860066e-06, "loss": 0.1764, "step": 3140 }, { "epoch": 0.09163311745142658, "grad_norm": 0.9563370412441334, "learning_rate": 9.900773266060814e-06, "loss": 0.2001, "step": 3141 }, { "epoch": 0.09166229068207013, "grad_norm": 1.0622492285855472, "learning_rate": 9.90067959150863e-06, "loss": 0.2042, "step": 3142 }, { "epoch": 0.0916914639127137, "grad_norm": 0.980733770469344, "learning_rate": 9.90058587320435e-06, "loss": 0.207, "step": 3143 }, { "epoch": 0.09172063714335725, "grad_norm": 0.9548998232870056, "learning_rate": 9.900492111148804e-06, "loss": 0.2022, "step": 3144 }, { "epoch": 0.09174981037400082, "grad_norm": 1.1523984929924487, "learning_rate": 9.900398305342838e-06, "loss": 0.1978, "step": 3145 }, { "epoch": 0.09177898360464438, "grad_norm": 0.9496215587216399, "learning_rate": 9.900304455787285e-06, "loss": 0.1852, "step": 3146 }, { "epoch": 0.09180815683528794, "grad_norm": 1.0291303492810868, "learning_rate": 9.900210562482985e-06, "loss": 0.1993, "step": 3147 }, { "epoch": 0.0918373300659315, "grad_norm": 1.1989672928651198, "learning_rate": 9.900116625430774e-06, "loss": 0.193, "step": 3148 }, { "epoch": 0.09186650329657506, "grad_norm": 0.9900630480878376, "learning_rate": 9.90002264463149e-06, "loss": 0.2284, "step": 3149 }, { "epoch": 0.09189567652721863, "grad_norm": 1.0363159213300737, "learning_rate": 9.899928620085975e-06, "loss": 0.196, "step": 3150 }, { "epoch": 0.09192484975786219, "grad_norm": 1.089681182274323, "learning_rate": 9.899834551795066e-06, "loss": 0.1909, "step": 3151 }, { "epoch": 0.09195402298850575, "grad_norm": 1.1716790411151727, "learning_rate": 9.899740439759605e-06, "loss": 0.1828, "step": 3152 }, { "epoch": 0.09198319621914931, "grad_norm": 0.8815311348464451, "learning_rate": 9.899646283980432e-06, "loss": 0.1781, "step": 3153 }, { "epoch": 0.09201236944979287, "grad_norm": 0.8823874358845932, "learning_rate": 9.899552084458383e-06, "loss": 0.2, "step": 3154 }, { "epoch": 0.09204154268043643, "grad_norm": 1.0980207624784954, "learning_rate": 9.899457841194307e-06, "loss": 0.1835, "step": 3155 }, { "epoch": 0.09207071591107999, "grad_norm": 1.09346749548337, "learning_rate": 9.899363554189038e-06, "loss": 0.1886, "step": 3156 }, { "epoch": 0.09209988914172355, "grad_norm": 0.832123474446183, "learning_rate": 9.899269223443421e-06, "loss": 0.2037, "step": 3157 }, { "epoch": 0.09212906237236712, "grad_norm": 0.9348866978298538, "learning_rate": 9.899174848958298e-06, "loss": 0.191, "step": 3158 }, { "epoch": 0.09215823560301067, "grad_norm": 0.8448691113518083, "learning_rate": 9.899080430734512e-06, "loss": 0.168, "step": 3159 }, { "epoch": 0.09218740883365424, "grad_norm": 0.9668811661491377, "learning_rate": 9.898985968772905e-06, "loss": 0.1911, "step": 3160 }, { "epoch": 0.0922165820642978, "grad_norm": 0.8822283021391198, "learning_rate": 9.898891463074321e-06, "loss": 0.2, "step": 3161 }, { "epoch": 0.09224575529494136, "grad_norm": 0.8299612372235152, "learning_rate": 9.898796913639605e-06, "loss": 0.1518, "step": 3162 }, { "epoch": 0.09227492852558493, "grad_norm": 1.1499537794875645, "learning_rate": 9.898702320469597e-06, "loss": 0.1837, "step": 3163 }, { "epoch": 0.09230410175622848, "grad_norm": 0.9581816435886256, "learning_rate": 9.898607683565146e-06, "loss": 0.1635, "step": 3164 }, { "epoch": 0.09233327498687205, "grad_norm": 0.776698703755397, "learning_rate": 9.898513002927094e-06, "loss": 0.1697, "step": 3165 }, { "epoch": 0.0923624482175156, "grad_norm": 1.1063066015427327, "learning_rate": 9.898418278556288e-06, "loss": 0.1696, "step": 3166 }, { "epoch": 0.09239162144815917, "grad_norm": 1.0075887363532354, "learning_rate": 9.898323510453571e-06, "loss": 0.1845, "step": 3167 }, { "epoch": 0.09242079467880274, "grad_norm": 0.9451382122340356, "learning_rate": 9.898228698619794e-06, "loss": 0.1791, "step": 3168 }, { "epoch": 0.09244996790944629, "grad_norm": 1.037499933351149, "learning_rate": 9.898133843055798e-06, "loss": 0.1729, "step": 3169 }, { "epoch": 0.09247914114008986, "grad_norm": 1.3896563982420864, "learning_rate": 9.898038943762434e-06, "loss": 0.1966, "step": 3170 }, { "epoch": 0.09250831437073341, "grad_norm": 1.009329571422478, "learning_rate": 9.897944000740547e-06, "loss": 0.2243, "step": 3171 }, { "epoch": 0.09253748760137698, "grad_norm": 0.8822312471226745, "learning_rate": 9.897849013990985e-06, "loss": 0.1789, "step": 3172 }, { "epoch": 0.09256666083202054, "grad_norm": 1.1137719163993303, "learning_rate": 9.897753983514595e-06, "loss": 0.222, "step": 3173 }, { "epoch": 0.0925958340626641, "grad_norm": 1.0172438228433538, "learning_rate": 9.897658909312229e-06, "loss": 0.1824, "step": 3174 }, { "epoch": 0.09262500729330767, "grad_norm": 0.9884739364426408, "learning_rate": 9.897563791384733e-06, "loss": 0.1768, "step": 3175 }, { "epoch": 0.09265418052395122, "grad_norm": 1.0481532690179143, "learning_rate": 9.897468629732956e-06, "loss": 0.2083, "step": 3176 }, { "epoch": 0.09268335375459479, "grad_norm": 0.9691419807415792, "learning_rate": 9.897373424357747e-06, "loss": 0.2084, "step": 3177 }, { "epoch": 0.09271252698523834, "grad_norm": 0.9862788336906526, "learning_rate": 9.897278175259959e-06, "loss": 0.1862, "step": 3178 }, { "epoch": 0.0927417002158819, "grad_norm": 0.9780930416857407, "learning_rate": 9.897182882440439e-06, "loss": 0.1723, "step": 3179 }, { "epoch": 0.09277087344652547, "grad_norm": 0.7931073017265083, "learning_rate": 9.897087545900039e-06, "loss": 0.1902, "step": 3180 }, { "epoch": 0.09280004667716903, "grad_norm": 0.8816903279426856, "learning_rate": 9.896992165639612e-06, "loss": 0.1863, "step": 3181 }, { "epoch": 0.0928292199078126, "grad_norm": 0.949194604791015, "learning_rate": 9.896896741660008e-06, "loss": 0.2192, "step": 3182 }, { "epoch": 0.09285839313845615, "grad_norm": 0.918235761083928, "learning_rate": 9.896801273962078e-06, "loss": 0.1992, "step": 3183 }, { "epoch": 0.09288756636909971, "grad_norm": 0.807178404781097, "learning_rate": 9.896705762546676e-06, "loss": 0.1997, "step": 3184 }, { "epoch": 0.09291673959974328, "grad_norm": 0.9321238823064868, "learning_rate": 9.896610207414654e-06, "loss": 0.2117, "step": 3185 }, { "epoch": 0.09294591283038683, "grad_norm": 0.9375778044105287, "learning_rate": 9.896514608566863e-06, "loss": 0.2062, "step": 3186 }, { "epoch": 0.0929750860610304, "grad_norm": 0.7205607205601595, "learning_rate": 9.896418966004159e-06, "loss": 0.1912, "step": 3187 }, { "epoch": 0.09300425929167395, "grad_norm": 0.8587312590346948, "learning_rate": 9.896323279727398e-06, "loss": 0.186, "step": 3188 }, { "epoch": 0.09303343252231752, "grad_norm": 1.0585477782466406, "learning_rate": 9.89622754973743e-06, "loss": 0.2327, "step": 3189 }, { "epoch": 0.09306260575296109, "grad_norm": 0.8824097003662679, "learning_rate": 9.896131776035111e-06, "loss": 0.1881, "step": 3190 }, { "epoch": 0.09309177898360464, "grad_norm": 1.0035868168095385, "learning_rate": 9.896035958621295e-06, "loss": 0.1937, "step": 3191 }, { "epoch": 0.09312095221424821, "grad_norm": 1.0261592643292214, "learning_rate": 9.89594009749684e-06, "loss": 0.2211, "step": 3192 }, { "epoch": 0.09315012544489176, "grad_norm": 0.8813262335652425, "learning_rate": 9.895844192662602e-06, "loss": 0.1801, "step": 3193 }, { "epoch": 0.09317929867553533, "grad_norm": 1.178278218707878, "learning_rate": 9.895748244119434e-06, "loss": 0.1958, "step": 3194 }, { "epoch": 0.0932084719061789, "grad_norm": 1.0313091909980319, "learning_rate": 9.895652251868196e-06, "loss": 0.2324, "step": 3195 }, { "epoch": 0.09323764513682245, "grad_norm": 0.9311020701092604, "learning_rate": 9.89555621590974e-06, "loss": 0.1788, "step": 3196 }, { "epoch": 0.09326681836746602, "grad_norm": 0.764970517720893, "learning_rate": 9.89546013624493e-06, "loss": 0.1872, "step": 3197 }, { "epoch": 0.09329599159810957, "grad_norm": 0.9470134867851716, "learning_rate": 9.89536401287462e-06, "loss": 0.1919, "step": 3198 }, { "epoch": 0.09332516482875314, "grad_norm": 0.8313124470837624, "learning_rate": 9.895267845799667e-06, "loss": 0.2011, "step": 3199 }, { "epoch": 0.09335433805939669, "grad_norm": 0.7609124467794467, "learning_rate": 9.895171635020933e-06, "loss": 0.1792, "step": 3200 }, { "epoch": 0.09338351129004026, "grad_norm": 0.8741796775430868, "learning_rate": 9.895075380539275e-06, "loss": 0.213, "step": 3201 }, { "epoch": 0.09341268452068383, "grad_norm": 0.9733134720207894, "learning_rate": 9.894979082355552e-06, "loss": 0.2125, "step": 3202 }, { "epoch": 0.09344185775132738, "grad_norm": 0.9272558824072925, "learning_rate": 9.894882740470625e-06, "loss": 0.2068, "step": 3203 }, { "epoch": 0.09347103098197095, "grad_norm": 0.8939817708889214, "learning_rate": 9.894786354885354e-06, "loss": 0.2177, "step": 3204 }, { "epoch": 0.0935002042126145, "grad_norm": 0.7836086036755353, "learning_rate": 9.894689925600596e-06, "loss": 0.1964, "step": 3205 }, { "epoch": 0.09352937744325807, "grad_norm": 0.8645969989270436, "learning_rate": 9.894593452617216e-06, "loss": 0.1979, "step": 3206 }, { "epoch": 0.09355855067390163, "grad_norm": 0.8637258641034835, "learning_rate": 9.894496935936076e-06, "loss": 0.179, "step": 3207 }, { "epoch": 0.09358772390454519, "grad_norm": 0.686666093078826, "learning_rate": 9.894400375558035e-06, "loss": 0.1797, "step": 3208 }, { "epoch": 0.09361689713518875, "grad_norm": 0.9946572338255079, "learning_rate": 9.894303771483955e-06, "loss": 0.1913, "step": 3209 }, { "epoch": 0.0936460703658323, "grad_norm": 0.7749649434714495, "learning_rate": 9.8942071237147e-06, "loss": 0.1663, "step": 3210 }, { "epoch": 0.09367524359647587, "grad_norm": 0.7833073905906534, "learning_rate": 9.894110432251131e-06, "loss": 0.1699, "step": 3211 }, { "epoch": 0.09370441682711944, "grad_norm": 0.9176131665834725, "learning_rate": 9.894013697094113e-06, "loss": 0.178, "step": 3212 }, { "epoch": 0.093733590057763, "grad_norm": 0.82519863993861, "learning_rate": 9.89391691824451e-06, "loss": 0.1909, "step": 3213 }, { "epoch": 0.09376276328840656, "grad_norm": 1.0785544956802007, "learning_rate": 9.893820095703185e-06, "loss": 0.1708, "step": 3214 }, { "epoch": 0.09379193651905011, "grad_norm": 0.8957478455051892, "learning_rate": 9.893723229471001e-06, "loss": 0.1815, "step": 3215 }, { "epoch": 0.09382110974969368, "grad_norm": 1.0309759483868939, "learning_rate": 9.893626319548823e-06, "loss": 0.2181, "step": 3216 }, { "epoch": 0.09385028298033725, "grad_norm": 1.112924509433906, "learning_rate": 9.89352936593752e-06, "loss": 0.1885, "step": 3217 }, { "epoch": 0.0938794562109808, "grad_norm": 1.0197801277631693, "learning_rate": 9.893432368637954e-06, "loss": 0.1724, "step": 3218 }, { "epoch": 0.09390862944162437, "grad_norm": 0.9052079172498365, "learning_rate": 9.893335327650992e-06, "loss": 0.1759, "step": 3219 }, { "epoch": 0.09393780267226792, "grad_norm": 1.0412474859763168, "learning_rate": 9.893238242977502e-06, "loss": 0.1901, "step": 3220 }, { "epoch": 0.09396697590291149, "grad_norm": 0.8339256080577501, "learning_rate": 9.893141114618348e-06, "loss": 0.1968, "step": 3221 }, { "epoch": 0.09399614913355504, "grad_norm": 0.8540496003410044, "learning_rate": 9.893043942574397e-06, "loss": 0.1831, "step": 3222 }, { "epoch": 0.09402532236419861, "grad_norm": 0.992501538161245, "learning_rate": 9.89294672684652e-06, "loss": 0.2003, "step": 3223 }, { "epoch": 0.09405449559484218, "grad_norm": 0.9560750673388776, "learning_rate": 9.89284946743558e-06, "loss": 0.197, "step": 3224 }, { "epoch": 0.09408366882548573, "grad_norm": 1.117693455821563, "learning_rate": 9.892752164342449e-06, "loss": 0.1927, "step": 3225 }, { "epoch": 0.0941128420561293, "grad_norm": 0.918814702440944, "learning_rate": 9.892654817567995e-06, "loss": 0.1923, "step": 3226 }, { "epoch": 0.09414201528677285, "grad_norm": 1.0096062228169131, "learning_rate": 9.892557427113087e-06, "loss": 0.1659, "step": 3227 }, { "epoch": 0.09417118851741642, "grad_norm": 0.9441484563863961, "learning_rate": 9.892459992978594e-06, "loss": 0.1872, "step": 3228 }, { "epoch": 0.09420036174805999, "grad_norm": 0.9177509239500861, "learning_rate": 9.892362515165386e-06, "loss": 0.2264, "step": 3229 }, { "epoch": 0.09422953497870354, "grad_norm": 0.898743746501821, "learning_rate": 9.892264993674334e-06, "loss": 0.189, "step": 3230 }, { "epoch": 0.0942587082093471, "grad_norm": 0.9162917180759605, "learning_rate": 9.892167428506307e-06, "loss": 0.1947, "step": 3231 }, { "epoch": 0.09428788143999066, "grad_norm": 1.1773099051391154, "learning_rate": 9.892069819662179e-06, "loss": 0.1942, "step": 3232 }, { "epoch": 0.09431705467063423, "grad_norm": 1.013402861911588, "learning_rate": 9.891972167142816e-06, "loss": 0.1939, "step": 3233 }, { "epoch": 0.0943462279012778, "grad_norm": 1.2777285577531594, "learning_rate": 9.891874470949095e-06, "loss": 0.1895, "step": 3234 }, { "epoch": 0.09437540113192135, "grad_norm": 0.9119440469852996, "learning_rate": 9.891776731081887e-06, "loss": 0.209, "step": 3235 }, { "epoch": 0.09440457436256491, "grad_norm": 0.8659368042696534, "learning_rate": 9.891678947542063e-06, "loss": 0.1967, "step": 3236 }, { "epoch": 0.09443374759320847, "grad_norm": 0.9496012057232084, "learning_rate": 9.891581120330498e-06, "loss": 0.1841, "step": 3237 }, { "epoch": 0.09446292082385203, "grad_norm": 0.8945707600040342, "learning_rate": 9.891483249448066e-06, "loss": 0.1724, "step": 3238 }, { "epoch": 0.0944920940544956, "grad_norm": 0.8099025961551439, "learning_rate": 9.891385334895637e-06, "loss": 0.1955, "step": 3239 }, { "epoch": 0.09452126728513915, "grad_norm": 0.8034245101934876, "learning_rate": 9.891287376674089e-06, "loss": 0.1944, "step": 3240 }, { "epoch": 0.09455044051578272, "grad_norm": 0.9094264512261659, "learning_rate": 9.891189374784294e-06, "loss": 0.1938, "step": 3241 }, { "epoch": 0.09457961374642627, "grad_norm": 1.0101400403658125, "learning_rate": 9.891091329227127e-06, "loss": 0.1916, "step": 3242 }, { "epoch": 0.09460878697706984, "grad_norm": 0.9529816607580996, "learning_rate": 9.890993240003465e-06, "loss": 0.1856, "step": 3243 }, { "epoch": 0.09463796020771341, "grad_norm": 0.7679335540210122, "learning_rate": 9.890895107114182e-06, "loss": 0.2018, "step": 3244 }, { "epoch": 0.09466713343835696, "grad_norm": 1.1404036593134883, "learning_rate": 9.890796930560156e-06, "loss": 0.1774, "step": 3245 }, { "epoch": 0.09469630666900053, "grad_norm": 0.8780786896423192, "learning_rate": 9.890698710342263e-06, "loss": 0.1967, "step": 3246 }, { "epoch": 0.09472547989964408, "grad_norm": 0.9576698836614522, "learning_rate": 9.89060044646138e-06, "loss": 0.1814, "step": 3247 }, { "epoch": 0.09475465313028765, "grad_norm": 0.8730570112400692, "learning_rate": 9.890502138918382e-06, "loss": 0.1873, "step": 3248 }, { "epoch": 0.0947838263609312, "grad_norm": 1.2602141128621513, "learning_rate": 9.890403787714148e-06, "loss": 0.1919, "step": 3249 }, { "epoch": 0.09481299959157477, "grad_norm": 1.0245830975735313, "learning_rate": 9.890305392849559e-06, "loss": 0.1758, "step": 3250 }, { "epoch": 0.09484217282221834, "grad_norm": 0.9221155138585105, "learning_rate": 9.89020695432549e-06, "loss": 0.2038, "step": 3251 }, { "epoch": 0.09487134605286189, "grad_norm": 1.4151546844713536, "learning_rate": 9.890108472142818e-06, "loss": 0.2032, "step": 3252 }, { "epoch": 0.09490051928350546, "grad_norm": 1.0559456413965769, "learning_rate": 9.890009946302429e-06, "loss": 0.2026, "step": 3253 }, { "epoch": 0.09492969251414901, "grad_norm": 1.0718071293408422, "learning_rate": 9.889911376805195e-06, "loss": 0.1714, "step": 3254 }, { "epoch": 0.09495886574479258, "grad_norm": 0.9089126402809143, "learning_rate": 9.889812763652002e-06, "loss": 0.2007, "step": 3255 }, { "epoch": 0.09498803897543615, "grad_norm": 1.1091010533807344, "learning_rate": 9.889714106843726e-06, "loss": 0.1913, "step": 3256 }, { "epoch": 0.0950172122060797, "grad_norm": 1.0009387069839508, "learning_rate": 9.889615406381252e-06, "loss": 0.1781, "step": 3257 }, { "epoch": 0.09504638543672327, "grad_norm": 0.9272485928702179, "learning_rate": 9.889516662265457e-06, "loss": 0.1754, "step": 3258 }, { "epoch": 0.09507555866736682, "grad_norm": 0.7614197847993052, "learning_rate": 9.889417874497225e-06, "loss": 0.1636, "step": 3259 }, { "epoch": 0.09510473189801039, "grad_norm": 0.9743955166343347, "learning_rate": 9.889319043077438e-06, "loss": 0.1972, "step": 3260 }, { "epoch": 0.09513390512865395, "grad_norm": 0.9910664775395345, "learning_rate": 9.889220168006977e-06, "loss": 0.2058, "step": 3261 }, { "epoch": 0.0951630783592975, "grad_norm": 0.7768260311895827, "learning_rate": 9.889121249286727e-06, "loss": 0.1661, "step": 3262 }, { "epoch": 0.09519225158994107, "grad_norm": 0.919702114574406, "learning_rate": 9.889022286917567e-06, "loss": 0.1687, "step": 3263 }, { "epoch": 0.09522142482058463, "grad_norm": 1.1063076369430893, "learning_rate": 9.888923280900385e-06, "loss": 0.1684, "step": 3264 }, { "epoch": 0.0952505980512282, "grad_norm": 0.9419642147128596, "learning_rate": 9.888824231236063e-06, "loss": 0.1717, "step": 3265 }, { "epoch": 0.09527977128187176, "grad_norm": 0.8241379303385615, "learning_rate": 9.888725137925484e-06, "loss": 0.1885, "step": 3266 }, { "epoch": 0.09530894451251531, "grad_norm": 1.0135462106994542, "learning_rate": 9.888626000969534e-06, "loss": 0.2109, "step": 3267 }, { "epoch": 0.09533811774315888, "grad_norm": 0.9170158099052814, "learning_rate": 9.8885268203691e-06, "loss": 0.1987, "step": 3268 }, { "epoch": 0.09536729097380243, "grad_norm": 1.0248116195809147, "learning_rate": 9.888427596125063e-06, "loss": 0.2015, "step": 3269 }, { "epoch": 0.095396464204446, "grad_norm": 0.9796239011512825, "learning_rate": 9.888328328238313e-06, "loss": 0.2114, "step": 3270 }, { "epoch": 0.09542563743508956, "grad_norm": 0.9266420074692738, "learning_rate": 9.888229016709735e-06, "loss": 0.1797, "step": 3271 }, { "epoch": 0.09545481066573312, "grad_norm": 1.0461543235954773, "learning_rate": 9.888129661540215e-06, "loss": 0.1819, "step": 3272 }, { "epoch": 0.09548398389637669, "grad_norm": 0.8013264228977466, "learning_rate": 9.88803026273064e-06, "loss": 0.171, "step": 3273 }, { "epoch": 0.09551315712702024, "grad_norm": 0.9709236879591128, "learning_rate": 9.887930820281896e-06, "loss": 0.1764, "step": 3274 }, { "epoch": 0.09554233035766381, "grad_norm": 0.9074604077154521, "learning_rate": 9.887831334194874e-06, "loss": 0.1848, "step": 3275 }, { "epoch": 0.09557150358830736, "grad_norm": 0.8539229277032876, "learning_rate": 9.887731804470462e-06, "loss": 0.1935, "step": 3276 }, { "epoch": 0.09560067681895093, "grad_norm": 0.9419424877500043, "learning_rate": 9.887632231109546e-06, "loss": 0.2011, "step": 3277 }, { "epoch": 0.0956298500495945, "grad_norm": 0.849111456865038, "learning_rate": 9.887532614113018e-06, "loss": 0.1814, "step": 3278 }, { "epoch": 0.09565902328023805, "grad_norm": 0.8042604791344427, "learning_rate": 9.887432953481762e-06, "loss": 0.1734, "step": 3279 }, { "epoch": 0.09568819651088162, "grad_norm": 0.8820753714732914, "learning_rate": 9.887333249216673e-06, "loss": 0.1808, "step": 3280 }, { "epoch": 0.09571736974152517, "grad_norm": 0.8229004316236141, "learning_rate": 9.88723350131864e-06, "loss": 0.2121, "step": 3281 }, { "epoch": 0.09574654297216874, "grad_norm": 0.8417919912196324, "learning_rate": 9.887133709788552e-06, "loss": 0.2052, "step": 3282 }, { "epoch": 0.0957757162028123, "grad_norm": 0.8615882879927675, "learning_rate": 9.887033874627303e-06, "loss": 0.2154, "step": 3283 }, { "epoch": 0.09580488943345586, "grad_norm": 1.0144257818867242, "learning_rate": 9.88693399583578e-06, "loss": 0.1894, "step": 3284 }, { "epoch": 0.09583406266409943, "grad_norm": 0.9307069346703885, "learning_rate": 9.886834073414878e-06, "loss": 0.1935, "step": 3285 }, { "epoch": 0.09586323589474298, "grad_norm": 0.9137495912927327, "learning_rate": 9.886734107365486e-06, "loss": 0.1805, "step": 3286 }, { "epoch": 0.09589240912538655, "grad_norm": 1.0047984497652105, "learning_rate": 9.8866340976885e-06, "loss": 0.2039, "step": 3287 }, { "epoch": 0.09592158235603011, "grad_norm": 0.9344310243993718, "learning_rate": 9.886534044384812e-06, "loss": 0.2172, "step": 3288 }, { "epoch": 0.09595075558667367, "grad_norm": 1.009365258653322, "learning_rate": 9.886433947455314e-06, "loss": 0.1894, "step": 3289 }, { "epoch": 0.09597992881731723, "grad_norm": 1.0553967057413909, "learning_rate": 9.886333806900901e-06, "loss": 0.1806, "step": 3290 }, { "epoch": 0.09600910204796079, "grad_norm": 1.0933924973594682, "learning_rate": 9.886233622722464e-06, "loss": 0.182, "step": 3291 }, { "epoch": 0.09603827527860435, "grad_norm": 0.8160425800459061, "learning_rate": 9.886133394920901e-06, "loss": 0.1702, "step": 3292 }, { "epoch": 0.09606744850924791, "grad_norm": 0.9554707412430091, "learning_rate": 9.886033123497106e-06, "loss": 0.1959, "step": 3293 }, { "epoch": 0.09609662173989147, "grad_norm": 0.7926178740748009, "learning_rate": 9.885932808451973e-06, "loss": 0.1772, "step": 3294 }, { "epoch": 0.09612579497053504, "grad_norm": 0.9356315460002099, "learning_rate": 9.885832449786398e-06, "loss": 0.1948, "step": 3295 }, { "epoch": 0.0961549682011786, "grad_norm": 0.8967607401124458, "learning_rate": 9.885732047501277e-06, "loss": 0.1947, "step": 3296 }, { "epoch": 0.09618414143182216, "grad_norm": 1.0259354476889229, "learning_rate": 9.885631601597508e-06, "loss": 0.173, "step": 3297 }, { "epoch": 0.09621331466246572, "grad_norm": 0.8528681011356201, "learning_rate": 9.885531112075986e-06, "loss": 0.1803, "step": 3298 }, { "epoch": 0.09624248789310928, "grad_norm": 0.7580319978031034, "learning_rate": 9.885430578937608e-06, "loss": 0.1766, "step": 3299 }, { "epoch": 0.09627166112375285, "grad_norm": 0.9766955848878668, "learning_rate": 9.88533000218327e-06, "loss": 0.2093, "step": 3300 }, { "epoch": 0.0963008343543964, "grad_norm": 0.6405171835639228, "learning_rate": 9.885229381813875e-06, "loss": 0.1709, "step": 3301 }, { "epoch": 0.09633000758503997, "grad_norm": 0.9008804603593166, "learning_rate": 9.885128717830317e-06, "loss": 0.2178, "step": 3302 }, { "epoch": 0.09635918081568352, "grad_norm": 0.8826419199815049, "learning_rate": 9.885028010233497e-06, "loss": 0.1689, "step": 3303 }, { "epoch": 0.09638835404632709, "grad_norm": 0.9761910585351266, "learning_rate": 9.884927259024311e-06, "loss": 0.1964, "step": 3304 }, { "epoch": 0.09641752727697066, "grad_norm": 0.8815602490417409, "learning_rate": 9.884826464203662e-06, "loss": 0.1975, "step": 3305 }, { "epoch": 0.09644670050761421, "grad_norm": 0.966632303420737, "learning_rate": 9.88472562577245e-06, "loss": 0.1909, "step": 3306 }, { "epoch": 0.09647587373825778, "grad_norm": 0.9525752124567378, "learning_rate": 9.88462474373157e-06, "loss": 0.1675, "step": 3307 }, { "epoch": 0.09650504696890133, "grad_norm": 0.9006113194578775, "learning_rate": 9.88452381808193e-06, "loss": 0.1781, "step": 3308 }, { "epoch": 0.0965342201995449, "grad_norm": 0.8680790723405596, "learning_rate": 9.884422848824424e-06, "loss": 0.1796, "step": 3309 }, { "epoch": 0.09656339343018847, "grad_norm": 1.6488054174957947, "learning_rate": 9.88432183595996e-06, "loss": 0.1566, "step": 3310 }, { "epoch": 0.09659256666083202, "grad_norm": 0.8059685590284299, "learning_rate": 9.884220779489435e-06, "loss": 0.1827, "step": 3311 }, { "epoch": 0.09662173989147559, "grad_norm": 0.9038782946036477, "learning_rate": 9.884119679413753e-06, "loss": 0.1832, "step": 3312 }, { "epoch": 0.09665091312211914, "grad_norm": 0.8444990222150676, "learning_rate": 9.884018535733816e-06, "loss": 0.2048, "step": 3313 }, { "epoch": 0.0966800863527627, "grad_norm": 0.9332736693317996, "learning_rate": 9.883917348450529e-06, "loss": 0.1994, "step": 3314 }, { "epoch": 0.09670925958340626, "grad_norm": 0.9741830425703921, "learning_rate": 9.883816117564792e-06, "loss": 0.184, "step": 3315 }, { "epoch": 0.09673843281404983, "grad_norm": 0.8552561903773729, "learning_rate": 9.883714843077512e-06, "loss": 0.1802, "step": 3316 }, { "epoch": 0.0967676060446934, "grad_norm": 1.017909983191464, "learning_rate": 9.883613524989591e-06, "loss": 0.2043, "step": 3317 }, { "epoch": 0.09679677927533695, "grad_norm": 0.8278959717775877, "learning_rate": 9.883512163301934e-06, "loss": 0.1761, "step": 3318 }, { "epoch": 0.09682595250598051, "grad_norm": 1.028255430484232, "learning_rate": 9.883410758015446e-06, "loss": 0.1853, "step": 3319 }, { "epoch": 0.09685512573662407, "grad_norm": 0.8815026666009607, "learning_rate": 9.883309309131032e-06, "loss": 0.168, "step": 3320 }, { "epoch": 0.09688429896726763, "grad_norm": 0.9173579267075747, "learning_rate": 9.883207816649599e-06, "loss": 0.1587, "step": 3321 }, { "epoch": 0.0969134721979112, "grad_norm": 0.8683491755852815, "learning_rate": 9.883106280572052e-06, "loss": 0.1983, "step": 3322 }, { "epoch": 0.09694264542855476, "grad_norm": 1.012331012789619, "learning_rate": 9.883004700899299e-06, "loss": 0.176, "step": 3323 }, { "epoch": 0.09697181865919832, "grad_norm": 0.8572340863566649, "learning_rate": 9.882903077632245e-06, "loss": 0.1965, "step": 3324 }, { "epoch": 0.09700099188984188, "grad_norm": 0.8623076186261965, "learning_rate": 9.882801410771798e-06, "loss": 0.166, "step": 3325 }, { "epoch": 0.09703016512048544, "grad_norm": 1.071937739099093, "learning_rate": 9.882699700318865e-06, "loss": 0.173, "step": 3326 }, { "epoch": 0.09705933835112901, "grad_norm": 1.0500033029737166, "learning_rate": 9.882597946274356e-06, "loss": 0.1951, "step": 3327 }, { "epoch": 0.09708851158177256, "grad_norm": 1.1095799706133407, "learning_rate": 9.882496148639178e-06, "loss": 0.1773, "step": 3328 }, { "epoch": 0.09711768481241613, "grad_norm": 0.8412458752082929, "learning_rate": 9.882394307414237e-06, "loss": 0.2105, "step": 3329 }, { "epoch": 0.09714685804305968, "grad_norm": 0.9602601915159229, "learning_rate": 9.88229242260045e-06, "loss": 0.1838, "step": 3330 }, { "epoch": 0.09717603127370325, "grad_norm": 1.056922883017463, "learning_rate": 9.882190494198718e-06, "loss": 0.1984, "step": 3331 }, { "epoch": 0.09720520450434682, "grad_norm": 0.9087468509235599, "learning_rate": 9.882088522209956e-06, "loss": 0.1755, "step": 3332 }, { "epoch": 0.09723437773499037, "grad_norm": 0.8542197272026406, "learning_rate": 9.881986506635073e-06, "loss": 0.1816, "step": 3333 }, { "epoch": 0.09726355096563394, "grad_norm": 0.9848855524266356, "learning_rate": 9.88188444747498e-06, "loss": 0.1975, "step": 3334 }, { "epoch": 0.09729272419627749, "grad_norm": 1.0557472764375053, "learning_rate": 9.881782344730588e-06, "loss": 0.1866, "step": 3335 }, { "epoch": 0.09732189742692106, "grad_norm": 0.9167491324108101, "learning_rate": 9.881680198402808e-06, "loss": 0.1833, "step": 3336 }, { "epoch": 0.09735107065756463, "grad_norm": 0.9669479835621975, "learning_rate": 9.881578008492554e-06, "loss": 0.2438, "step": 3337 }, { "epoch": 0.09738024388820818, "grad_norm": 1.132293288909696, "learning_rate": 9.881475775000735e-06, "loss": 0.2082, "step": 3338 }, { "epoch": 0.09740941711885175, "grad_norm": 1.0823698455225095, "learning_rate": 9.881373497928267e-06, "loss": 0.1738, "step": 3339 }, { "epoch": 0.0974385903494953, "grad_norm": 1.2544833097035346, "learning_rate": 9.881271177276061e-06, "loss": 0.2096, "step": 3340 }, { "epoch": 0.09746776358013887, "grad_norm": 1.025550403217825, "learning_rate": 9.881168813045032e-06, "loss": 0.1918, "step": 3341 }, { "epoch": 0.09749693681078242, "grad_norm": 0.7801058042312882, "learning_rate": 9.881066405236093e-06, "loss": 0.1862, "step": 3342 }, { "epoch": 0.09752611004142599, "grad_norm": 1.123647415104818, "learning_rate": 9.880963953850158e-06, "loss": 0.1944, "step": 3343 }, { "epoch": 0.09755528327206955, "grad_norm": 0.9789486274989931, "learning_rate": 9.880861458888141e-06, "loss": 0.2039, "step": 3344 }, { "epoch": 0.09758445650271311, "grad_norm": 0.8972913970938006, "learning_rate": 9.88075892035096e-06, "loss": 0.1849, "step": 3345 }, { "epoch": 0.09761362973335667, "grad_norm": 1.4062895258580415, "learning_rate": 9.880656338239527e-06, "loss": 0.1955, "step": 3346 }, { "epoch": 0.09764280296400023, "grad_norm": 0.9490073349859146, "learning_rate": 9.880553712554759e-06, "loss": 0.1881, "step": 3347 }, { "epoch": 0.0976719761946438, "grad_norm": 1.015122416593752, "learning_rate": 9.880451043297574e-06, "loss": 0.1759, "step": 3348 }, { "epoch": 0.09770114942528736, "grad_norm": 0.8943725267473936, "learning_rate": 9.880348330468885e-06, "loss": 0.1793, "step": 3349 }, { "epoch": 0.09773032265593092, "grad_norm": 0.969702487842303, "learning_rate": 9.880245574069613e-06, "loss": 0.1867, "step": 3350 }, { "epoch": 0.09775949588657448, "grad_norm": 1.2799073018645288, "learning_rate": 9.880142774100673e-06, "loss": 0.1787, "step": 3351 }, { "epoch": 0.09778866911721804, "grad_norm": 1.0941196809913207, "learning_rate": 9.880039930562983e-06, "loss": 0.1813, "step": 3352 }, { "epoch": 0.0978178423478616, "grad_norm": 1.0847507720429033, "learning_rate": 9.879937043457462e-06, "loss": 0.1697, "step": 3353 }, { "epoch": 0.09784701557850517, "grad_norm": 0.8460299768224915, "learning_rate": 9.879834112785028e-06, "loss": 0.2061, "step": 3354 }, { "epoch": 0.09787618880914872, "grad_norm": 1.1308387993487357, "learning_rate": 9.8797311385466e-06, "loss": 0.1928, "step": 3355 }, { "epoch": 0.09790536203979229, "grad_norm": 1.051652606247428, "learning_rate": 9.879628120743096e-06, "loss": 0.2231, "step": 3356 }, { "epoch": 0.09793453527043584, "grad_norm": 0.9733068669877226, "learning_rate": 9.879525059375438e-06, "loss": 0.198, "step": 3357 }, { "epoch": 0.09796370850107941, "grad_norm": 1.0361697472510567, "learning_rate": 9.879421954444546e-06, "loss": 0.1969, "step": 3358 }, { "epoch": 0.09799288173172298, "grad_norm": 0.9013273137220902, "learning_rate": 9.879318805951339e-06, "loss": 0.2174, "step": 3359 }, { "epoch": 0.09802205496236653, "grad_norm": 0.9373053268113689, "learning_rate": 9.879215613896737e-06, "loss": 0.1638, "step": 3360 }, { "epoch": 0.0980512281930101, "grad_norm": 1.0238897306994317, "learning_rate": 9.879112378281666e-06, "loss": 0.1904, "step": 3361 }, { "epoch": 0.09808040142365365, "grad_norm": 0.9165639559651162, "learning_rate": 9.879009099107042e-06, "loss": 0.1961, "step": 3362 }, { "epoch": 0.09810957465429722, "grad_norm": 1.13160027184167, "learning_rate": 9.87890577637379e-06, "loss": 0.2078, "step": 3363 }, { "epoch": 0.09813874788494077, "grad_norm": 1.0210775302186303, "learning_rate": 9.878802410082832e-06, "loss": 0.1955, "step": 3364 }, { "epoch": 0.09816792111558434, "grad_norm": 0.8217896526873605, "learning_rate": 9.87869900023509e-06, "loss": 0.1682, "step": 3365 }, { "epoch": 0.0981970943462279, "grad_norm": 1.1233133348841937, "learning_rate": 9.87859554683149e-06, "loss": 0.1885, "step": 3366 }, { "epoch": 0.09822626757687146, "grad_norm": 1.0795927439787238, "learning_rate": 9.878492049872951e-06, "loss": 0.1978, "step": 3367 }, { "epoch": 0.09825544080751503, "grad_norm": 1.2355938655652003, "learning_rate": 9.8783885093604e-06, "loss": 0.2049, "step": 3368 }, { "epoch": 0.09828461403815858, "grad_norm": 1.0884883466673847, "learning_rate": 9.878284925294763e-06, "loss": 0.1823, "step": 3369 }, { "epoch": 0.09831378726880215, "grad_norm": 1.2905904713936949, "learning_rate": 9.87818129767696e-06, "loss": 0.2198, "step": 3370 }, { "epoch": 0.09834296049944571, "grad_norm": 0.8099165654820559, "learning_rate": 9.878077626507921e-06, "loss": 0.1829, "step": 3371 }, { "epoch": 0.09837213373008927, "grad_norm": 1.206895294436484, "learning_rate": 9.877973911788569e-06, "loss": 0.2136, "step": 3372 }, { "epoch": 0.09840130696073283, "grad_norm": 1.284751688336377, "learning_rate": 9.87787015351983e-06, "loss": 0.1921, "step": 3373 }, { "epoch": 0.09843048019137639, "grad_norm": 0.7459196499352689, "learning_rate": 9.877766351702631e-06, "loss": 0.1664, "step": 3374 }, { "epoch": 0.09845965342201995, "grad_norm": 1.2156262081437241, "learning_rate": 9.877662506337898e-06, "loss": 0.1964, "step": 3375 }, { "epoch": 0.09848882665266352, "grad_norm": 0.9004909046762573, "learning_rate": 9.877558617426558e-06, "loss": 0.2037, "step": 3376 }, { "epoch": 0.09851799988330708, "grad_norm": 0.8786610661004547, "learning_rate": 9.877454684969541e-06, "loss": 0.1864, "step": 3377 }, { "epoch": 0.09854717311395064, "grad_norm": 0.8747028960856301, "learning_rate": 9.87735070896777e-06, "loss": 0.1912, "step": 3378 }, { "epoch": 0.0985763463445942, "grad_norm": 0.9535249120129909, "learning_rate": 9.87724668942218e-06, "loss": 0.1775, "step": 3379 }, { "epoch": 0.09860551957523776, "grad_norm": 0.7698045496984315, "learning_rate": 9.877142626333692e-06, "loss": 0.1693, "step": 3380 }, { "epoch": 0.09863469280588133, "grad_norm": 0.8464332139561874, "learning_rate": 9.87703851970324e-06, "loss": 0.1714, "step": 3381 }, { "epoch": 0.09866386603652488, "grad_norm": 0.9022198071684441, "learning_rate": 9.876934369531754e-06, "loss": 0.1952, "step": 3382 }, { "epoch": 0.09869303926716845, "grad_norm": 0.9859271685129993, "learning_rate": 9.87683017582016e-06, "loss": 0.1888, "step": 3383 }, { "epoch": 0.098722212497812, "grad_norm": 1.026625590481748, "learning_rate": 9.876725938569392e-06, "loss": 0.1818, "step": 3384 }, { "epoch": 0.09875138572845557, "grad_norm": 0.9036674784035109, "learning_rate": 9.876621657780378e-06, "loss": 0.1709, "step": 3385 }, { "epoch": 0.09878055895909912, "grad_norm": 1.0830903552836817, "learning_rate": 9.876517333454051e-06, "loss": 0.1864, "step": 3386 }, { "epoch": 0.09880973218974269, "grad_norm": 1.0448141036868295, "learning_rate": 9.876412965591343e-06, "loss": 0.1951, "step": 3387 }, { "epoch": 0.09883890542038626, "grad_norm": 0.9688155387079148, "learning_rate": 9.876308554193182e-06, "loss": 0.1695, "step": 3388 }, { "epoch": 0.09886807865102981, "grad_norm": 0.8334733215835574, "learning_rate": 9.876204099260501e-06, "loss": 0.1769, "step": 3389 }, { "epoch": 0.09889725188167338, "grad_norm": 0.9606360477202657, "learning_rate": 9.876099600794236e-06, "loss": 0.1955, "step": 3390 }, { "epoch": 0.09892642511231693, "grad_norm": 0.8205109808036659, "learning_rate": 9.875995058795316e-06, "loss": 0.2048, "step": 3391 }, { "epoch": 0.0989555983429605, "grad_norm": 0.9982764097769949, "learning_rate": 9.875890473264678e-06, "loss": 0.2095, "step": 3392 }, { "epoch": 0.09898477157360407, "grad_norm": 1.0626266558414459, "learning_rate": 9.875785844203251e-06, "loss": 0.2021, "step": 3393 }, { "epoch": 0.09901394480424762, "grad_norm": 0.9973487185751021, "learning_rate": 9.875681171611974e-06, "loss": 0.1683, "step": 3394 }, { "epoch": 0.09904311803489119, "grad_norm": 1.0140220788974939, "learning_rate": 9.87557645549178e-06, "loss": 0.1782, "step": 3395 }, { "epoch": 0.09907229126553474, "grad_norm": 0.9630179068400443, "learning_rate": 9.875471695843603e-06, "loss": 0.169, "step": 3396 }, { "epoch": 0.09910146449617831, "grad_norm": 0.7429839039648668, "learning_rate": 9.875366892668376e-06, "loss": 0.1653, "step": 3397 }, { "epoch": 0.09913063772682187, "grad_norm": 1.075112707108095, "learning_rate": 9.87526204596704e-06, "loss": 0.1951, "step": 3398 }, { "epoch": 0.09915981095746543, "grad_norm": 0.9306430813422164, "learning_rate": 9.875157155740528e-06, "loss": 0.1878, "step": 3399 }, { "epoch": 0.099188984188109, "grad_norm": 0.81734598225374, "learning_rate": 9.875052221989777e-06, "loss": 0.182, "step": 3400 }, { "epoch": 0.09921815741875255, "grad_norm": 0.885771193859634, "learning_rate": 9.874947244715722e-06, "loss": 0.1704, "step": 3401 }, { "epoch": 0.09924733064939611, "grad_norm": 0.8940409062009961, "learning_rate": 9.874842223919303e-06, "loss": 0.1752, "step": 3402 }, { "epoch": 0.09927650388003968, "grad_norm": 0.8175311045028216, "learning_rate": 9.874737159601455e-06, "loss": 0.1635, "step": 3403 }, { "epoch": 0.09930567711068324, "grad_norm": 0.9443309919729741, "learning_rate": 9.87463205176312e-06, "loss": 0.2026, "step": 3404 }, { "epoch": 0.0993348503413268, "grad_norm": 0.919398290601415, "learning_rate": 9.87452690040523e-06, "loss": 0.1726, "step": 3405 }, { "epoch": 0.09936402357197036, "grad_norm": 0.8357805180322369, "learning_rate": 9.87442170552873e-06, "loss": 0.1836, "step": 3406 }, { "epoch": 0.09939319680261392, "grad_norm": 0.885788982003115, "learning_rate": 9.874316467134557e-06, "loss": 0.1737, "step": 3407 }, { "epoch": 0.09942237003325748, "grad_norm": 0.9252449527546076, "learning_rate": 9.874211185223649e-06, "loss": 0.2005, "step": 3408 }, { "epoch": 0.09945154326390104, "grad_norm": 1.4076580398560004, "learning_rate": 9.874105859796947e-06, "loss": 0.2026, "step": 3409 }, { "epoch": 0.09948071649454461, "grad_norm": 0.9840688774481136, "learning_rate": 9.87400049085539e-06, "loss": 0.185, "step": 3410 }, { "epoch": 0.09950988972518816, "grad_norm": 0.8966402285038693, "learning_rate": 9.873895078399925e-06, "loss": 0.1624, "step": 3411 }, { "epoch": 0.09953906295583173, "grad_norm": 0.8245966853829566, "learning_rate": 9.873789622431484e-06, "loss": 0.1746, "step": 3412 }, { "epoch": 0.09956823618647528, "grad_norm": 0.941991276321919, "learning_rate": 9.873684122951013e-06, "loss": 0.1717, "step": 3413 }, { "epoch": 0.09959740941711885, "grad_norm": 0.9563024015671291, "learning_rate": 9.873578579959456e-06, "loss": 0.1923, "step": 3414 }, { "epoch": 0.09962658264776242, "grad_norm": 1.0495462587066298, "learning_rate": 9.87347299345775e-06, "loss": 0.2, "step": 3415 }, { "epoch": 0.09965575587840597, "grad_norm": 1.082272418818284, "learning_rate": 9.873367363446843e-06, "loss": 0.1999, "step": 3416 }, { "epoch": 0.09968492910904954, "grad_norm": 0.9170445275393765, "learning_rate": 9.873261689927674e-06, "loss": 0.1873, "step": 3417 }, { "epoch": 0.09971410233969309, "grad_norm": 0.9607984396105024, "learning_rate": 9.873155972901187e-06, "loss": 0.1884, "step": 3418 }, { "epoch": 0.09974327557033666, "grad_norm": 0.878612477667233, "learning_rate": 9.87305021236833e-06, "loss": 0.1969, "step": 3419 }, { "epoch": 0.09977244880098023, "grad_norm": 0.8412296816249966, "learning_rate": 9.87294440833004e-06, "loss": 0.1816, "step": 3420 }, { "epoch": 0.09980162203162378, "grad_norm": 1.023308551384643, "learning_rate": 9.872838560787269e-06, "loss": 0.1765, "step": 3421 }, { "epoch": 0.09983079526226735, "grad_norm": 0.7906909622655207, "learning_rate": 9.872732669740956e-06, "loss": 0.1857, "step": 3422 }, { "epoch": 0.0998599684929109, "grad_norm": 0.8820929409110352, "learning_rate": 9.87262673519205e-06, "loss": 0.2048, "step": 3423 }, { "epoch": 0.09988914172355447, "grad_norm": 0.968160720640558, "learning_rate": 9.872520757141497e-06, "loss": 0.1854, "step": 3424 }, { "epoch": 0.09991831495419803, "grad_norm": 0.70461981116057, "learning_rate": 9.87241473559024e-06, "loss": 0.183, "step": 3425 }, { "epoch": 0.09994748818484159, "grad_norm": 0.6835017711956095, "learning_rate": 9.872308670539229e-06, "loss": 0.1572, "step": 3426 }, { "epoch": 0.09997666141548515, "grad_norm": 0.8106605104603117, "learning_rate": 9.872202561989409e-06, "loss": 0.188, "step": 3427 }, { "epoch": 0.10000583464612871, "grad_norm": 1.0264724096549127, "learning_rate": 9.872096409941726e-06, "loss": 0.1971, "step": 3428 }, { "epoch": 0.10003500787677228, "grad_norm": 0.7914214116749334, "learning_rate": 9.871990214397131e-06, "loss": 0.183, "step": 3429 }, { "epoch": 0.10006418110741584, "grad_norm": 0.9188258887963189, "learning_rate": 9.871883975356568e-06, "loss": 0.1761, "step": 3430 }, { "epoch": 0.1000933543380594, "grad_norm": 0.8394724893678543, "learning_rate": 9.87177769282099e-06, "loss": 0.1804, "step": 3431 }, { "epoch": 0.10012252756870296, "grad_norm": 1.2163310379645849, "learning_rate": 9.871671366791344e-06, "loss": 0.1958, "step": 3432 }, { "epoch": 0.10015170079934652, "grad_norm": 0.8273408849172802, "learning_rate": 9.87156499726858e-06, "loss": 0.1693, "step": 3433 }, { "epoch": 0.10018087402999008, "grad_norm": 0.9024040961441147, "learning_rate": 9.871458584253644e-06, "loss": 0.1908, "step": 3434 }, { "epoch": 0.10021004726063364, "grad_norm": 0.8330899285828883, "learning_rate": 9.871352127747489e-06, "loss": 0.1711, "step": 3435 }, { "epoch": 0.1002392204912772, "grad_norm": 0.9509143420825992, "learning_rate": 9.871245627751067e-06, "loss": 0.1846, "step": 3436 }, { "epoch": 0.10026839372192077, "grad_norm": 1.0883350035256232, "learning_rate": 9.871139084265324e-06, "loss": 0.2013, "step": 3437 }, { "epoch": 0.10029756695256432, "grad_norm": 1.1309254010693355, "learning_rate": 9.871032497291217e-06, "loss": 0.2145, "step": 3438 }, { "epoch": 0.10032674018320789, "grad_norm": 0.9576681093904603, "learning_rate": 9.870925866829692e-06, "loss": 0.1901, "step": 3439 }, { "epoch": 0.10035591341385144, "grad_norm": 1.1262857177310994, "learning_rate": 9.870819192881707e-06, "loss": 0.1655, "step": 3440 }, { "epoch": 0.10038508664449501, "grad_norm": 0.9866849846226996, "learning_rate": 9.870712475448207e-06, "loss": 0.1817, "step": 3441 }, { "epoch": 0.10041425987513858, "grad_norm": 1.0529171418373293, "learning_rate": 9.870605714530152e-06, "loss": 0.1915, "step": 3442 }, { "epoch": 0.10044343310578213, "grad_norm": 1.0145462296288652, "learning_rate": 9.870498910128492e-06, "loss": 0.189, "step": 3443 }, { "epoch": 0.1004726063364257, "grad_norm": 0.8909609759811756, "learning_rate": 9.870392062244178e-06, "loss": 0.1554, "step": 3444 }, { "epoch": 0.10050177956706925, "grad_norm": 1.1927000558620366, "learning_rate": 9.870285170878167e-06, "loss": 0.177, "step": 3445 }, { "epoch": 0.10053095279771282, "grad_norm": 1.178151159469842, "learning_rate": 9.870178236031413e-06, "loss": 0.2003, "step": 3446 }, { "epoch": 0.10056012602835639, "grad_norm": 0.975324090964805, "learning_rate": 9.870071257704871e-06, "loss": 0.1837, "step": 3447 }, { "epoch": 0.10058929925899994, "grad_norm": 1.1448229783115247, "learning_rate": 9.869964235899494e-06, "loss": 0.174, "step": 3448 }, { "epoch": 0.1006184724896435, "grad_norm": 0.9514149119948482, "learning_rate": 9.86985717061624e-06, "loss": 0.1821, "step": 3449 }, { "epoch": 0.10064764572028706, "grad_norm": 0.847909054724833, "learning_rate": 9.869750061856063e-06, "loss": 0.1681, "step": 3450 }, { "epoch": 0.10067681895093063, "grad_norm": 0.9218506513046638, "learning_rate": 9.869642909619921e-06, "loss": 0.1883, "step": 3451 }, { "epoch": 0.1007059921815742, "grad_norm": 0.9041931059317265, "learning_rate": 9.869535713908768e-06, "loss": 0.1958, "step": 3452 }, { "epoch": 0.10073516541221775, "grad_norm": 0.8896465975703093, "learning_rate": 9.869428474723563e-06, "loss": 0.1794, "step": 3453 }, { "epoch": 0.10076433864286131, "grad_norm": 0.9377163592942124, "learning_rate": 9.869321192065264e-06, "loss": 0.1734, "step": 3454 }, { "epoch": 0.10079351187350487, "grad_norm": 1.0246636715401394, "learning_rate": 9.869213865934827e-06, "loss": 0.1938, "step": 3455 }, { "epoch": 0.10082268510414844, "grad_norm": 0.8475687952188002, "learning_rate": 9.869106496333213e-06, "loss": 0.2209, "step": 3456 }, { "epoch": 0.10085185833479199, "grad_norm": 0.9379196203907476, "learning_rate": 9.868999083261377e-06, "loss": 0.1892, "step": 3457 }, { "epoch": 0.10088103156543556, "grad_norm": 0.9441790698051824, "learning_rate": 9.868891626720279e-06, "loss": 0.2129, "step": 3458 }, { "epoch": 0.10091020479607912, "grad_norm": 0.7326471551649248, "learning_rate": 9.868784126710878e-06, "loss": 0.1785, "step": 3459 }, { "epoch": 0.10093937802672268, "grad_norm": 0.818500409204287, "learning_rate": 9.868676583234136e-06, "loss": 0.1778, "step": 3460 }, { "epoch": 0.10096855125736624, "grad_norm": 0.8706269163312929, "learning_rate": 9.868568996291013e-06, "loss": 0.1668, "step": 3461 }, { "epoch": 0.1009977244880098, "grad_norm": 1.3126513024180493, "learning_rate": 9.868461365882465e-06, "loss": 0.1815, "step": 3462 }, { "epoch": 0.10102689771865336, "grad_norm": 0.8610585831478037, "learning_rate": 9.868353692009458e-06, "loss": 0.177, "step": 3463 }, { "epoch": 0.10105607094929693, "grad_norm": 1.011781175607378, "learning_rate": 9.868245974672952e-06, "loss": 0.1965, "step": 3464 }, { "epoch": 0.10108524417994048, "grad_norm": 0.833371986012559, "learning_rate": 9.868138213873908e-06, "loss": 0.172, "step": 3465 }, { "epoch": 0.10111441741058405, "grad_norm": 1.022504922983816, "learning_rate": 9.868030409613286e-06, "loss": 0.1783, "step": 3466 }, { "epoch": 0.1011435906412276, "grad_norm": 1.0568751412016992, "learning_rate": 9.867922561892053e-06, "loss": 0.2108, "step": 3467 }, { "epoch": 0.10117276387187117, "grad_norm": 0.9318373715013006, "learning_rate": 9.86781467071117e-06, "loss": 0.1764, "step": 3468 }, { "epoch": 0.10120193710251474, "grad_norm": 0.9056729730442917, "learning_rate": 9.867706736071596e-06, "loss": 0.1759, "step": 3469 }, { "epoch": 0.10123111033315829, "grad_norm": 0.8599796170135051, "learning_rate": 9.867598757974302e-06, "loss": 0.1844, "step": 3470 }, { "epoch": 0.10126028356380186, "grad_norm": 0.9620661176977179, "learning_rate": 9.867490736420245e-06, "loss": 0.2018, "step": 3471 }, { "epoch": 0.10128945679444541, "grad_norm": 0.9284833683199802, "learning_rate": 9.867382671410395e-06, "loss": 0.1757, "step": 3472 }, { "epoch": 0.10131863002508898, "grad_norm": 0.8619858597108815, "learning_rate": 9.867274562945713e-06, "loss": 0.2095, "step": 3473 }, { "epoch": 0.10134780325573255, "grad_norm": 1.040972216388095, "learning_rate": 9.867166411027167e-06, "loss": 0.1842, "step": 3474 }, { "epoch": 0.1013769764863761, "grad_norm": 1.008087937414507, "learning_rate": 9.867058215655721e-06, "loss": 0.1659, "step": 3475 }, { "epoch": 0.10140614971701967, "grad_norm": 0.8469254202320418, "learning_rate": 9.86694997683234e-06, "loss": 0.1902, "step": 3476 }, { "epoch": 0.10143532294766322, "grad_norm": 0.7941411979341042, "learning_rate": 9.866841694557993e-06, "loss": 0.2063, "step": 3477 }, { "epoch": 0.10146449617830679, "grad_norm": 0.8950825419085655, "learning_rate": 9.866733368833643e-06, "loss": 0.186, "step": 3478 }, { "epoch": 0.10149366940895034, "grad_norm": 1.14569798578479, "learning_rate": 9.866624999660262e-06, "loss": 0.2022, "step": 3479 }, { "epoch": 0.10152284263959391, "grad_norm": 0.7917867480873769, "learning_rate": 9.866516587038813e-06, "loss": 0.1754, "step": 3480 }, { "epoch": 0.10155201587023747, "grad_norm": 0.8642967705541632, "learning_rate": 9.866408130970267e-06, "loss": 0.1829, "step": 3481 }, { "epoch": 0.10158118910088103, "grad_norm": 1.5467627532716057, "learning_rate": 9.86629963145559e-06, "loss": 0.2343, "step": 3482 }, { "epoch": 0.1016103623315246, "grad_norm": 0.8266053130281075, "learning_rate": 9.86619108849575e-06, "loss": 0.1737, "step": 3483 }, { "epoch": 0.10163953556216815, "grad_norm": 0.9367688798654689, "learning_rate": 9.86608250209172e-06, "loss": 0.1775, "step": 3484 }, { "epoch": 0.10166870879281172, "grad_norm": 0.9155674535316602, "learning_rate": 9.865973872244466e-06, "loss": 0.1667, "step": 3485 }, { "epoch": 0.10169788202345528, "grad_norm": 0.9847455876401546, "learning_rate": 9.865865198954959e-06, "loss": 0.2094, "step": 3486 }, { "epoch": 0.10172705525409884, "grad_norm": 0.8308552008359891, "learning_rate": 9.865756482224169e-06, "loss": 0.1995, "step": 3487 }, { "epoch": 0.1017562284847424, "grad_norm": 0.8410268218827373, "learning_rate": 9.865647722053066e-06, "loss": 0.1962, "step": 3488 }, { "epoch": 0.10178540171538596, "grad_norm": 1.0087615518486086, "learning_rate": 9.865538918442624e-06, "loss": 0.1779, "step": 3489 }, { "epoch": 0.10181457494602952, "grad_norm": 1.041963448367663, "learning_rate": 9.86543007139381e-06, "loss": 0.1771, "step": 3490 }, { "epoch": 0.10184374817667309, "grad_norm": 1.0238256726632478, "learning_rate": 9.865321180907597e-06, "loss": 0.167, "step": 3491 }, { "epoch": 0.10187292140731664, "grad_norm": 1.161048222587803, "learning_rate": 9.86521224698496e-06, "loss": 0.205, "step": 3492 }, { "epoch": 0.10190209463796021, "grad_norm": 0.9043069579942137, "learning_rate": 9.865103269626868e-06, "loss": 0.1717, "step": 3493 }, { "epoch": 0.10193126786860376, "grad_norm": 0.8923616361442843, "learning_rate": 9.864994248834297e-06, "loss": 0.1919, "step": 3494 }, { "epoch": 0.10196044109924733, "grad_norm": 0.7993846930318241, "learning_rate": 9.864885184608217e-06, "loss": 0.2093, "step": 3495 }, { "epoch": 0.1019896143298909, "grad_norm": 0.9328576530882595, "learning_rate": 9.864776076949604e-06, "loss": 0.2036, "step": 3496 }, { "epoch": 0.10201878756053445, "grad_norm": 0.9497224570949078, "learning_rate": 9.864666925859432e-06, "loss": 0.1852, "step": 3497 }, { "epoch": 0.10204796079117802, "grad_norm": 0.8879533396575509, "learning_rate": 9.864557731338675e-06, "loss": 0.1747, "step": 3498 }, { "epoch": 0.10207713402182157, "grad_norm": 0.8937655185804683, "learning_rate": 9.864448493388307e-06, "loss": 0.1946, "step": 3499 }, { "epoch": 0.10210630725246514, "grad_norm": 0.9357675307242985, "learning_rate": 9.864339212009304e-06, "loss": 0.197, "step": 3500 }, { "epoch": 0.10213548048310869, "grad_norm": 0.8261058627027291, "learning_rate": 9.864229887202643e-06, "loss": 0.1909, "step": 3501 }, { "epoch": 0.10216465371375226, "grad_norm": 1.0006517067419611, "learning_rate": 9.864120518969298e-06, "loss": 0.1902, "step": 3502 }, { "epoch": 0.10219382694439583, "grad_norm": 1.0604154204135914, "learning_rate": 9.864011107310246e-06, "loss": 0.1867, "step": 3503 }, { "epoch": 0.10222300017503938, "grad_norm": 0.8545044363306173, "learning_rate": 9.863901652226464e-06, "loss": 0.2068, "step": 3504 }, { "epoch": 0.10225217340568295, "grad_norm": 1.0859515433427762, "learning_rate": 9.86379215371893e-06, "loss": 0.1637, "step": 3505 }, { "epoch": 0.1022813466363265, "grad_norm": 1.1918579655623438, "learning_rate": 9.86368261178862e-06, "loss": 0.2019, "step": 3506 }, { "epoch": 0.10231051986697007, "grad_norm": 1.0430591125132915, "learning_rate": 9.863573026436513e-06, "loss": 0.1965, "step": 3507 }, { "epoch": 0.10233969309761363, "grad_norm": 1.221202207806947, "learning_rate": 9.863463397663587e-06, "loss": 0.1915, "step": 3508 }, { "epoch": 0.10236886632825719, "grad_norm": 1.142944732564729, "learning_rate": 9.863353725470822e-06, "loss": 0.2163, "step": 3509 }, { "epoch": 0.10239803955890076, "grad_norm": 0.9364772931896006, "learning_rate": 9.863244009859194e-06, "loss": 0.1711, "step": 3510 }, { "epoch": 0.10242721278954431, "grad_norm": 1.052710404061729, "learning_rate": 9.863134250829685e-06, "loss": 0.1801, "step": 3511 }, { "epoch": 0.10245638602018788, "grad_norm": 1.0758791904026912, "learning_rate": 9.863024448383273e-06, "loss": 0.1846, "step": 3512 }, { "epoch": 0.10248555925083144, "grad_norm": 1.038212081530209, "learning_rate": 9.86291460252094e-06, "loss": 0.1926, "step": 3513 }, { "epoch": 0.102514732481475, "grad_norm": 1.134331392771612, "learning_rate": 9.862804713243667e-06, "loss": 0.1836, "step": 3514 }, { "epoch": 0.10254390571211856, "grad_norm": 1.0703474061020763, "learning_rate": 9.862694780552435e-06, "loss": 0.1736, "step": 3515 }, { "epoch": 0.10257307894276212, "grad_norm": 1.0638348901860075, "learning_rate": 9.862584804448226e-06, "loss": 0.2009, "step": 3516 }, { "epoch": 0.10260225217340568, "grad_norm": 0.8599113957013631, "learning_rate": 9.862474784932018e-06, "loss": 0.1828, "step": 3517 }, { "epoch": 0.10263142540404925, "grad_norm": 0.9874231174873996, "learning_rate": 9.862364722004798e-06, "loss": 0.1715, "step": 3518 }, { "epoch": 0.1026605986346928, "grad_norm": 0.8885320043956563, "learning_rate": 9.862254615667546e-06, "loss": 0.1908, "step": 3519 }, { "epoch": 0.10268977186533637, "grad_norm": 1.073164181263853, "learning_rate": 9.862144465921244e-06, "loss": 0.2105, "step": 3520 }, { "epoch": 0.10271894509597992, "grad_norm": 0.8240112977118601, "learning_rate": 9.862034272766879e-06, "loss": 0.1767, "step": 3521 }, { "epoch": 0.10274811832662349, "grad_norm": 0.8359128423414961, "learning_rate": 9.86192403620543e-06, "loss": 0.1746, "step": 3522 }, { "epoch": 0.10277729155726706, "grad_norm": 0.9401539512117901, "learning_rate": 9.861813756237886e-06, "loss": 0.2089, "step": 3523 }, { "epoch": 0.10280646478791061, "grad_norm": 0.7972123494742941, "learning_rate": 9.861703432865228e-06, "loss": 0.1778, "step": 3524 }, { "epoch": 0.10283563801855418, "grad_norm": 0.8325761377041836, "learning_rate": 9.861593066088444e-06, "loss": 0.1787, "step": 3525 }, { "epoch": 0.10286481124919773, "grad_norm": 0.820495223309961, "learning_rate": 9.861482655908517e-06, "loss": 0.2027, "step": 3526 }, { "epoch": 0.1028939844798413, "grad_norm": 0.7924336240807823, "learning_rate": 9.861372202326432e-06, "loss": 0.1723, "step": 3527 }, { "epoch": 0.10292315771048485, "grad_norm": 0.7817727912241805, "learning_rate": 9.861261705343178e-06, "loss": 0.1683, "step": 3528 }, { "epoch": 0.10295233094112842, "grad_norm": 0.86152589055737, "learning_rate": 9.861151164959738e-06, "loss": 0.1749, "step": 3529 }, { "epoch": 0.10298150417177199, "grad_norm": 0.6543986493202684, "learning_rate": 9.861040581177103e-06, "loss": 0.1547, "step": 3530 }, { "epoch": 0.10301067740241554, "grad_norm": 1.0808079835968676, "learning_rate": 9.86092995399626e-06, "loss": 0.1648, "step": 3531 }, { "epoch": 0.10303985063305911, "grad_norm": 0.8424842609109987, "learning_rate": 9.860819283418192e-06, "loss": 0.1878, "step": 3532 }, { "epoch": 0.10306902386370266, "grad_norm": 0.8238721639152723, "learning_rate": 9.860708569443888e-06, "loss": 0.1892, "step": 3533 }, { "epoch": 0.10309819709434623, "grad_norm": 0.8802106856130075, "learning_rate": 9.860597812074343e-06, "loss": 0.1705, "step": 3534 }, { "epoch": 0.1031273703249898, "grad_norm": 0.7550766242911688, "learning_rate": 9.860487011310537e-06, "loss": 0.1472, "step": 3535 }, { "epoch": 0.10315654355563335, "grad_norm": 0.9375274970501286, "learning_rate": 9.860376167153466e-06, "loss": 0.1783, "step": 3536 }, { "epoch": 0.10318571678627692, "grad_norm": 0.9816305146749097, "learning_rate": 9.860265279604114e-06, "loss": 0.2091, "step": 3537 }, { "epoch": 0.10321489001692047, "grad_norm": 0.9361128396163539, "learning_rate": 9.860154348663476e-06, "loss": 0.1865, "step": 3538 }, { "epoch": 0.10324406324756404, "grad_norm": 0.9431525304148766, "learning_rate": 9.86004337433254e-06, "loss": 0.1921, "step": 3539 }, { "epoch": 0.1032732364782076, "grad_norm": 1.1496564048652087, "learning_rate": 9.859932356612297e-06, "loss": 0.1886, "step": 3540 }, { "epoch": 0.10330240970885116, "grad_norm": 0.9096912734203451, "learning_rate": 9.859821295503736e-06, "loss": 0.2, "step": 3541 }, { "epoch": 0.10333158293949472, "grad_norm": 0.7937785251353193, "learning_rate": 9.859710191007851e-06, "loss": 0.2068, "step": 3542 }, { "epoch": 0.10336075617013828, "grad_norm": 1.0477861995956441, "learning_rate": 9.859599043125636e-06, "loss": 0.1765, "step": 3543 }, { "epoch": 0.10338992940078184, "grad_norm": 0.8480879574997857, "learning_rate": 9.85948785185808e-06, "loss": 0.1709, "step": 3544 }, { "epoch": 0.10341910263142541, "grad_norm": 0.7737043039154154, "learning_rate": 9.859376617206175e-06, "loss": 0.1727, "step": 3545 }, { "epoch": 0.10344827586206896, "grad_norm": 0.8627681389248744, "learning_rate": 9.859265339170918e-06, "loss": 0.1763, "step": 3546 }, { "epoch": 0.10347744909271253, "grad_norm": 0.7419522433195506, "learning_rate": 9.859154017753299e-06, "loss": 0.1755, "step": 3547 }, { "epoch": 0.10350662232335608, "grad_norm": 0.9000903192887774, "learning_rate": 9.859042652954312e-06, "loss": 0.1811, "step": 3548 }, { "epoch": 0.10353579555399965, "grad_norm": 0.8256068494585156, "learning_rate": 9.858931244774952e-06, "loss": 0.1972, "step": 3549 }, { "epoch": 0.1035649687846432, "grad_norm": 0.8784834109295333, "learning_rate": 9.858819793216214e-06, "loss": 0.1571, "step": 3550 }, { "epoch": 0.10359414201528677, "grad_norm": 0.7339011855047681, "learning_rate": 9.858708298279094e-06, "loss": 0.1682, "step": 3551 }, { "epoch": 0.10362331524593034, "grad_norm": 0.776599733420472, "learning_rate": 9.858596759964586e-06, "loss": 0.1947, "step": 3552 }, { "epoch": 0.10365248847657389, "grad_norm": 1.0010043951440706, "learning_rate": 9.858485178273684e-06, "loss": 0.2001, "step": 3553 }, { "epoch": 0.10368166170721746, "grad_norm": 0.9808689083488781, "learning_rate": 9.858373553207387e-06, "loss": 0.1782, "step": 3554 }, { "epoch": 0.10371083493786101, "grad_norm": 0.7748226788065447, "learning_rate": 9.858261884766693e-06, "loss": 0.1694, "step": 3555 }, { "epoch": 0.10374000816850458, "grad_norm": 0.9663212703640649, "learning_rate": 9.858150172952594e-06, "loss": 0.1901, "step": 3556 }, { "epoch": 0.10376918139914815, "grad_norm": 0.9912394499988492, "learning_rate": 9.85803841776609e-06, "loss": 0.1836, "step": 3557 }, { "epoch": 0.1037983546297917, "grad_norm": 1.1206233748183603, "learning_rate": 9.857926619208181e-06, "loss": 0.1824, "step": 3558 }, { "epoch": 0.10382752786043527, "grad_norm": 0.8513351639284373, "learning_rate": 9.857814777279861e-06, "loss": 0.1862, "step": 3559 }, { "epoch": 0.10385670109107882, "grad_norm": 1.1036362299937632, "learning_rate": 9.85770289198213e-06, "loss": 0.173, "step": 3560 }, { "epoch": 0.10388587432172239, "grad_norm": 0.917666980493957, "learning_rate": 9.85759096331599e-06, "loss": 0.1692, "step": 3561 }, { "epoch": 0.10391504755236596, "grad_norm": 0.9200122465756295, "learning_rate": 9.857478991282434e-06, "loss": 0.1897, "step": 3562 }, { "epoch": 0.10394422078300951, "grad_norm": 0.9203765928255002, "learning_rate": 9.857366975882468e-06, "loss": 0.21, "step": 3563 }, { "epoch": 0.10397339401365308, "grad_norm": 0.8525297570143247, "learning_rate": 9.857254917117087e-06, "loss": 0.1877, "step": 3564 }, { "epoch": 0.10400256724429663, "grad_norm": 0.7806261111855988, "learning_rate": 9.857142814987295e-06, "loss": 0.1892, "step": 3565 }, { "epoch": 0.1040317404749402, "grad_norm": 0.7587133703653338, "learning_rate": 9.85703066949409e-06, "loss": 0.1845, "step": 3566 }, { "epoch": 0.10406091370558376, "grad_norm": 1.026226746266854, "learning_rate": 9.856918480638476e-06, "loss": 0.191, "step": 3567 }, { "epoch": 0.10409008693622732, "grad_norm": 0.7792006638213195, "learning_rate": 9.856806248421453e-06, "loss": 0.1728, "step": 3568 }, { "epoch": 0.10411926016687088, "grad_norm": 0.8558516870616105, "learning_rate": 9.856693972844022e-06, "loss": 0.1595, "step": 3569 }, { "epoch": 0.10414843339751444, "grad_norm": 0.8568316620101666, "learning_rate": 9.856581653907188e-06, "loss": 0.1845, "step": 3570 }, { "epoch": 0.104177606628158, "grad_norm": 0.9358208121428494, "learning_rate": 9.856469291611953e-06, "loss": 0.1891, "step": 3571 }, { "epoch": 0.10420677985880156, "grad_norm": 0.9295522384551842, "learning_rate": 9.856356885959318e-06, "loss": 0.1942, "step": 3572 }, { "epoch": 0.10423595308944512, "grad_norm": 0.8160735448647487, "learning_rate": 9.856244436950287e-06, "loss": 0.1781, "step": 3573 }, { "epoch": 0.10426512632008869, "grad_norm": 1.0885212326819285, "learning_rate": 9.856131944585867e-06, "loss": 0.201, "step": 3574 }, { "epoch": 0.10429429955073224, "grad_norm": 0.9730661270887382, "learning_rate": 9.85601940886706e-06, "loss": 0.1903, "step": 3575 }, { "epoch": 0.10432347278137581, "grad_norm": 0.7703858602175028, "learning_rate": 9.85590682979487e-06, "loss": 0.1936, "step": 3576 }, { "epoch": 0.10435264601201936, "grad_norm": 0.804187780846293, "learning_rate": 9.855794207370305e-06, "loss": 0.1974, "step": 3577 }, { "epoch": 0.10438181924266293, "grad_norm": 0.8588128990261085, "learning_rate": 9.855681541594367e-06, "loss": 0.226, "step": 3578 }, { "epoch": 0.1044109924733065, "grad_norm": 0.9682983627913089, "learning_rate": 9.855568832468063e-06, "loss": 0.185, "step": 3579 }, { "epoch": 0.10444016570395005, "grad_norm": 0.8972332881015471, "learning_rate": 9.8554560799924e-06, "loss": 0.1866, "step": 3580 }, { "epoch": 0.10446933893459362, "grad_norm": 0.817740618854365, "learning_rate": 9.855343284168384e-06, "loss": 0.1768, "step": 3581 }, { "epoch": 0.10449851216523717, "grad_norm": 0.8671290101837122, "learning_rate": 9.855230444997021e-06, "loss": 0.1845, "step": 3582 }, { "epoch": 0.10452768539588074, "grad_norm": 1.0124203321984675, "learning_rate": 9.855117562479321e-06, "loss": 0.1987, "step": 3583 }, { "epoch": 0.10455685862652431, "grad_norm": 1.0825051900887046, "learning_rate": 9.855004636616293e-06, "loss": 0.1724, "step": 3584 }, { "epoch": 0.10458603185716786, "grad_norm": 0.9814958449489769, "learning_rate": 9.85489166740894e-06, "loss": 0.2068, "step": 3585 }, { "epoch": 0.10461520508781143, "grad_norm": 0.9602983957181478, "learning_rate": 9.854778654858272e-06, "loss": 0.1795, "step": 3586 }, { "epoch": 0.10464437831845498, "grad_norm": 0.9920054261851653, "learning_rate": 9.854665598965301e-06, "loss": 0.176, "step": 3587 }, { "epoch": 0.10467355154909855, "grad_norm": 0.9678449489202484, "learning_rate": 9.854552499731032e-06, "loss": 0.1929, "step": 3588 }, { "epoch": 0.10470272477974212, "grad_norm": 0.9525309788802895, "learning_rate": 9.85443935715648e-06, "loss": 0.1992, "step": 3589 }, { "epoch": 0.10473189801038567, "grad_norm": 0.8838725559384846, "learning_rate": 9.854326171242651e-06, "loss": 0.1708, "step": 3590 }, { "epoch": 0.10476107124102924, "grad_norm": 0.9580701114468362, "learning_rate": 9.854212941990557e-06, "loss": 0.1706, "step": 3591 }, { "epoch": 0.10479024447167279, "grad_norm": 1.1849188690459378, "learning_rate": 9.854099669401209e-06, "loss": 0.1909, "step": 3592 }, { "epoch": 0.10481941770231636, "grad_norm": 0.8369489004436974, "learning_rate": 9.853986353475618e-06, "loss": 0.1949, "step": 3593 }, { "epoch": 0.10484859093295991, "grad_norm": 0.8283792487854817, "learning_rate": 9.853872994214794e-06, "loss": 0.1601, "step": 3594 }, { "epoch": 0.10487776416360348, "grad_norm": 0.9514420257774472, "learning_rate": 9.853759591619752e-06, "loss": 0.1886, "step": 3595 }, { "epoch": 0.10490693739424704, "grad_norm": 0.7027861315228137, "learning_rate": 9.853646145691502e-06, "loss": 0.1724, "step": 3596 }, { "epoch": 0.1049361106248906, "grad_norm": 0.821053437805854, "learning_rate": 9.85353265643106e-06, "loss": 0.2092, "step": 3597 }, { "epoch": 0.10496528385553416, "grad_norm": 0.961109184519753, "learning_rate": 9.853419123839434e-06, "loss": 0.1811, "step": 3598 }, { "epoch": 0.10499445708617772, "grad_norm": 0.8388968563304371, "learning_rate": 9.853305547917643e-06, "loss": 0.2077, "step": 3599 }, { "epoch": 0.10502363031682128, "grad_norm": 0.8995002274805877, "learning_rate": 9.853191928666699e-06, "loss": 0.2083, "step": 3600 }, { "epoch": 0.10505280354746485, "grad_norm": 0.8278156054714256, "learning_rate": 9.853078266087615e-06, "loss": 0.1739, "step": 3601 }, { "epoch": 0.1050819767781084, "grad_norm": 0.8207064823983518, "learning_rate": 9.852964560181406e-06, "loss": 0.173, "step": 3602 }, { "epoch": 0.10511115000875197, "grad_norm": 0.778077752028872, "learning_rate": 9.852850810949088e-06, "loss": 0.1978, "step": 3603 }, { "epoch": 0.10514032323939553, "grad_norm": 0.7439386492724069, "learning_rate": 9.852737018391678e-06, "loss": 0.1907, "step": 3604 }, { "epoch": 0.10516949647003909, "grad_norm": 0.8941220101624787, "learning_rate": 9.85262318251019e-06, "loss": 0.1684, "step": 3605 }, { "epoch": 0.10519866970068266, "grad_norm": 0.920803984158187, "learning_rate": 9.85250930330564e-06, "loss": 0.2096, "step": 3606 }, { "epoch": 0.10522784293132621, "grad_norm": 0.8530089595896981, "learning_rate": 9.852395380779045e-06, "loss": 0.1747, "step": 3607 }, { "epoch": 0.10525701616196978, "grad_norm": 0.7817955853277232, "learning_rate": 9.852281414931422e-06, "loss": 0.2058, "step": 3608 }, { "epoch": 0.10528618939261333, "grad_norm": 0.8349454842830898, "learning_rate": 9.852167405763791e-06, "loss": 0.182, "step": 3609 }, { "epoch": 0.1053153626232569, "grad_norm": 0.9498919429639381, "learning_rate": 9.852053353277166e-06, "loss": 0.1861, "step": 3610 }, { "epoch": 0.10534453585390047, "grad_norm": 1.010670781742482, "learning_rate": 9.851939257472567e-06, "loss": 0.1714, "step": 3611 }, { "epoch": 0.10537370908454402, "grad_norm": 0.8576945237017012, "learning_rate": 9.851825118351012e-06, "loss": 0.1748, "step": 3612 }, { "epoch": 0.10540288231518759, "grad_norm": 0.8817090284343435, "learning_rate": 9.851710935913522e-06, "loss": 0.1497, "step": 3613 }, { "epoch": 0.10543205554583114, "grad_norm": 0.9768218949533995, "learning_rate": 9.851596710161115e-06, "loss": 0.1873, "step": 3614 }, { "epoch": 0.10546122877647471, "grad_norm": 0.969484927400703, "learning_rate": 9.851482441094809e-06, "loss": 0.1937, "step": 3615 }, { "epoch": 0.10549040200711828, "grad_norm": 0.8436609685314721, "learning_rate": 9.851368128715627e-06, "loss": 0.1608, "step": 3616 }, { "epoch": 0.10551957523776183, "grad_norm": 0.8973571802412434, "learning_rate": 9.85125377302459e-06, "loss": 0.2042, "step": 3617 }, { "epoch": 0.1055487484684054, "grad_norm": 0.9805136113686406, "learning_rate": 9.851139374022715e-06, "loss": 0.1761, "step": 3618 }, { "epoch": 0.10557792169904895, "grad_norm": 0.8950355394942241, "learning_rate": 9.851024931711026e-06, "loss": 0.1988, "step": 3619 }, { "epoch": 0.10560709492969252, "grad_norm": 0.9967364634792623, "learning_rate": 9.850910446090545e-06, "loss": 0.1736, "step": 3620 }, { "epoch": 0.10563626816033607, "grad_norm": 0.8152349956595033, "learning_rate": 9.850795917162295e-06, "loss": 0.1721, "step": 3621 }, { "epoch": 0.10566544139097964, "grad_norm": 0.9117315910839804, "learning_rate": 9.850681344927295e-06, "loss": 0.2011, "step": 3622 }, { "epoch": 0.1056946146216232, "grad_norm": 0.9183673562073597, "learning_rate": 9.85056672938657e-06, "loss": 0.1913, "step": 3623 }, { "epoch": 0.10572378785226676, "grad_norm": 0.8907167001631543, "learning_rate": 9.850452070541145e-06, "loss": 0.1997, "step": 3624 }, { "epoch": 0.10575296108291032, "grad_norm": 0.7452388870196537, "learning_rate": 9.85033736839204e-06, "loss": 0.1567, "step": 3625 }, { "epoch": 0.10578213431355388, "grad_norm": 1.0293733016375348, "learning_rate": 9.850222622940282e-06, "loss": 0.1786, "step": 3626 }, { "epoch": 0.10581130754419744, "grad_norm": 0.8947462098266787, "learning_rate": 9.850107834186893e-06, "loss": 0.1807, "step": 3627 }, { "epoch": 0.10584048077484101, "grad_norm": 0.8708521042151944, "learning_rate": 9.8499930021329e-06, "loss": 0.1698, "step": 3628 }, { "epoch": 0.10586965400548456, "grad_norm": 0.9404817004403054, "learning_rate": 9.849878126779326e-06, "loss": 0.219, "step": 3629 }, { "epoch": 0.10589882723612813, "grad_norm": 0.8760941174090979, "learning_rate": 9.8497632081272e-06, "loss": 0.2157, "step": 3630 }, { "epoch": 0.10592800046677169, "grad_norm": 0.9371641097270601, "learning_rate": 9.849648246177544e-06, "loss": 0.1963, "step": 3631 }, { "epoch": 0.10595717369741525, "grad_norm": 0.8086527270226705, "learning_rate": 9.849533240931388e-06, "loss": 0.1569, "step": 3632 }, { "epoch": 0.10598634692805882, "grad_norm": 0.9406634070880344, "learning_rate": 9.849418192389755e-06, "loss": 0.1793, "step": 3633 }, { "epoch": 0.10601552015870237, "grad_norm": 1.010354893817405, "learning_rate": 9.849303100553675e-06, "loss": 0.2215, "step": 3634 }, { "epoch": 0.10604469338934594, "grad_norm": 1.1892070123249876, "learning_rate": 9.849187965424174e-06, "loss": 0.1973, "step": 3635 }, { "epoch": 0.1060738666199895, "grad_norm": 1.0311238063426773, "learning_rate": 9.849072787002281e-06, "loss": 0.1823, "step": 3636 }, { "epoch": 0.10610303985063306, "grad_norm": 0.8037583422842794, "learning_rate": 9.848957565289024e-06, "loss": 0.173, "step": 3637 }, { "epoch": 0.10613221308127663, "grad_norm": 0.9914643598253442, "learning_rate": 9.84884230028543e-06, "loss": 0.1733, "step": 3638 }, { "epoch": 0.10616138631192018, "grad_norm": 0.7477706637644134, "learning_rate": 9.84872699199253e-06, "loss": 0.1967, "step": 3639 }, { "epoch": 0.10619055954256375, "grad_norm": 0.8397578799055514, "learning_rate": 9.848611640411355e-06, "loss": 0.1821, "step": 3640 }, { "epoch": 0.1062197327732073, "grad_norm": 0.8983672869267227, "learning_rate": 9.848496245542928e-06, "loss": 0.1913, "step": 3641 }, { "epoch": 0.10624890600385087, "grad_norm": 0.8500491520526038, "learning_rate": 9.848380807388287e-06, "loss": 0.1673, "step": 3642 }, { "epoch": 0.10627807923449442, "grad_norm": 0.8896732084116462, "learning_rate": 9.84826532594846e-06, "loss": 0.1905, "step": 3643 }, { "epoch": 0.10630725246513799, "grad_norm": 0.7485932722651284, "learning_rate": 9.848149801224478e-06, "loss": 0.1849, "step": 3644 }, { "epoch": 0.10633642569578156, "grad_norm": 1.967401684554678, "learning_rate": 9.84803423321737e-06, "loss": 0.1847, "step": 3645 }, { "epoch": 0.10636559892642511, "grad_norm": 0.8288106003515783, "learning_rate": 9.84791862192817e-06, "loss": 0.1721, "step": 3646 }, { "epoch": 0.10639477215706868, "grad_norm": 1.146949706729043, "learning_rate": 9.84780296735791e-06, "loss": 0.1993, "step": 3647 }, { "epoch": 0.10642394538771223, "grad_norm": 0.8806391390682924, "learning_rate": 9.847687269507624e-06, "loss": 0.1798, "step": 3648 }, { "epoch": 0.1064531186183558, "grad_norm": 1.0032036286894994, "learning_rate": 9.847571528378342e-06, "loss": 0.1659, "step": 3649 }, { "epoch": 0.10648229184899936, "grad_norm": 0.8126100534228001, "learning_rate": 9.8474557439711e-06, "loss": 0.1796, "step": 3650 }, { "epoch": 0.10651146507964292, "grad_norm": 0.8152182091978051, "learning_rate": 9.847339916286928e-06, "loss": 0.1589, "step": 3651 }, { "epoch": 0.10654063831028648, "grad_norm": 0.7248696144602788, "learning_rate": 9.847224045326864e-06, "loss": 0.1737, "step": 3652 }, { "epoch": 0.10656981154093004, "grad_norm": 0.9821676998680899, "learning_rate": 9.84710813109194e-06, "loss": 0.1755, "step": 3653 }, { "epoch": 0.1065989847715736, "grad_norm": 0.9874156412488563, "learning_rate": 9.846992173583193e-06, "loss": 0.1744, "step": 3654 }, { "epoch": 0.10662815800221717, "grad_norm": 0.9935917984752777, "learning_rate": 9.846876172801653e-06, "loss": 0.1875, "step": 3655 }, { "epoch": 0.10665733123286072, "grad_norm": 0.8547818384954391, "learning_rate": 9.846760128748363e-06, "loss": 0.2255, "step": 3656 }, { "epoch": 0.10668650446350429, "grad_norm": 1.1420768220006505, "learning_rate": 9.846644041424357e-06, "loss": 0.1973, "step": 3657 }, { "epoch": 0.10671567769414785, "grad_norm": 0.9489048749425871, "learning_rate": 9.846527910830666e-06, "loss": 0.189, "step": 3658 }, { "epoch": 0.10674485092479141, "grad_norm": 0.7622933975836242, "learning_rate": 9.846411736968334e-06, "loss": 0.1802, "step": 3659 }, { "epoch": 0.10677402415543498, "grad_norm": 0.9875678375622912, "learning_rate": 9.846295519838393e-06, "loss": 0.2019, "step": 3660 }, { "epoch": 0.10680319738607853, "grad_norm": 0.9658618910429523, "learning_rate": 9.846179259441884e-06, "loss": 0.1736, "step": 3661 }, { "epoch": 0.1068323706167221, "grad_norm": 0.7838056532062962, "learning_rate": 9.846062955779843e-06, "loss": 0.1678, "step": 3662 }, { "epoch": 0.10686154384736565, "grad_norm": 0.9063951910550987, "learning_rate": 9.845946608853307e-06, "loss": 0.1751, "step": 3663 }, { "epoch": 0.10689071707800922, "grad_norm": 0.954163078224032, "learning_rate": 9.845830218663319e-06, "loss": 0.1974, "step": 3664 }, { "epoch": 0.10691989030865277, "grad_norm": 0.9078999281041293, "learning_rate": 9.845713785210915e-06, "loss": 0.1807, "step": 3665 }, { "epoch": 0.10694906353929634, "grad_norm": 0.9482398997349659, "learning_rate": 9.845597308497134e-06, "loss": 0.1603, "step": 3666 }, { "epoch": 0.10697823676993991, "grad_norm": 0.9527446241527082, "learning_rate": 9.845480788523018e-06, "loss": 0.1867, "step": 3667 }, { "epoch": 0.10700741000058346, "grad_norm": 0.8204347964831162, "learning_rate": 9.845364225289606e-06, "loss": 0.163, "step": 3668 }, { "epoch": 0.10703658323122703, "grad_norm": 0.7344762053758938, "learning_rate": 9.845247618797938e-06, "loss": 0.1621, "step": 3669 }, { "epoch": 0.10706575646187058, "grad_norm": 1.0051301924257035, "learning_rate": 9.845130969049057e-06, "loss": 0.2151, "step": 3670 }, { "epoch": 0.10709492969251415, "grad_norm": 0.9033577606481228, "learning_rate": 9.845014276044002e-06, "loss": 0.1864, "step": 3671 }, { "epoch": 0.10712410292315772, "grad_norm": 1.0478524524363073, "learning_rate": 9.844897539783817e-06, "loss": 0.1952, "step": 3672 }, { "epoch": 0.10715327615380127, "grad_norm": 0.9434465268313577, "learning_rate": 9.844780760269543e-06, "loss": 0.1921, "step": 3673 }, { "epoch": 0.10718244938444484, "grad_norm": 1.185492328938468, "learning_rate": 9.844663937502225e-06, "loss": 0.1987, "step": 3674 }, { "epoch": 0.10721162261508839, "grad_norm": 0.9408758168148659, "learning_rate": 9.844547071482902e-06, "loss": 0.1827, "step": 3675 }, { "epoch": 0.10724079584573196, "grad_norm": 0.9133147091973878, "learning_rate": 9.844430162212619e-06, "loss": 0.1894, "step": 3676 }, { "epoch": 0.10726996907637552, "grad_norm": 0.8748009274424542, "learning_rate": 9.84431320969242e-06, "loss": 0.2018, "step": 3677 }, { "epoch": 0.10729914230701908, "grad_norm": 1.0174652467453873, "learning_rate": 9.84419621392335e-06, "loss": 0.1778, "step": 3678 }, { "epoch": 0.10732831553766264, "grad_norm": 0.7728782072356283, "learning_rate": 9.844079174906453e-06, "loss": 0.1831, "step": 3679 }, { "epoch": 0.1073574887683062, "grad_norm": 1.108675218183585, "learning_rate": 9.843962092642772e-06, "loss": 0.1937, "step": 3680 }, { "epoch": 0.10738666199894976, "grad_norm": 0.8191061791296362, "learning_rate": 9.843844967133353e-06, "loss": 0.1666, "step": 3681 }, { "epoch": 0.10741583522959333, "grad_norm": 1.093219952602174, "learning_rate": 9.843727798379245e-06, "loss": 0.1834, "step": 3682 }, { "epoch": 0.10744500846023688, "grad_norm": 0.8429401948794337, "learning_rate": 9.843610586381491e-06, "loss": 0.1951, "step": 3683 }, { "epoch": 0.10747418169088045, "grad_norm": 1.0067529057383613, "learning_rate": 9.843493331141136e-06, "loss": 0.2011, "step": 3684 }, { "epoch": 0.107503354921524, "grad_norm": 0.7733939536939644, "learning_rate": 9.843376032659231e-06, "loss": 0.1728, "step": 3685 }, { "epoch": 0.10753252815216757, "grad_norm": 0.9664776166046107, "learning_rate": 9.84325869093682e-06, "loss": 0.1712, "step": 3686 }, { "epoch": 0.10756170138281113, "grad_norm": 0.8594469761438377, "learning_rate": 9.843141305974951e-06, "loss": 0.2086, "step": 3687 }, { "epoch": 0.10759087461345469, "grad_norm": 1.0214638134518348, "learning_rate": 9.843023877774673e-06, "loss": 0.1671, "step": 3688 }, { "epoch": 0.10762004784409826, "grad_norm": 0.9576001938898379, "learning_rate": 9.842906406337034e-06, "loss": 0.1923, "step": 3689 }, { "epoch": 0.10764922107474181, "grad_norm": 0.867883200304093, "learning_rate": 9.842788891663085e-06, "loss": 0.1962, "step": 3690 }, { "epoch": 0.10767839430538538, "grad_norm": 0.891562337545845, "learning_rate": 9.84267133375387e-06, "loss": 0.1782, "step": 3691 }, { "epoch": 0.10770756753602893, "grad_norm": 0.9513846342371838, "learning_rate": 9.842553732610442e-06, "loss": 0.1972, "step": 3692 }, { "epoch": 0.1077367407666725, "grad_norm": 0.8740119196856326, "learning_rate": 9.842436088233851e-06, "loss": 0.2128, "step": 3693 }, { "epoch": 0.10776591399731607, "grad_norm": 0.9505336722522594, "learning_rate": 9.842318400625145e-06, "loss": 0.2001, "step": 3694 }, { "epoch": 0.10779508722795962, "grad_norm": 0.9032592869121997, "learning_rate": 9.842200669785378e-06, "loss": 0.1871, "step": 3695 }, { "epoch": 0.10782426045860319, "grad_norm": 1.0805069099010487, "learning_rate": 9.842082895715598e-06, "loss": 0.1887, "step": 3696 }, { "epoch": 0.10785343368924674, "grad_norm": 0.9110385262451008, "learning_rate": 9.84196507841686e-06, "loss": 0.2106, "step": 3697 }, { "epoch": 0.10788260691989031, "grad_norm": 1.0107292562277639, "learning_rate": 9.84184721789021e-06, "loss": 0.1758, "step": 3698 }, { "epoch": 0.10791178015053388, "grad_norm": 1.1430490413573193, "learning_rate": 9.841729314136707e-06, "loss": 0.1807, "step": 3699 }, { "epoch": 0.10794095338117743, "grad_norm": 1.2589913484771265, "learning_rate": 9.8416113671574e-06, "loss": 0.1829, "step": 3700 }, { "epoch": 0.107970126611821, "grad_norm": 0.9674064446555021, "learning_rate": 9.841493376953341e-06, "loss": 0.1862, "step": 3701 }, { "epoch": 0.10799929984246455, "grad_norm": 1.00196352427155, "learning_rate": 9.841375343525586e-06, "loss": 0.2146, "step": 3702 }, { "epoch": 0.10802847307310812, "grad_norm": 0.8110697783459644, "learning_rate": 9.841257266875187e-06, "loss": 0.1696, "step": 3703 }, { "epoch": 0.10805764630375168, "grad_norm": 1.0770057636871617, "learning_rate": 9.8411391470032e-06, "loss": 0.1775, "step": 3704 }, { "epoch": 0.10808681953439524, "grad_norm": 1.0268880636789652, "learning_rate": 9.841020983910675e-06, "loss": 0.1761, "step": 3705 }, { "epoch": 0.1081159927650388, "grad_norm": 0.8155987790917397, "learning_rate": 9.840902777598675e-06, "loss": 0.1679, "step": 3706 }, { "epoch": 0.10814516599568236, "grad_norm": 0.8870319716724403, "learning_rate": 9.840784528068248e-06, "loss": 0.1854, "step": 3707 }, { "epoch": 0.10817433922632592, "grad_norm": 0.8624835876854527, "learning_rate": 9.840666235320453e-06, "loss": 0.1896, "step": 3708 }, { "epoch": 0.10820351245696949, "grad_norm": 0.9718767513383666, "learning_rate": 9.840547899356344e-06, "loss": 0.1817, "step": 3709 }, { "epoch": 0.10823268568761304, "grad_norm": 0.9234147792364222, "learning_rate": 9.840429520176981e-06, "loss": 0.2356, "step": 3710 }, { "epoch": 0.10826185891825661, "grad_norm": 0.8759859936433729, "learning_rate": 9.84031109778342e-06, "loss": 0.1804, "step": 3711 }, { "epoch": 0.10829103214890017, "grad_norm": 0.9744025451650687, "learning_rate": 9.840192632176714e-06, "loss": 0.1864, "step": 3712 }, { "epoch": 0.10832020537954373, "grad_norm": 1.1145841227908508, "learning_rate": 9.840074123357924e-06, "loss": 0.185, "step": 3713 }, { "epoch": 0.10834937861018729, "grad_norm": 0.9164931550793644, "learning_rate": 9.839955571328108e-06, "loss": 0.1762, "step": 3714 }, { "epoch": 0.10837855184083085, "grad_norm": 1.3306792105766259, "learning_rate": 9.839836976088326e-06, "loss": 0.2067, "step": 3715 }, { "epoch": 0.10840772507147442, "grad_norm": 1.0025956067578259, "learning_rate": 9.839718337639633e-06, "loss": 0.1904, "step": 3716 }, { "epoch": 0.10843689830211797, "grad_norm": 0.9590885285899383, "learning_rate": 9.83959965598309e-06, "loss": 0.1753, "step": 3717 }, { "epoch": 0.10846607153276154, "grad_norm": 0.8519876240715798, "learning_rate": 9.839480931119756e-06, "loss": 0.1655, "step": 3718 }, { "epoch": 0.1084952447634051, "grad_norm": 1.0216085372450623, "learning_rate": 9.839362163050692e-06, "loss": 0.2068, "step": 3719 }, { "epoch": 0.10852441799404866, "grad_norm": 1.015218478793388, "learning_rate": 9.839243351776959e-06, "loss": 0.2108, "step": 3720 }, { "epoch": 0.10855359122469223, "grad_norm": 0.8833693071316394, "learning_rate": 9.839124497299614e-06, "loss": 0.1814, "step": 3721 }, { "epoch": 0.10858276445533578, "grad_norm": 0.9985068447394757, "learning_rate": 9.839005599619723e-06, "loss": 0.1952, "step": 3722 }, { "epoch": 0.10861193768597935, "grad_norm": 0.9275513908128796, "learning_rate": 9.838886658738345e-06, "loss": 0.1816, "step": 3723 }, { "epoch": 0.1086411109166229, "grad_norm": 1.0624517703707523, "learning_rate": 9.838767674656541e-06, "loss": 0.2028, "step": 3724 }, { "epoch": 0.10867028414726647, "grad_norm": 1.0070792855763442, "learning_rate": 9.838648647375375e-06, "loss": 0.1519, "step": 3725 }, { "epoch": 0.10869945737791004, "grad_norm": 0.8410867942920129, "learning_rate": 9.83852957689591e-06, "loss": 0.1704, "step": 3726 }, { "epoch": 0.10872863060855359, "grad_norm": 1.1809385178616498, "learning_rate": 9.838410463219206e-06, "loss": 0.2194, "step": 3727 }, { "epoch": 0.10875780383919716, "grad_norm": 1.01928972420757, "learning_rate": 9.838291306346329e-06, "loss": 0.1862, "step": 3728 }, { "epoch": 0.10878697706984071, "grad_norm": 0.8529205792076802, "learning_rate": 9.838172106278344e-06, "loss": 0.171, "step": 3729 }, { "epoch": 0.10881615030048428, "grad_norm": 0.9092029669728684, "learning_rate": 9.83805286301631e-06, "loss": 0.1764, "step": 3730 }, { "epoch": 0.10884532353112784, "grad_norm": 0.9078365624779545, "learning_rate": 9.837933576561297e-06, "loss": 0.1843, "step": 3731 }, { "epoch": 0.1088744967617714, "grad_norm": 0.8989173614593705, "learning_rate": 9.837814246914367e-06, "loss": 0.2223, "step": 3732 }, { "epoch": 0.10890366999241496, "grad_norm": 0.8498264796992692, "learning_rate": 9.83769487407659e-06, "loss": 0.1806, "step": 3733 }, { "epoch": 0.10893284322305852, "grad_norm": 1.1470141639434395, "learning_rate": 9.837575458049023e-06, "loss": 0.1977, "step": 3734 }, { "epoch": 0.10896201645370208, "grad_norm": 1.0260437543873395, "learning_rate": 9.83745599883274e-06, "loss": 0.2063, "step": 3735 }, { "epoch": 0.10899118968434564, "grad_norm": 0.9272079305040548, "learning_rate": 9.837336496428804e-06, "loss": 0.1625, "step": 3736 }, { "epoch": 0.1090203629149892, "grad_norm": 0.7823327792556027, "learning_rate": 9.837216950838282e-06, "loss": 0.1914, "step": 3737 }, { "epoch": 0.10904953614563277, "grad_norm": 0.9446827984937581, "learning_rate": 9.83709736206224e-06, "loss": 0.1744, "step": 3738 }, { "epoch": 0.10907870937627633, "grad_norm": 1.0576705550162075, "learning_rate": 9.836977730101751e-06, "loss": 0.2053, "step": 3739 }, { "epoch": 0.10910788260691989, "grad_norm": 0.7897544097238344, "learning_rate": 9.836858054957879e-06, "loss": 0.1837, "step": 3740 }, { "epoch": 0.10913705583756345, "grad_norm": 0.7602633907190607, "learning_rate": 9.83673833663169e-06, "loss": 0.1868, "step": 3741 }, { "epoch": 0.10916622906820701, "grad_norm": 0.8498575573800358, "learning_rate": 9.836618575124259e-06, "loss": 0.1652, "step": 3742 }, { "epoch": 0.10919540229885058, "grad_norm": 0.8334433786361117, "learning_rate": 9.836498770436652e-06, "loss": 0.1701, "step": 3743 }, { "epoch": 0.10922457552949413, "grad_norm": 0.8748325644408049, "learning_rate": 9.836378922569935e-06, "loss": 0.1678, "step": 3744 }, { "epoch": 0.1092537487601377, "grad_norm": 1.1032192674675612, "learning_rate": 9.836259031525184e-06, "loss": 0.196, "step": 3745 }, { "epoch": 0.10928292199078125, "grad_norm": 0.8929200675933284, "learning_rate": 9.836139097303468e-06, "loss": 0.176, "step": 3746 }, { "epoch": 0.10931209522142482, "grad_norm": 0.9147053940989985, "learning_rate": 9.836019119905856e-06, "loss": 0.1866, "step": 3747 }, { "epoch": 0.10934126845206839, "grad_norm": 0.8795091301665815, "learning_rate": 9.835899099333418e-06, "loss": 0.1712, "step": 3748 }, { "epoch": 0.10937044168271194, "grad_norm": 0.8008882443825363, "learning_rate": 9.835779035587228e-06, "loss": 0.1879, "step": 3749 }, { "epoch": 0.10939961491335551, "grad_norm": 0.9958444752671687, "learning_rate": 9.835658928668356e-06, "loss": 0.2044, "step": 3750 }, { "epoch": 0.10942878814399906, "grad_norm": 0.778564098885269, "learning_rate": 9.835538778577877e-06, "loss": 0.1832, "step": 3751 }, { "epoch": 0.10945796137464263, "grad_norm": 0.9398409858222706, "learning_rate": 9.835418585316863e-06, "loss": 0.1748, "step": 3752 }, { "epoch": 0.1094871346052862, "grad_norm": 0.8758560899904604, "learning_rate": 9.835298348886386e-06, "loss": 0.16, "step": 3753 }, { "epoch": 0.10951630783592975, "grad_norm": 0.8911949385467063, "learning_rate": 9.835178069287519e-06, "loss": 0.1743, "step": 3754 }, { "epoch": 0.10954548106657332, "grad_norm": 0.8900291916817913, "learning_rate": 9.835057746521335e-06, "loss": 0.1839, "step": 3755 }, { "epoch": 0.10957465429721687, "grad_norm": 0.9417581588384263, "learning_rate": 9.83493738058891e-06, "loss": 0.1781, "step": 3756 }, { "epoch": 0.10960382752786044, "grad_norm": 0.9872104931867929, "learning_rate": 9.834816971491322e-06, "loss": 0.1767, "step": 3757 }, { "epoch": 0.10963300075850399, "grad_norm": 0.8309452719909529, "learning_rate": 9.834696519229638e-06, "loss": 0.1591, "step": 3758 }, { "epoch": 0.10966217398914756, "grad_norm": 0.8356111273091072, "learning_rate": 9.83457602380494e-06, "loss": 0.188, "step": 3759 }, { "epoch": 0.10969134721979112, "grad_norm": 1.2821605838808534, "learning_rate": 9.8344554852183e-06, "loss": 0.1767, "step": 3760 }, { "epoch": 0.10972052045043468, "grad_norm": 0.7899581477687933, "learning_rate": 9.834334903470796e-06, "loss": 0.1861, "step": 3761 }, { "epoch": 0.10974969368107824, "grad_norm": 0.8060093967417578, "learning_rate": 9.834214278563503e-06, "loss": 0.1719, "step": 3762 }, { "epoch": 0.1097788669117218, "grad_norm": 0.9629195746359068, "learning_rate": 9.834093610497501e-06, "loss": 0.1846, "step": 3763 }, { "epoch": 0.10980804014236537, "grad_norm": 0.8498193523692971, "learning_rate": 9.833972899273863e-06, "loss": 0.1865, "step": 3764 }, { "epoch": 0.10983721337300893, "grad_norm": 0.9510094692083803, "learning_rate": 9.83385214489367e-06, "loss": 0.2186, "step": 3765 }, { "epoch": 0.10986638660365249, "grad_norm": 0.9199366240798985, "learning_rate": 9.833731347358e-06, "loss": 0.1844, "step": 3766 }, { "epoch": 0.10989555983429605, "grad_norm": 0.9649402924819324, "learning_rate": 9.83361050666793e-06, "loss": 0.2012, "step": 3767 }, { "epoch": 0.1099247330649396, "grad_norm": 0.8969514360865897, "learning_rate": 9.833489622824537e-06, "loss": 0.1543, "step": 3768 }, { "epoch": 0.10995390629558317, "grad_norm": 0.9774128949200379, "learning_rate": 9.833368695828905e-06, "loss": 0.1777, "step": 3769 }, { "epoch": 0.10998307952622674, "grad_norm": 0.8418971392788187, "learning_rate": 9.833247725682111e-06, "loss": 0.1741, "step": 3770 }, { "epoch": 0.1100122527568703, "grad_norm": 0.8379352874568623, "learning_rate": 9.833126712385234e-06, "loss": 0.1655, "step": 3771 }, { "epoch": 0.11004142598751386, "grad_norm": 0.8231065469113594, "learning_rate": 9.833005655939356e-06, "loss": 0.1836, "step": 3772 }, { "epoch": 0.11007059921815741, "grad_norm": 0.8191344525780554, "learning_rate": 9.832884556345556e-06, "loss": 0.1689, "step": 3773 }, { "epoch": 0.11009977244880098, "grad_norm": 0.9611187222121934, "learning_rate": 9.832763413604918e-06, "loss": 0.1933, "step": 3774 }, { "epoch": 0.11012894567944455, "grad_norm": 0.832741343395637, "learning_rate": 9.832642227718522e-06, "loss": 0.1874, "step": 3775 }, { "epoch": 0.1101581189100881, "grad_norm": 0.897739888953288, "learning_rate": 9.83252099868745e-06, "loss": 0.1721, "step": 3776 }, { "epoch": 0.11018729214073167, "grad_norm": 0.9764722624378016, "learning_rate": 9.832399726512783e-06, "loss": 0.1987, "step": 3777 }, { "epoch": 0.11021646537137522, "grad_norm": 0.8605112724821691, "learning_rate": 9.832278411195606e-06, "loss": 0.1985, "step": 3778 }, { "epoch": 0.11024563860201879, "grad_norm": 0.9433795788950949, "learning_rate": 9.832157052737e-06, "loss": 0.1808, "step": 3779 }, { "epoch": 0.11027481183266234, "grad_norm": 1.1215045418125138, "learning_rate": 9.83203565113805e-06, "loss": 0.1788, "step": 3780 }, { "epoch": 0.11030398506330591, "grad_norm": 0.9726536245052037, "learning_rate": 9.831914206399837e-06, "loss": 0.1605, "step": 3781 }, { "epoch": 0.11033315829394948, "grad_norm": 0.9226634528504349, "learning_rate": 9.831792718523449e-06, "loss": 0.1787, "step": 3782 }, { "epoch": 0.11036233152459303, "grad_norm": 0.81507376458163, "learning_rate": 9.83167118750997e-06, "loss": 0.2006, "step": 3783 }, { "epoch": 0.1103915047552366, "grad_norm": 0.9089075165787985, "learning_rate": 9.831549613360482e-06, "loss": 0.198, "step": 3784 }, { "epoch": 0.11042067798588015, "grad_norm": 0.9859611143876775, "learning_rate": 9.831427996076074e-06, "loss": 0.1777, "step": 3785 }, { "epoch": 0.11044985121652372, "grad_norm": 0.7837485554712099, "learning_rate": 9.83130633565783e-06, "loss": 0.1565, "step": 3786 }, { "epoch": 0.11047902444716728, "grad_norm": 0.9260998754003568, "learning_rate": 9.831184632106837e-06, "loss": 0.2108, "step": 3787 }, { "epoch": 0.11050819767781084, "grad_norm": 0.8565242221498323, "learning_rate": 9.831062885424181e-06, "loss": 0.1886, "step": 3788 }, { "epoch": 0.1105373709084544, "grad_norm": 0.8681149920558899, "learning_rate": 9.830941095610948e-06, "loss": 0.1839, "step": 3789 }, { "epoch": 0.11056654413909796, "grad_norm": 0.857683535434253, "learning_rate": 9.830819262668225e-06, "loss": 0.1857, "step": 3790 }, { "epoch": 0.11059571736974153, "grad_norm": 1.810542375779002, "learning_rate": 9.830697386597102e-06, "loss": 0.2115, "step": 3791 }, { "epoch": 0.11062489060038509, "grad_norm": 0.9987769624881818, "learning_rate": 9.830575467398666e-06, "loss": 0.1878, "step": 3792 }, { "epoch": 0.11065406383102865, "grad_norm": 0.8863324438702518, "learning_rate": 9.830453505074005e-06, "loss": 0.1875, "step": 3793 }, { "epoch": 0.11068323706167221, "grad_norm": 0.9043276789551575, "learning_rate": 9.830331499624208e-06, "loss": 0.1657, "step": 3794 }, { "epoch": 0.11071241029231577, "grad_norm": 1.0448868343342503, "learning_rate": 9.830209451050365e-06, "loss": 0.1816, "step": 3795 }, { "epoch": 0.11074158352295933, "grad_norm": 1.1265263659396727, "learning_rate": 9.830087359353566e-06, "loss": 0.1772, "step": 3796 }, { "epoch": 0.1107707567536029, "grad_norm": 0.9817894511712082, "learning_rate": 9.829965224534899e-06, "loss": 0.194, "step": 3797 }, { "epoch": 0.11079992998424645, "grad_norm": 0.8576839815110606, "learning_rate": 9.829843046595455e-06, "loss": 0.1879, "step": 3798 }, { "epoch": 0.11082910321489002, "grad_norm": 0.9716186224016058, "learning_rate": 9.829720825536327e-06, "loss": 0.1745, "step": 3799 }, { "epoch": 0.11085827644553357, "grad_norm": 1.000444412402128, "learning_rate": 9.829598561358602e-06, "loss": 0.1658, "step": 3800 }, { "epoch": 0.11088744967617714, "grad_norm": 0.9179903393874113, "learning_rate": 9.829476254063376e-06, "loss": 0.1732, "step": 3801 }, { "epoch": 0.11091662290682071, "grad_norm": 1.0004830831060219, "learning_rate": 9.829353903651739e-06, "loss": 0.1831, "step": 3802 }, { "epoch": 0.11094579613746426, "grad_norm": 1.0181127662768221, "learning_rate": 9.829231510124782e-06, "loss": 0.1746, "step": 3803 }, { "epoch": 0.11097496936810783, "grad_norm": 1.0400139495645497, "learning_rate": 9.829109073483598e-06, "loss": 0.2311, "step": 3804 }, { "epoch": 0.11100414259875138, "grad_norm": 0.8718607152673257, "learning_rate": 9.828986593729283e-06, "loss": 0.2045, "step": 3805 }, { "epoch": 0.11103331582939495, "grad_norm": 0.92743147372795, "learning_rate": 9.828864070862927e-06, "loss": 0.2088, "step": 3806 }, { "epoch": 0.1110624890600385, "grad_norm": 0.9482139382413433, "learning_rate": 9.828741504885627e-06, "loss": 0.1533, "step": 3807 }, { "epoch": 0.11109166229068207, "grad_norm": 0.8801976295218055, "learning_rate": 9.828618895798474e-06, "loss": 0.1647, "step": 3808 }, { "epoch": 0.11112083552132564, "grad_norm": 0.8325714634226374, "learning_rate": 9.828496243602566e-06, "loss": 0.1695, "step": 3809 }, { "epoch": 0.11115000875196919, "grad_norm": 0.9163576438397082, "learning_rate": 9.828373548298994e-06, "loss": 0.1638, "step": 3810 }, { "epoch": 0.11117918198261276, "grad_norm": 1.1032582438441247, "learning_rate": 9.828250809888857e-06, "loss": 0.1789, "step": 3811 }, { "epoch": 0.11120835521325631, "grad_norm": 0.9469312949880322, "learning_rate": 9.828128028373249e-06, "loss": 0.1896, "step": 3812 }, { "epoch": 0.11123752844389988, "grad_norm": 0.9198554637930745, "learning_rate": 9.828005203753266e-06, "loss": 0.1759, "step": 3813 }, { "epoch": 0.11126670167454344, "grad_norm": 0.9820440764944599, "learning_rate": 9.827882336030005e-06, "loss": 0.1659, "step": 3814 }, { "epoch": 0.111295874905187, "grad_norm": 0.8816234477053786, "learning_rate": 9.827759425204563e-06, "loss": 0.1752, "step": 3815 }, { "epoch": 0.11132504813583056, "grad_norm": 0.8578308886705731, "learning_rate": 9.82763647127804e-06, "loss": 0.1835, "step": 3816 }, { "epoch": 0.11135422136647412, "grad_norm": 1.1081784354192596, "learning_rate": 9.827513474251527e-06, "loss": 0.1889, "step": 3817 }, { "epoch": 0.11138339459711769, "grad_norm": 1.2342896315640153, "learning_rate": 9.827390434126128e-06, "loss": 0.1766, "step": 3818 }, { "epoch": 0.11141256782776125, "grad_norm": 0.9570697493862371, "learning_rate": 9.82726735090294e-06, "loss": 0.1937, "step": 3819 }, { "epoch": 0.1114417410584048, "grad_norm": 1.0175854183995052, "learning_rate": 9.827144224583061e-06, "loss": 0.181, "step": 3820 }, { "epoch": 0.11147091428904837, "grad_norm": 1.0069047586440978, "learning_rate": 9.827021055167591e-06, "loss": 0.1795, "step": 3821 }, { "epoch": 0.11150008751969193, "grad_norm": 0.8170535531556331, "learning_rate": 9.82689784265763e-06, "loss": 0.1958, "step": 3822 }, { "epoch": 0.1115292607503355, "grad_norm": 0.9762549144270604, "learning_rate": 9.826774587054274e-06, "loss": 0.1887, "step": 3823 }, { "epoch": 0.11155843398097906, "grad_norm": 0.9765942416669872, "learning_rate": 9.826651288358631e-06, "loss": 0.1836, "step": 3824 }, { "epoch": 0.11158760721162261, "grad_norm": 0.8275904962695941, "learning_rate": 9.826527946571796e-06, "loss": 0.1638, "step": 3825 }, { "epoch": 0.11161678044226618, "grad_norm": 0.7480591770664048, "learning_rate": 9.826404561694872e-06, "loss": 0.1949, "step": 3826 }, { "epoch": 0.11164595367290973, "grad_norm": 0.8127582086929548, "learning_rate": 9.82628113372896e-06, "loss": 0.1724, "step": 3827 }, { "epoch": 0.1116751269035533, "grad_norm": 0.9590706890411163, "learning_rate": 9.82615766267516e-06, "loss": 0.1797, "step": 3828 }, { "epoch": 0.11170430013419685, "grad_norm": 0.9691927534950275, "learning_rate": 9.826034148534578e-06, "loss": 0.1907, "step": 3829 }, { "epoch": 0.11173347336484042, "grad_norm": 0.9479541205797285, "learning_rate": 9.825910591308316e-06, "loss": 0.1862, "step": 3830 }, { "epoch": 0.11176264659548399, "grad_norm": 0.8363688236797676, "learning_rate": 9.825786990997474e-06, "loss": 0.1813, "step": 3831 }, { "epoch": 0.11179181982612754, "grad_norm": 1.0214010560599478, "learning_rate": 9.82566334760316e-06, "loss": 0.1632, "step": 3832 }, { "epoch": 0.11182099305677111, "grad_norm": 0.9408923245091138, "learning_rate": 9.825539661126476e-06, "loss": 0.1951, "step": 3833 }, { "epoch": 0.11185016628741466, "grad_norm": 0.8505465960882488, "learning_rate": 9.825415931568525e-06, "loss": 0.1886, "step": 3834 }, { "epoch": 0.11187933951805823, "grad_norm": 0.9581726090741551, "learning_rate": 9.825292158930414e-06, "loss": 0.2082, "step": 3835 }, { "epoch": 0.1119085127487018, "grad_norm": 1.0289200179538978, "learning_rate": 9.825168343213244e-06, "loss": 0.1919, "step": 3836 }, { "epoch": 0.11193768597934535, "grad_norm": 0.8372072909384086, "learning_rate": 9.825044484418123e-06, "loss": 0.1879, "step": 3837 }, { "epoch": 0.11196685920998892, "grad_norm": 0.9287659409067983, "learning_rate": 9.824920582546157e-06, "loss": 0.1985, "step": 3838 }, { "epoch": 0.11199603244063247, "grad_norm": 1.1754128764959069, "learning_rate": 9.824796637598452e-06, "loss": 0.1935, "step": 3839 }, { "epoch": 0.11202520567127604, "grad_norm": 0.7114469733950892, "learning_rate": 9.824672649576114e-06, "loss": 0.1542, "step": 3840 }, { "epoch": 0.1120543789019196, "grad_norm": 0.9652819565295031, "learning_rate": 9.824548618480251e-06, "loss": 0.1697, "step": 3841 }, { "epoch": 0.11208355213256316, "grad_norm": 0.8632809183926381, "learning_rate": 9.82442454431197e-06, "loss": 0.1877, "step": 3842 }, { "epoch": 0.11211272536320672, "grad_norm": 0.8925510327530022, "learning_rate": 9.824300427072379e-06, "loss": 0.1937, "step": 3843 }, { "epoch": 0.11214189859385028, "grad_norm": 1.0306300289579706, "learning_rate": 9.824176266762584e-06, "loss": 0.1879, "step": 3844 }, { "epoch": 0.11217107182449385, "grad_norm": 0.7272399866168803, "learning_rate": 9.824052063383696e-06, "loss": 0.174, "step": 3845 }, { "epoch": 0.11220024505513741, "grad_norm": 0.7953291749523506, "learning_rate": 9.823927816936823e-06, "loss": 0.1594, "step": 3846 }, { "epoch": 0.11222941828578097, "grad_norm": 0.9242746273139414, "learning_rate": 9.823803527423073e-06, "loss": 0.1844, "step": 3847 }, { "epoch": 0.11225859151642453, "grad_norm": 0.7378429952935994, "learning_rate": 9.823679194843556e-06, "loss": 0.1838, "step": 3848 }, { "epoch": 0.11228776474706809, "grad_norm": 0.8491326506415591, "learning_rate": 9.823554819199383e-06, "loss": 0.1739, "step": 3849 }, { "epoch": 0.11231693797771165, "grad_norm": 0.8026960017082256, "learning_rate": 9.823430400491665e-06, "loss": 0.1633, "step": 3850 }, { "epoch": 0.1123461112083552, "grad_norm": 0.6599642453313245, "learning_rate": 9.823305938721511e-06, "loss": 0.1662, "step": 3851 }, { "epoch": 0.11237528443899877, "grad_norm": 0.8515874900467115, "learning_rate": 9.823181433890033e-06, "loss": 0.1814, "step": 3852 }, { "epoch": 0.11240445766964234, "grad_norm": 0.8802819705576921, "learning_rate": 9.823056885998344e-06, "loss": 0.1618, "step": 3853 }, { "epoch": 0.1124336309002859, "grad_norm": 0.9681653255674283, "learning_rate": 9.822932295047552e-06, "loss": 0.1708, "step": 3854 }, { "epoch": 0.11246280413092946, "grad_norm": 0.8629397185868605, "learning_rate": 9.822807661038774e-06, "loss": 0.2135, "step": 3855 }, { "epoch": 0.11249197736157301, "grad_norm": 0.9201597321229085, "learning_rate": 9.82268298397312e-06, "loss": 0.1723, "step": 3856 }, { "epoch": 0.11252115059221658, "grad_norm": 1.0763162141258271, "learning_rate": 9.822558263851703e-06, "loss": 0.1741, "step": 3857 }, { "epoch": 0.11255032382286015, "grad_norm": 0.8981623376821546, "learning_rate": 9.822433500675637e-06, "loss": 0.1767, "step": 3858 }, { "epoch": 0.1125794970535037, "grad_norm": 0.9258733393847015, "learning_rate": 9.822308694446036e-06, "loss": 0.1978, "step": 3859 }, { "epoch": 0.11260867028414727, "grad_norm": 1.4218574899201408, "learning_rate": 9.822183845164016e-06, "loss": 0.1732, "step": 3860 }, { "epoch": 0.11263784351479082, "grad_norm": 1.0251844248860928, "learning_rate": 9.822058952830687e-06, "loss": 0.1897, "step": 3861 }, { "epoch": 0.11266701674543439, "grad_norm": 0.9119394006279787, "learning_rate": 9.821934017447167e-06, "loss": 0.1806, "step": 3862 }, { "epoch": 0.11269618997607796, "grad_norm": 0.9380165588659504, "learning_rate": 9.82180903901457e-06, "loss": 0.1742, "step": 3863 }, { "epoch": 0.11272536320672151, "grad_norm": 1.03819331935346, "learning_rate": 9.821684017534016e-06, "loss": 0.193, "step": 3864 }, { "epoch": 0.11275453643736508, "grad_norm": 1.5310012087194778, "learning_rate": 9.821558953006618e-06, "loss": 0.1743, "step": 3865 }, { "epoch": 0.11278370966800863, "grad_norm": 0.9863703381047424, "learning_rate": 9.821433845433492e-06, "loss": 0.1805, "step": 3866 }, { "epoch": 0.1128128828986522, "grad_norm": 0.9499991563289474, "learning_rate": 9.821308694815757e-06, "loss": 0.2043, "step": 3867 }, { "epoch": 0.11284205612929576, "grad_norm": 0.9947653672530437, "learning_rate": 9.821183501154526e-06, "loss": 0.1884, "step": 3868 }, { "epoch": 0.11287122935993932, "grad_norm": 1.0925399041803332, "learning_rate": 9.821058264450921e-06, "loss": 0.1881, "step": 3869 }, { "epoch": 0.11290040259058289, "grad_norm": 0.7692636279572528, "learning_rate": 9.82093298470606e-06, "loss": 0.1623, "step": 3870 }, { "epoch": 0.11292957582122644, "grad_norm": 0.9293704198720617, "learning_rate": 9.820807661921057e-06, "loss": 0.1785, "step": 3871 }, { "epoch": 0.11295874905187, "grad_norm": 1.1286801579823407, "learning_rate": 9.820682296097038e-06, "loss": 0.1883, "step": 3872 }, { "epoch": 0.11298792228251356, "grad_norm": 0.99191725635705, "learning_rate": 9.820556887235115e-06, "loss": 0.1793, "step": 3873 }, { "epoch": 0.11301709551315713, "grad_norm": 1.1514342529400776, "learning_rate": 9.820431435336412e-06, "loss": 0.1794, "step": 3874 }, { "epoch": 0.11304626874380069, "grad_norm": 0.8792916962165257, "learning_rate": 9.820305940402046e-06, "loss": 0.1748, "step": 3875 }, { "epoch": 0.11307544197444425, "grad_norm": 0.9190598137431197, "learning_rate": 9.82018040243314e-06, "loss": 0.2084, "step": 3876 }, { "epoch": 0.11310461520508781, "grad_norm": 1.036820131371785, "learning_rate": 9.820054821430818e-06, "loss": 0.1797, "step": 3877 }, { "epoch": 0.11313378843573137, "grad_norm": 1.0797855853546763, "learning_rate": 9.819929197396193e-06, "loss": 0.1821, "step": 3878 }, { "epoch": 0.11316296166637493, "grad_norm": 0.9713902491663778, "learning_rate": 9.819803530330393e-06, "loss": 0.2093, "step": 3879 }, { "epoch": 0.1131921348970185, "grad_norm": 0.9679103759968202, "learning_rate": 9.819677820234536e-06, "loss": 0.1933, "step": 3880 }, { "epoch": 0.11322130812766205, "grad_norm": 0.9074006853552428, "learning_rate": 9.819552067109748e-06, "loss": 0.1826, "step": 3881 }, { "epoch": 0.11325048135830562, "grad_norm": 0.7600454088624697, "learning_rate": 9.819426270957148e-06, "loss": 0.1755, "step": 3882 }, { "epoch": 0.11327965458894917, "grad_norm": 0.8660801771507908, "learning_rate": 9.819300431777861e-06, "loss": 0.1847, "step": 3883 }, { "epoch": 0.11330882781959274, "grad_norm": 0.8873507212821025, "learning_rate": 9.81917454957301e-06, "loss": 0.1881, "step": 3884 }, { "epoch": 0.11333800105023631, "grad_norm": 0.9727903192838452, "learning_rate": 9.819048624343718e-06, "loss": 0.172, "step": 3885 }, { "epoch": 0.11336717428087986, "grad_norm": 0.9129466865242515, "learning_rate": 9.818922656091113e-06, "loss": 0.1991, "step": 3886 }, { "epoch": 0.11339634751152343, "grad_norm": 0.7584331411062796, "learning_rate": 9.818796644816315e-06, "loss": 0.1902, "step": 3887 }, { "epoch": 0.11342552074216698, "grad_norm": 0.9276645585519963, "learning_rate": 9.818670590520452e-06, "loss": 0.1941, "step": 3888 }, { "epoch": 0.11345469397281055, "grad_norm": 0.7732179536792737, "learning_rate": 9.818544493204647e-06, "loss": 0.1913, "step": 3889 }, { "epoch": 0.11348386720345412, "grad_norm": 0.7812702008459518, "learning_rate": 9.818418352870028e-06, "loss": 0.1911, "step": 3890 }, { "epoch": 0.11351304043409767, "grad_norm": 0.8628150701202014, "learning_rate": 9.81829216951772e-06, "loss": 0.179, "step": 3891 }, { "epoch": 0.11354221366474124, "grad_norm": 0.6979059627359, "learning_rate": 9.81816594314885e-06, "loss": 0.1722, "step": 3892 }, { "epoch": 0.11357138689538479, "grad_norm": 0.9831420334341316, "learning_rate": 9.818039673764543e-06, "loss": 0.1881, "step": 3893 }, { "epoch": 0.11360056012602836, "grad_norm": 0.7137533847401493, "learning_rate": 9.817913361365931e-06, "loss": 0.1912, "step": 3894 }, { "epoch": 0.11362973335667192, "grad_norm": 0.7777777033964453, "learning_rate": 9.817787005954136e-06, "loss": 0.1655, "step": 3895 }, { "epoch": 0.11365890658731548, "grad_norm": 0.8134838142745925, "learning_rate": 9.81766060753029e-06, "loss": 0.1768, "step": 3896 }, { "epoch": 0.11368807981795905, "grad_norm": 0.8617382337355313, "learning_rate": 9.817534166095519e-06, "loss": 0.1702, "step": 3897 }, { "epoch": 0.1137172530486026, "grad_norm": 0.7574232328325251, "learning_rate": 9.817407681650955e-06, "loss": 0.178, "step": 3898 }, { "epoch": 0.11374642627924617, "grad_norm": 0.967889985141796, "learning_rate": 9.817281154197725e-06, "loss": 0.1813, "step": 3899 }, { "epoch": 0.11377559950988972, "grad_norm": 0.8882074160499374, "learning_rate": 9.817154583736956e-06, "loss": 0.2091, "step": 3900 }, { "epoch": 0.11380477274053329, "grad_norm": 0.8488250995262521, "learning_rate": 9.817027970269783e-06, "loss": 0.1617, "step": 3901 }, { "epoch": 0.11383394597117685, "grad_norm": 0.9773972648043582, "learning_rate": 9.816901313797333e-06, "loss": 0.1658, "step": 3902 }, { "epoch": 0.1138631192018204, "grad_norm": 0.8364426964259626, "learning_rate": 9.81677461432074e-06, "loss": 0.1725, "step": 3903 }, { "epoch": 0.11389229243246397, "grad_norm": 0.9219903731469782, "learning_rate": 9.816647871841132e-06, "loss": 0.1689, "step": 3904 }, { "epoch": 0.11392146566310753, "grad_norm": 0.8414022118558264, "learning_rate": 9.816521086359641e-06, "loss": 0.1664, "step": 3905 }, { "epoch": 0.1139506388937511, "grad_norm": 1.1454968057716195, "learning_rate": 9.8163942578774e-06, "loss": 0.2033, "step": 3906 }, { "epoch": 0.11397981212439466, "grad_norm": 1.0569777052292275, "learning_rate": 9.816267386395542e-06, "loss": 0.182, "step": 3907 }, { "epoch": 0.11400898535503821, "grad_norm": 0.8936548661951315, "learning_rate": 9.816140471915196e-06, "loss": 0.1924, "step": 3908 }, { "epoch": 0.11403815858568178, "grad_norm": 1.0896106836331243, "learning_rate": 9.8160135144375e-06, "loss": 0.2064, "step": 3909 }, { "epoch": 0.11406733181632533, "grad_norm": 0.8439333244411857, "learning_rate": 9.815886513963584e-06, "loss": 0.1767, "step": 3910 }, { "epoch": 0.1140965050469689, "grad_norm": 1.012792895535858, "learning_rate": 9.815759470494582e-06, "loss": 0.1896, "step": 3911 }, { "epoch": 0.11412567827761247, "grad_norm": 1.1158822214582353, "learning_rate": 9.81563238403163e-06, "loss": 0.2073, "step": 3912 }, { "epoch": 0.11415485150825602, "grad_norm": 0.8168770507793907, "learning_rate": 9.815505254575862e-06, "loss": 0.1929, "step": 3913 }, { "epoch": 0.11418402473889959, "grad_norm": 1.0465396613520146, "learning_rate": 9.815378082128414e-06, "loss": 0.1842, "step": 3914 }, { "epoch": 0.11421319796954314, "grad_norm": 1.03863955639095, "learning_rate": 9.815250866690418e-06, "loss": 0.1921, "step": 3915 }, { "epoch": 0.11424237120018671, "grad_norm": 0.8759339958631058, "learning_rate": 9.815123608263011e-06, "loss": 0.1971, "step": 3916 }, { "epoch": 0.11427154443083028, "grad_norm": 0.9576286845939498, "learning_rate": 9.81499630684733e-06, "loss": 0.1914, "step": 3917 }, { "epoch": 0.11430071766147383, "grad_norm": 0.8924321060739058, "learning_rate": 9.814868962444512e-06, "loss": 0.1733, "step": 3918 }, { "epoch": 0.1143298908921174, "grad_norm": 0.9533798172660076, "learning_rate": 9.814741575055694e-06, "loss": 0.201, "step": 3919 }, { "epoch": 0.11435906412276095, "grad_norm": 0.8569490934644632, "learning_rate": 9.814614144682014e-06, "loss": 0.1709, "step": 3920 }, { "epoch": 0.11438823735340452, "grad_norm": 0.9917094580774355, "learning_rate": 9.814486671324604e-06, "loss": 0.2006, "step": 3921 }, { "epoch": 0.11441741058404807, "grad_norm": 0.9987004050751437, "learning_rate": 9.81435915498461e-06, "loss": 0.1792, "step": 3922 }, { "epoch": 0.11444658381469164, "grad_norm": 0.9412265153598919, "learning_rate": 9.814231595663165e-06, "loss": 0.1784, "step": 3923 }, { "epoch": 0.1144757570453352, "grad_norm": 0.9343015684978182, "learning_rate": 9.81410399336141e-06, "loss": 0.1866, "step": 3924 }, { "epoch": 0.11450493027597876, "grad_norm": 1.0180321162969372, "learning_rate": 9.813976348080484e-06, "loss": 0.2045, "step": 3925 }, { "epoch": 0.11453410350662233, "grad_norm": 0.7786413912112332, "learning_rate": 9.813848659821526e-06, "loss": 0.1665, "step": 3926 }, { "epoch": 0.11456327673726588, "grad_norm": 0.9569943966338017, "learning_rate": 9.813720928585678e-06, "loss": 0.1968, "step": 3927 }, { "epoch": 0.11459244996790945, "grad_norm": 0.8815021786382026, "learning_rate": 9.813593154374075e-06, "loss": 0.1662, "step": 3928 }, { "epoch": 0.11462162319855301, "grad_norm": 0.8900614397711096, "learning_rate": 9.813465337187864e-06, "loss": 0.1853, "step": 3929 }, { "epoch": 0.11465079642919657, "grad_norm": 0.8984565850328089, "learning_rate": 9.813337477028184e-06, "loss": 0.1685, "step": 3930 }, { "epoch": 0.11467996965984013, "grad_norm": 1.4587227900082587, "learning_rate": 9.813209573896175e-06, "loss": 0.2095, "step": 3931 }, { "epoch": 0.11470914289048369, "grad_norm": 1.0989382820231854, "learning_rate": 9.81308162779298e-06, "loss": 0.1766, "step": 3932 }, { "epoch": 0.11473831612112725, "grad_norm": 0.7076470715977727, "learning_rate": 9.812953638719741e-06, "loss": 0.1852, "step": 3933 }, { "epoch": 0.11476748935177082, "grad_norm": 0.8461530718765761, "learning_rate": 9.812825606677601e-06, "loss": 0.2041, "step": 3934 }, { "epoch": 0.11479666258241437, "grad_norm": 0.807544292357872, "learning_rate": 9.812697531667704e-06, "loss": 0.1848, "step": 3935 }, { "epoch": 0.11482583581305794, "grad_norm": 0.934358375278067, "learning_rate": 9.812569413691191e-06, "loss": 0.1939, "step": 3936 }, { "epoch": 0.1148550090437015, "grad_norm": 0.6569794659233321, "learning_rate": 9.812441252749207e-06, "loss": 0.176, "step": 3937 }, { "epoch": 0.11488418227434506, "grad_norm": 0.865850056802861, "learning_rate": 9.812313048842896e-06, "loss": 0.19, "step": 3938 }, { "epoch": 0.11491335550498863, "grad_norm": 1.4734153159013528, "learning_rate": 9.812184801973405e-06, "loss": 0.1671, "step": 3939 }, { "epoch": 0.11494252873563218, "grad_norm": 0.7067043431495279, "learning_rate": 9.812056512141875e-06, "loss": 0.1888, "step": 3940 }, { "epoch": 0.11497170196627575, "grad_norm": 0.7949118566718023, "learning_rate": 9.811928179349455e-06, "loss": 0.1906, "step": 3941 }, { "epoch": 0.1150008751969193, "grad_norm": 0.8324770315797557, "learning_rate": 9.811799803597286e-06, "loss": 0.1859, "step": 3942 }, { "epoch": 0.11503004842756287, "grad_norm": 0.7840093644842944, "learning_rate": 9.811671384886518e-06, "loss": 0.1884, "step": 3943 }, { "epoch": 0.11505922165820642, "grad_norm": 1.0144230590449386, "learning_rate": 9.811542923218298e-06, "loss": 0.1879, "step": 3944 }, { "epoch": 0.11508839488884999, "grad_norm": 0.9499532149031065, "learning_rate": 9.811414418593771e-06, "loss": 0.2091, "step": 3945 }, { "epoch": 0.11511756811949356, "grad_norm": 1.845968864637907, "learning_rate": 9.811285871014084e-06, "loss": 0.1768, "step": 3946 }, { "epoch": 0.11514674135013711, "grad_norm": 0.915737179055813, "learning_rate": 9.811157280480386e-06, "loss": 0.1724, "step": 3947 }, { "epoch": 0.11517591458078068, "grad_norm": 0.8715096334382696, "learning_rate": 9.811028646993823e-06, "loss": 0.1861, "step": 3948 }, { "epoch": 0.11520508781142423, "grad_norm": 1.1633558319535617, "learning_rate": 9.810899970555547e-06, "loss": 0.2188, "step": 3949 }, { "epoch": 0.1152342610420678, "grad_norm": 1.0185924503742674, "learning_rate": 9.810771251166702e-06, "loss": 0.1959, "step": 3950 }, { "epoch": 0.11526343427271137, "grad_norm": 0.9597127919256301, "learning_rate": 9.810642488828442e-06, "loss": 0.1864, "step": 3951 }, { "epoch": 0.11529260750335492, "grad_norm": 0.934634222614499, "learning_rate": 9.810513683541913e-06, "loss": 0.1681, "step": 3952 }, { "epoch": 0.11532178073399849, "grad_norm": 0.9237840016353603, "learning_rate": 9.810384835308266e-06, "loss": 0.1939, "step": 3953 }, { "epoch": 0.11535095396464204, "grad_norm": 0.849446720945563, "learning_rate": 9.810255944128651e-06, "loss": 0.1653, "step": 3954 }, { "epoch": 0.1153801271952856, "grad_norm": 1.084779195091929, "learning_rate": 9.81012701000422e-06, "loss": 0.1699, "step": 3955 }, { "epoch": 0.11540930042592917, "grad_norm": 0.944132426603004, "learning_rate": 9.809998032936123e-06, "loss": 0.1733, "step": 3956 }, { "epoch": 0.11543847365657273, "grad_norm": 0.961987504303707, "learning_rate": 9.809869012925512e-06, "loss": 0.1782, "step": 3957 }, { "epoch": 0.1154676468872163, "grad_norm": 1.0617644175619754, "learning_rate": 9.80973994997354e-06, "loss": 0.1813, "step": 3958 }, { "epoch": 0.11549682011785985, "grad_norm": 0.9413351711979291, "learning_rate": 9.809610844081357e-06, "loss": 0.167, "step": 3959 }, { "epoch": 0.11552599334850341, "grad_norm": 0.8095112512951378, "learning_rate": 9.809481695250116e-06, "loss": 0.1549, "step": 3960 }, { "epoch": 0.11555516657914698, "grad_norm": 1.0105866694575891, "learning_rate": 9.80935250348097e-06, "loss": 0.1953, "step": 3961 }, { "epoch": 0.11558433980979053, "grad_norm": 1.177664778950724, "learning_rate": 9.809223268775074e-06, "loss": 0.1852, "step": 3962 }, { "epoch": 0.1156135130404341, "grad_norm": 0.7748362871271275, "learning_rate": 9.80909399113358e-06, "loss": 0.1861, "step": 3963 }, { "epoch": 0.11564268627107765, "grad_norm": 0.8218146313453756, "learning_rate": 9.808964670557643e-06, "loss": 0.1603, "step": 3964 }, { "epoch": 0.11567185950172122, "grad_norm": 1.1794113164782682, "learning_rate": 9.80883530704842e-06, "loss": 0.1827, "step": 3965 }, { "epoch": 0.11570103273236478, "grad_norm": 0.7130535387940719, "learning_rate": 9.808705900607058e-06, "loss": 0.1541, "step": 3966 }, { "epoch": 0.11573020596300834, "grad_norm": 0.9235360139481987, "learning_rate": 9.808576451234721e-06, "loss": 0.179, "step": 3967 }, { "epoch": 0.11575937919365191, "grad_norm": 1.1301699140384471, "learning_rate": 9.80844695893256e-06, "loss": 0.1914, "step": 3968 }, { "epoch": 0.11578855242429546, "grad_norm": 0.8260365779849368, "learning_rate": 9.808317423701735e-06, "loss": 0.1531, "step": 3969 }, { "epoch": 0.11581772565493903, "grad_norm": 0.8304113067468187, "learning_rate": 9.808187845543397e-06, "loss": 0.1969, "step": 3970 }, { "epoch": 0.11584689888558258, "grad_norm": 0.9679207163452974, "learning_rate": 9.808058224458708e-06, "loss": 0.1897, "step": 3971 }, { "epoch": 0.11587607211622615, "grad_norm": 0.8470787888221096, "learning_rate": 9.807928560448822e-06, "loss": 0.1729, "step": 3972 }, { "epoch": 0.11590524534686972, "grad_norm": 0.9483774764824003, "learning_rate": 9.807798853514898e-06, "loss": 0.1717, "step": 3973 }, { "epoch": 0.11593441857751327, "grad_norm": 0.8434783474399654, "learning_rate": 9.807669103658092e-06, "loss": 0.187, "step": 3974 }, { "epoch": 0.11596359180815684, "grad_norm": 1.138839474688897, "learning_rate": 9.807539310879566e-06, "loss": 0.2028, "step": 3975 }, { "epoch": 0.11599276503880039, "grad_norm": 0.8098813997017877, "learning_rate": 9.807409475180476e-06, "loss": 0.1715, "step": 3976 }, { "epoch": 0.11602193826944396, "grad_norm": 0.8476295399786722, "learning_rate": 9.80727959656198e-06, "loss": 0.1742, "step": 3977 }, { "epoch": 0.11605111150008753, "grad_norm": 1.0189550480133291, "learning_rate": 9.807149675025242e-06, "loss": 0.1936, "step": 3978 }, { "epoch": 0.11608028473073108, "grad_norm": 0.9583739475972958, "learning_rate": 9.807019710571418e-06, "loss": 0.1925, "step": 3979 }, { "epoch": 0.11610945796137465, "grad_norm": 0.947373128348233, "learning_rate": 9.80688970320167e-06, "loss": 0.184, "step": 3980 }, { "epoch": 0.1161386311920182, "grad_norm": 1.0217865886958784, "learning_rate": 9.806759652917157e-06, "loss": 0.194, "step": 3981 }, { "epoch": 0.11616780442266177, "grad_norm": 0.9091592305269642, "learning_rate": 9.806629559719042e-06, "loss": 0.1809, "step": 3982 }, { "epoch": 0.11619697765330533, "grad_norm": 0.927284884626031, "learning_rate": 9.806499423608486e-06, "loss": 0.1788, "step": 3983 }, { "epoch": 0.11622615088394889, "grad_norm": 1.0614083593915533, "learning_rate": 9.80636924458665e-06, "loss": 0.1802, "step": 3984 }, { "epoch": 0.11625532411459245, "grad_norm": 0.8477671183172, "learning_rate": 9.806239022654699e-06, "loss": 0.2047, "step": 3985 }, { "epoch": 0.11628449734523601, "grad_norm": 1.0228600825612724, "learning_rate": 9.80610875781379e-06, "loss": 0.1742, "step": 3986 }, { "epoch": 0.11631367057587957, "grad_norm": 0.992616764441537, "learning_rate": 9.805978450065092e-06, "loss": 0.1681, "step": 3987 }, { "epoch": 0.11634284380652314, "grad_norm": 0.9306633160170079, "learning_rate": 9.805848099409765e-06, "loss": 0.1861, "step": 3988 }, { "epoch": 0.1163720170371667, "grad_norm": 1.0939088043077234, "learning_rate": 9.805717705848972e-06, "loss": 0.1747, "step": 3989 }, { "epoch": 0.11640119026781026, "grad_norm": 0.8550016996193792, "learning_rate": 9.805587269383881e-06, "loss": 0.1892, "step": 3990 }, { "epoch": 0.11643036349845381, "grad_norm": 1.0985057177862256, "learning_rate": 9.805456790015652e-06, "loss": 0.1952, "step": 3991 }, { "epoch": 0.11645953672909738, "grad_norm": 1.2032884200598375, "learning_rate": 9.805326267745451e-06, "loss": 0.1862, "step": 3992 }, { "epoch": 0.11648870995974094, "grad_norm": 0.8801044670755954, "learning_rate": 9.805195702574446e-06, "loss": 0.1677, "step": 3993 }, { "epoch": 0.1165178831903845, "grad_norm": 0.9838842619636844, "learning_rate": 9.805065094503801e-06, "loss": 0.2165, "step": 3994 }, { "epoch": 0.11654705642102807, "grad_norm": 1.036174236277911, "learning_rate": 9.804934443534682e-06, "loss": 0.1717, "step": 3995 }, { "epoch": 0.11657622965167162, "grad_norm": 1.0365517572217218, "learning_rate": 9.804803749668254e-06, "loss": 0.182, "step": 3996 }, { "epoch": 0.11660540288231519, "grad_norm": 1.2389885107002385, "learning_rate": 9.804673012905686e-06, "loss": 0.2106, "step": 3997 }, { "epoch": 0.11663457611295874, "grad_norm": 0.908669887314025, "learning_rate": 9.804542233248144e-06, "loss": 0.1792, "step": 3998 }, { "epoch": 0.11666374934360231, "grad_norm": 0.7735821706347971, "learning_rate": 9.804411410696797e-06, "loss": 0.1682, "step": 3999 }, { "epoch": 0.11669292257424588, "grad_norm": 0.9761508165663874, "learning_rate": 9.804280545252812e-06, "loss": 0.1963, "step": 4000 }, { "epoch": 0.11672209580488943, "grad_norm": 0.8864981232126092, "learning_rate": 9.804149636917355e-06, "loss": 0.1899, "step": 4001 }, { "epoch": 0.116751269035533, "grad_norm": 0.8001310128164034, "learning_rate": 9.8040186856916e-06, "loss": 0.1836, "step": 4002 }, { "epoch": 0.11678044226617655, "grad_norm": 0.8814383640876319, "learning_rate": 9.80388769157671e-06, "loss": 0.1788, "step": 4003 }, { "epoch": 0.11680961549682012, "grad_norm": 0.8520770186053725, "learning_rate": 9.803756654573857e-06, "loss": 0.1771, "step": 4004 }, { "epoch": 0.11683878872746369, "grad_norm": 0.880540957617837, "learning_rate": 9.803625574684213e-06, "loss": 0.1825, "step": 4005 }, { "epoch": 0.11686796195810724, "grad_norm": 0.9309486508322999, "learning_rate": 9.803494451908946e-06, "loss": 0.1881, "step": 4006 }, { "epoch": 0.1168971351887508, "grad_norm": 0.8613043448542246, "learning_rate": 9.803363286249228e-06, "loss": 0.1665, "step": 4007 }, { "epoch": 0.11692630841939436, "grad_norm": 0.96365789331625, "learning_rate": 9.803232077706229e-06, "loss": 0.1835, "step": 4008 }, { "epoch": 0.11695548165003793, "grad_norm": 0.8145166726519757, "learning_rate": 9.80310082628112e-06, "loss": 0.1949, "step": 4009 }, { "epoch": 0.1169846548806815, "grad_norm": 0.8066981054531861, "learning_rate": 9.802969531975074e-06, "loss": 0.1774, "step": 4010 }, { "epoch": 0.11701382811132505, "grad_norm": 0.8025106983913187, "learning_rate": 9.802838194789264e-06, "loss": 0.1775, "step": 4011 }, { "epoch": 0.11704300134196861, "grad_norm": 0.7545697971668403, "learning_rate": 9.802706814724857e-06, "loss": 0.1826, "step": 4012 }, { "epoch": 0.11707217457261217, "grad_norm": 0.8854062913148097, "learning_rate": 9.802575391783033e-06, "loss": 0.1763, "step": 4013 }, { "epoch": 0.11710134780325573, "grad_norm": 0.886431520686121, "learning_rate": 9.802443925964963e-06, "loss": 0.1914, "step": 4014 }, { "epoch": 0.11713052103389929, "grad_norm": 0.819842833339432, "learning_rate": 9.80231241727182e-06, "loss": 0.1723, "step": 4015 }, { "epoch": 0.11715969426454285, "grad_norm": 0.777312991272999, "learning_rate": 9.802180865704775e-06, "loss": 0.1766, "step": 4016 }, { "epoch": 0.11718886749518642, "grad_norm": 0.9257122523737836, "learning_rate": 9.80204927126501e-06, "loss": 0.1825, "step": 4017 }, { "epoch": 0.11721804072582998, "grad_norm": 0.8258775242878535, "learning_rate": 9.801917633953693e-06, "loss": 0.1899, "step": 4018 }, { "epoch": 0.11724721395647354, "grad_norm": 0.7616173750474217, "learning_rate": 9.801785953772001e-06, "loss": 0.1886, "step": 4019 }, { "epoch": 0.1172763871871171, "grad_norm": 0.9280170484458173, "learning_rate": 9.801654230721111e-06, "loss": 0.2046, "step": 4020 }, { "epoch": 0.11730556041776066, "grad_norm": 0.9799221946806403, "learning_rate": 9.801522464802199e-06, "loss": 0.1842, "step": 4021 }, { "epoch": 0.11733473364840423, "grad_norm": 0.8179915557194807, "learning_rate": 9.80139065601644e-06, "loss": 0.1696, "step": 4022 }, { "epoch": 0.11736390687904778, "grad_norm": 0.8595657911189106, "learning_rate": 9.801258804365013e-06, "loss": 0.1735, "step": 4023 }, { "epoch": 0.11739308010969135, "grad_norm": 0.9495398536792797, "learning_rate": 9.80112690984909e-06, "loss": 0.1671, "step": 4024 }, { "epoch": 0.1174222533403349, "grad_norm": 1.096707425460663, "learning_rate": 9.800994972469855e-06, "loss": 0.1657, "step": 4025 }, { "epoch": 0.11745142657097847, "grad_norm": 0.8307152374186523, "learning_rate": 9.800862992228481e-06, "loss": 0.1806, "step": 4026 }, { "epoch": 0.11748059980162204, "grad_norm": 0.6971424405215413, "learning_rate": 9.800730969126151e-06, "loss": 0.1627, "step": 4027 }, { "epoch": 0.11750977303226559, "grad_norm": 1.1698168537208269, "learning_rate": 9.800598903164039e-06, "loss": 0.1918, "step": 4028 }, { "epoch": 0.11753894626290916, "grad_norm": 0.8671636053545344, "learning_rate": 9.800466794343326e-06, "loss": 0.2005, "step": 4029 }, { "epoch": 0.11756811949355271, "grad_norm": 0.7690461245297601, "learning_rate": 9.800334642665193e-06, "loss": 0.1822, "step": 4030 }, { "epoch": 0.11759729272419628, "grad_norm": 0.8673928340155402, "learning_rate": 9.800202448130816e-06, "loss": 0.1686, "step": 4031 }, { "epoch": 0.11762646595483985, "grad_norm": 1.0113940236773769, "learning_rate": 9.80007021074138e-06, "loss": 0.1915, "step": 4032 }, { "epoch": 0.1176556391854834, "grad_norm": 0.835804990942079, "learning_rate": 9.79993793049806e-06, "loss": 0.1878, "step": 4033 }, { "epoch": 0.11768481241612697, "grad_norm": 1.055479587939155, "learning_rate": 9.799805607402042e-06, "loss": 0.1573, "step": 4034 }, { "epoch": 0.11771398564677052, "grad_norm": 0.9922774040757671, "learning_rate": 9.799673241454504e-06, "loss": 0.1709, "step": 4035 }, { "epoch": 0.11774315887741409, "grad_norm": 0.8400929161992129, "learning_rate": 9.79954083265663e-06, "loss": 0.1608, "step": 4036 }, { "epoch": 0.11777233210805764, "grad_norm": 1.0264751256358728, "learning_rate": 9.7994083810096e-06, "loss": 0.184, "step": 4037 }, { "epoch": 0.1178015053387012, "grad_norm": 0.9068214960259822, "learning_rate": 9.799275886514599e-06, "loss": 0.1754, "step": 4038 }, { "epoch": 0.11783067856934477, "grad_norm": 1.096105259722257, "learning_rate": 9.799143349172809e-06, "loss": 0.1583, "step": 4039 }, { "epoch": 0.11785985179998833, "grad_norm": 0.7592187551813588, "learning_rate": 9.799010768985413e-06, "loss": 0.1569, "step": 4040 }, { "epoch": 0.1178890250306319, "grad_norm": 0.9843277352889542, "learning_rate": 9.798878145953592e-06, "loss": 0.2049, "step": 4041 }, { "epoch": 0.11791819826127545, "grad_norm": 1.2825858480708008, "learning_rate": 9.798745480078535e-06, "loss": 0.1993, "step": 4042 }, { "epoch": 0.11794737149191901, "grad_norm": 0.8415361714071162, "learning_rate": 9.798612771361423e-06, "loss": 0.1884, "step": 4043 }, { "epoch": 0.11797654472256258, "grad_norm": 0.68541036268047, "learning_rate": 9.798480019803442e-06, "loss": 0.1677, "step": 4044 }, { "epoch": 0.11800571795320614, "grad_norm": 0.8833576539239999, "learning_rate": 9.798347225405777e-06, "loss": 0.1775, "step": 4045 }, { "epoch": 0.1180348911838497, "grad_norm": 0.7764367365558691, "learning_rate": 9.798214388169613e-06, "loss": 0.1793, "step": 4046 }, { "epoch": 0.11806406441449326, "grad_norm": 0.7572082043070054, "learning_rate": 9.798081508096135e-06, "loss": 0.1672, "step": 4047 }, { "epoch": 0.11809323764513682, "grad_norm": 0.7607281881986289, "learning_rate": 9.797948585186533e-06, "loss": 0.178, "step": 4048 }, { "epoch": 0.11812241087578039, "grad_norm": 1.0620435903387817, "learning_rate": 9.79781561944199e-06, "loss": 0.2079, "step": 4049 }, { "epoch": 0.11815158410642394, "grad_norm": 0.7446112541942896, "learning_rate": 9.797682610863695e-06, "loss": 0.171, "step": 4050 }, { "epoch": 0.11818075733706751, "grad_norm": 0.7523181824036657, "learning_rate": 9.797549559452835e-06, "loss": 0.1609, "step": 4051 }, { "epoch": 0.11820993056771106, "grad_norm": 0.7258478653299077, "learning_rate": 9.797416465210599e-06, "loss": 0.1728, "step": 4052 }, { "epoch": 0.11823910379835463, "grad_norm": 0.788248929247024, "learning_rate": 9.797283328138172e-06, "loss": 0.1783, "step": 4053 }, { "epoch": 0.1182682770289982, "grad_norm": 1.0221899508346437, "learning_rate": 9.797150148236744e-06, "loss": 0.1922, "step": 4054 }, { "epoch": 0.11829745025964175, "grad_norm": 0.8504084637091053, "learning_rate": 9.797016925507507e-06, "loss": 0.195, "step": 4055 }, { "epoch": 0.11832662349028532, "grad_norm": 0.8017278695647965, "learning_rate": 9.796883659951648e-06, "loss": 0.1966, "step": 4056 }, { "epoch": 0.11835579672092887, "grad_norm": 1.1323548778598158, "learning_rate": 9.796750351570355e-06, "loss": 0.1683, "step": 4057 }, { "epoch": 0.11838496995157244, "grad_norm": 1.1258246439497628, "learning_rate": 9.79661700036482e-06, "loss": 0.1936, "step": 4058 }, { "epoch": 0.11841414318221599, "grad_norm": 0.8904428235802615, "learning_rate": 9.796483606336235e-06, "loss": 0.1841, "step": 4059 }, { "epoch": 0.11844331641285956, "grad_norm": 1.1806870222346555, "learning_rate": 9.796350169485789e-06, "loss": 0.1864, "step": 4060 }, { "epoch": 0.11847248964350313, "grad_norm": 1.0329696619723054, "learning_rate": 9.796216689814672e-06, "loss": 0.1982, "step": 4061 }, { "epoch": 0.11850166287414668, "grad_norm": 0.9676735357582843, "learning_rate": 9.79608316732408e-06, "loss": 0.1914, "step": 4062 }, { "epoch": 0.11853083610479025, "grad_norm": 0.8864614157739367, "learning_rate": 9.7959496020152e-06, "loss": 0.1786, "step": 4063 }, { "epoch": 0.1185600093354338, "grad_norm": 1.0882041126738569, "learning_rate": 9.795815993889229e-06, "loss": 0.1964, "step": 4064 }, { "epoch": 0.11858918256607737, "grad_norm": 0.8471911828856352, "learning_rate": 9.795682342947356e-06, "loss": 0.1833, "step": 4065 }, { "epoch": 0.11861835579672093, "grad_norm": 0.7909408846675637, "learning_rate": 9.795548649190777e-06, "loss": 0.1761, "step": 4066 }, { "epoch": 0.11864752902736449, "grad_norm": 0.9649875727522704, "learning_rate": 9.795414912620685e-06, "loss": 0.1947, "step": 4067 }, { "epoch": 0.11867670225800805, "grad_norm": 0.8506455621724606, "learning_rate": 9.79528113323827e-06, "loss": 0.1786, "step": 4068 }, { "epoch": 0.11870587548865161, "grad_norm": 0.7179979463357128, "learning_rate": 9.795147311044732e-06, "loss": 0.1874, "step": 4069 }, { "epoch": 0.11873504871929517, "grad_norm": 0.8867828001423357, "learning_rate": 9.795013446041264e-06, "loss": 0.177, "step": 4070 }, { "epoch": 0.11876422194993874, "grad_norm": 0.7854471935045214, "learning_rate": 9.79487953822906e-06, "loss": 0.1785, "step": 4071 }, { "epoch": 0.1187933951805823, "grad_norm": 0.8486496225340521, "learning_rate": 9.794745587609318e-06, "loss": 0.1735, "step": 4072 }, { "epoch": 0.11882256841122586, "grad_norm": 0.8685026532048004, "learning_rate": 9.794611594183229e-06, "loss": 0.1722, "step": 4073 }, { "epoch": 0.11885174164186942, "grad_norm": 0.9528898964468909, "learning_rate": 9.794477557951993e-06, "loss": 0.1803, "step": 4074 }, { "epoch": 0.11888091487251298, "grad_norm": 0.8342324505777576, "learning_rate": 9.794343478916807e-06, "loss": 0.1779, "step": 4075 }, { "epoch": 0.11891008810315655, "grad_norm": 0.7742586727436434, "learning_rate": 9.794209357078867e-06, "loss": 0.2007, "step": 4076 }, { "epoch": 0.1189392613338001, "grad_norm": 0.9091465661381676, "learning_rate": 9.79407519243937e-06, "loss": 0.1842, "step": 4077 }, { "epoch": 0.11896843456444367, "grad_norm": 0.7469480493965509, "learning_rate": 9.793940984999512e-06, "loss": 0.1462, "step": 4078 }, { "epoch": 0.11899760779508722, "grad_norm": 0.8886994889203368, "learning_rate": 9.793806734760496e-06, "loss": 0.1947, "step": 4079 }, { "epoch": 0.11902678102573079, "grad_norm": 0.965534648887828, "learning_rate": 9.793672441723515e-06, "loss": 0.1827, "step": 4080 }, { "epoch": 0.11905595425637436, "grad_norm": 0.9257063083089873, "learning_rate": 9.793538105889775e-06, "loss": 0.1761, "step": 4081 }, { "epoch": 0.11908512748701791, "grad_norm": 1.2208151541428913, "learning_rate": 9.79340372726047e-06, "loss": 0.1908, "step": 4082 }, { "epoch": 0.11911430071766148, "grad_norm": 0.9993951765731436, "learning_rate": 9.793269305836799e-06, "loss": 0.191, "step": 4083 }, { "epoch": 0.11914347394830503, "grad_norm": 0.8280227146034735, "learning_rate": 9.793134841619964e-06, "loss": 0.1623, "step": 4084 }, { "epoch": 0.1191726471789486, "grad_norm": 1.3120390396897854, "learning_rate": 9.793000334611166e-06, "loss": 0.1563, "step": 4085 }, { "epoch": 0.11920182040959215, "grad_norm": 0.8928568609837696, "learning_rate": 9.792865784811604e-06, "loss": 0.1685, "step": 4086 }, { "epoch": 0.11923099364023572, "grad_norm": 1.0876694985665638, "learning_rate": 9.792731192222482e-06, "loss": 0.1704, "step": 4087 }, { "epoch": 0.11926016687087929, "grad_norm": 0.8772309262531918, "learning_rate": 9.792596556845e-06, "loss": 0.1743, "step": 4088 }, { "epoch": 0.11928934010152284, "grad_norm": 0.8882982565596025, "learning_rate": 9.79246187868036e-06, "loss": 0.1548, "step": 4089 }, { "epoch": 0.1193185133321664, "grad_norm": 0.7598220770302359, "learning_rate": 9.792327157729762e-06, "loss": 0.1719, "step": 4090 }, { "epoch": 0.11934768656280996, "grad_norm": 0.8979567040089305, "learning_rate": 9.792192393994415e-06, "loss": 0.1703, "step": 4091 }, { "epoch": 0.11937685979345353, "grad_norm": 1.0627609739083699, "learning_rate": 9.792057587475516e-06, "loss": 0.1821, "step": 4092 }, { "epoch": 0.1194060330240971, "grad_norm": 0.9182593943569555, "learning_rate": 9.79192273817427e-06, "loss": 0.1948, "step": 4093 }, { "epoch": 0.11943520625474065, "grad_norm": 0.8349549529579332, "learning_rate": 9.791787846091883e-06, "loss": 0.199, "step": 4094 }, { "epoch": 0.11946437948538421, "grad_norm": 0.9436604013645415, "learning_rate": 9.79165291122956e-06, "loss": 0.192, "step": 4095 }, { "epoch": 0.11949355271602777, "grad_norm": 0.8038645009853541, "learning_rate": 9.7915179335885e-06, "loss": 0.1638, "step": 4096 }, { "epoch": 0.11952272594667133, "grad_norm": 0.9705876176402898, "learning_rate": 9.791382913169913e-06, "loss": 0.1903, "step": 4097 }, { "epoch": 0.1195518991773149, "grad_norm": 0.8509804031952122, "learning_rate": 9.791247849975003e-06, "loss": 0.2078, "step": 4098 }, { "epoch": 0.11958107240795846, "grad_norm": 1.141298404017863, "learning_rate": 9.791112744004979e-06, "loss": 0.1825, "step": 4099 }, { "epoch": 0.11961024563860202, "grad_norm": 0.8444499592388437, "learning_rate": 9.79097759526104e-06, "loss": 0.1968, "step": 4100 }, { "epoch": 0.11963941886924558, "grad_norm": 0.8467549660289233, "learning_rate": 9.790842403744398e-06, "loss": 0.1741, "step": 4101 }, { "epoch": 0.11966859209988914, "grad_norm": 0.8479821220200611, "learning_rate": 9.79070716945626e-06, "loss": 0.1772, "step": 4102 }, { "epoch": 0.11969776533053271, "grad_norm": 0.8502502276293883, "learning_rate": 9.79057189239783e-06, "loss": 0.1899, "step": 4103 }, { "epoch": 0.11972693856117626, "grad_norm": 0.7865772133425387, "learning_rate": 9.790436572570319e-06, "loss": 0.1626, "step": 4104 }, { "epoch": 0.11975611179181983, "grad_norm": 0.962058000668101, "learning_rate": 9.790301209974932e-06, "loss": 0.1824, "step": 4105 }, { "epoch": 0.11978528502246338, "grad_norm": 0.7708385284061801, "learning_rate": 9.790165804612882e-06, "loss": 0.1852, "step": 4106 }, { "epoch": 0.11981445825310695, "grad_norm": 0.9232131298020444, "learning_rate": 9.790030356485374e-06, "loss": 0.1766, "step": 4107 }, { "epoch": 0.1198436314837505, "grad_norm": 0.8298980115415907, "learning_rate": 9.789894865593619e-06, "loss": 0.1662, "step": 4108 }, { "epoch": 0.11987280471439407, "grad_norm": 0.7722703020740381, "learning_rate": 9.789759331938826e-06, "loss": 0.165, "step": 4109 }, { "epoch": 0.11990197794503764, "grad_norm": 1.0514972642459577, "learning_rate": 9.789623755522204e-06, "loss": 0.1822, "step": 4110 }, { "epoch": 0.11993115117568119, "grad_norm": 0.6943035354131065, "learning_rate": 9.789488136344966e-06, "loss": 0.1652, "step": 4111 }, { "epoch": 0.11996032440632476, "grad_norm": 0.8219632672017334, "learning_rate": 9.78935247440832e-06, "loss": 0.1632, "step": 4112 }, { "epoch": 0.11998949763696831, "grad_norm": 0.9181735980006912, "learning_rate": 9.789216769713479e-06, "loss": 0.1691, "step": 4113 }, { "epoch": 0.12001867086761188, "grad_norm": 1.042590637169085, "learning_rate": 9.789081022261654e-06, "loss": 0.2045, "step": 4114 }, { "epoch": 0.12004784409825545, "grad_norm": 0.7952694115501756, "learning_rate": 9.788945232054056e-06, "loss": 0.1585, "step": 4115 }, { "epoch": 0.120077017328899, "grad_norm": 0.6729379380871152, "learning_rate": 9.788809399091899e-06, "loss": 0.1787, "step": 4116 }, { "epoch": 0.12010619055954257, "grad_norm": 1.5782806905729891, "learning_rate": 9.788673523376396e-06, "loss": 0.1917, "step": 4117 }, { "epoch": 0.12013536379018612, "grad_norm": 0.9286855269409086, "learning_rate": 9.788537604908756e-06, "loss": 0.1906, "step": 4118 }, { "epoch": 0.12016453702082969, "grad_norm": 1.1332457540301297, "learning_rate": 9.788401643690197e-06, "loss": 0.1729, "step": 4119 }, { "epoch": 0.12019371025147325, "grad_norm": 0.9568786266958319, "learning_rate": 9.788265639721932e-06, "loss": 0.1785, "step": 4120 }, { "epoch": 0.12022288348211681, "grad_norm": 0.8098561999190378, "learning_rate": 9.788129593005174e-06, "loss": 0.19, "step": 4121 }, { "epoch": 0.12025205671276037, "grad_norm": 1.0386085787577433, "learning_rate": 9.787993503541137e-06, "loss": 0.1847, "step": 4122 }, { "epoch": 0.12028122994340393, "grad_norm": 0.792221227936919, "learning_rate": 9.787857371331039e-06, "loss": 0.2145, "step": 4123 }, { "epoch": 0.1203104031740475, "grad_norm": 1.0271046503439851, "learning_rate": 9.787721196376092e-06, "loss": 0.1812, "step": 4124 }, { "epoch": 0.12033957640469106, "grad_norm": 0.8288151132963502, "learning_rate": 9.787584978677514e-06, "loss": 0.192, "step": 4125 }, { "epoch": 0.12036874963533462, "grad_norm": 0.8788552241854136, "learning_rate": 9.787448718236519e-06, "loss": 0.1728, "step": 4126 }, { "epoch": 0.12039792286597818, "grad_norm": 0.9221041884990281, "learning_rate": 9.787312415054325e-06, "loss": 0.184, "step": 4127 }, { "epoch": 0.12042709609662174, "grad_norm": 0.8464018185918841, "learning_rate": 9.787176069132149e-06, "loss": 0.1595, "step": 4128 }, { "epoch": 0.1204562693272653, "grad_norm": 0.9061550624397733, "learning_rate": 9.787039680471206e-06, "loss": 0.1811, "step": 4129 }, { "epoch": 0.12048544255790886, "grad_norm": 0.9436511449768542, "learning_rate": 9.786903249072717e-06, "loss": 0.1601, "step": 4130 }, { "epoch": 0.12051461578855242, "grad_norm": 0.8521825288934504, "learning_rate": 9.786766774937898e-06, "loss": 0.206, "step": 4131 }, { "epoch": 0.12054378901919599, "grad_norm": 1.3319908623471917, "learning_rate": 9.78663025806797e-06, "loss": 0.166, "step": 4132 }, { "epoch": 0.12057296224983954, "grad_norm": 1.3676056687058058, "learning_rate": 9.786493698464149e-06, "loss": 0.1735, "step": 4133 }, { "epoch": 0.12060213548048311, "grad_norm": 0.9383909096438793, "learning_rate": 9.786357096127652e-06, "loss": 0.1817, "step": 4134 }, { "epoch": 0.12063130871112666, "grad_norm": 1.2318892210635184, "learning_rate": 9.786220451059704e-06, "loss": 0.1979, "step": 4135 }, { "epoch": 0.12066048194177023, "grad_norm": 0.9014131174008452, "learning_rate": 9.786083763261522e-06, "loss": 0.1712, "step": 4136 }, { "epoch": 0.1206896551724138, "grad_norm": 0.8132711903801089, "learning_rate": 9.785947032734326e-06, "loss": 0.1873, "step": 4137 }, { "epoch": 0.12071882840305735, "grad_norm": 0.8642170649113723, "learning_rate": 9.785810259479337e-06, "loss": 0.1724, "step": 4138 }, { "epoch": 0.12074800163370092, "grad_norm": 0.8133536176102205, "learning_rate": 9.785673443497779e-06, "loss": 0.1635, "step": 4139 }, { "epoch": 0.12077717486434447, "grad_norm": 0.8421766063801384, "learning_rate": 9.785536584790869e-06, "loss": 0.1992, "step": 4140 }, { "epoch": 0.12080634809498804, "grad_norm": 0.8301407227670122, "learning_rate": 9.78539968335983e-06, "loss": 0.1728, "step": 4141 }, { "epoch": 0.1208355213256316, "grad_norm": 0.8329694079632481, "learning_rate": 9.785262739205887e-06, "loss": 0.172, "step": 4142 }, { "epoch": 0.12086469455627516, "grad_norm": 0.8015332815545945, "learning_rate": 9.78512575233026e-06, "loss": 0.1815, "step": 4143 }, { "epoch": 0.12089386778691873, "grad_norm": 0.7640230894037681, "learning_rate": 9.784988722734172e-06, "loss": 0.1662, "step": 4144 }, { "epoch": 0.12092304101756228, "grad_norm": 0.7414765506748662, "learning_rate": 9.784851650418847e-06, "loss": 0.1756, "step": 4145 }, { "epoch": 0.12095221424820585, "grad_norm": 0.8478198725277386, "learning_rate": 9.784714535385509e-06, "loss": 0.1594, "step": 4146 }, { "epoch": 0.12098138747884941, "grad_norm": 1.2695178645519447, "learning_rate": 9.784577377635382e-06, "loss": 0.1631, "step": 4147 }, { "epoch": 0.12101056070949297, "grad_norm": 0.7650605253914328, "learning_rate": 9.784440177169689e-06, "loss": 0.1609, "step": 4148 }, { "epoch": 0.12103973394013653, "grad_norm": 1.0276395784864185, "learning_rate": 9.784302933989657e-06, "loss": 0.2238, "step": 4149 }, { "epoch": 0.12106890717078009, "grad_norm": 0.8494424714848874, "learning_rate": 9.784165648096514e-06, "loss": 0.1882, "step": 4150 }, { "epoch": 0.12109808040142366, "grad_norm": 0.8593437139387499, "learning_rate": 9.784028319491478e-06, "loss": 0.1743, "step": 4151 }, { "epoch": 0.12112725363206721, "grad_norm": 0.8446351444485749, "learning_rate": 9.78389094817578e-06, "loss": 0.1742, "step": 4152 }, { "epoch": 0.12115642686271078, "grad_norm": 0.906404467522756, "learning_rate": 9.783753534150646e-06, "loss": 0.1892, "step": 4153 }, { "epoch": 0.12118560009335434, "grad_norm": 0.7928934533824235, "learning_rate": 9.783616077417301e-06, "loss": 0.1764, "step": 4154 }, { "epoch": 0.1212147733239979, "grad_norm": 1.011787013239762, "learning_rate": 9.783478577976976e-06, "loss": 0.1746, "step": 4155 }, { "epoch": 0.12124394655464146, "grad_norm": 0.8092316016257747, "learning_rate": 9.783341035830895e-06, "loss": 0.1744, "step": 4156 }, { "epoch": 0.12127311978528502, "grad_norm": 0.7880658319412299, "learning_rate": 9.783203450980287e-06, "loss": 0.1754, "step": 4157 }, { "epoch": 0.12130229301592858, "grad_norm": 0.9193699472202896, "learning_rate": 9.78306582342638e-06, "loss": 0.1914, "step": 4158 }, { "epoch": 0.12133146624657215, "grad_norm": 0.782109094769802, "learning_rate": 9.782928153170403e-06, "loss": 0.1599, "step": 4159 }, { "epoch": 0.1213606394772157, "grad_norm": 1.131680214875881, "learning_rate": 9.782790440213587e-06, "loss": 0.197, "step": 4160 }, { "epoch": 0.12138981270785927, "grad_norm": 0.8343074545424928, "learning_rate": 9.782652684557158e-06, "loss": 0.1773, "step": 4161 }, { "epoch": 0.12141898593850282, "grad_norm": 0.9572818164206963, "learning_rate": 9.78251488620235e-06, "loss": 0.174, "step": 4162 }, { "epoch": 0.12144815916914639, "grad_norm": 0.7420543684606417, "learning_rate": 9.782377045150387e-06, "loss": 0.168, "step": 4163 }, { "epoch": 0.12147733239978996, "grad_norm": 0.8974813647430494, "learning_rate": 9.782239161402505e-06, "loss": 0.1609, "step": 4164 }, { "epoch": 0.12150650563043351, "grad_norm": 0.8375675889457979, "learning_rate": 9.782101234959935e-06, "loss": 0.1808, "step": 4165 }, { "epoch": 0.12153567886107708, "grad_norm": 1.038493570210264, "learning_rate": 9.781963265823905e-06, "loss": 0.1869, "step": 4166 }, { "epoch": 0.12156485209172063, "grad_norm": 0.9711615425737522, "learning_rate": 9.78182525399565e-06, "loss": 0.178, "step": 4167 }, { "epoch": 0.1215940253223642, "grad_norm": 0.7962882984557913, "learning_rate": 9.781687199476399e-06, "loss": 0.1514, "step": 4168 }, { "epoch": 0.12162319855300777, "grad_norm": 0.7542737816784294, "learning_rate": 9.781549102267387e-06, "loss": 0.1632, "step": 4169 }, { "epoch": 0.12165237178365132, "grad_norm": 0.9970523601200407, "learning_rate": 9.781410962369846e-06, "loss": 0.1702, "step": 4170 }, { "epoch": 0.12168154501429489, "grad_norm": 1.074944699922814, "learning_rate": 9.78127277978501e-06, "loss": 0.1836, "step": 4171 }, { "epoch": 0.12171071824493844, "grad_norm": 1.052545248224118, "learning_rate": 9.781134554514108e-06, "loss": 0.1778, "step": 4172 }, { "epoch": 0.12173989147558201, "grad_norm": 0.8528179908215335, "learning_rate": 9.780996286558382e-06, "loss": 0.1685, "step": 4173 }, { "epoch": 0.12176906470622557, "grad_norm": 1.047169045200767, "learning_rate": 9.780857975919063e-06, "loss": 0.1955, "step": 4174 }, { "epoch": 0.12179823793686913, "grad_norm": 0.9103774911103129, "learning_rate": 9.780719622597383e-06, "loss": 0.1881, "step": 4175 }, { "epoch": 0.1218274111675127, "grad_norm": 0.6978801978572893, "learning_rate": 9.78058122659458e-06, "loss": 0.1521, "step": 4176 }, { "epoch": 0.12185658439815625, "grad_norm": 0.8262896383805954, "learning_rate": 9.780442787911891e-06, "loss": 0.1668, "step": 4177 }, { "epoch": 0.12188575762879982, "grad_norm": 0.8353675530022704, "learning_rate": 9.780304306550547e-06, "loss": 0.1743, "step": 4178 }, { "epoch": 0.12191493085944337, "grad_norm": 1.269227532562418, "learning_rate": 9.78016578251179e-06, "loss": 0.2063, "step": 4179 }, { "epoch": 0.12194410409008694, "grad_norm": 0.800200258529141, "learning_rate": 9.780027215796853e-06, "loss": 0.1675, "step": 4180 }, { "epoch": 0.1219732773207305, "grad_norm": 0.862996763044887, "learning_rate": 9.779888606406974e-06, "loss": 0.1682, "step": 4181 }, { "epoch": 0.12200245055137406, "grad_norm": 1.0524814671615295, "learning_rate": 9.77974995434339e-06, "loss": 0.1711, "step": 4182 }, { "epoch": 0.12203162378201762, "grad_norm": 0.7834384166130912, "learning_rate": 9.77961125960734e-06, "loss": 0.1766, "step": 4183 }, { "epoch": 0.12206079701266118, "grad_norm": 1.0000136967483966, "learning_rate": 9.779472522200063e-06, "loss": 0.1928, "step": 4184 }, { "epoch": 0.12208997024330474, "grad_norm": 0.8285754849884852, "learning_rate": 9.779333742122792e-06, "loss": 0.207, "step": 4185 }, { "epoch": 0.12211914347394831, "grad_norm": 0.9061836895942934, "learning_rate": 9.779194919376774e-06, "loss": 0.1918, "step": 4186 }, { "epoch": 0.12214831670459186, "grad_norm": 0.9974874319325114, "learning_rate": 9.779056053963243e-06, "loss": 0.1832, "step": 4187 }, { "epoch": 0.12217748993523543, "grad_norm": 0.79496352702967, "learning_rate": 9.778917145883441e-06, "loss": 0.1974, "step": 4188 }, { "epoch": 0.12220666316587898, "grad_norm": 1.1376640314406414, "learning_rate": 9.778778195138609e-06, "loss": 0.1795, "step": 4189 }, { "epoch": 0.12223583639652255, "grad_norm": 0.8717399481860646, "learning_rate": 9.778639201729985e-06, "loss": 0.1883, "step": 4190 }, { "epoch": 0.12226500962716612, "grad_norm": 0.8262123753517054, "learning_rate": 9.77850016565881e-06, "loss": 0.1858, "step": 4191 }, { "epoch": 0.12229418285780967, "grad_norm": 1.0161112536908263, "learning_rate": 9.778361086926327e-06, "loss": 0.2038, "step": 4192 }, { "epoch": 0.12232335608845324, "grad_norm": 0.9989675131884603, "learning_rate": 9.778221965533776e-06, "loss": 0.1889, "step": 4193 }, { "epoch": 0.12235252931909679, "grad_norm": 0.886147768004657, "learning_rate": 9.778082801482402e-06, "loss": 0.1553, "step": 4194 }, { "epoch": 0.12238170254974036, "grad_norm": 0.9443788967680901, "learning_rate": 9.777943594773443e-06, "loss": 0.1862, "step": 4195 }, { "epoch": 0.12241087578038393, "grad_norm": 0.7090528132935483, "learning_rate": 9.777804345408146e-06, "loss": 0.1859, "step": 4196 }, { "epoch": 0.12244004901102748, "grad_norm": 0.9017168604179734, "learning_rate": 9.77766505338775e-06, "loss": 0.1881, "step": 4197 }, { "epoch": 0.12246922224167105, "grad_norm": 0.9176129179618445, "learning_rate": 9.777525718713503e-06, "loss": 0.2047, "step": 4198 }, { "epoch": 0.1224983954723146, "grad_norm": 0.8592543265106798, "learning_rate": 9.777386341386647e-06, "loss": 0.1971, "step": 4199 }, { "epoch": 0.12252756870295817, "grad_norm": 0.9434533936174614, "learning_rate": 9.777246921408426e-06, "loss": 0.1667, "step": 4200 }, { "epoch": 0.12255674193360172, "grad_norm": 0.927354638086785, "learning_rate": 9.777107458780084e-06, "loss": 0.1919, "step": 4201 }, { "epoch": 0.12258591516424529, "grad_norm": 0.9226859426751418, "learning_rate": 9.776967953502869e-06, "loss": 0.2093, "step": 4202 }, { "epoch": 0.12261508839488885, "grad_norm": 0.9161039871550587, "learning_rate": 9.776828405578023e-06, "loss": 0.1823, "step": 4203 }, { "epoch": 0.12264426162553241, "grad_norm": 0.9020688434530008, "learning_rate": 9.776688815006792e-06, "loss": 0.1651, "step": 4204 }, { "epoch": 0.12267343485617598, "grad_norm": 1.037436081549395, "learning_rate": 9.776549181790424e-06, "loss": 0.1836, "step": 4205 }, { "epoch": 0.12270260808681953, "grad_norm": 0.8719546752081956, "learning_rate": 9.776409505930167e-06, "loss": 0.1694, "step": 4206 }, { "epoch": 0.1227317813174631, "grad_norm": 0.8810485573848786, "learning_rate": 9.776269787427266e-06, "loss": 0.1796, "step": 4207 }, { "epoch": 0.12276095454810666, "grad_norm": 0.9978537568358294, "learning_rate": 9.776130026282968e-06, "loss": 0.1915, "step": 4208 }, { "epoch": 0.12279012777875022, "grad_norm": 0.7555369809030872, "learning_rate": 9.77599022249852e-06, "loss": 0.1708, "step": 4209 }, { "epoch": 0.12281930100939378, "grad_norm": 0.9692423805565872, "learning_rate": 9.775850376075174e-06, "loss": 0.1572, "step": 4210 }, { "epoch": 0.12284847424003734, "grad_norm": 1.0618323249689408, "learning_rate": 9.775710487014172e-06, "loss": 0.2008, "step": 4211 }, { "epoch": 0.1228776474706809, "grad_norm": 1.040945500262105, "learning_rate": 9.77557055531677e-06, "loss": 0.1839, "step": 4212 }, { "epoch": 0.12290682070132447, "grad_norm": 1.037133168332038, "learning_rate": 9.775430580984213e-06, "loss": 0.2114, "step": 4213 }, { "epoch": 0.12293599393196802, "grad_norm": 0.8061472566653828, "learning_rate": 9.775290564017752e-06, "loss": 0.1874, "step": 4214 }, { "epoch": 0.12296516716261159, "grad_norm": 0.8963591424698387, "learning_rate": 9.775150504418638e-06, "loss": 0.1713, "step": 4215 }, { "epoch": 0.12299434039325514, "grad_norm": 0.8630866206082455, "learning_rate": 9.775010402188119e-06, "loss": 0.189, "step": 4216 }, { "epoch": 0.12302351362389871, "grad_norm": 0.9263597454549111, "learning_rate": 9.774870257327447e-06, "loss": 0.2046, "step": 4217 }, { "epoch": 0.12305268685454228, "grad_norm": 1.071914955078211, "learning_rate": 9.774730069837872e-06, "loss": 0.1956, "step": 4218 }, { "epoch": 0.12308186008518583, "grad_norm": 0.8329010322263537, "learning_rate": 9.774589839720649e-06, "loss": 0.1758, "step": 4219 }, { "epoch": 0.1231110333158294, "grad_norm": 0.7920640763112636, "learning_rate": 9.774449566977027e-06, "loss": 0.1977, "step": 4220 }, { "epoch": 0.12314020654647295, "grad_norm": 1.008236458276534, "learning_rate": 9.774309251608259e-06, "loss": 0.2016, "step": 4221 }, { "epoch": 0.12316937977711652, "grad_norm": 0.9347482520652315, "learning_rate": 9.774168893615597e-06, "loss": 0.1833, "step": 4222 }, { "epoch": 0.12319855300776007, "grad_norm": 0.8640234539806687, "learning_rate": 9.774028493000295e-06, "loss": 0.1681, "step": 4223 }, { "epoch": 0.12322772623840364, "grad_norm": 1.1631087202336545, "learning_rate": 9.773888049763606e-06, "loss": 0.2026, "step": 4224 }, { "epoch": 0.12325689946904721, "grad_norm": 1.0334941698045452, "learning_rate": 9.773747563906785e-06, "loss": 0.1759, "step": 4225 }, { "epoch": 0.12328607269969076, "grad_norm": 0.9064466530876436, "learning_rate": 9.773607035431085e-06, "loss": 0.1854, "step": 4226 }, { "epoch": 0.12331524593033433, "grad_norm": 1.819916819047121, "learning_rate": 9.77346646433776e-06, "loss": 0.1936, "step": 4227 }, { "epoch": 0.12334441916097788, "grad_norm": 0.9761879810370655, "learning_rate": 9.773325850628069e-06, "loss": 0.1885, "step": 4228 }, { "epoch": 0.12337359239162145, "grad_norm": 0.7388657787543375, "learning_rate": 9.77318519430326e-06, "loss": 0.1658, "step": 4229 }, { "epoch": 0.12340276562226501, "grad_norm": 0.991855344225325, "learning_rate": 9.773044495364596e-06, "loss": 0.1685, "step": 4230 }, { "epoch": 0.12343193885290857, "grad_norm": 0.8834022854048016, "learning_rate": 9.77290375381333e-06, "loss": 0.2044, "step": 4231 }, { "epoch": 0.12346111208355214, "grad_norm": 0.8811956457462701, "learning_rate": 9.772762969650717e-06, "loss": 0.2013, "step": 4232 }, { "epoch": 0.12349028531419569, "grad_norm": 0.9784935254653434, "learning_rate": 9.772622142878016e-06, "loss": 0.1691, "step": 4233 }, { "epoch": 0.12351945854483926, "grad_norm": 0.8224157593540263, "learning_rate": 9.772481273496486e-06, "loss": 0.1745, "step": 4234 }, { "epoch": 0.12354863177548282, "grad_norm": 0.9826511369228231, "learning_rate": 9.77234036150738e-06, "loss": 0.1704, "step": 4235 }, { "epoch": 0.12357780500612638, "grad_norm": 0.8707132641162341, "learning_rate": 9.77219940691196e-06, "loss": 0.1594, "step": 4236 }, { "epoch": 0.12360697823676994, "grad_norm": 0.8486161938388265, "learning_rate": 9.77205840971148e-06, "loss": 0.2133, "step": 4237 }, { "epoch": 0.1236361514674135, "grad_norm": 0.9949223452883215, "learning_rate": 9.771917369907206e-06, "loss": 0.18, "step": 4238 }, { "epoch": 0.12366532469805706, "grad_norm": 1.0058494158528097, "learning_rate": 9.77177628750039e-06, "loss": 0.2059, "step": 4239 }, { "epoch": 0.12369449792870063, "grad_norm": 0.9213454019566658, "learning_rate": 9.771635162492296e-06, "loss": 0.1639, "step": 4240 }, { "epoch": 0.12372367115934418, "grad_norm": 1.0291911621537848, "learning_rate": 9.771493994884182e-06, "loss": 0.1898, "step": 4241 }, { "epoch": 0.12375284438998775, "grad_norm": 0.8649075291704439, "learning_rate": 9.771352784677309e-06, "loss": 0.1783, "step": 4242 }, { "epoch": 0.1237820176206313, "grad_norm": 0.8785762286609688, "learning_rate": 9.771211531872935e-06, "loss": 0.1514, "step": 4243 }, { "epoch": 0.12381119085127487, "grad_norm": 0.823081519999211, "learning_rate": 9.771070236472324e-06, "loss": 0.1826, "step": 4244 }, { "epoch": 0.12384036408191842, "grad_norm": 0.7215599451247365, "learning_rate": 9.77092889847674e-06, "loss": 0.1731, "step": 4245 }, { "epoch": 0.12386953731256199, "grad_norm": 0.9713610710868089, "learning_rate": 9.770787517887439e-06, "loss": 0.1993, "step": 4246 }, { "epoch": 0.12389871054320556, "grad_norm": 0.8715957915372682, "learning_rate": 9.770646094705687e-06, "loss": 0.1883, "step": 4247 }, { "epoch": 0.12392788377384911, "grad_norm": 0.8420458576843002, "learning_rate": 9.770504628932744e-06, "loss": 0.1868, "step": 4248 }, { "epoch": 0.12395705700449268, "grad_norm": 0.9451975101715953, "learning_rate": 9.770363120569876e-06, "loss": 0.191, "step": 4249 }, { "epoch": 0.12398623023513623, "grad_norm": 0.9607044740168983, "learning_rate": 9.770221569618343e-06, "loss": 0.1633, "step": 4250 }, { "epoch": 0.1240154034657798, "grad_norm": 0.7286356854872065, "learning_rate": 9.770079976079414e-06, "loss": 0.1686, "step": 4251 }, { "epoch": 0.12404457669642337, "grad_norm": 0.8874022088049638, "learning_rate": 9.769938339954347e-06, "loss": 0.1866, "step": 4252 }, { "epoch": 0.12407374992706692, "grad_norm": 1.0625998134826966, "learning_rate": 9.76979666124441e-06, "loss": 0.1889, "step": 4253 }, { "epoch": 0.12410292315771049, "grad_norm": 0.8184826542103033, "learning_rate": 9.769654939950866e-06, "loss": 0.1633, "step": 4254 }, { "epoch": 0.12413209638835404, "grad_norm": 1.2131584622111504, "learning_rate": 9.769513176074982e-06, "loss": 0.1826, "step": 4255 }, { "epoch": 0.12416126961899761, "grad_norm": 0.9988584186668532, "learning_rate": 9.769371369618023e-06, "loss": 0.1853, "step": 4256 }, { "epoch": 0.12419044284964117, "grad_norm": 1.180325587882098, "learning_rate": 9.769229520581256e-06, "loss": 0.1509, "step": 4257 }, { "epoch": 0.12421961608028473, "grad_norm": 0.7234165745053014, "learning_rate": 9.769087628965945e-06, "loss": 0.1889, "step": 4258 }, { "epoch": 0.1242487893109283, "grad_norm": 0.8663462398173085, "learning_rate": 9.768945694773358e-06, "loss": 0.173, "step": 4259 }, { "epoch": 0.12427796254157185, "grad_norm": 0.7336133954693259, "learning_rate": 9.768803718004764e-06, "loss": 0.182, "step": 4260 }, { "epoch": 0.12430713577221542, "grad_norm": 0.7633816702741599, "learning_rate": 9.768661698661427e-06, "loss": 0.1595, "step": 4261 }, { "epoch": 0.12433630900285898, "grad_norm": 0.89792804587649, "learning_rate": 9.768519636744618e-06, "loss": 0.1715, "step": 4262 }, { "epoch": 0.12436548223350254, "grad_norm": 0.8208558511054609, "learning_rate": 9.768377532255602e-06, "loss": 0.2037, "step": 4263 }, { "epoch": 0.1243946554641461, "grad_norm": 1.213932174256665, "learning_rate": 9.768235385195653e-06, "loss": 0.1795, "step": 4264 }, { "epoch": 0.12442382869478966, "grad_norm": 0.7904329914917131, "learning_rate": 9.768093195566033e-06, "loss": 0.1611, "step": 4265 }, { "epoch": 0.12445300192543322, "grad_norm": 0.7345958432260812, "learning_rate": 9.767950963368018e-06, "loss": 0.1863, "step": 4266 }, { "epoch": 0.12448217515607679, "grad_norm": 0.872169523460647, "learning_rate": 9.767808688602873e-06, "loss": 0.159, "step": 4267 }, { "epoch": 0.12451134838672034, "grad_norm": 0.8695611591130454, "learning_rate": 9.767666371271871e-06, "loss": 0.1784, "step": 4268 }, { "epoch": 0.12454052161736391, "grad_norm": 0.6730778413023714, "learning_rate": 9.767524011376283e-06, "loss": 0.174, "step": 4269 }, { "epoch": 0.12456969484800746, "grad_norm": 0.9220896534051399, "learning_rate": 9.767381608917377e-06, "loss": 0.1973, "step": 4270 }, { "epoch": 0.12459886807865103, "grad_norm": 0.871915000359137, "learning_rate": 9.767239163896427e-06, "loss": 0.1691, "step": 4271 }, { "epoch": 0.12462804130929458, "grad_norm": 0.8734391780543423, "learning_rate": 9.767096676314703e-06, "loss": 0.194, "step": 4272 }, { "epoch": 0.12465721453993815, "grad_norm": 0.8891960491574288, "learning_rate": 9.76695414617348e-06, "loss": 0.1925, "step": 4273 }, { "epoch": 0.12468638777058172, "grad_norm": 0.8286255539685276, "learning_rate": 9.766811573474026e-06, "loss": 0.1743, "step": 4274 }, { "epoch": 0.12471556100122527, "grad_norm": 1.1132476951309755, "learning_rate": 9.766668958217617e-06, "loss": 0.1531, "step": 4275 }, { "epoch": 0.12474473423186884, "grad_norm": 0.9154750259971727, "learning_rate": 9.766526300405525e-06, "loss": 0.1677, "step": 4276 }, { "epoch": 0.12477390746251239, "grad_norm": 0.9013459160814709, "learning_rate": 9.766383600039025e-06, "loss": 0.1823, "step": 4277 }, { "epoch": 0.12480308069315596, "grad_norm": 0.9060290447522402, "learning_rate": 9.76624085711939e-06, "loss": 0.1581, "step": 4278 }, { "epoch": 0.12483225392379953, "grad_norm": 0.9803903375051782, "learning_rate": 9.766098071647892e-06, "loss": 0.1683, "step": 4279 }, { "epoch": 0.12486142715444308, "grad_norm": 0.8788098055543385, "learning_rate": 9.765955243625811e-06, "loss": 0.2036, "step": 4280 }, { "epoch": 0.12489060038508665, "grad_norm": 0.9587287730384955, "learning_rate": 9.765812373054418e-06, "loss": 0.1957, "step": 4281 }, { "epoch": 0.1249197736157302, "grad_norm": 0.9823218003703186, "learning_rate": 9.76566945993499e-06, "loss": 0.1852, "step": 4282 }, { "epoch": 0.12494894684637377, "grad_norm": 1.0552717781488987, "learning_rate": 9.765526504268803e-06, "loss": 0.1767, "step": 4283 }, { "epoch": 0.12497812007701734, "grad_norm": 1.0440471798962738, "learning_rate": 9.765383506057134e-06, "loss": 0.1985, "step": 4284 }, { "epoch": 0.1250072933076609, "grad_norm": 0.8833607271954884, "learning_rate": 9.765240465301256e-06, "loss": 0.1695, "step": 4285 }, { "epoch": 0.12503646653830444, "grad_norm": 1.0199050563499885, "learning_rate": 9.765097382002451e-06, "loss": 0.202, "step": 4286 }, { "epoch": 0.12506563976894802, "grad_norm": 1.00389863995398, "learning_rate": 9.764954256161994e-06, "loss": 0.1864, "step": 4287 }, { "epoch": 0.12509481299959158, "grad_norm": 1.0983481360195875, "learning_rate": 9.76481108778116e-06, "loss": 0.1737, "step": 4288 }, { "epoch": 0.12512398623023513, "grad_norm": 0.9268256651823165, "learning_rate": 9.764667876861234e-06, "loss": 0.1745, "step": 4289 }, { "epoch": 0.1251531594608787, "grad_norm": 0.7470529526993203, "learning_rate": 9.764524623403488e-06, "loss": 0.1788, "step": 4290 }, { "epoch": 0.12518233269152226, "grad_norm": 1.1216784539669975, "learning_rate": 9.764381327409204e-06, "loss": 0.176, "step": 4291 }, { "epoch": 0.12521150592216582, "grad_norm": 0.9260657882814729, "learning_rate": 9.764237988879663e-06, "loss": 0.1994, "step": 4292 }, { "epoch": 0.12524067915280937, "grad_norm": 0.8801649593751968, "learning_rate": 9.76409460781614e-06, "loss": 0.1698, "step": 4293 }, { "epoch": 0.12526985238345295, "grad_norm": 0.7524840407802879, "learning_rate": 9.76395118421992e-06, "loss": 0.1665, "step": 4294 }, { "epoch": 0.1252990256140965, "grad_norm": 0.9182771897339053, "learning_rate": 9.763807718092278e-06, "loss": 0.203, "step": 4295 }, { "epoch": 0.12532819884474006, "grad_norm": 0.9437971667408273, "learning_rate": 9.7636642094345e-06, "loss": 0.1744, "step": 4296 }, { "epoch": 0.12535737207538364, "grad_norm": 0.8554139785337814, "learning_rate": 9.763520658247866e-06, "loss": 0.1656, "step": 4297 }, { "epoch": 0.1253865453060272, "grad_norm": 1.0251410191592603, "learning_rate": 9.763377064533654e-06, "loss": 0.1814, "step": 4298 }, { "epoch": 0.12541571853667074, "grad_norm": 0.8584107009287739, "learning_rate": 9.76323342829315e-06, "loss": 0.1878, "step": 4299 }, { "epoch": 0.12544489176731433, "grad_norm": 1.298075008577799, "learning_rate": 9.763089749527635e-06, "loss": 0.1914, "step": 4300 }, { "epoch": 0.12547406499795788, "grad_norm": 0.8654377573473938, "learning_rate": 9.762946028238391e-06, "loss": 0.1741, "step": 4301 }, { "epoch": 0.12550323822860143, "grad_norm": 0.7753543465818352, "learning_rate": 9.762802264426703e-06, "loss": 0.1893, "step": 4302 }, { "epoch": 0.12553241145924499, "grad_norm": 1.3344574668330917, "learning_rate": 9.762658458093852e-06, "loss": 0.1782, "step": 4303 }, { "epoch": 0.12556158468988857, "grad_norm": 0.9834481709401277, "learning_rate": 9.762514609241124e-06, "loss": 0.1892, "step": 4304 }, { "epoch": 0.12559075792053212, "grad_norm": 0.8555403034672496, "learning_rate": 9.762370717869804e-06, "loss": 0.1835, "step": 4305 }, { "epoch": 0.12561993115117567, "grad_norm": 1.0901077663869283, "learning_rate": 9.762226783981174e-06, "loss": 0.1929, "step": 4306 }, { "epoch": 0.12564910438181925, "grad_norm": 0.9769178606454748, "learning_rate": 9.762082807576518e-06, "loss": 0.1909, "step": 4307 }, { "epoch": 0.1256782776124628, "grad_norm": 0.7929454063432803, "learning_rate": 9.761938788657127e-06, "loss": 0.1983, "step": 4308 }, { "epoch": 0.12570745084310636, "grad_norm": 0.8921832339132862, "learning_rate": 9.761794727224281e-06, "loss": 0.2156, "step": 4309 }, { "epoch": 0.12573662407374991, "grad_norm": 0.8545715424189586, "learning_rate": 9.761650623279269e-06, "loss": 0.1535, "step": 4310 }, { "epoch": 0.1257657973043935, "grad_norm": 0.8778341189523261, "learning_rate": 9.761506476823377e-06, "loss": 0.1788, "step": 4311 }, { "epoch": 0.12579497053503705, "grad_norm": 0.7884318379167016, "learning_rate": 9.761362287857891e-06, "loss": 0.1773, "step": 4312 }, { "epoch": 0.1258241437656806, "grad_norm": 0.8426840395974396, "learning_rate": 9.761218056384102e-06, "loss": 0.1728, "step": 4313 }, { "epoch": 0.12585331699632418, "grad_norm": 0.9073579849848015, "learning_rate": 9.761073782403291e-06, "loss": 0.1814, "step": 4314 }, { "epoch": 0.12588249022696774, "grad_norm": 0.9179197291915331, "learning_rate": 9.760929465916752e-06, "loss": 0.1588, "step": 4315 }, { "epoch": 0.1259116634576113, "grad_norm": 0.8223070944303513, "learning_rate": 9.76078510692577e-06, "loss": 0.1725, "step": 4316 }, { "epoch": 0.12594083668825487, "grad_norm": 0.9131029632198148, "learning_rate": 9.760640705431636e-06, "loss": 0.1931, "step": 4317 }, { "epoch": 0.12597000991889842, "grad_norm": 0.8772096245229108, "learning_rate": 9.76049626143564e-06, "loss": 0.1472, "step": 4318 }, { "epoch": 0.12599918314954198, "grad_norm": 0.7007596844463523, "learning_rate": 9.760351774939068e-06, "loss": 0.1809, "step": 4319 }, { "epoch": 0.12602835638018553, "grad_norm": 0.8300390578037848, "learning_rate": 9.76020724594321e-06, "loss": 0.1905, "step": 4320 }, { "epoch": 0.1260575296108291, "grad_norm": 1.0328419104468722, "learning_rate": 9.76006267444936e-06, "loss": 0.1781, "step": 4321 }, { "epoch": 0.12608670284147266, "grad_norm": 0.7480386916897522, "learning_rate": 9.759918060458807e-06, "loss": 0.1619, "step": 4322 }, { "epoch": 0.12611587607211622, "grad_norm": 0.8698351781302592, "learning_rate": 9.75977340397284e-06, "loss": 0.1793, "step": 4323 }, { "epoch": 0.1261450493027598, "grad_norm": 0.8172360464952455, "learning_rate": 9.759628704992754e-06, "loss": 0.1735, "step": 4324 }, { "epoch": 0.12617422253340335, "grad_norm": 0.8593913726772656, "learning_rate": 9.75948396351984e-06, "loss": 0.1738, "step": 4325 }, { "epoch": 0.1262033957640469, "grad_norm": 0.8322858175517441, "learning_rate": 9.759339179555387e-06, "loss": 0.1669, "step": 4326 }, { "epoch": 0.12623256899469046, "grad_norm": 0.8524071023669001, "learning_rate": 9.759194353100692e-06, "loss": 0.1765, "step": 4327 }, { "epoch": 0.12626174222533404, "grad_norm": 0.8115798583704614, "learning_rate": 9.759049484157045e-06, "loss": 0.1711, "step": 4328 }, { "epoch": 0.1262909154559776, "grad_norm": 0.830874898071875, "learning_rate": 9.758904572725739e-06, "loss": 0.1997, "step": 4329 }, { "epoch": 0.12632008868662115, "grad_norm": 0.8784167964965695, "learning_rate": 9.758759618808071e-06, "loss": 0.1765, "step": 4330 }, { "epoch": 0.12634926191726473, "grad_norm": 0.7548423758563226, "learning_rate": 9.75861462240533e-06, "loss": 0.1848, "step": 4331 }, { "epoch": 0.12637843514790828, "grad_norm": 0.844675211204546, "learning_rate": 9.758469583518819e-06, "loss": 0.1617, "step": 4332 }, { "epoch": 0.12640760837855183, "grad_norm": 0.8814391936112137, "learning_rate": 9.758324502149824e-06, "loss": 0.1688, "step": 4333 }, { "epoch": 0.12643678160919541, "grad_norm": 0.9049808953261231, "learning_rate": 9.758179378299644e-06, "loss": 0.1648, "step": 4334 }, { "epoch": 0.12646595483983897, "grad_norm": 0.941452688339845, "learning_rate": 9.758034211969573e-06, "loss": 0.1932, "step": 4335 }, { "epoch": 0.12649512807048252, "grad_norm": 0.7424095216186802, "learning_rate": 9.757889003160912e-06, "loss": 0.1624, "step": 4336 }, { "epoch": 0.12652430130112607, "grad_norm": 0.853858928136336, "learning_rate": 9.757743751874951e-06, "loss": 0.1705, "step": 4337 }, { "epoch": 0.12655347453176966, "grad_norm": 0.83593321941497, "learning_rate": 9.757598458112991e-06, "loss": 0.1698, "step": 4338 }, { "epoch": 0.1265826477624132, "grad_norm": 0.7729304014754222, "learning_rate": 9.757453121876327e-06, "loss": 0.1504, "step": 4339 }, { "epoch": 0.12661182099305676, "grad_norm": 1.2136177458516852, "learning_rate": 9.757307743166259e-06, "loss": 0.1568, "step": 4340 }, { "epoch": 0.12664099422370034, "grad_norm": 0.8488242378339931, "learning_rate": 9.757162321984079e-06, "loss": 0.1737, "step": 4341 }, { "epoch": 0.1266701674543439, "grad_norm": 0.8264301337138541, "learning_rate": 9.757016858331092e-06, "loss": 0.1948, "step": 4342 }, { "epoch": 0.12669934068498745, "grad_norm": 1.1684518053757948, "learning_rate": 9.756871352208594e-06, "loss": 0.1746, "step": 4343 }, { "epoch": 0.12672851391563103, "grad_norm": 0.9713931040968758, "learning_rate": 9.756725803617883e-06, "loss": 0.2024, "step": 4344 }, { "epoch": 0.12675768714627458, "grad_norm": 0.903833122560911, "learning_rate": 9.756580212560261e-06, "loss": 0.1805, "step": 4345 }, { "epoch": 0.12678686037691814, "grad_norm": 1.0932615376906702, "learning_rate": 9.756434579037027e-06, "loss": 0.1978, "step": 4346 }, { "epoch": 0.1268160336075617, "grad_norm": 1.019024004897646, "learning_rate": 9.75628890304948e-06, "loss": 0.1823, "step": 4347 }, { "epoch": 0.12684520683820527, "grad_norm": 0.7929642521823096, "learning_rate": 9.756143184598919e-06, "loss": 0.182, "step": 4348 }, { "epoch": 0.12687438006884882, "grad_norm": 0.839591692305866, "learning_rate": 9.755997423686649e-06, "loss": 0.1728, "step": 4349 }, { "epoch": 0.12690355329949238, "grad_norm": 0.9423603267135233, "learning_rate": 9.75585162031397e-06, "loss": 0.1765, "step": 4350 }, { "epoch": 0.12693272653013596, "grad_norm": 0.8079607281592949, "learning_rate": 9.75570577448218e-06, "loss": 0.1857, "step": 4351 }, { "epoch": 0.1269618997607795, "grad_norm": 0.8822615224554138, "learning_rate": 9.755559886192586e-06, "loss": 0.1741, "step": 4352 }, { "epoch": 0.12699107299142307, "grad_norm": 0.6578728844839694, "learning_rate": 9.75541395544649e-06, "loss": 0.1711, "step": 4353 }, { "epoch": 0.12702024622206662, "grad_norm": 0.7544640852412733, "learning_rate": 9.755267982245192e-06, "loss": 0.1746, "step": 4354 }, { "epoch": 0.1270494194527102, "grad_norm": 0.8261389581100625, "learning_rate": 9.755121966589996e-06, "loss": 0.1552, "step": 4355 }, { "epoch": 0.12707859268335375, "grad_norm": 0.7913161637905148, "learning_rate": 9.754975908482207e-06, "loss": 0.1747, "step": 4356 }, { "epoch": 0.1271077659139973, "grad_norm": 0.8225646514876421, "learning_rate": 9.75482980792313e-06, "loss": 0.1879, "step": 4357 }, { "epoch": 0.1271369391446409, "grad_norm": 0.876059043782913, "learning_rate": 9.754683664914064e-06, "loss": 0.2059, "step": 4358 }, { "epoch": 0.12716611237528444, "grad_norm": 0.8546291935416853, "learning_rate": 9.75453747945632e-06, "loss": 0.169, "step": 4359 }, { "epoch": 0.127195285605928, "grad_norm": 0.9040342126127462, "learning_rate": 9.754391251551199e-06, "loss": 0.1804, "step": 4360 }, { "epoch": 0.12722445883657157, "grad_norm": 0.7900388267349989, "learning_rate": 9.754244981200007e-06, "loss": 0.1708, "step": 4361 }, { "epoch": 0.12725363206721513, "grad_norm": 0.7686265863281533, "learning_rate": 9.754098668404053e-06, "loss": 0.1904, "step": 4362 }, { "epoch": 0.12728280529785868, "grad_norm": 0.8614124048181928, "learning_rate": 9.753952313164639e-06, "loss": 0.1962, "step": 4363 }, { "epoch": 0.12731197852850223, "grad_norm": 0.8364488152172986, "learning_rate": 9.753805915483076e-06, "loss": 0.1648, "step": 4364 }, { "epoch": 0.12734115175914582, "grad_norm": 1.1712213109837195, "learning_rate": 9.753659475360666e-06, "loss": 0.1745, "step": 4365 }, { "epoch": 0.12737032498978937, "grad_norm": 0.823577484205447, "learning_rate": 9.75351299279872e-06, "loss": 0.2046, "step": 4366 }, { "epoch": 0.12739949822043292, "grad_norm": 0.8274937620034111, "learning_rate": 9.753366467798545e-06, "loss": 0.1854, "step": 4367 }, { "epoch": 0.1274286714510765, "grad_norm": 0.7816174048127913, "learning_rate": 9.753219900361449e-06, "loss": 0.1667, "step": 4368 }, { "epoch": 0.12745784468172006, "grad_norm": 0.9266467808636973, "learning_rate": 9.75307329048874e-06, "loss": 0.1933, "step": 4369 }, { "epoch": 0.1274870179123636, "grad_norm": 0.8210069172112148, "learning_rate": 9.752926638181728e-06, "loss": 0.1567, "step": 4370 }, { "epoch": 0.1275161911430072, "grad_norm": 0.8234231612323969, "learning_rate": 9.75277994344172e-06, "loss": 0.1563, "step": 4371 }, { "epoch": 0.12754536437365074, "grad_norm": 0.9439437784972723, "learning_rate": 9.75263320627003e-06, "loss": 0.1654, "step": 4372 }, { "epoch": 0.1275745376042943, "grad_norm": 1.0884616616947242, "learning_rate": 9.752486426667963e-06, "loss": 0.187, "step": 4373 }, { "epoch": 0.12760371083493785, "grad_norm": 1.0232977420602125, "learning_rate": 9.752339604636832e-06, "loss": 0.1978, "step": 4374 }, { "epoch": 0.12763288406558143, "grad_norm": 0.8515421355954126, "learning_rate": 9.752192740177948e-06, "loss": 0.2103, "step": 4375 }, { "epoch": 0.12766205729622498, "grad_norm": 0.8976042839791545, "learning_rate": 9.752045833292622e-06, "loss": 0.167, "step": 4376 }, { "epoch": 0.12769123052686854, "grad_norm": 1.000535780969401, "learning_rate": 9.751898883982164e-06, "loss": 0.1835, "step": 4377 }, { "epoch": 0.12772040375751212, "grad_norm": 0.8509745941678003, "learning_rate": 9.751751892247888e-06, "loss": 0.2075, "step": 4378 }, { "epoch": 0.12774957698815567, "grad_norm": 1.074620987838019, "learning_rate": 9.751604858091106e-06, "loss": 0.2284, "step": 4379 }, { "epoch": 0.12777875021879923, "grad_norm": 0.8236903064117794, "learning_rate": 9.75145778151313e-06, "loss": 0.1752, "step": 4380 }, { "epoch": 0.12780792344944278, "grad_norm": 0.9561425146550531, "learning_rate": 9.751310662515271e-06, "loss": 0.1694, "step": 4381 }, { "epoch": 0.12783709668008636, "grad_norm": 1.0155410375377456, "learning_rate": 9.751163501098847e-06, "loss": 0.1628, "step": 4382 }, { "epoch": 0.1278662699107299, "grad_norm": 0.8281766624317595, "learning_rate": 9.751016297265168e-06, "loss": 0.1641, "step": 4383 }, { "epoch": 0.12789544314137347, "grad_norm": 1.0099441175090207, "learning_rate": 9.75086905101555e-06, "loss": 0.1824, "step": 4384 }, { "epoch": 0.12792461637201705, "grad_norm": 0.7999313130963264, "learning_rate": 9.750721762351308e-06, "loss": 0.1763, "step": 4385 }, { "epoch": 0.1279537896026606, "grad_norm": 1.054702610948299, "learning_rate": 9.750574431273756e-06, "loss": 0.1679, "step": 4386 }, { "epoch": 0.12798296283330415, "grad_norm": 0.9946926871128318, "learning_rate": 9.75042705778421e-06, "loss": 0.1783, "step": 4387 }, { "epoch": 0.12801213606394773, "grad_norm": 0.7788516142655981, "learning_rate": 9.750279641883985e-06, "loss": 0.1932, "step": 4388 }, { "epoch": 0.1280413092945913, "grad_norm": 1.0465756204115466, "learning_rate": 9.750132183574395e-06, "loss": 0.1718, "step": 4389 }, { "epoch": 0.12807048252523484, "grad_norm": 0.9532329728436199, "learning_rate": 9.749984682856762e-06, "loss": 0.1865, "step": 4390 }, { "epoch": 0.1280996557558784, "grad_norm": 0.8816784919587766, "learning_rate": 9.749837139732398e-06, "loss": 0.1822, "step": 4391 }, { "epoch": 0.12812882898652198, "grad_norm": 1.217591959923788, "learning_rate": 9.749689554202621e-06, "loss": 0.1874, "step": 4392 }, { "epoch": 0.12815800221716553, "grad_norm": 0.9820674644439095, "learning_rate": 9.74954192626875e-06, "loss": 0.1637, "step": 4393 }, { "epoch": 0.12818717544780908, "grad_norm": 0.7625453602481914, "learning_rate": 9.749394255932105e-06, "loss": 0.1606, "step": 4394 }, { "epoch": 0.12821634867845266, "grad_norm": 1.0725278108650127, "learning_rate": 9.749246543194e-06, "loss": 0.208, "step": 4395 }, { "epoch": 0.12824552190909622, "grad_norm": 0.731477959555322, "learning_rate": 9.749098788055755e-06, "loss": 0.1826, "step": 4396 }, { "epoch": 0.12827469513973977, "grad_norm": 0.881191244196159, "learning_rate": 9.748950990518691e-06, "loss": 0.2128, "step": 4397 }, { "epoch": 0.12830386837038332, "grad_norm": 1.0291776455260686, "learning_rate": 9.748803150584125e-06, "loss": 0.1821, "step": 4398 }, { "epoch": 0.1283330416010269, "grad_norm": 0.935685919356452, "learning_rate": 9.74865526825338e-06, "loss": 0.1605, "step": 4399 }, { "epoch": 0.12836221483167046, "grad_norm": 0.8613374160537525, "learning_rate": 9.748507343527772e-06, "loss": 0.1797, "step": 4400 }, { "epoch": 0.128391388062314, "grad_norm": 0.9866279611483154, "learning_rate": 9.748359376408625e-06, "loss": 0.1987, "step": 4401 }, { "epoch": 0.1284205612929576, "grad_norm": 0.8524349648703374, "learning_rate": 9.74821136689726e-06, "loss": 0.1823, "step": 4402 }, { "epoch": 0.12844973452360114, "grad_norm": 0.8114088040798733, "learning_rate": 9.748063314994995e-06, "loss": 0.1784, "step": 4403 }, { "epoch": 0.1284789077542447, "grad_norm": 1.0120858097085856, "learning_rate": 9.747915220703157e-06, "loss": 0.1882, "step": 4404 }, { "epoch": 0.12850808098488828, "grad_norm": 0.906713513376724, "learning_rate": 9.747767084023063e-06, "loss": 0.1698, "step": 4405 }, { "epoch": 0.12853725421553183, "grad_norm": 0.808284806868891, "learning_rate": 9.74761890495604e-06, "loss": 0.1788, "step": 4406 }, { "epoch": 0.12856642744617539, "grad_norm": 0.9157060587186935, "learning_rate": 9.747470683503407e-06, "loss": 0.1552, "step": 4407 }, { "epoch": 0.12859560067681894, "grad_norm": 0.7299765371223875, "learning_rate": 9.74732241966649e-06, "loss": 0.1783, "step": 4408 }, { "epoch": 0.12862477390746252, "grad_norm": 0.794202062630329, "learning_rate": 9.747174113446612e-06, "loss": 0.1805, "step": 4409 }, { "epoch": 0.12865394713810607, "grad_norm": 0.9529590201063353, "learning_rate": 9.747025764845095e-06, "loss": 0.1579, "step": 4410 }, { "epoch": 0.12868312036874963, "grad_norm": 0.815325678211468, "learning_rate": 9.746877373863265e-06, "loss": 0.173, "step": 4411 }, { "epoch": 0.1287122935993932, "grad_norm": 0.7731034448423537, "learning_rate": 9.74672894050245e-06, "loss": 0.1658, "step": 4412 }, { "epoch": 0.12874146683003676, "grad_norm": 0.9692188961092176, "learning_rate": 9.74658046476397e-06, "loss": 0.1806, "step": 4413 }, { "epoch": 0.1287706400606803, "grad_norm": 0.8441353019429177, "learning_rate": 9.746431946649153e-06, "loss": 0.1703, "step": 4414 }, { "epoch": 0.1287998132913239, "grad_norm": 1.4495784388670117, "learning_rate": 9.746283386159326e-06, "loss": 0.1687, "step": 4415 }, { "epoch": 0.12882898652196745, "grad_norm": 0.844591601552637, "learning_rate": 9.746134783295813e-06, "loss": 0.1833, "step": 4416 }, { "epoch": 0.128858159752611, "grad_norm": 0.8216297326240757, "learning_rate": 9.745986138059941e-06, "loss": 0.183, "step": 4417 }, { "epoch": 0.12888733298325455, "grad_norm": 0.7837085449321946, "learning_rate": 9.745837450453036e-06, "loss": 0.1558, "step": 4418 }, { "epoch": 0.12891650621389814, "grad_norm": 0.9203670229578482, "learning_rate": 9.745688720476431e-06, "loss": 0.171, "step": 4419 }, { "epoch": 0.1289456794445417, "grad_norm": 0.9020978895465857, "learning_rate": 9.745539948131449e-06, "loss": 0.1813, "step": 4420 }, { "epoch": 0.12897485267518524, "grad_norm": 0.8931646113379528, "learning_rate": 9.745391133419417e-06, "loss": 0.2127, "step": 4421 }, { "epoch": 0.12900402590582882, "grad_norm": 1.1000981484583716, "learning_rate": 9.74524227634167e-06, "loss": 0.16, "step": 4422 }, { "epoch": 0.12903319913647238, "grad_norm": 1.0698500351691078, "learning_rate": 9.745093376899528e-06, "loss": 0.1701, "step": 4423 }, { "epoch": 0.12906237236711593, "grad_norm": 0.9110995597732715, "learning_rate": 9.744944435094327e-06, "loss": 0.2185, "step": 4424 }, { "epoch": 0.12909154559775948, "grad_norm": 0.9159848910732172, "learning_rate": 9.744795450927395e-06, "loss": 0.2024, "step": 4425 }, { "epoch": 0.12912071882840306, "grad_norm": 0.8734090453282745, "learning_rate": 9.744646424400062e-06, "loss": 0.1625, "step": 4426 }, { "epoch": 0.12914989205904662, "grad_norm": 0.950467843389007, "learning_rate": 9.744497355513658e-06, "loss": 0.1709, "step": 4427 }, { "epoch": 0.12917906528969017, "grad_norm": 0.823200969360653, "learning_rate": 9.744348244269515e-06, "loss": 0.1609, "step": 4428 }, { "epoch": 0.12920823852033375, "grad_norm": 0.8237340072156272, "learning_rate": 9.744199090668963e-06, "loss": 0.1603, "step": 4429 }, { "epoch": 0.1292374117509773, "grad_norm": 0.8998333669220802, "learning_rate": 9.744049894713334e-06, "loss": 0.1796, "step": 4430 }, { "epoch": 0.12926658498162086, "grad_norm": 1.099354717803711, "learning_rate": 9.74390065640396e-06, "loss": 0.1642, "step": 4431 }, { "epoch": 0.12929575821226444, "grad_norm": 0.8097038713295855, "learning_rate": 9.743751375742171e-06, "loss": 0.1733, "step": 4432 }, { "epoch": 0.129324931442908, "grad_norm": 0.8592724449828005, "learning_rate": 9.743602052729307e-06, "loss": 0.1872, "step": 4433 }, { "epoch": 0.12935410467355155, "grad_norm": 1.0984868393219305, "learning_rate": 9.743452687366692e-06, "loss": 0.1914, "step": 4434 }, { "epoch": 0.1293832779041951, "grad_norm": 0.8896499347836703, "learning_rate": 9.743303279655666e-06, "loss": 0.1964, "step": 4435 }, { "epoch": 0.12941245113483868, "grad_norm": 1.0362742045145739, "learning_rate": 9.74315382959756e-06, "loss": 0.1828, "step": 4436 }, { "epoch": 0.12944162436548223, "grad_norm": 0.7021469494672792, "learning_rate": 9.743004337193708e-06, "loss": 0.1617, "step": 4437 }, { "epoch": 0.1294707975961258, "grad_norm": 0.8919651586134387, "learning_rate": 9.742854802445447e-06, "loss": 0.2237, "step": 4438 }, { "epoch": 0.12949997082676937, "grad_norm": 0.9760275214910902, "learning_rate": 9.74270522535411e-06, "loss": 0.1749, "step": 4439 }, { "epoch": 0.12952914405741292, "grad_norm": 0.856505135887569, "learning_rate": 9.742555605921033e-06, "loss": 0.177, "step": 4440 }, { "epoch": 0.12955831728805647, "grad_norm": 0.8350006804059159, "learning_rate": 9.742405944147552e-06, "loss": 0.1582, "step": 4441 }, { "epoch": 0.12958749051870003, "grad_norm": 0.7715643717784217, "learning_rate": 9.742256240035001e-06, "loss": 0.1942, "step": 4442 }, { "epoch": 0.1296166637493436, "grad_norm": 0.9894542110207976, "learning_rate": 9.74210649358472e-06, "loss": 0.2079, "step": 4443 }, { "epoch": 0.12964583697998716, "grad_norm": 0.9973062047101183, "learning_rate": 9.741956704798045e-06, "loss": 0.1907, "step": 4444 }, { "epoch": 0.12967501021063071, "grad_norm": 0.8975359268533455, "learning_rate": 9.741806873676311e-06, "loss": 0.1728, "step": 4445 }, { "epoch": 0.1297041834412743, "grad_norm": 0.9650392929427682, "learning_rate": 9.741657000220858e-06, "loss": 0.1688, "step": 4446 }, { "epoch": 0.12973335667191785, "grad_norm": 0.8087828182320042, "learning_rate": 9.741507084433024e-06, "loss": 0.1742, "step": 4447 }, { "epoch": 0.1297625299025614, "grad_norm": 0.8646356277152982, "learning_rate": 9.741357126314146e-06, "loss": 0.1692, "step": 4448 }, { "epoch": 0.12979170313320498, "grad_norm": 1.2893248191042816, "learning_rate": 9.741207125865562e-06, "loss": 0.199, "step": 4449 }, { "epoch": 0.12982087636384854, "grad_norm": 0.8472904394910414, "learning_rate": 9.741057083088614e-06, "loss": 0.1643, "step": 4450 }, { "epoch": 0.1298500495944921, "grad_norm": 0.6933867492193225, "learning_rate": 9.74090699798464e-06, "loss": 0.1597, "step": 4451 }, { "epoch": 0.12987922282513564, "grad_norm": 0.8737035311152308, "learning_rate": 9.740756870554979e-06, "loss": 0.1838, "step": 4452 }, { "epoch": 0.12990839605577922, "grad_norm": 0.7910643982888277, "learning_rate": 9.740606700800974e-06, "loss": 0.1497, "step": 4453 }, { "epoch": 0.12993756928642278, "grad_norm": 0.8199743996277233, "learning_rate": 9.740456488723964e-06, "loss": 0.1917, "step": 4454 }, { "epoch": 0.12996674251706633, "grad_norm": 0.7722978469757387, "learning_rate": 9.74030623432529e-06, "loss": 0.1695, "step": 4455 }, { "epoch": 0.1299959157477099, "grad_norm": 1.308533333466574, "learning_rate": 9.740155937606291e-06, "loss": 0.1709, "step": 4456 }, { "epoch": 0.13002508897835346, "grad_norm": 0.7990116703827624, "learning_rate": 9.740005598568314e-06, "loss": 0.1745, "step": 4457 }, { "epoch": 0.13005426220899702, "grad_norm": 1.0282780284630753, "learning_rate": 9.739855217212699e-06, "loss": 0.2187, "step": 4458 }, { "epoch": 0.1300834354396406, "grad_norm": 0.8502975395774706, "learning_rate": 9.739704793540786e-06, "loss": 0.1778, "step": 4459 }, { "epoch": 0.13011260867028415, "grad_norm": 0.9208482895620984, "learning_rate": 9.739554327553922e-06, "loss": 0.178, "step": 4460 }, { "epoch": 0.1301417819009277, "grad_norm": 0.9264866785624601, "learning_rate": 9.739403819253447e-06, "loss": 0.1888, "step": 4461 }, { "epoch": 0.13017095513157126, "grad_norm": 0.8794463549541848, "learning_rate": 9.739253268640705e-06, "loss": 0.1728, "step": 4462 }, { "epoch": 0.13020012836221484, "grad_norm": 0.9188176098897562, "learning_rate": 9.739102675717044e-06, "loss": 0.1627, "step": 4463 }, { "epoch": 0.1302293015928584, "grad_norm": 0.9793199473961062, "learning_rate": 9.738952040483804e-06, "loss": 0.178, "step": 4464 }, { "epoch": 0.13025847482350195, "grad_norm": 0.9646800594319975, "learning_rate": 9.738801362942332e-06, "loss": 0.1757, "step": 4465 }, { "epoch": 0.13028764805414553, "grad_norm": 1.1586724336458754, "learning_rate": 9.738650643093972e-06, "loss": 0.1992, "step": 4466 }, { "epoch": 0.13031682128478908, "grad_norm": 1.1308472705818482, "learning_rate": 9.738499880940071e-06, "loss": 0.167, "step": 4467 }, { "epoch": 0.13034599451543263, "grad_norm": 0.901168776397259, "learning_rate": 9.738349076481975e-06, "loss": 0.193, "step": 4468 }, { "epoch": 0.1303751677460762, "grad_norm": 0.9144738545975689, "learning_rate": 9.738198229721028e-06, "loss": 0.1824, "step": 4469 }, { "epoch": 0.13040434097671977, "grad_norm": 0.8880825342168955, "learning_rate": 9.738047340658578e-06, "loss": 0.2157, "step": 4470 }, { "epoch": 0.13043351420736332, "grad_norm": 0.7563688378989818, "learning_rate": 9.737896409295974e-06, "loss": 0.1791, "step": 4471 }, { "epoch": 0.13046268743800687, "grad_norm": 0.9776378723900304, "learning_rate": 9.73774543563456e-06, "loss": 0.1844, "step": 4472 }, { "epoch": 0.13049186066865046, "grad_norm": 0.7359968535887115, "learning_rate": 9.737594419675687e-06, "loss": 0.1737, "step": 4473 }, { "epoch": 0.130521033899294, "grad_norm": 0.7716440186722187, "learning_rate": 9.737443361420702e-06, "loss": 0.1718, "step": 4474 }, { "epoch": 0.13055020712993756, "grad_norm": 0.9520845386723726, "learning_rate": 9.737292260870954e-06, "loss": 0.1585, "step": 4475 }, { "epoch": 0.13057938036058114, "grad_norm": 0.721596082019839, "learning_rate": 9.737141118027791e-06, "loss": 0.1939, "step": 4476 }, { "epoch": 0.1306085535912247, "grad_norm": 0.8360220237465885, "learning_rate": 9.736989932892564e-06, "loss": 0.2028, "step": 4477 }, { "epoch": 0.13063772682186825, "grad_norm": 1.0491231446619385, "learning_rate": 9.73683870546662e-06, "loss": 0.1842, "step": 4478 }, { "epoch": 0.1306669000525118, "grad_norm": 0.8288794621008557, "learning_rate": 9.736687435751311e-06, "loss": 0.1732, "step": 4479 }, { "epoch": 0.13069607328315538, "grad_norm": 0.9218598572374194, "learning_rate": 9.736536123747989e-06, "loss": 0.1645, "step": 4480 }, { "epoch": 0.13072524651379894, "grad_norm": 0.8530519074164498, "learning_rate": 9.736384769458e-06, "loss": 0.1692, "step": 4481 }, { "epoch": 0.1307544197444425, "grad_norm": 0.8975258785313561, "learning_rate": 9.736233372882701e-06, "loss": 0.1797, "step": 4482 }, { "epoch": 0.13078359297508607, "grad_norm": 0.7132316423037498, "learning_rate": 9.73608193402344e-06, "loss": 0.184, "step": 4483 }, { "epoch": 0.13081276620572962, "grad_norm": 0.6603410548513219, "learning_rate": 9.735930452881571e-06, "loss": 0.1705, "step": 4484 }, { "epoch": 0.13084193943637318, "grad_norm": 0.9550476682958892, "learning_rate": 9.735778929458446e-06, "loss": 0.1967, "step": 4485 }, { "epoch": 0.13087111266701676, "grad_norm": 0.9924511364030971, "learning_rate": 9.735627363755415e-06, "loss": 0.1805, "step": 4486 }, { "epoch": 0.1309002858976603, "grad_norm": 0.887661578594931, "learning_rate": 9.735475755773836e-06, "loss": 0.1785, "step": 4487 }, { "epoch": 0.13092945912830387, "grad_norm": 0.7651320938224379, "learning_rate": 9.735324105515059e-06, "loss": 0.1823, "step": 4488 }, { "epoch": 0.13095863235894742, "grad_norm": 0.7714400056966344, "learning_rate": 9.735172412980439e-06, "loss": 0.1636, "step": 4489 }, { "epoch": 0.130987805589591, "grad_norm": 0.8132703483276422, "learning_rate": 9.735020678171327e-06, "loss": 0.1729, "step": 4490 }, { "epoch": 0.13101697882023455, "grad_norm": 0.7972538544908403, "learning_rate": 9.734868901089084e-06, "loss": 0.1672, "step": 4491 }, { "epoch": 0.1310461520508781, "grad_norm": 0.8735972701058112, "learning_rate": 9.734717081735061e-06, "loss": 0.1592, "step": 4492 }, { "epoch": 0.1310753252815217, "grad_norm": 0.9802666760505124, "learning_rate": 9.734565220110614e-06, "loss": 0.1874, "step": 4493 }, { "epoch": 0.13110449851216524, "grad_norm": 0.9545295838676167, "learning_rate": 9.7344133162171e-06, "loss": 0.1914, "step": 4494 }, { "epoch": 0.1311336717428088, "grad_norm": 0.7092996313604574, "learning_rate": 9.734261370055873e-06, "loss": 0.1794, "step": 4495 }, { "epoch": 0.13116284497345235, "grad_norm": 0.8287097252576615, "learning_rate": 9.734109381628289e-06, "loss": 0.1775, "step": 4496 }, { "epoch": 0.13119201820409593, "grad_norm": 0.9295568582263029, "learning_rate": 9.73395735093571e-06, "loss": 0.1621, "step": 4497 }, { "epoch": 0.13122119143473948, "grad_norm": 0.7091365935560728, "learning_rate": 9.733805277979488e-06, "loss": 0.1803, "step": 4498 }, { "epoch": 0.13125036466538303, "grad_norm": 0.8390669649611711, "learning_rate": 9.733653162760984e-06, "loss": 0.1791, "step": 4499 }, { "epoch": 0.13127953789602662, "grad_norm": 0.9664208370062155, "learning_rate": 9.733501005281552e-06, "loss": 0.1748, "step": 4500 }, { "epoch": 0.13130871112667017, "grad_norm": 0.8713257616137842, "learning_rate": 9.733348805542555e-06, "loss": 0.198, "step": 4501 }, { "epoch": 0.13133788435731372, "grad_norm": 1.0050067027036291, "learning_rate": 9.73319656354535e-06, "loss": 0.166, "step": 4502 }, { "epoch": 0.1313670575879573, "grad_norm": 0.8355747823438511, "learning_rate": 9.733044279291293e-06, "loss": 0.1673, "step": 4503 }, { "epoch": 0.13139623081860086, "grad_norm": 0.7444021287462947, "learning_rate": 9.73289195278175e-06, "loss": 0.1845, "step": 4504 }, { "epoch": 0.1314254040492444, "grad_norm": 0.9151521681337017, "learning_rate": 9.732739584018074e-06, "loss": 0.181, "step": 4505 }, { "epoch": 0.13145457727988796, "grad_norm": 0.7972987826597281, "learning_rate": 9.732587173001631e-06, "loss": 0.1592, "step": 4506 }, { "epoch": 0.13148375051053154, "grad_norm": 0.8646593079278234, "learning_rate": 9.732434719733782e-06, "loss": 0.1704, "step": 4507 }, { "epoch": 0.1315129237411751, "grad_norm": 0.8241084619506681, "learning_rate": 9.732282224215881e-06, "loss": 0.1901, "step": 4508 }, { "epoch": 0.13154209697181865, "grad_norm": 0.9892414621713331, "learning_rate": 9.732129686449296e-06, "loss": 0.1651, "step": 4509 }, { "epoch": 0.13157127020246223, "grad_norm": 0.9819990085102668, "learning_rate": 9.731977106435387e-06, "loss": 0.188, "step": 4510 }, { "epoch": 0.13160044343310578, "grad_norm": 0.9171421502044307, "learning_rate": 9.731824484175516e-06, "loss": 0.1876, "step": 4511 }, { "epoch": 0.13162961666374934, "grad_norm": 0.8218702361406134, "learning_rate": 9.731671819671045e-06, "loss": 0.179, "step": 4512 }, { "epoch": 0.1316587898943929, "grad_norm": 1.2322820415795661, "learning_rate": 9.731519112923338e-06, "loss": 0.2002, "step": 4513 }, { "epoch": 0.13168796312503647, "grad_norm": 0.9227080892573732, "learning_rate": 9.731366363933759e-06, "loss": 0.1608, "step": 4514 }, { "epoch": 0.13171713635568003, "grad_norm": 0.9087240830849821, "learning_rate": 9.731213572703668e-06, "loss": 0.1818, "step": 4515 }, { "epoch": 0.13174630958632358, "grad_norm": 0.8235324858729057, "learning_rate": 9.731060739234433e-06, "loss": 0.1703, "step": 4516 }, { "epoch": 0.13177548281696716, "grad_norm": 0.9362396933008768, "learning_rate": 9.730907863527417e-06, "loss": 0.1648, "step": 4517 }, { "epoch": 0.1318046560476107, "grad_norm": 1.1912283627279456, "learning_rate": 9.730754945583985e-06, "loss": 0.1865, "step": 4518 }, { "epoch": 0.13183382927825427, "grad_norm": 0.8935139894178387, "learning_rate": 9.730601985405502e-06, "loss": 0.1914, "step": 4519 }, { "epoch": 0.13186300250889785, "grad_norm": 0.973250479275411, "learning_rate": 9.730448982993335e-06, "loss": 0.184, "step": 4520 }, { "epoch": 0.1318921757395414, "grad_norm": 0.9207900746444148, "learning_rate": 9.730295938348847e-06, "loss": 0.1975, "step": 4521 }, { "epoch": 0.13192134897018495, "grad_norm": 0.783714243575158, "learning_rate": 9.730142851473407e-06, "loss": 0.1571, "step": 4522 }, { "epoch": 0.1319505222008285, "grad_norm": 0.9324602659366575, "learning_rate": 9.729989722368381e-06, "loss": 0.1912, "step": 4523 }, { "epoch": 0.1319796954314721, "grad_norm": 0.808382609467356, "learning_rate": 9.729836551035134e-06, "loss": 0.1559, "step": 4524 }, { "epoch": 0.13200886866211564, "grad_norm": 0.7921957445975134, "learning_rate": 9.729683337475037e-06, "loss": 0.1716, "step": 4525 }, { "epoch": 0.1320380418927592, "grad_norm": 0.8852867994342315, "learning_rate": 9.729530081689456e-06, "loss": 0.1857, "step": 4526 }, { "epoch": 0.13206721512340278, "grad_norm": 0.8749446727151079, "learning_rate": 9.72937678367976e-06, "loss": 0.1931, "step": 4527 }, { "epoch": 0.13209638835404633, "grad_norm": 0.7229377908847272, "learning_rate": 9.729223443447318e-06, "loss": 0.1641, "step": 4528 }, { "epoch": 0.13212556158468988, "grad_norm": 0.9158433909304731, "learning_rate": 9.729070060993495e-06, "loss": 0.1791, "step": 4529 }, { "epoch": 0.13215473481533346, "grad_norm": 0.8377665759811016, "learning_rate": 9.728916636319666e-06, "loss": 0.1791, "step": 4530 }, { "epoch": 0.13218390804597702, "grad_norm": 0.6941753010427415, "learning_rate": 9.728763169427197e-06, "loss": 0.1618, "step": 4531 }, { "epoch": 0.13221308127662057, "grad_norm": 0.8049438878497239, "learning_rate": 9.72860966031746e-06, "loss": 0.1604, "step": 4532 }, { "epoch": 0.13224225450726412, "grad_norm": 0.8760066339377347, "learning_rate": 9.728456108991824e-06, "loss": 0.1704, "step": 4533 }, { "epoch": 0.1322714277379077, "grad_norm": 0.7424387588543098, "learning_rate": 9.728302515451661e-06, "loss": 0.1493, "step": 4534 }, { "epoch": 0.13230060096855126, "grad_norm": 0.9813900623889721, "learning_rate": 9.728148879698341e-06, "loss": 0.1796, "step": 4535 }, { "epoch": 0.1323297741991948, "grad_norm": 0.8126444801411503, "learning_rate": 9.727995201733238e-06, "loss": 0.1783, "step": 4536 }, { "epoch": 0.1323589474298384, "grad_norm": 0.8546638488854917, "learning_rate": 9.727841481557722e-06, "loss": 0.2128, "step": 4537 }, { "epoch": 0.13238812066048194, "grad_norm": 0.6899971193858305, "learning_rate": 9.727687719173164e-06, "loss": 0.1565, "step": 4538 }, { "epoch": 0.1324172938911255, "grad_norm": 1.0858397714441617, "learning_rate": 9.727533914580941e-06, "loss": 0.1578, "step": 4539 }, { "epoch": 0.13244646712176905, "grad_norm": 0.9089000914262344, "learning_rate": 9.727380067782424e-06, "loss": 0.1679, "step": 4540 }, { "epoch": 0.13247564035241263, "grad_norm": 0.9199188357334842, "learning_rate": 9.727226178778985e-06, "loss": 0.1622, "step": 4541 }, { "epoch": 0.13250481358305619, "grad_norm": 1.0201509071058543, "learning_rate": 9.727072247572e-06, "loss": 0.1826, "step": 4542 }, { "epoch": 0.13253398681369974, "grad_norm": 0.8936142567745634, "learning_rate": 9.726918274162841e-06, "loss": 0.1627, "step": 4543 }, { "epoch": 0.13256316004434332, "grad_norm": 0.8517450630339168, "learning_rate": 9.726764258552885e-06, "loss": 0.1519, "step": 4544 }, { "epoch": 0.13259233327498687, "grad_norm": 0.9178315720289905, "learning_rate": 9.726610200743505e-06, "loss": 0.1757, "step": 4545 }, { "epoch": 0.13262150650563043, "grad_norm": 1.0190663322135842, "learning_rate": 9.726456100736079e-06, "loss": 0.1758, "step": 4546 }, { "epoch": 0.132650679736274, "grad_norm": 1.0820984922683805, "learning_rate": 9.72630195853198e-06, "loss": 0.2015, "step": 4547 }, { "epoch": 0.13267985296691756, "grad_norm": 1.0041173467043631, "learning_rate": 9.726147774132588e-06, "loss": 0.1603, "step": 4548 }, { "epoch": 0.13270902619756111, "grad_norm": 0.8375708058908469, "learning_rate": 9.725993547539274e-06, "loss": 0.1822, "step": 4549 }, { "epoch": 0.13273819942820467, "grad_norm": 1.068467284790401, "learning_rate": 9.72583927875342e-06, "loss": 0.1669, "step": 4550 }, { "epoch": 0.13276737265884825, "grad_norm": 0.6462988019926467, "learning_rate": 9.725684967776398e-06, "loss": 0.1908, "step": 4551 }, { "epoch": 0.1327965458894918, "grad_norm": 0.9865048607505865, "learning_rate": 9.725530614609592e-06, "loss": 0.187, "step": 4552 }, { "epoch": 0.13282571912013535, "grad_norm": 0.9761082259552106, "learning_rate": 9.725376219254374e-06, "loss": 0.1827, "step": 4553 }, { "epoch": 0.13285489235077894, "grad_norm": 0.7043053307124566, "learning_rate": 9.725221781712128e-06, "loss": 0.1714, "step": 4554 }, { "epoch": 0.1328840655814225, "grad_norm": 0.7991076445944068, "learning_rate": 9.725067301984228e-06, "loss": 0.1495, "step": 4555 }, { "epoch": 0.13291323881206604, "grad_norm": 0.8925024714989009, "learning_rate": 9.724912780072055e-06, "loss": 0.186, "step": 4556 }, { "epoch": 0.13294241204270962, "grad_norm": 0.9022041240709835, "learning_rate": 9.72475821597699e-06, "loss": 0.1866, "step": 4557 }, { "epoch": 0.13297158527335318, "grad_norm": 0.8633724589768932, "learning_rate": 9.724603609700409e-06, "loss": 0.1978, "step": 4558 }, { "epoch": 0.13300075850399673, "grad_norm": 0.9573618809816482, "learning_rate": 9.724448961243698e-06, "loss": 0.1618, "step": 4559 }, { "epoch": 0.13302993173464028, "grad_norm": 0.9401676823193406, "learning_rate": 9.724294270608232e-06, "loss": 0.1531, "step": 4560 }, { "epoch": 0.13305910496528386, "grad_norm": 0.7827004137553876, "learning_rate": 9.724139537795396e-06, "loss": 0.1691, "step": 4561 }, { "epoch": 0.13308827819592742, "grad_norm": 0.8200385903586284, "learning_rate": 9.72398476280657e-06, "loss": 0.1893, "step": 4562 }, { "epoch": 0.13311745142657097, "grad_norm": 0.9458018411150592, "learning_rate": 9.723829945643135e-06, "loss": 0.1903, "step": 4563 }, { "epoch": 0.13314662465721455, "grad_norm": 0.9489788729222979, "learning_rate": 9.723675086306474e-06, "loss": 0.1685, "step": 4564 }, { "epoch": 0.1331757978878581, "grad_norm": 0.935321272037622, "learning_rate": 9.72352018479797e-06, "loss": 0.1712, "step": 4565 }, { "epoch": 0.13320497111850166, "grad_norm": 1.0357709357010814, "learning_rate": 9.723365241119004e-06, "loss": 0.1975, "step": 4566 }, { "epoch": 0.1332341443491452, "grad_norm": 1.2735266730368744, "learning_rate": 9.723210255270962e-06, "loss": 0.1677, "step": 4567 }, { "epoch": 0.1332633175797888, "grad_norm": 0.9795401224102253, "learning_rate": 9.723055227255227e-06, "loss": 0.1783, "step": 4568 }, { "epoch": 0.13329249081043235, "grad_norm": 1.169361910063188, "learning_rate": 9.722900157073181e-06, "loss": 0.1731, "step": 4569 }, { "epoch": 0.1333216640410759, "grad_norm": 0.7712404266060171, "learning_rate": 9.72274504472621e-06, "loss": 0.1732, "step": 4570 }, { "epoch": 0.13335083727171948, "grad_norm": 0.9771102635200523, "learning_rate": 9.722589890215699e-06, "loss": 0.201, "step": 4571 }, { "epoch": 0.13338001050236303, "grad_norm": 0.9443789669268178, "learning_rate": 9.722434693543032e-06, "loss": 0.1674, "step": 4572 }, { "epoch": 0.1334091837330066, "grad_norm": 1.069382026402891, "learning_rate": 9.722279454709596e-06, "loss": 0.1968, "step": 4573 }, { "epoch": 0.13343835696365017, "grad_norm": 1.0769538937358414, "learning_rate": 9.722124173716776e-06, "loss": 0.1645, "step": 4574 }, { "epoch": 0.13346753019429372, "grad_norm": 0.923235470376459, "learning_rate": 9.72196885056596e-06, "loss": 0.1918, "step": 4575 }, { "epoch": 0.13349670342493727, "grad_norm": 0.7941395869754817, "learning_rate": 9.721813485258533e-06, "loss": 0.1563, "step": 4576 }, { "epoch": 0.13352587665558083, "grad_norm": 1.0700898790958382, "learning_rate": 9.72165807779588e-06, "loss": 0.1865, "step": 4577 }, { "epoch": 0.1335550498862244, "grad_norm": 0.9833498497939681, "learning_rate": 9.721502628179394e-06, "loss": 0.1802, "step": 4578 }, { "epoch": 0.13358422311686796, "grad_norm": 0.944078838232898, "learning_rate": 9.721347136410458e-06, "loss": 0.1633, "step": 4579 }, { "epoch": 0.13361339634751151, "grad_norm": 1.1012213330837473, "learning_rate": 9.721191602490463e-06, "loss": 0.1904, "step": 4580 }, { "epoch": 0.1336425695781551, "grad_norm": 1.1939507249414758, "learning_rate": 9.721036026420795e-06, "loss": 0.1858, "step": 4581 }, { "epoch": 0.13367174280879865, "grad_norm": 1.0069483261326841, "learning_rate": 9.720880408202844e-06, "loss": 0.1618, "step": 4582 }, { "epoch": 0.1337009160394422, "grad_norm": 1.124695251461858, "learning_rate": 9.720724747838002e-06, "loss": 0.1894, "step": 4583 }, { "epoch": 0.13373008927008576, "grad_norm": 1.2071403039976019, "learning_rate": 9.720569045327655e-06, "loss": 0.15, "step": 4584 }, { "epoch": 0.13375926250072934, "grad_norm": 0.8810175876828813, "learning_rate": 9.720413300673194e-06, "loss": 0.1794, "step": 4585 }, { "epoch": 0.1337884357313729, "grad_norm": 1.0453791704302176, "learning_rate": 9.72025751387601e-06, "loss": 0.213, "step": 4586 }, { "epoch": 0.13381760896201644, "grad_norm": 0.927080338413851, "learning_rate": 9.720101684937494e-06, "loss": 0.2021, "step": 4587 }, { "epoch": 0.13384678219266002, "grad_norm": 0.779984080753791, "learning_rate": 9.719945813859037e-06, "loss": 0.1872, "step": 4588 }, { "epoch": 0.13387595542330358, "grad_norm": 0.7996838449590193, "learning_rate": 9.719789900642031e-06, "loss": 0.1697, "step": 4589 }, { "epoch": 0.13390512865394713, "grad_norm": 1.0471543096218647, "learning_rate": 9.719633945287867e-06, "loss": 0.19, "step": 4590 }, { "epoch": 0.1339343018845907, "grad_norm": 0.8589292071998451, "learning_rate": 9.719477947797938e-06, "loss": 0.1916, "step": 4591 }, { "epoch": 0.13396347511523427, "grad_norm": 0.9015449264921895, "learning_rate": 9.719321908173636e-06, "loss": 0.1639, "step": 4592 }, { "epoch": 0.13399264834587782, "grad_norm": 0.7964675837696863, "learning_rate": 9.719165826416354e-06, "loss": 0.17, "step": 4593 }, { "epoch": 0.13402182157652137, "grad_norm": 0.7635652503837955, "learning_rate": 9.719009702527488e-06, "loss": 0.1807, "step": 4594 }, { "epoch": 0.13405099480716495, "grad_norm": 0.8480596163432714, "learning_rate": 9.718853536508428e-06, "loss": 0.1802, "step": 4595 }, { "epoch": 0.1340801680378085, "grad_norm": 0.9808208161559423, "learning_rate": 9.718697328360571e-06, "loss": 0.1766, "step": 4596 }, { "epoch": 0.13410934126845206, "grad_norm": 0.7814399820174374, "learning_rate": 9.71854107808531e-06, "loss": 0.1716, "step": 4597 }, { "epoch": 0.13413851449909564, "grad_norm": 1.0404926267332295, "learning_rate": 9.718384785684043e-06, "loss": 0.19, "step": 4598 }, { "epoch": 0.1341676877297392, "grad_norm": 0.8586317864673153, "learning_rate": 9.71822845115816e-06, "loss": 0.193, "step": 4599 }, { "epoch": 0.13419686096038275, "grad_norm": 0.8304034883877234, "learning_rate": 9.718072074509061e-06, "loss": 0.1874, "step": 4600 }, { "epoch": 0.13422603419102633, "grad_norm": 0.9164153625205795, "learning_rate": 9.717915655738142e-06, "loss": 0.2338, "step": 4601 }, { "epoch": 0.13425520742166988, "grad_norm": 1.1430108183267886, "learning_rate": 9.717759194846797e-06, "loss": 0.1782, "step": 4602 }, { "epoch": 0.13428438065231343, "grad_norm": 0.9137431537210429, "learning_rate": 9.717602691836423e-06, "loss": 0.1561, "step": 4603 }, { "epoch": 0.134313553882957, "grad_norm": 0.7274491086036875, "learning_rate": 9.717446146708421e-06, "loss": 0.1889, "step": 4604 }, { "epoch": 0.13434272711360057, "grad_norm": 0.9989608522592939, "learning_rate": 9.717289559464185e-06, "loss": 0.1808, "step": 4605 }, { "epoch": 0.13437190034424412, "grad_norm": 0.8142491245683972, "learning_rate": 9.717132930105114e-06, "loss": 0.1676, "step": 4606 }, { "epoch": 0.13440107357488768, "grad_norm": 0.9805855579751301, "learning_rate": 9.716976258632604e-06, "loss": 0.1949, "step": 4607 }, { "epoch": 0.13443024680553126, "grad_norm": 0.8075337425541218, "learning_rate": 9.716819545048058e-06, "loss": 0.168, "step": 4608 }, { "epoch": 0.1344594200361748, "grad_norm": 1.0539063299461362, "learning_rate": 9.716662789352872e-06, "loss": 0.1948, "step": 4609 }, { "epoch": 0.13448859326681836, "grad_norm": 0.8805078573360297, "learning_rate": 9.716505991548448e-06, "loss": 0.1681, "step": 4610 }, { "epoch": 0.13451776649746192, "grad_norm": 0.9125942018569365, "learning_rate": 9.716349151636183e-06, "loss": 0.1731, "step": 4611 }, { "epoch": 0.1345469397281055, "grad_norm": 0.9474453748762206, "learning_rate": 9.716192269617482e-06, "loss": 0.1584, "step": 4612 }, { "epoch": 0.13457611295874905, "grad_norm": 0.8860912041637936, "learning_rate": 9.71603534549374e-06, "loss": 0.1658, "step": 4613 }, { "epoch": 0.1346052861893926, "grad_norm": 0.8443991560150967, "learning_rate": 9.715878379266359e-06, "loss": 0.1585, "step": 4614 }, { "epoch": 0.13463445942003618, "grad_norm": 1.0155062221408329, "learning_rate": 9.715721370936742e-06, "loss": 0.1795, "step": 4615 }, { "epoch": 0.13466363265067974, "grad_norm": 0.8834955589918384, "learning_rate": 9.715564320506292e-06, "loss": 0.1727, "step": 4616 }, { "epoch": 0.1346928058813233, "grad_norm": 0.9188399044549099, "learning_rate": 9.715407227976408e-06, "loss": 0.1717, "step": 4617 }, { "epoch": 0.13472197911196687, "grad_norm": 1.028223764093895, "learning_rate": 9.715250093348494e-06, "loss": 0.1784, "step": 4618 }, { "epoch": 0.13475115234261043, "grad_norm": 0.844104848499323, "learning_rate": 9.715092916623954e-06, "loss": 0.1813, "step": 4619 }, { "epoch": 0.13478032557325398, "grad_norm": 0.9158693434032174, "learning_rate": 9.714935697804188e-06, "loss": 0.1856, "step": 4620 }, { "epoch": 0.13480949880389753, "grad_norm": 0.9538583470221261, "learning_rate": 9.714778436890604e-06, "loss": 0.1723, "step": 4621 }, { "epoch": 0.1348386720345411, "grad_norm": 0.9928564515885795, "learning_rate": 9.7146211338846e-06, "loss": 0.1954, "step": 4622 }, { "epoch": 0.13486784526518467, "grad_norm": 0.9387120450307491, "learning_rate": 9.714463788787588e-06, "loss": 0.1698, "step": 4623 }, { "epoch": 0.13489701849582822, "grad_norm": 1.0527771502712409, "learning_rate": 9.714306401600967e-06, "loss": 0.1907, "step": 4624 }, { "epoch": 0.1349261917264718, "grad_norm": 0.8933751834117948, "learning_rate": 9.714148972326144e-06, "loss": 0.1804, "step": 4625 }, { "epoch": 0.13495536495711535, "grad_norm": 0.6514000135480227, "learning_rate": 9.713991500964524e-06, "loss": 0.1694, "step": 4626 }, { "epoch": 0.1349845381877589, "grad_norm": 1.0321308868196355, "learning_rate": 9.713833987517514e-06, "loss": 0.1553, "step": 4627 }, { "epoch": 0.13501371141840246, "grad_norm": 0.8747041239954297, "learning_rate": 9.713676431986518e-06, "loss": 0.1678, "step": 4628 }, { "epoch": 0.13504288464904604, "grad_norm": 0.7833899639069093, "learning_rate": 9.713518834372946e-06, "loss": 0.198, "step": 4629 }, { "epoch": 0.1350720578796896, "grad_norm": 0.76908563457908, "learning_rate": 9.713361194678201e-06, "loss": 0.1672, "step": 4630 }, { "epoch": 0.13510123111033315, "grad_norm": 0.9875628885189603, "learning_rate": 9.713203512903695e-06, "loss": 0.1817, "step": 4631 }, { "epoch": 0.13513040434097673, "grad_norm": 0.9629519042401639, "learning_rate": 9.71304578905083e-06, "loss": 0.1672, "step": 4632 }, { "epoch": 0.13515957757162028, "grad_norm": 0.9857470706999932, "learning_rate": 9.71288802312102e-06, "loss": 0.186, "step": 4633 }, { "epoch": 0.13518875080226384, "grad_norm": 0.730565997339259, "learning_rate": 9.712730215115668e-06, "loss": 0.1552, "step": 4634 }, { "epoch": 0.13521792403290742, "grad_norm": 1.1446007527994253, "learning_rate": 9.71257236503619e-06, "loss": 0.1915, "step": 4635 }, { "epoch": 0.13524709726355097, "grad_norm": 0.9749549218703435, "learning_rate": 9.712414472883987e-06, "loss": 0.1812, "step": 4636 }, { "epoch": 0.13527627049419452, "grad_norm": 0.7799690275589842, "learning_rate": 9.712256538660474e-06, "loss": 0.1578, "step": 4637 }, { "epoch": 0.13530544372483808, "grad_norm": 0.7807747971278297, "learning_rate": 9.712098562367059e-06, "loss": 0.1758, "step": 4638 }, { "epoch": 0.13533461695548166, "grad_norm": 0.8710834872583403, "learning_rate": 9.711940544005154e-06, "loss": 0.1832, "step": 4639 }, { "epoch": 0.1353637901861252, "grad_norm": 0.7614578950485028, "learning_rate": 9.711782483576168e-06, "loss": 0.1732, "step": 4640 }, { "epoch": 0.13539296341676876, "grad_norm": 0.8424303678910273, "learning_rate": 9.711624381081513e-06, "loss": 0.1992, "step": 4641 }, { "epoch": 0.13542213664741234, "grad_norm": 0.865337928104798, "learning_rate": 9.711466236522599e-06, "loss": 0.1697, "step": 4642 }, { "epoch": 0.1354513098780559, "grad_norm": 0.9408710848620837, "learning_rate": 9.71130804990084e-06, "loss": 0.187, "step": 4643 }, { "epoch": 0.13548048310869945, "grad_norm": 0.8181683362796547, "learning_rate": 9.711149821217648e-06, "loss": 0.1815, "step": 4644 }, { "epoch": 0.13550965633934303, "grad_norm": 0.8425298921105472, "learning_rate": 9.710991550474435e-06, "loss": 0.1558, "step": 4645 }, { "epoch": 0.13553882956998659, "grad_norm": 0.8238656748496306, "learning_rate": 9.710833237672612e-06, "loss": 0.1592, "step": 4646 }, { "epoch": 0.13556800280063014, "grad_norm": 1.0303863480310418, "learning_rate": 9.710674882813598e-06, "loss": 0.1952, "step": 4647 }, { "epoch": 0.1355971760312737, "grad_norm": 0.9194943781256945, "learning_rate": 9.7105164858988e-06, "loss": 0.198, "step": 4648 }, { "epoch": 0.13562634926191727, "grad_norm": 0.8206790543242434, "learning_rate": 9.710358046929636e-06, "loss": 0.1733, "step": 4649 }, { "epoch": 0.13565552249256083, "grad_norm": 1.107013105208648, "learning_rate": 9.710199565907521e-06, "loss": 0.1918, "step": 4650 }, { "epoch": 0.13568469572320438, "grad_norm": 0.9489735749586852, "learning_rate": 9.710041042833869e-06, "loss": 0.1512, "step": 4651 }, { "epoch": 0.13571386895384796, "grad_norm": 0.7685001734325031, "learning_rate": 9.709882477710093e-06, "loss": 0.1822, "step": 4652 }, { "epoch": 0.1357430421844915, "grad_norm": 0.9211035392852889, "learning_rate": 9.709723870537613e-06, "loss": 0.1782, "step": 4653 }, { "epoch": 0.13577221541513507, "grad_norm": 0.9523188363240555, "learning_rate": 9.70956522131784e-06, "loss": 0.1606, "step": 4654 }, { "epoch": 0.13580138864577862, "grad_norm": 0.9060661785416497, "learning_rate": 9.709406530052194e-06, "loss": 0.1775, "step": 4655 }, { "epoch": 0.1358305618764222, "grad_norm": 0.9669192154217833, "learning_rate": 9.709247796742091e-06, "loss": 0.187, "step": 4656 }, { "epoch": 0.13585973510706575, "grad_norm": 0.9399764473212696, "learning_rate": 9.709089021388947e-06, "loss": 0.195, "step": 4657 }, { "epoch": 0.1358889083377093, "grad_norm": 0.9661483210094688, "learning_rate": 9.708930203994182e-06, "loss": 0.1798, "step": 4658 }, { "epoch": 0.1359180815683529, "grad_norm": 0.9124454167609475, "learning_rate": 9.708771344559212e-06, "loss": 0.1684, "step": 4659 }, { "epoch": 0.13594725479899644, "grad_norm": 1.0215319227945203, "learning_rate": 9.708612443085454e-06, "loss": 0.1919, "step": 4660 }, { "epoch": 0.13597642802964, "grad_norm": 1.0114839591203058, "learning_rate": 9.708453499574328e-06, "loss": 0.1734, "step": 4661 }, { "epoch": 0.13600560126028358, "grad_norm": 0.9298959485935899, "learning_rate": 9.708294514027255e-06, "loss": 0.177, "step": 4662 }, { "epoch": 0.13603477449092713, "grad_norm": 0.9542062169030524, "learning_rate": 9.708135486445652e-06, "loss": 0.1771, "step": 4663 }, { "epoch": 0.13606394772157068, "grad_norm": 0.858838796975156, "learning_rate": 9.707976416830938e-06, "loss": 0.1553, "step": 4664 }, { "epoch": 0.13609312095221424, "grad_norm": 0.7526944223238617, "learning_rate": 9.707817305184535e-06, "loss": 0.18, "step": 4665 }, { "epoch": 0.13612229418285782, "grad_norm": 0.7810099686966809, "learning_rate": 9.707658151507864e-06, "loss": 0.1637, "step": 4666 }, { "epoch": 0.13615146741350137, "grad_norm": 0.9086215597119266, "learning_rate": 9.707498955802343e-06, "loss": 0.1872, "step": 4667 }, { "epoch": 0.13618064064414492, "grad_norm": 0.9508524355128216, "learning_rate": 9.707339718069397e-06, "loss": 0.1891, "step": 4668 }, { "epoch": 0.1362098138747885, "grad_norm": 0.7195667254981255, "learning_rate": 9.707180438310446e-06, "loss": 0.1848, "step": 4669 }, { "epoch": 0.13623898710543206, "grad_norm": 0.8184403771567913, "learning_rate": 9.707021116526908e-06, "loss": 0.182, "step": 4670 }, { "epoch": 0.1362681603360756, "grad_norm": 0.9658791864775528, "learning_rate": 9.706861752720213e-06, "loss": 0.1559, "step": 4671 }, { "epoch": 0.1362973335667192, "grad_norm": 0.7212660341141374, "learning_rate": 9.706702346891778e-06, "loss": 0.1546, "step": 4672 }, { "epoch": 0.13632650679736275, "grad_norm": 0.8810046665043058, "learning_rate": 9.70654289904303e-06, "loss": 0.1698, "step": 4673 }, { "epoch": 0.1363556800280063, "grad_norm": 0.8165448817841481, "learning_rate": 9.70638340917539e-06, "loss": 0.1819, "step": 4674 }, { "epoch": 0.13638485325864985, "grad_norm": 1.0664393599702064, "learning_rate": 9.706223877290282e-06, "loss": 0.1813, "step": 4675 }, { "epoch": 0.13641402648929343, "grad_norm": 0.7238391073438882, "learning_rate": 9.70606430338913e-06, "loss": 0.1828, "step": 4676 }, { "epoch": 0.136443199719937, "grad_norm": 1.005694470360366, "learning_rate": 9.70590468747336e-06, "loss": 0.1973, "step": 4677 }, { "epoch": 0.13647237295058054, "grad_norm": 0.9241377635739217, "learning_rate": 9.705745029544396e-06, "loss": 0.1871, "step": 4678 }, { "epoch": 0.13650154618122412, "grad_norm": 0.8179031785167225, "learning_rate": 9.705585329603664e-06, "loss": 0.1804, "step": 4679 }, { "epoch": 0.13653071941186767, "grad_norm": 0.7308893257140879, "learning_rate": 9.705425587652589e-06, "loss": 0.1732, "step": 4680 }, { "epoch": 0.13655989264251123, "grad_norm": 0.7528912940315003, "learning_rate": 9.705265803692597e-06, "loss": 0.1722, "step": 4681 }, { "epoch": 0.13658906587315478, "grad_norm": 1.052690632300158, "learning_rate": 9.705105977725117e-06, "loss": 0.175, "step": 4682 }, { "epoch": 0.13661823910379836, "grad_norm": 0.8706043181573576, "learning_rate": 9.704946109751572e-06, "loss": 0.1625, "step": 4683 }, { "epoch": 0.13664741233444191, "grad_norm": 0.7828218637229405, "learning_rate": 9.704786199773392e-06, "loss": 0.1767, "step": 4684 }, { "epoch": 0.13667658556508547, "grad_norm": 1.6359450019303676, "learning_rate": 9.704626247792006e-06, "loss": 0.1768, "step": 4685 }, { "epoch": 0.13670575879572905, "grad_norm": 0.8522023527214693, "learning_rate": 9.704466253808837e-06, "loss": 0.1467, "step": 4686 }, { "epoch": 0.1367349320263726, "grad_norm": 0.9438185753052197, "learning_rate": 9.70430621782532e-06, "loss": 0.1943, "step": 4687 }, { "epoch": 0.13676410525701616, "grad_norm": 0.8967662864085583, "learning_rate": 9.704146139842876e-06, "loss": 0.1958, "step": 4688 }, { "epoch": 0.13679327848765974, "grad_norm": 0.8176074719163361, "learning_rate": 9.70398601986294e-06, "loss": 0.2017, "step": 4689 }, { "epoch": 0.1368224517183033, "grad_norm": 1.0065416187094867, "learning_rate": 9.70382585788694e-06, "loss": 0.2056, "step": 4690 }, { "epoch": 0.13685162494894684, "grad_norm": 0.8540913616045407, "learning_rate": 9.703665653916306e-06, "loss": 0.1881, "step": 4691 }, { "epoch": 0.1368807981795904, "grad_norm": 0.7964480986863118, "learning_rate": 9.703505407952467e-06, "loss": 0.1611, "step": 4692 }, { "epoch": 0.13690997141023398, "grad_norm": 1.0276098799259779, "learning_rate": 9.703345119996854e-06, "loss": 0.2064, "step": 4693 }, { "epoch": 0.13693914464087753, "grad_norm": 0.8780151029964625, "learning_rate": 9.7031847900509e-06, "loss": 0.181, "step": 4694 }, { "epoch": 0.13696831787152108, "grad_norm": 0.9988535269115225, "learning_rate": 9.703024418116035e-06, "loss": 0.1828, "step": 4695 }, { "epoch": 0.13699749110216466, "grad_norm": 0.8217531980589999, "learning_rate": 9.702864004193689e-06, "loss": 0.1904, "step": 4696 }, { "epoch": 0.13702666433280822, "grad_norm": 0.9369455041215813, "learning_rate": 9.702703548285297e-06, "loss": 0.194, "step": 4697 }, { "epoch": 0.13705583756345177, "grad_norm": 1.0206640713725064, "learning_rate": 9.702543050392289e-06, "loss": 0.1897, "step": 4698 }, { "epoch": 0.13708501079409532, "grad_norm": 0.905883141033335, "learning_rate": 9.702382510516101e-06, "loss": 0.1785, "step": 4699 }, { "epoch": 0.1371141840247389, "grad_norm": 0.822309423685823, "learning_rate": 9.702221928658162e-06, "loss": 0.1628, "step": 4700 }, { "epoch": 0.13714335725538246, "grad_norm": 0.830246206302412, "learning_rate": 9.702061304819912e-06, "loss": 0.1513, "step": 4701 }, { "epoch": 0.137172530486026, "grad_norm": 0.844493568165159, "learning_rate": 9.70190063900278e-06, "loss": 0.1923, "step": 4702 }, { "epoch": 0.1372017037166696, "grad_norm": 0.8159300219183769, "learning_rate": 9.701739931208199e-06, "loss": 0.1595, "step": 4703 }, { "epoch": 0.13723087694731315, "grad_norm": 0.9591670417352346, "learning_rate": 9.701579181437608e-06, "loss": 0.1657, "step": 4704 }, { "epoch": 0.1372600501779567, "grad_norm": 0.9397254357533835, "learning_rate": 9.701418389692441e-06, "loss": 0.1744, "step": 4705 }, { "epoch": 0.13728922340860028, "grad_norm": 0.8273778307663063, "learning_rate": 9.701257555974131e-06, "loss": 0.187, "step": 4706 }, { "epoch": 0.13731839663924383, "grad_norm": 1.003808430533134, "learning_rate": 9.701096680284119e-06, "loss": 0.1812, "step": 4707 }, { "epoch": 0.1373475698698874, "grad_norm": 0.7965113465415409, "learning_rate": 9.700935762623835e-06, "loss": 0.1621, "step": 4708 }, { "epoch": 0.13737674310053094, "grad_norm": 0.8984695013894336, "learning_rate": 9.700774802994721e-06, "loss": 0.1799, "step": 4709 }, { "epoch": 0.13740591633117452, "grad_norm": 1.0562346510337288, "learning_rate": 9.700613801398209e-06, "loss": 0.2136, "step": 4710 }, { "epoch": 0.13743508956181807, "grad_norm": 1.0092671338183845, "learning_rate": 9.700452757835741e-06, "loss": 0.179, "step": 4711 }, { "epoch": 0.13746426279246163, "grad_norm": 0.8689706657713749, "learning_rate": 9.700291672308752e-06, "loss": 0.1666, "step": 4712 }, { "epoch": 0.1374934360231052, "grad_norm": 0.9547444978433681, "learning_rate": 9.700130544818682e-06, "loss": 0.2214, "step": 4713 }, { "epoch": 0.13752260925374876, "grad_norm": 0.898366732252034, "learning_rate": 9.69996937536697e-06, "loss": 0.1915, "step": 4714 }, { "epoch": 0.13755178248439232, "grad_norm": 0.7665205658010267, "learning_rate": 9.69980816395505e-06, "loss": 0.1653, "step": 4715 }, { "epoch": 0.1375809557150359, "grad_norm": 0.8091459372871365, "learning_rate": 9.699646910584366e-06, "loss": 0.185, "step": 4716 }, { "epoch": 0.13761012894567945, "grad_norm": 0.9247503942827152, "learning_rate": 9.699485615256357e-06, "loss": 0.1824, "step": 4717 }, { "epoch": 0.137639302176323, "grad_norm": 0.8717922752137528, "learning_rate": 9.699324277972462e-06, "loss": 0.1819, "step": 4718 }, { "epoch": 0.13766847540696656, "grad_norm": 1.0773589783394717, "learning_rate": 9.699162898734122e-06, "loss": 0.1925, "step": 4719 }, { "epoch": 0.13769764863761014, "grad_norm": 1.0901931893968568, "learning_rate": 9.699001477542775e-06, "loss": 0.197, "step": 4720 }, { "epoch": 0.1377268218682537, "grad_norm": 1.1835489899861442, "learning_rate": 9.698840014399867e-06, "loss": 0.1804, "step": 4721 }, { "epoch": 0.13775599509889724, "grad_norm": 0.9021954966986981, "learning_rate": 9.698678509306836e-06, "loss": 0.207, "step": 4722 }, { "epoch": 0.13778516832954082, "grad_norm": 1.0469311591030825, "learning_rate": 9.698516962265125e-06, "loss": 0.175, "step": 4723 }, { "epoch": 0.13781434156018438, "grad_norm": 0.8073322173220286, "learning_rate": 9.698355373276178e-06, "loss": 0.1755, "step": 4724 }, { "epoch": 0.13784351479082793, "grad_norm": 0.8685317986389969, "learning_rate": 9.698193742341434e-06, "loss": 0.173, "step": 4725 }, { "epoch": 0.13787268802147148, "grad_norm": 0.8256525508359017, "learning_rate": 9.698032069462338e-06, "loss": 0.1755, "step": 4726 }, { "epoch": 0.13790186125211507, "grad_norm": 0.9431652155983686, "learning_rate": 9.697870354640334e-06, "loss": 0.1899, "step": 4727 }, { "epoch": 0.13793103448275862, "grad_norm": 1.0830611080645738, "learning_rate": 9.697708597876863e-06, "loss": 0.1504, "step": 4728 }, { "epoch": 0.13796020771340217, "grad_norm": 0.7177082561141378, "learning_rate": 9.697546799173372e-06, "loss": 0.1696, "step": 4729 }, { "epoch": 0.13798938094404575, "grad_norm": 1.0145093897760615, "learning_rate": 9.697384958531307e-06, "loss": 0.1684, "step": 4730 }, { "epoch": 0.1380185541746893, "grad_norm": 0.9537271286997424, "learning_rate": 9.697223075952107e-06, "loss": 0.2042, "step": 4731 }, { "epoch": 0.13804772740533286, "grad_norm": 0.8615635927849443, "learning_rate": 9.697061151437223e-06, "loss": 0.1744, "step": 4732 }, { "epoch": 0.13807690063597644, "grad_norm": 1.0196870703943546, "learning_rate": 9.696899184988097e-06, "loss": 0.164, "step": 4733 }, { "epoch": 0.13810607386662, "grad_norm": 0.7820486500981627, "learning_rate": 9.696737176606177e-06, "loss": 0.1603, "step": 4734 }, { "epoch": 0.13813524709726355, "grad_norm": 1.0072738160492847, "learning_rate": 9.696575126292908e-06, "loss": 0.1887, "step": 4735 }, { "epoch": 0.1381644203279071, "grad_norm": 1.002552660392895, "learning_rate": 9.696413034049738e-06, "loss": 0.1781, "step": 4736 }, { "epoch": 0.13819359355855068, "grad_norm": 0.8051411523838466, "learning_rate": 9.696250899878114e-06, "loss": 0.174, "step": 4737 }, { "epoch": 0.13822276678919423, "grad_norm": 0.8422472358201145, "learning_rate": 9.696088723779481e-06, "loss": 0.2058, "step": 4738 }, { "epoch": 0.1382519400198378, "grad_norm": 1.0801041751659373, "learning_rate": 9.695926505755291e-06, "loss": 0.1796, "step": 4739 }, { "epoch": 0.13828111325048137, "grad_norm": 0.7600740521139456, "learning_rate": 9.695764245806989e-06, "loss": 0.1877, "step": 4740 }, { "epoch": 0.13831028648112492, "grad_norm": 0.7945319040741914, "learning_rate": 9.695601943936026e-06, "loss": 0.1549, "step": 4741 }, { "epoch": 0.13833945971176848, "grad_norm": 0.992848090373319, "learning_rate": 9.69543960014385e-06, "loss": 0.1774, "step": 4742 }, { "epoch": 0.13836863294241206, "grad_norm": 0.7954009770454634, "learning_rate": 9.695277214431909e-06, "loss": 0.1525, "step": 4743 }, { "epoch": 0.1383978061730556, "grad_norm": 1.0887109705714113, "learning_rate": 9.695114786801654e-06, "loss": 0.1813, "step": 4744 }, { "epoch": 0.13842697940369916, "grad_norm": 0.7643587553064333, "learning_rate": 9.694952317254535e-06, "loss": 0.1788, "step": 4745 }, { "epoch": 0.13845615263434272, "grad_norm": 1.371828272599082, "learning_rate": 9.694789805792001e-06, "loss": 0.1648, "step": 4746 }, { "epoch": 0.1384853258649863, "grad_norm": 0.8498365739317533, "learning_rate": 9.694627252415507e-06, "loss": 0.1668, "step": 4747 }, { "epoch": 0.13851449909562985, "grad_norm": 1.2725773426236568, "learning_rate": 9.6944646571265e-06, "loss": 0.1729, "step": 4748 }, { "epoch": 0.1385436723262734, "grad_norm": 0.8315757480574972, "learning_rate": 9.694302019926433e-06, "loss": 0.1991, "step": 4749 }, { "epoch": 0.13857284555691698, "grad_norm": 1.1038174200605573, "learning_rate": 9.69413934081676e-06, "loss": 0.1822, "step": 4750 }, { "epoch": 0.13860201878756054, "grad_norm": 0.9394178159287982, "learning_rate": 9.69397661979893e-06, "loss": 0.1935, "step": 4751 }, { "epoch": 0.1386311920182041, "grad_norm": 0.9224771336747449, "learning_rate": 9.693813856874399e-06, "loss": 0.181, "step": 4752 }, { "epoch": 0.13866036524884764, "grad_norm": 1.0966758332974174, "learning_rate": 9.693651052044617e-06, "loss": 0.201, "step": 4753 }, { "epoch": 0.13868953847949123, "grad_norm": 1.0613697191543172, "learning_rate": 9.693488205311039e-06, "loss": 0.1764, "step": 4754 }, { "epoch": 0.13871871171013478, "grad_norm": 0.8008712692693377, "learning_rate": 9.693325316675118e-06, "loss": 0.1506, "step": 4755 }, { "epoch": 0.13874788494077833, "grad_norm": 0.9747624024045326, "learning_rate": 9.69316238613831e-06, "loss": 0.1886, "step": 4756 }, { "epoch": 0.1387770581714219, "grad_norm": 1.0202675067414013, "learning_rate": 9.69299941370207e-06, "loss": 0.1833, "step": 4757 }, { "epoch": 0.13880623140206547, "grad_norm": 0.8653840356432823, "learning_rate": 9.692836399367849e-06, "loss": 0.1441, "step": 4758 }, { "epoch": 0.13883540463270902, "grad_norm": 0.8570360870727849, "learning_rate": 9.692673343137105e-06, "loss": 0.1548, "step": 4759 }, { "epoch": 0.1388645778633526, "grad_norm": 0.6883782771617556, "learning_rate": 9.692510245011295e-06, "loss": 0.1604, "step": 4760 }, { "epoch": 0.13889375109399615, "grad_norm": 0.8918098074714871, "learning_rate": 9.692347104991872e-06, "loss": 0.1833, "step": 4761 }, { "epoch": 0.1389229243246397, "grad_norm": 0.8063028934656418, "learning_rate": 9.692183923080296e-06, "loss": 0.2015, "step": 4762 }, { "epoch": 0.13895209755528326, "grad_norm": 0.8285488144862405, "learning_rate": 9.692020699278022e-06, "loss": 0.2058, "step": 4763 }, { "epoch": 0.13898127078592684, "grad_norm": 0.9826245249457514, "learning_rate": 9.691857433586506e-06, "loss": 0.1755, "step": 4764 }, { "epoch": 0.1390104440165704, "grad_norm": 1.0692972601624349, "learning_rate": 9.691694126007207e-06, "loss": 0.1621, "step": 4765 }, { "epoch": 0.13903961724721395, "grad_norm": 0.7243879299682265, "learning_rate": 9.691530776541584e-06, "loss": 0.1718, "step": 4766 }, { "epoch": 0.13906879047785753, "grad_norm": 1.469514154173167, "learning_rate": 9.691367385191092e-06, "loss": 0.1684, "step": 4767 }, { "epoch": 0.13909796370850108, "grad_norm": 1.007738393673632, "learning_rate": 9.691203951957195e-06, "loss": 0.1612, "step": 4768 }, { "epoch": 0.13912713693914464, "grad_norm": 0.7627352180329647, "learning_rate": 9.691040476841347e-06, "loss": 0.1872, "step": 4769 }, { "epoch": 0.1391563101697882, "grad_norm": 1.0794946403128456, "learning_rate": 9.69087695984501e-06, "loss": 0.1836, "step": 4770 }, { "epoch": 0.13918548340043177, "grad_norm": 0.8567896408660971, "learning_rate": 9.690713400969643e-06, "loss": 0.1692, "step": 4771 }, { "epoch": 0.13921465663107532, "grad_norm": 0.7951352571820303, "learning_rate": 9.690549800216707e-06, "loss": 0.1707, "step": 4772 }, { "epoch": 0.13924382986171888, "grad_norm": 0.8863994923542984, "learning_rate": 9.69038615758766e-06, "loss": 0.1804, "step": 4773 }, { "epoch": 0.13927300309236246, "grad_norm": 1.00564861230173, "learning_rate": 9.690222473083969e-06, "loss": 0.1781, "step": 4774 }, { "epoch": 0.139302176323006, "grad_norm": 0.940740162116202, "learning_rate": 9.690058746707088e-06, "loss": 0.157, "step": 4775 }, { "epoch": 0.13933134955364956, "grad_norm": 0.8229699866809916, "learning_rate": 9.689894978458483e-06, "loss": 0.1905, "step": 4776 }, { "epoch": 0.13936052278429314, "grad_norm": 0.8355385203041596, "learning_rate": 9.689731168339617e-06, "loss": 0.1481, "step": 4777 }, { "epoch": 0.1393896960149367, "grad_norm": 1.0131008965133432, "learning_rate": 9.689567316351948e-06, "loss": 0.1554, "step": 4778 }, { "epoch": 0.13941886924558025, "grad_norm": 0.922450578193256, "learning_rate": 9.689403422496943e-06, "loss": 0.1821, "step": 4779 }, { "epoch": 0.1394480424762238, "grad_norm": 0.8739646449136426, "learning_rate": 9.689239486776062e-06, "loss": 0.2, "step": 4780 }, { "epoch": 0.13947721570686739, "grad_norm": 0.7958490554860915, "learning_rate": 9.689075509190773e-06, "loss": 0.1892, "step": 4781 }, { "epoch": 0.13950638893751094, "grad_norm": 1.081798561341606, "learning_rate": 9.688911489742536e-06, "loss": 0.1629, "step": 4782 }, { "epoch": 0.1395355621681545, "grad_norm": 1.0034315642502445, "learning_rate": 9.688747428432817e-06, "loss": 0.1786, "step": 4783 }, { "epoch": 0.13956473539879807, "grad_norm": 1.1513257134225778, "learning_rate": 9.68858332526308e-06, "loss": 0.1918, "step": 4784 }, { "epoch": 0.13959390862944163, "grad_norm": 0.931118991810217, "learning_rate": 9.68841918023479e-06, "loss": 0.1708, "step": 4785 }, { "epoch": 0.13962308186008518, "grad_norm": 0.9032086134251692, "learning_rate": 9.688254993349413e-06, "loss": 0.1728, "step": 4786 }, { "epoch": 0.13965225509072876, "grad_norm": 0.8804102543714083, "learning_rate": 9.688090764608414e-06, "loss": 0.1724, "step": 4787 }, { "epoch": 0.13968142832137231, "grad_norm": 0.9761342170497085, "learning_rate": 9.68792649401326e-06, "loss": 0.1996, "step": 4788 }, { "epoch": 0.13971060155201587, "grad_norm": 1.1328109314819435, "learning_rate": 9.687762181565417e-06, "loss": 0.1751, "step": 4789 }, { "epoch": 0.13973977478265942, "grad_norm": 0.6669056177756585, "learning_rate": 9.687597827266355e-06, "loss": 0.1617, "step": 4790 }, { "epoch": 0.139768948013303, "grad_norm": 1.0562784085060106, "learning_rate": 9.687433431117536e-06, "loss": 0.1762, "step": 4791 }, { "epoch": 0.13979812124394655, "grad_norm": 0.9796484153565788, "learning_rate": 9.68726899312043e-06, "loss": 0.1845, "step": 4792 }, { "epoch": 0.1398272944745901, "grad_norm": 0.8837581374484309, "learning_rate": 9.687104513276506e-06, "loss": 0.1808, "step": 4793 }, { "epoch": 0.1398564677052337, "grad_norm": 0.9029351835677035, "learning_rate": 9.686939991587231e-06, "loss": 0.1809, "step": 4794 }, { "epoch": 0.13988564093587724, "grad_norm": 1.073322187155975, "learning_rate": 9.686775428054077e-06, "loss": 0.1855, "step": 4795 }, { "epoch": 0.1399148141665208, "grad_norm": 0.872777369415657, "learning_rate": 9.68661082267851e-06, "loss": 0.1758, "step": 4796 }, { "epoch": 0.13994398739716435, "grad_norm": 1.220254433923574, "learning_rate": 9.686446175462e-06, "loss": 0.1992, "step": 4797 }, { "epoch": 0.13997316062780793, "grad_norm": 1.0710973421363612, "learning_rate": 9.686281486406016e-06, "loss": 0.1957, "step": 4798 }, { "epoch": 0.14000233385845148, "grad_norm": 0.8878692918807475, "learning_rate": 9.68611675551203e-06, "loss": 0.1805, "step": 4799 }, { "epoch": 0.14003150708909504, "grad_norm": 1.2768733842676163, "learning_rate": 9.685951982781515e-06, "loss": 0.1702, "step": 4800 }, { "epoch": 0.14006068031973862, "grad_norm": 0.7965365572186565, "learning_rate": 9.685787168215936e-06, "loss": 0.1667, "step": 4801 }, { "epoch": 0.14008985355038217, "grad_norm": 0.9463080448399828, "learning_rate": 9.68562231181677e-06, "loss": 0.1803, "step": 4802 }, { "epoch": 0.14011902678102572, "grad_norm": 0.8209363619499964, "learning_rate": 9.685457413585485e-06, "loss": 0.1546, "step": 4803 }, { "epoch": 0.1401482000116693, "grad_norm": 0.9216286184168969, "learning_rate": 9.685292473523556e-06, "loss": 0.195, "step": 4804 }, { "epoch": 0.14017737324231286, "grad_norm": 0.7379346868485538, "learning_rate": 9.685127491632453e-06, "loss": 0.2126, "step": 4805 }, { "epoch": 0.1402065464729564, "grad_norm": 0.8735232330808408, "learning_rate": 9.68496246791365e-06, "loss": 0.1843, "step": 4806 }, { "epoch": 0.14023571970359996, "grad_norm": 0.7222127071955207, "learning_rate": 9.684797402368622e-06, "loss": 0.1806, "step": 4807 }, { "epoch": 0.14026489293424355, "grad_norm": 0.7931973528469856, "learning_rate": 9.684632294998839e-06, "loss": 0.1766, "step": 4808 }, { "epoch": 0.1402940661648871, "grad_norm": 0.8743287978832351, "learning_rate": 9.68446714580578e-06, "loss": 0.1581, "step": 4809 }, { "epoch": 0.14032323939553065, "grad_norm": 0.8218092003905312, "learning_rate": 9.684301954790914e-06, "loss": 0.1527, "step": 4810 }, { "epoch": 0.14035241262617423, "grad_norm": 0.8003358263025518, "learning_rate": 9.68413672195572e-06, "loss": 0.1785, "step": 4811 }, { "epoch": 0.1403815858568178, "grad_norm": 0.8468720761596434, "learning_rate": 9.683971447301672e-06, "loss": 0.1701, "step": 4812 }, { "epoch": 0.14041075908746134, "grad_norm": 0.8887877124128191, "learning_rate": 9.683806130830243e-06, "loss": 0.1741, "step": 4813 }, { "epoch": 0.1404399323181049, "grad_norm": 1.0279176973479827, "learning_rate": 9.683640772542913e-06, "loss": 0.1731, "step": 4814 }, { "epoch": 0.14046910554874847, "grad_norm": 0.7195582202001497, "learning_rate": 9.683475372441154e-06, "loss": 0.1852, "step": 4815 }, { "epoch": 0.14049827877939203, "grad_norm": 0.8020805476616278, "learning_rate": 9.683309930526447e-06, "loss": 0.1855, "step": 4816 }, { "epoch": 0.14052745201003558, "grad_norm": 0.9048510224345565, "learning_rate": 9.683144446800265e-06, "loss": 0.195, "step": 4817 }, { "epoch": 0.14055662524067916, "grad_norm": 0.9783988267007112, "learning_rate": 9.682978921264091e-06, "loss": 0.2009, "step": 4818 }, { "epoch": 0.14058579847132271, "grad_norm": 0.854084900659183, "learning_rate": 9.682813353919395e-06, "loss": 0.1682, "step": 4819 }, { "epoch": 0.14061497170196627, "grad_norm": 0.9144785598322361, "learning_rate": 9.68264774476766e-06, "loss": 0.1763, "step": 4820 }, { "epoch": 0.14064414493260985, "grad_norm": 0.8744938036025505, "learning_rate": 9.682482093810366e-06, "loss": 0.1974, "step": 4821 }, { "epoch": 0.1406733181632534, "grad_norm": 0.9891754775443123, "learning_rate": 9.682316401048988e-06, "loss": 0.2059, "step": 4822 }, { "epoch": 0.14070249139389696, "grad_norm": 0.8074593601049309, "learning_rate": 9.682150666485007e-06, "loss": 0.1657, "step": 4823 }, { "epoch": 0.1407316646245405, "grad_norm": 0.8042886657050788, "learning_rate": 9.681984890119903e-06, "loss": 0.1729, "step": 4824 }, { "epoch": 0.1407608378551841, "grad_norm": 1.025908523614656, "learning_rate": 9.681819071955155e-06, "loss": 0.2028, "step": 4825 }, { "epoch": 0.14079001108582764, "grad_norm": 0.8273014432705452, "learning_rate": 9.681653211992244e-06, "loss": 0.1789, "step": 4826 }, { "epoch": 0.1408191843164712, "grad_norm": 1.0057024319220127, "learning_rate": 9.68148731023265e-06, "loss": 0.1711, "step": 4827 }, { "epoch": 0.14084835754711478, "grad_norm": 1.168127807007021, "learning_rate": 9.681321366677858e-06, "loss": 0.1922, "step": 4828 }, { "epoch": 0.14087753077775833, "grad_norm": 1.0477758856695032, "learning_rate": 9.681155381329344e-06, "loss": 0.1857, "step": 4829 }, { "epoch": 0.14090670400840188, "grad_norm": 0.8195480953049104, "learning_rate": 9.680989354188593e-06, "loss": 0.1767, "step": 4830 }, { "epoch": 0.14093587723904547, "grad_norm": 0.9603507354533929, "learning_rate": 9.680823285257087e-06, "loss": 0.1824, "step": 4831 }, { "epoch": 0.14096505046968902, "grad_norm": 1.1537190976081713, "learning_rate": 9.680657174536305e-06, "loss": 0.1955, "step": 4832 }, { "epoch": 0.14099422370033257, "grad_norm": 0.8363877872012376, "learning_rate": 9.680491022027736e-06, "loss": 0.1614, "step": 4833 }, { "epoch": 0.14102339693097612, "grad_norm": 0.910452640839773, "learning_rate": 9.68032482773286e-06, "loss": 0.1586, "step": 4834 }, { "epoch": 0.1410525701616197, "grad_norm": 1.0384120108467383, "learning_rate": 9.680158591653162e-06, "loss": 0.1877, "step": 4835 }, { "epoch": 0.14108174339226326, "grad_norm": 0.8799418821600672, "learning_rate": 9.679992313790123e-06, "loss": 0.181, "step": 4836 }, { "epoch": 0.1411109166229068, "grad_norm": 1.1052277610346195, "learning_rate": 9.679825994145232e-06, "loss": 0.1696, "step": 4837 }, { "epoch": 0.1411400898535504, "grad_norm": 0.8070947761707465, "learning_rate": 9.67965963271997e-06, "loss": 0.1689, "step": 4838 }, { "epoch": 0.14116926308419395, "grad_norm": 0.8348709118117703, "learning_rate": 9.679493229515825e-06, "loss": 0.1831, "step": 4839 }, { "epoch": 0.1411984363148375, "grad_norm": 0.883498083009703, "learning_rate": 9.679326784534283e-06, "loss": 0.1851, "step": 4840 }, { "epoch": 0.14122760954548105, "grad_norm": 0.8407038467802971, "learning_rate": 9.679160297776826e-06, "loss": 0.1669, "step": 4841 }, { "epoch": 0.14125678277612463, "grad_norm": 0.8112263115764934, "learning_rate": 9.678993769244942e-06, "loss": 0.185, "step": 4842 }, { "epoch": 0.1412859560067682, "grad_norm": 0.8819726568891366, "learning_rate": 9.678827198940121e-06, "loss": 0.172, "step": 4843 }, { "epoch": 0.14131512923741174, "grad_norm": 0.8196877100230318, "learning_rate": 9.678660586863847e-06, "loss": 0.1838, "step": 4844 }, { "epoch": 0.14134430246805532, "grad_norm": 0.8916286389589517, "learning_rate": 9.678493933017608e-06, "loss": 0.1889, "step": 4845 }, { "epoch": 0.14137347569869887, "grad_norm": 0.8081831430889657, "learning_rate": 9.678327237402892e-06, "loss": 0.1844, "step": 4846 }, { "epoch": 0.14140264892934243, "grad_norm": 0.8996943013924475, "learning_rate": 9.678160500021188e-06, "loss": 0.2088, "step": 4847 }, { "epoch": 0.141431822159986, "grad_norm": 1.1877947274995793, "learning_rate": 9.677993720873983e-06, "loss": 0.1849, "step": 4848 }, { "epoch": 0.14146099539062956, "grad_norm": 0.8454475429653244, "learning_rate": 9.677826899962767e-06, "loss": 0.181, "step": 4849 }, { "epoch": 0.14149016862127312, "grad_norm": 0.9777729656039873, "learning_rate": 9.677660037289029e-06, "loss": 0.2063, "step": 4850 }, { "epoch": 0.14151934185191667, "grad_norm": 0.8346924308437704, "learning_rate": 9.67749313285426e-06, "loss": 0.1797, "step": 4851 }, { "epoch": 0.14154851508256025, "grad_norm": 0.8235078319478367, "learning_rate": 9.677326186659947e-06, "loss": 0.1828, "step": 4852 }, { "epoch": 0.1415776883132038, "grad_norm": 0.8134056400223716, "learning_rate": 9.677159198707582e-06, "loss": 0.1679, "step": 4853 }, { "epoch": 0.14160686154384736, "grad_norm": 0.9372396523329549, "learning_rate": 9.676992168998657e-06, "loss": 0.1782, "step": 4854 }, { "epoch": 0.14163603477449094, "grad_norm": 0.75538528348334, "learning_rate": 9.676825097534663e-06, "loss": 0.1568, "step": 4855 }, { "epoch": 0.1416652080051345, "grad_norm": 0.7854605906588132, "learning_rate": 9.676657984317092e-06, "loss": 0.1759, "step": 4856 }, { "epoch": 0.14169438123577804, "grad_norm": 0.9421707495263524, "learning_rate": 9.676490829347434e-06, "loss": 0.1794, "step": 4857 }, { "epoch": 0.14172355446642163, "grad_norm": 0.7743067145071496, "learning_rate": 9.67632363262718e-06, "loss": 0.1703, "step": 4858 }, { "epoch": 0.14175272769706518, "grad_norm": 0.930500697699094, "learning_rate": 9.676156394157829e-06, "loss": 0.1743, "step": 4859 }, { "epoch": 0.14178190092770873, "grad_norm": 0.7814997975949639, "learning_rate": 9.675989113940866e-06, "loss": 0.1612, "step": 4860 }, { "epoch": 0.14181107415835228, "grad_norm": 0.8865724413664395, "learning_rate": 9.67582179197779e-06, "loss": 0.1699, "step": 4861 }, { "epoch": 0.14184024738899587, "grad_norm": 1.1884952321978883, "learning_rate": 9.675654428270094e-06, "loss": 0.183, "step": 4862 }, { "epoch": 0.14186942061963942, "grad_norm": 0.8074043648783656, "learning_rate": 9.675487022819273e-06, "loss": 0.1713, "step": 4863 }, { "epoch": 0.14189859385028297, "grad_norm": 0.8235702417362052, "learning_rate": 9.675319575626817e-06, "loss": 0.1705, "step": 4864 }, { "epoch": 0.14192776708092655, "grad_norm": 1.2325535207631664, "learning_rate": 9.675152086694226e-06, "loss": 0.1851, "step": 4865 }, { "epoch": 0.1419569403115701, "grad_norm": 1.0524729024408745, "learning_rate": 9.67498455602299e-06, "loss": 0.183, "step": 4866 }, { "epoch": 0.14198611354221366, "grad_norm": 1.0442032804423818, "learning_rate": 9.674816983614611e-06, "loss": 0.194, "step": 4867 }, { "epoch": 0.1420152867728572, "grad_norm": 0.958524240553488, "learning_rate": 9.67464936947058e-06, "loss": 0.1929, "step": 4868 }, { "epoch": 0.1420444600035008, "grad_norm": 0.9250214990383446, "learning_rate": 9.674481713592398e-06, "loss": 0.1904, "step": 4869 }, { "epoch": 0.14207363323414435, "grad_norm": 0.8600711976437861, "learning_rate": 9.674314015981557e-06, "loss": 0.2122, "step": 4870 }, { "epoch": 0.1421028064647879, "grad_norm": 0.8270568012751615, "learning_rate": 9.674146276639556e-06, "loss": 0.1628, "step": 4871 }, { "epoch": 0.14213197969543148, "grad_norm": 0.891043897789974, "learning_rate": 9.673978495567895e-06, "loss": 0.1797, "step": 4872 }, { "epoch": 0.14216115292607504, "grad_norm": 0.9007499180467402, "learning_rate": 9.673810672768068e-06, "loss": 0.1758, "step": 4873 }, { "epoch": 0.1421903261567186, "grad_norm": 0.9840940820256868, "learning_rate": 9.673642808241574e-06, "loss": 0.1674, "step": 4874 }, { "epoch": 0.14221949938736217, "grad_norm": 1.0706059805118107, "learning_rate": 9.673474901989916e-06, "loss": 0.1663, "step": 4875 }, { "epoch": 0.14224867261800572, "grad_norm": 0.7539907717704584, "learning_rate": 9.673306954014588e-06, "loss": 0.1641, "step": 4876 }, { "epoch": 0.14227784584864928, "grad_norm": 0.9564622588789834, "learning_rate": 9.673138964317091e-06, "loss": 0.1917, "step": 4877 }, { "epoch": 0.14230701907929283, "grad_norm": 1.072023018231923, "learning_rate": 9.672970932898923e-06, "loss": 0.1669, "step": 4878 }, { "epoch": 0.1423361923099364, "grad_norm": 0.876912322391861, "learning_rate": 9.67280285976159e-06, "loss": 0.1719, "step": 4879 }, { "epoch": 0.14236536554057996, "grad_norm": 1.0984522305127342, "learning_rate": 9.672634744906585e-06, "loss": 0.1677, "step": 4880 }, { "epoch": 0.14239453877122352, "grad_norm": 0.9969050836869188, "learning_rate": 9.672466588335414e-06, "loss": 0.1843, "step": 4881 }, { "epoch": 0.1424237120018671, "grad_norm": 0.8220361004029493, "learning_rate": 9.672298390049577e-06, "loss": 0.1782, "step": 4882 }, { "epoch": 0.14245288523251065, "grad_norm": 1.0229843634298637, "learning_rate": 9.672130150050576e-06, "loss": 0.1809, "step": 4883 }, { "epoch": 0.1424820584631542, "grad_norm": 0.970871248674842, "learning_rate": 9.67196186833991e-06, "loss": 0.1731, "step": 4884 }, { "epoch": 0.14251123169379776, "grad_norm": 1.1818675378503378, "learning_rate": 9.671793544919086e-06, "loss": 0.1873, "step": 4885 }, { "epoch": 0.14254040492444134, "grad_norm": 0.9214167530946143, "learning_rate": 9.671625179789603e-06, "loss": 0.1758, "step": 4886 }, { "epoch": 0.1425695781550849, "grad_norm": 0.807857849013604, "learning_rate": 9.671456772952967e-06, "loss": 0.1707, "step": 4887 }, { "epoch": 0.14259875138572844, "grad_norm": 1.1217031758878113, "learning_rate": 9.671288324410678e-06, "loss": 0.1761, "step": 4888 }, { "epoch": 0.14262792461637203, "grad_norm": 0.7196737452829093, "learning_rate": 9.671119834164245e-06, "loss": 0.1554, "step": 4889 }, { "epoch": 0.14265709784701558, "grad_norm": 0.8343552659400554, "learning_rate": 9.670951302215166e-06, "loss": 0.2039, "step": 4890 }, { "epoch": 0.14268627107765913, "grad_norm": 0.967408695123973, "learning_rate": 9.67078272856495e-06, "loss": 0.1932, "step": 4891 }, { "epoch": 0.1427154443083027, "grad_norm": 0.892280770306024, "learning_rate": 9.670614113215102e-06, "loss": 0.1958, "step": 4892 }, { "epoch": 0.14274461753894627, "grad_norm": 0.8341263048425164, "learning_rate": 9.670445456167125e-06, "loss": 0.177, "step": 4893 }, { "epoch": 0.14277379076958982, "grad_norm": 0.8470140746338892, "learning_rate": 9.670276757422525e-06, "loss": 0.1557, "step": 4894 }, { "epoch": 0.14280296400023337, "grad_norm": 0.7642882308535696, "learning_rate": 9.670108016982812e-06, "loss": 0.1607, "step": 4895 }, { "epoch": 0.14283213723087695, "grad_norm": 0.9228984753411509, "learning_rate": 9.669939234849485e-06, "loss": 0.1648, "step": 4896 }, { "epoch": 0.1428613104615205, "grad_norm": 0.9083247188440113, "learning_rate": 9.66977041102406e-06, "loss": 0.1886, "step": 4897 }, { "epoch": 0.14289048369216406, "grad_norm": 0.9890740184703097, "learning_rate": 9.669601545508037e-06, "loss": 0.1827, "step": 4898 }, { "epoch": 0.14291965692280764, "grad_norm": 0.952680685214979, "learning_rate": 9.669432638302926e-06, "loss": 0.1732, "step": 4899 }, { "epoch": 0.1429488301534512, "grad_norm": 0.9701128725983815, "learning_rate": 9.669263689410236e-06, "loss": 0.1736, "step": 4900 }, { "epoch": 0.14297800338409475, "grad_norm": 0.9325190999545404, "learning_rate": 9.669094698831474e-06, "loss": 0.1794, "step": 4901 }, { "epoch": 0.14300717661473833, "grad_norm": 0.9819126438419686, "learning_rate": 9.66892566656815e-06, "loss": 0.1813, "step": 4902 }, { "epoch": 0.14303634984538188, "grad_norm": 0.7815883657284478, "learning_rate": 9.668756592621771e-06, "loss": 0.1745, "step": 4903 }, { "epoch": 0.14306552307602544, "grad_norm": 1.0241138247997523, "learning_rate": 9.668587476993847e-06, "loss": 0.1786, "step": 4904 }, { "epoch": 0.143094696306669, "grad_norm": 0.8584384943611965, "learning_rate": 9.66841831968589e-06, "loss": 0.1738, "step": 4905 }, { "epoch": 0.14312386953731257, "grad_norm": 0.8130231945414756, "learning_rate": 9.668249120699409e-06, "loss": 0.1784, "step": 4906 }, { "epoch": 0.14315304276795612, "grad_norm": 0.9657054584040975, "learning_rate": 9.668079880035911e-06, "loss": 0.1666, "step": 4907 }, { "epoch": 0.14318221599859968, "grad_norm": 0.7926088134825279, "learning_rate": 9.667910597696914e-06, "loss": 0.1716, "step": 4908 }, { "epoch": 0.14321138922924326, "grad_norm": 0.7750102848427548, "learning_rate": 9.667741273683924e-06, "loss": 0.172, "step": 4909 }, { "epoch": 0.1432405624598868, "grad_norm": 0.945000267560802, "learning_rate": 9.667571907998455e-06, "loss": 0.2008, "step": 4910 }, { "epoch": 0.14326973569053036, "grad_norm": 0.8321070111201656, "learning_rate": 9.667402500642017e-06, "loss": 0.1951, "step": 4911 }, { "epoch": 0.14329890892117392, "grad_norm": 0.7348755939022059, "learning_rate": 9.667233051616124e-06, "loss": 0.1851, "step": 4912 }, { "epoch": 0.1433280821518175, "grad_norm": 0.998817677821531, "learning_rate": 9.66706356092229e-06, "loss": 0.211, "step": 4913 }, { "epoch": 0.14335725538246105, "grad_norm": 1.2869541877033552, "learning_rate": 9.666894028562025e-06, "loss": 0.1577, "step": 4914 }, { "epoch": 0.1433864286131046, "grad_norm": 0.810689609054851, "learning_rate": 9.666724454536844e-06, "loss": 0.1902, "step": 4915 }, { "epoch": 0.14341560184374819, "grad_norm": 0.7782743730720433, "learning_rate": 9.666554838848262e-06, "loss": 0.1869, "step": 4916 }, { "epoch": 0.14344477507439174, "grad_norm": 0.7901143669137367, "learning_rate": 9.666385181497793e-06, "loss": 0.1727, "step": 4917 }, { "epoch": 0.1434739483050353, "grad_norm": 0.8631548557260729, "learning_rate": 9.66621548248695e-06, "loss": 0.1963, "step": 4918 }, { "epoch": 0.14350312153567887, "grad_norm": 0.9935157262006136, "learning_rate": 9.666045741817249e-06, "loss": 0.2079, "step": 4919 }, { "epoch": 0.14353229476632243, "grad_norm": 0.9220926059322345, "learning_rate": 9.665875959490205e-06, "loss": 0.1619, "step": 4920 }, { "epoch": 0.14356146799696598, "grad_norm": 0.8758027143288649, "learning_rate": 9.665706135507336e-06, "loss": 0.1579, "step": 4921 }, { "epoch": 0.14359064122760953, "grad_norm": 1.0016470592695905, "learning_rate": 9.665536269870155e-06, "loss": 0.171, "step": 4922 }, { "epoch": 0.14361981445825311, "grad_norm": 0.8461107158112773, "learning_rate": 9.665366362580179e-06, "loss": 0.1906, "step": 4923 }, { "epoch": 0.14364898768889667, "grad_norm": 0.8362304161899768, "learning_rate": 9.665196413638929e-06, "loss": 0.1687, "step": 4924 }, { "epoch": 0.14367816091954022, "grad_norm": 0.9839200656933649, "learning_rate": 9.665026423047916e-06, "loss": 0.1795, "step": 4925 }, { "epoch": 0.1437073341501838, "grad_norm": 0.7722141779186894, "learning_rate": 9.664856390808661e-06, "loss": 0.1687, "step": 4926 }, { "epoch": 0.14373650738082736, "grad_norm": 0.9790159364229566, "learning_rate": 9.664686316922684e-06, "loss": 0.2082, "step": 4927 }, { "epoch": 0.1437656806114709, "grad_norm": 0.7355735603486906, "learning_rate": 9.664516201391501e-06, "loss": 0.1859, "step": 4928 }, { "epoch": 0.1437948538421145, "grad_norm": 0.7096188215676326, "learning_rate": 9.664346044216628e-06, "loss": 0.1685, "step": 4929 }, { "epoch": 0.14382402707275804, "grad_norm": 0.7527802600599048, "learning_rate": 9.66417584539959e-06, "loss": 0.1758, "step": 4930 }, { "epoch": 0.1438532003034016, "grad_norm": 0.9988017797907917, "learning_rate": 9.664005604941901e-06, "loss": 0.1843, "step": 4931 }, { "epoch": 0.14388237353404515, "grad_norm": 0.8848600437266344, "learning_rate": 9.663835322845086e-06, "loss": 0.1741, "step": 4932 }, { "epoch": 0.14391154676468873, "grad_norm": 0.8896283950204757, "learning_rate": 9.66366499911066e-06, "loss": 0.1653, "step": 4933 }, { "epoch": 0.14394071999533228, "grad_norm": 0.9126068563691186, "learning_rate": 9.663494633740148e-06, "loss": 0.1836, "step": 4934 }, { "epoch": 0.14396989322597584, "grad_norm": 0.9289381424332497, "learning_rate": 9.663324226735069e-06, "loss": 0.1843, "step": 4935 }, { "epoch": 0.14399906645661942, "grad_norm": 0.9131368057191285, "learning_rate": 9.663153778096943e-06, "loss": 0.1585, "step": 4936 }, { "epoch": 0.14402823968726297, "grad_norm": 0.9669098843130987, "learning_rate": 9.662983287827295e-06, "loss": 0.1739, "step": 4937 }, { "epoch": 0.14405741291790652, "grad_norm": 0.9790305801935686, "learning_rate": 9.662812755927645e-06, "loss": 0.1903, "step": 4938 }, { "epoch": 0.14408658614855008, "grad_norm": 0.899433487635994, "learning_rate": 9.662642182399514e-06, "loss": 0.1882, "step": 4939 }, { "epoch": 0.14411575937919366, "grad_norm": 1.0199047256431513, "learning_rate": 9.662471567244428e-06, "loss": 0.1937, "step": 4940 }, { "epoch": 0.1441449326098372, "grad_norm": 0.8457275564071992, "learning_rate": 9.662300910463908e-06, "loss": 0.1606, "step": 4941 }, { "epoch": 0.14417410584048077, "grad_norm": 1.0844181886077786, "learning_rate": 9.662130212059481e-06, "loss": 0.1799, "step": 4942 }, { "epoch": 0.14420327907112435, "grad_norm": 1.225013607112546, "learning_rate": 9.661959472032667e-06, "loss": 0.1576, "step": 4943 }, { "epoch": 0.1442324523017679, "grad_norm": 0.88643665251535, "learning_rate": 9.66178869038499e-06, "loss": 0.183, "step": 4944 }, { "epoch": 0.14426162553241145, "grad_norm": 1.0884878450603226, "learning_rate": 9.661617867117978e-06, "loss": 0.2036, "step": 4945 }, { "epoch": 0.14429079876305503, "grad_norm": 1.194654672655845, "learning_rate": 9.661447002233156e-06, "loss": 0.1587, "step": 4946 }, { "epoch": 0.1443199719936986, "grad_norm": 0.9949113935387659, "learning_rate": 9.661276095732046e-06, "loss": 0.1819, "step": 4947 }, { "epoch": 0.14434914522434214, "grad_norm": 0.9433703823494165, "learning_rate": 9.661105147616177e-06, "loss": 0.1672, "step": 4948 }, { "epoch": 0.1443783184549857, "grad_norm": 1.1335559624318436, "learning_rate": 9.660934157887072e-06, "loss": 0.1802, "step": 4949 }, { "epoch": 0.14440749168562927, "grad_norm": 1.0942795572129844, "learning_rate": 9.66076312654626e-06, "loss": 0.1969, "step": 4950 }, { "epoch": 0.14443666491627283, "grad_norm": 0.7748508902390537, "learning_rate": 9.660592053595268e-06, "loss": 0.1666, "step": 4951 }, { "epoch": 0.14446583814691638, "grad_norm": 1.1553100246260095, "learning_rate": 9.660420939035624e-06, "loss": 0.1774, "step": 4952 }, { "epoch": 0.14449501137755996, "grad_norm": 0.9604119024938216, "learning_rate": 9.660249782868853e-06, "loss": 0.2019, "step": 4953 }, { "epoch": 0.14452418460820352, "grad_norm": 0.9544521640614683, "learning_rate": 9.660078585096484e-06, "loss": 0.1765, "step": 4954 }, { "epoch": 0.14455335783884707, "grad_norm": 1.1864981680288276, "learning_rate": 9.659907345720046e-06, "loss": 0.176, "step": 4955 }, { "epoch": 0.14458253106949062, "grad_norm": 0.8692202165735067, "learning_rate": 9.659736064741068e-06, "loss": 0.2009, "step": 4956 }, { "epoch": 0.1446117043001342, "grad_norm": 0.646865467793404, "learning_rate": 9.65956474216108e-06, "loss": 0.1424, "step": 4957 }, { "epoch": 0.14464087753077776, "grad_norm": 1.0337038399625629, "learning_rate": 9.659393377981609e-06, "loss": 0.1845, "step": 4958 }, { "epoch": 0.1446700507614213, "grad_norm": 1.0443727583431346, "learning_rate": 9.659221972204186e-06, "loss": 0.1727, "step": 4959 }, { "epoch": 0.1446992239920649, "grad_norm": 0.8110179707939673, "learning_rate": 9.65905052483034e-06, "loss": 0.1673, "step": 4960 }, { "epoch": 0.14472839722270844, "grad_norm": 0.7813388231834524, "learning_rate": 9.658879035861606e-06, "loss": 0.1705, "step": 4961 }, { "epoch": 0.144757570453352, "grad_norm": 0.9773009102407664, "learning_rate": 9.65870750529951e-06, "loss": 0.1749, "step": 4962 }, { "epoch": 0.14478674368399558, "grad_norm": 1.0953635856464539, "learning_rate": 9.658535933145588e-06, "loss": 0.2042, "step": 4963 }, { "epoch": 0.14481591691463913, "grad_norm": 0.8310078695169273, "learning_rate": 9.658364319401368e-06, "loss": 0.1684, "step": 4964 }, { "epoch": 0.14484509014528268, "grad_norm": 0.8966136224520148, "learning_rate": 9.658192664068382e-06, "loss": 0.1942, "step": 4965 }, { "epoch": 0.14487426337592624, "grad_norm": 0.9646429390948612, "learning_rate": 9.658020967148166e-06, "loss": 0.1953, "step": 4966 }, { "epoch": 0.14490343660656982, "grad_norm": 0.9060359924917519, "learning_rate": 9.65784922864225e-06, "loss": 0.188, "step": 4967 }, { "epoch": 0.14493260983721337, "grad_norm": 1.21107510751277, "learning_rate": 9.657677448552167e-06, "loss": 0.1605, "step": 4968 }, { "epoch": 0.14496178306785693, "grad_norm": 0.8668860283561532, "learning_rate": 9.657505626879452e-06, "loss": 0.1642, "step": 4969 }, { "epoch": 0.1449909562985005, "grad_norm": 1.0359011983742257, "learning_rate": 9.65733376362564e-06, "loss": 0.1879, "step": 4970 }, { "epoch": 0.14502012952914406, "grad_norm": 0.7821949860670446, "learning_rate": 9.657161858792263e-06, "loss": 0.1684, "step": 4971 }, { "epoch": 0.1450493027597876, "grad_norm": 0.7176589228105676, "learning_rate": 9.656989912380857e-06, "loss": 0.1677, "step": 4972 }, { "epoch": 0.1450784759904312, "grad_norm": 0.8690548931633352, "learning_rate": 9.656817924392958e-06, "loss": 0.1957, "step": 4973 }, { "epoch": 0.14510764922107475, "grad_norm": 0.8615092289222902, "learning_rate": 9.656645894830098e-06, "loss": 0.2092, "step": 4974 }, { "epoch": 0.1451368224517183, "grad_norm": 0.9381894088351447, "learning_rate": 9.656473823693814e-06, "loss": 0.1725, "step": 4975 }, { "epoch": 0.14516599568236185, "grad_norm": 0.7216487457606322, "learning_rate": 9.656301710985646e-06, "loss": 0.1581, "step": 4976 }, { "epoch": 0.14519516891300543, "grad_norm": 0.9951387042927817, "learning_rate": 9.656129556707127e-06, "loss": 0.1655, "step": 4977 }, { "epoch": 0.145224342143649, "grad_norm": 0.6943663219266645, "learning_rate": 9.655957360859796e-06, "loss": 0.1642, "step": 4978 }, { "epoch": 0.14525351537429254, "grad_norm": 0.745824175529699, "learning_rate": 9.655785123445186e-06, "loss": 0.163, "step": 4979 }, { "epoch": 0.14528268860493612, "grad_norm": 0.7571817541508856, "learning_rate": 9.65561284446484e-06, "loss": 0.1852, "step": 4980 }, { "epoch": 0.14531186183557968, "grad_norm": 0.8569921722541516, "learning_rate": 9.655440523920295e-06, "loss": 0.1658, "step": 4981 }, { "epoch": 0.14534103506622323, "grad_norm": 0.7833330263188776, "learning_rate": 9.655268161813088e-06, "loss": 0.1627, "step": 4982 }, { "epoch": 0.14537020829686678, "grad_norm": 0.7938495250166756, "learning_rate": 9.655095758144757e-06, "loss": 0.1823, "step": 4983 }, { "epoch": 0.14539938152751036, "grad_norm": 0.8190841000349872, "learning_rate": 9.654923312916842e-06, "loss": 0.1646, "step": 4984 }, { "epoch": 0.14542855475815392, "grad_norm": 1.2172420550336633, "learning_rate": 9.654750826130882e-06, "loss": 0.1607, "step": 4985 }, { "epoch": 0.14545772798879747, "grad_norm": 0.8853725881410209, "learning_rate": 9.654578297788421e-06, "loss": 0.161, "step": 4986 }, { "epoch": 0.14548690121944105, "grad_norm": 0.83162721813925, "learning_rate": 9.654405727890994e-06, "loss": 0.1581, "step": 4987 }, { "epoch": 0.1455160744500846, "grad_norm": 1.23119552734588, "learning_rate": 9.654233116440144e-06, "loss": 0.1761, "step": 4988 }, { "epoch": 0.14554524768072816, "grad_norm": 0.8811144445160094, "learning_rate": 9.654060463437411e-06, "loss": 0.176, "step": 4989 }, { "epoch": 0.14557442091137174, "grad_norm": 0.7174372214501783, "learning_rate": 9.65388776888434e-06, "loss": 0.1919, "step": 4990 }, { "epoch": 0.1456035941420153, "grad_norm": 0.99449911960273, "learning_rate": 9.653715032782467e-06, "loss": 0.1632, "step": 4991 }, { "epoch": 0.14563276737265884, "grad_norm": 0.8985792692934712, "learning_rate": 9.653542255133339e-06, "loss": 0.2133, "step": 4992 }, { "epoch": 0.1456619406033024, "grad_norm": 0.8439128817625573, "learning_rate": 9.653369435938495e-06, "loss": 0.1738, "step": 4993 }, { "epoch": 0.14569111383394598, "grad_norm": 0.9013739350288998, "learning_rate": 9.65319657519948e-06, "loss": 0.2177, "step": 4994 }, { "epoch": 0.14572028706458953, "grad_norm": 1.1169097052433632, "learning_rate": 9.653023672917839e-06, "loss": 0.1606, "step": 4995 }, { "epoch": 0.14574946029523309, "grad_norm": 0.8952326065505679, "learning_rate": 9.65285072909511e-06, "loss": 0.1931, "step": 4996 }, { "epoch": 0.14577863352587667, "grad_norm": 0.8400266765729321, "learning_rate": 9.652677743732843e-06, "loss": 0.1632, "step": 4997 }, { "epoch": 0.14580780675652022, "grad_norm": 1.0135250732946663, "learning_rate": 9.652504716832578e-06, "loss": 0.1917, "step": 4998 }, { "epoch": 0.14583697998716377, "grad_norm": 0.8595442628576205, "learning_rate": 9.652331648395863e-06, "loss": 0.1656, "step": 4999 }, { "epoch": 0.14586615321780733, "grad_norm": 0.9970121540001996, "learning_rate": 9.65215853842424e-06, "loss": 0.1969, "step": 5000 }, { "epoch": 0.1458953264484509, "grad_norm": 0.9562353762307105, "learning_rate": 9.651985386919257e-06, "loss": 0.174, "step": 5001 }, { "epoch": 0.14592449967909446, "grad_norm": 0.8746215172434272, "learning_rate": 9.65181219388246e-06, "loss": 0.1831, "step": 5002 }, { "epoch": 0.145953672909738, "grad_norm": 0.834023705470222, "learning_rate": 9.651638959315392e-06, "loss": 0.1906, "step": 5003 }, { "epoch": 0.1459828461403816, "grad_norm": 0.8542850057204772, "learning_rate": 9.651465683219603e-06, "loss": 0.1756, "step": 5004 }, { "epoch": 0.14601201937102515, "grad_norm": 0.9872404766398125, "learning_rate": 9.65129236559664e-06, "loss": 0.1664, "step": 5005 }, { "epoch": 0.1460411926016687, "grad_norm": 0.9819310883974036, "learning_rate": 9.651119006448047e-06, "loss": 0.1721, "step": 5006 }, { "epoch": 0.14607036583231228, "grad_norm": 0.7927647989876684, "learning_rate": 9.650945605775374e-06, "loss": 0.1691, "step": 5007 }, { "epoch": 0.14609953906295584, "grad_norm": 0.8871799854751007, "learning_rate": 9.650772163580171e-06, "loss": 0.1719, "step": 5008 }, { "epoch": 0.1461287122935994, "grad_norm": 1.0809928484274853, "learning_rate": 9.650598679863983e-06, "loss": 0.2095, "step": 5009 }, { "epoch": 0.14615788552424294, "grad_norm": 0.7793813813735866, "learning_rate": 9.65042515462836e-06, "loss": 0.1676, "step": 5010 }, { "epoch": 0.14618705875488652, "grad_norm": 1.0455750202298337, "learning_rate": 9.65025158787485e-06, "loss": 0.1879, "step": 5011 }, { "epoch": 0.14621623198553008, "grad_norm": 0.8515921729199322, "learning_rate": 9.650077979605008e-06, "loss": 0.1956, "step": 5012 }, { "epoch": 0.14624540521617363, "grad_norm": 0.9358857516516348, "learning_rate": 9.649904329820377e-06, "loss": 0.1838, "step": 5013 }, { "epoch": 0.1462745784468172, "grad_norm": 1.471933046679515, "learning_rate": 9.64973063852251e-06, "loss": 0.1612, "step": 5014 }, { "epoch": 0.14630375167746076, "grad_norm": 0.7511996901125207, "learning_rate": 9.649556905712958e-06, "loss": 0.1826, "step": 5015 }, { "epoch": 0.14633292490810432, "grad_norm": 1.0837699750013672, "learning_rate": 9.649383131393273e-06, "loss": 0.1909, "step": 5016 }, { "epoch": 0.1463620981387479, "grad_norm": 0.9137538164715912, "learning_rate": 9.649209315565005e-06, "loss": 0.1615, "step": 5017 }, { "epoch": 0.14639127136939145, "grad_norm": 0.9160622542964243, "learning_rate": 9.649035458229706e-06, "loss": 0.2022, "step": 5018 }, { "epoch": 0.146420444600035, "grad_norm": 0.9417295540153825, "learning_rate": 9.648861559388927e-06, "loss": 0.1823, "step": 5019 }, { "epoch": 0.14644961783067856, "grad_norm": 0.9319882259605891, "learning_rate": 9.648687619044222e-06, "loss": 0.2048, "step": 5020 }, { "epoch": 0.14647879106132214, "grad_norm": 0.90146406917979, "learning_rate": 9.648513637197145e-06, "loss": 0.1749, "step": 5021 }, { "epoch": 0.1465079642919657, "grad_norm": 1.0829929748917295, "learning_rate": 9.648339613849246e-06, "loss": 0.1848, "step": 5022 }, { "epoch": 0.14653713752260925, "grad_norm": 1.0302658800200448, "learning_rate": 9.648165549002082e-06, "loss": 0.19, "step": 5023 }, { "epoch": 0.14656631075325283, "grad_norm": 0.911838579645531, "learning_rate": 9.647991442657206e-06, "loss": 0.1965, "step": 5024 }, { "epoch": 0.14659548398389638, "grad_norm": 1.0274456874566988, "learning_rate": 9.647817294816171e-06, "loss": 0.1928, "step": 5025 }, { "epoch": 0.14662465721453993, "grad_norm": 0.9573276296487475, "learning_rate": 9.647643105480533e-06, "loss": 0.1619, "step": 5026 }, { "epoch": 0.1466538304451835, "grad_norm": 0.8000526587833882, "learning_rate": 9.647468874651847e-06, "loss": 0.1878, "step": 5027 }, { "epoch": 0.14668300367582707, "grad_norm": 1.0577708390316358, "learning_rate": 9.64729460233167e-06, "loss": 0.1555, "step": 5028 }, { "epoch": 0.14671217690647062, "grad_norm": 1.000180504334239, "learning_rate": 9.647120288521552e-06, "loss": 0.1695, "step": 5029 }, { "epoch": 0.14674135013711417, "grad_norm": 0.862740318043579, "learning_rate": 9.646945933223058e-06, "loss": 0.2048, "step": 5030 }, { "epoch": 0.14677052336775775, "grad_norm": 0.9669313044036966, "learning_rate": 9.646771536437737e-06, "loss": 0.1508, "step": 5031 }, { "epoch": 0.1467996965984013, "grad_norm": 0.9811796630238051, "learning_rate": 9.64659709816715e-06, "loss": 0.1808, "step": 5032 }, { "epoch": 0.14682886982904486, "grad_norm": 0.7942673025650904, "learning_rate": 9.646422618412853e-06, "loss": 0.1899, "step": 5033 }, { "epoch": 0.14685804305968844, "grad_norm": 0.8135131995389027, "learning_rate": 9.646248097176404e-06, "loss": 0.1431, "step": 5034 }, { "epoch": 0.146887216290332, "grad_norm": 0.7968946027923035, "learning_rate": 9.646073534459362e-06, "loss": 0.1833, "step": 5035 }, { "epoch": 0.14691638952097555, "grad_norm": 0.7283403021107399, "learning_rate": 9.645898930263284e-06, "loss": 0.1513, "step": 5036 }, { "epoch": 0.1469455627516191, "grad_norm": 0.876018090340404, "learning_rate": 9.64572428458973e-06, "loss": 0.1943, "step": 5037 }, { "epoch": 0.14697473598226268, "grad_norm": 0.942450992235393, "learning_rate": 9.645549597440258e-06, "loss": 0.193, "step": 5038 }, { "epoch": 0.14700390921290624, "grad_norm": 1.3212259219432179, "learning_rate": 9.645374868816427e-06, "loss": 0.1722, "step": 5039 }, { "epoch": 0.1470330824435498, "grad_norm": 0.9321174895484561, "learning_rate": 9.6452000987198e-06, "loss": 0.1932, "step": 5040 }, { "epoch": 0.14706225567419337, "grad_norm": 0.8246588295826806, "learning_rate": 9.645025287151935e-06, "loss": 0.1897, "step": 5041 }, { "epoch": 0.14709142890483692, "grad_norm": 1.1071089887580734, "learning_rate": 9.644850434114392e-06, "loss": 0.1841, "step": 5042 }, { "epoch": 0.14712060213548048, "grad_norm": 0.7533652903409431, "learning_rate": 9.644675539608735e-06, "loss": 0.1802, "step": 5043 }, { "epoch": 0.14714977536612406, "grad_norm": 0.7793821199418741, "learning_rate": 9.644500603636521e-06, "loss": 0.192, "step": 5044 }, { "epoch": 0.1471789485967676, "grad_norm": 1.0249558726678674, "learning_rate": 9.644325626199315e-06, "loss": 0.1674, "step": 5045 }, { "epoch": 0.14720812182741116, "grad_norm": 0.9035026720752898, "learning_rate": 9.64415060729868e-06, "loss": 0.1565, "step": 5046 }, { "epoch": 0.14723729505805472, "grad_norm": 0.9098129543276077, "learning_rate": 9.643975546936177e-06, "loss": 0.2052, "step": 5047 }, { "epoch": 0.1472664682886983, "grad_norm": 1.145881702321572, "learning_rate": 9.64380044511337e-06, "loss": 0.2131, "step": 5048 }, { "epoch": 0.14729564151934185, "grad_norm": 1.0054627880130236, "learning_rate": 9.643625301831819e-06, "loss": 0.1987, "step": 5049 }, { "epoch": 0.1473248147499854, "grad_norm": 0.8709340744090021, "learning_rate": 9.64345011709309e-06, "loss": 0.1869, "step": 5050 }, { "epoch": 0.147353987980629, "grad_norm": 0.8013928944623968, "learning_rate": 9.643274890898746e-06, "loss": 0.1622, "step": 5051 }, { "epoch": 0.14738316121127254, "grad_norm": 1.1178847635819615, "learning_rate": 9.643099623250354e-06, "loss": 0.167, "step": 5052 }, { "epoch": 0.1474123344419161, "grad_norm": 0.8633938310892648, "learning_rate": 9.642924314149476e-06, "loss": 0.1725, "step": 5053 }, { "epoch": 0.14744150767255965, "grad_norm": 0.8510786116023561, "learning_rate": 9.642748963597679e-06, "loss": 0.1859, "step": 5054 }, { "epoch": 0.14747068090320323, "grad_norm": 0.8381533064398632, "learning_rate": 9.642573571596526e-06, "loss": 0.1587, "step": 5055 }, { "epoch": 0.14749985413384678, "grad_norm": 0.8260893979067963, "learning_rate": 9.642398138147586e-06, "loss": 0.1804, "step": 5056 }, { "epoch": 0.14752902736449033, "grad_norm": 1.0002854630513247, "learning_rate": 9.642222663252423e-06, "loss": 0.1834, "step": 5057 }, { "epoch": 0.14755820059513391, "grad_norm": 0.7813155893855992, "learning_rate": 9.642047146912605e-06, "loss": 0.1655, "step": 5058 }, { "epoch": 0.14758737382577747, "grad_norm": 1.0238358688806195, "learning_rate": 9.641871589129696e-06, "loss": 0.1804, "step": 5059 }, { "epoch": 0.14761654705642102, "grad_norm": 1.0137480806042538, "learning_rate": 9.641695989905268e-06, "loss": 0.1947, "step": 5060 }, { "epoch": 0.1476457202870646, "grad_norm": 0.8568766580525737, "learning_rate": 9.641520349240885e-06, "loss": 0.1635, "step": 5061 }, { "epoch": 0.14767489351770816, "grad_norm": 1.0123927327613185, "learning_rate": 9.641344667138117e-06, "loss": 0.1587, "step": 5062 }, { "epoch": 0.1477040667483517, "grad_norm": 0.7770446009954005, "learning_rate": 9.641168943598531e-06, "loss": 0.1828, "step": 5063 }, { "epoch": 0.14773323997899526, "grad_norm": 1.0421396794898743, "learning_rate": 9.640993178623698e-06, "loss": 0.2229, "step": 5064 }, { "epoch": 0.14776241320963884, "grad_norm": 0.945971704493877, "learning_rate": 9.640817372215184e-06, "loss": 0.1589, "step": 5065 }, { "epoch": 0.1477915864402824, "grad_norm": 0.8100496290166211, "learning_rate": 9.640641524374561e-06, "loss": 0.167, "step": 5066 }, { "epoch": 0.14782075967092595, "grad_norm": 0.9022934117122732, "learning_rate": 9.6404656351034e-06, "loss": 0.2038, "step": 5067 }, { "epoch": 0.14784993290156953, "grad_norm": 0.940857312978776, "learning_rate": 9.640289704403268e-06, "loss": 0.1746, "step": 5068 }, { "epoch": 0.14787910613221308, "grad_norm": 1.1533203706367532, "learning_rate": 9.640113732275736e-06, "loss": 0.1631, "step": 5069 }, { "epoch": 0.14790827936285664, "grad_norm": 0.726215537350509, "learning_rate": 9.639937718722379e-06, "loss": 0.1671, "step": 5070 }, { "epoch": 0.1479374525935002, "grad_norm": 0.94110480875373, "learning_rate": 9.639761663744764e-06, "loss": 0.1644, "step": 5071 }, { "epoch": 0.14796662582414377, "grad_norm": 0.8060087313547423, "learning_rate": 9.639585567344464e-06, "loss": 0.1642, "step": 5072 }, { "epoch": 0.14799579905478732, "grad_norm": 0.8795655083349191, "learning_rate": 9.639409429523053e-06, "loss": 0.1669, "step": 5073 }, { "epoch": 0.14802497228543088, "grad_norm": 0.8783713990715803, "learning_rate": 9.639233250282101e-06, "loss": 0.1813, "step": 5074 }, { "epoch": 0.14805414551607446, "grad_norm": 0.923042653322279, "learning_rate": 9.639057029623183e-06, "loss": 0.1919, "step": 5075 }, { "epoch": 0.148083318746718, "grad_norm": 0.8036545791712385, "learning_rate": 9.63888076754787e-06, "loss": 0.1818, "step": 5076 }, { "epoch": 0.14811249197736157, "grad_norm": 0.8408184672385143, "learning_rate": 9.63870446405774e-06, "loss": 0.1742, "step": 5077 }, { "epoch": 0.14814166520800515, "grad_norm": 0.8302103753326288, "learning_rate": 9.63852811915436e-06, "loss": 0.1864, "step": 5078 }, { "epoch": 0.1481708384386487, "grad_norm": 0.987029581812354, "learning_rate": 9.638351732839311e-06, "loss": 0.186, "step": 5079 }, { "epoch": 0.14820001166929225, "grad_norm": 0.7762550095069394, "learning_rate": 9.638175305114163e-06, "loss": 0.189, "step": 5080 }, { "epoch": 0.1482291848999358, "grad_norm": 0.948031984001899, "learning_rate": 9.637998835980493e-06, "loss": 0.1626, "step": 5081 }, { "epoch": 0.1482583581305794, "grad_norm": 0.8432231161022153, "learning_rate": 9.637822325439878e-06, "loss": 0.1575, "step": 5082 }, { "epoch": 0.14828753136122294, "grad_norm": 0.8277721859028909, "learning_rate": 9.637645773493893e-06, "loss": 0.1795, "step": 5083 }, { "epoch": 0.1483167045918665, "grad_norm": 0.8421227542506831, "learning_rate": 9.637469180144112e-06, "loss": 0.1863, "step": 5084 }, { "epoch": 0.14834587782251007, "grad_norm": 0.9640066115636375, "learning_rate": 9.637292545392114e-06, "loss": 0.1518, "step": 5085 }, { "epoch": 0.14837505105315363, "grad_norm": 0.7376568881964116, "learning_rate": 9.637115869239475e-06, "loss": 0.1631, "step": 5086 }, { "epoch": 0.14840422428379718, "grad_norm": 0.894572818155894, "learning_rate": 9.636939151687772e-06, "loss": 0.1839, "step": 5087 }, { "epoch": 0.14843339751444076, "grad_norm": 0.8624785688237763, "learning_rate": 9.636762392738583e-06, "loss": 0.1718, "step": 5088 }, { "epoch": 0.14846257074508432, "grad_norm": 0.8963683902542617, "learning_rate": 9.636585592393489e-06, "loss": 0.1636, "step": 5089 }, { "epoch": 0.14849174397572787, "grad_norm": 0.7614672069464431, "learning_rate": 9.636408750654062e-06, "loss": 0.1761, "step": 5090 }, { "epoch": 0.14852091720637142, "grad_norm": 0.8099578606723454, "learning_rate": 9.636231867521886e-06, "loss": 0.1641, "step": 5091 }, { "epoch": 0.148550090437015, "grad_norm": 0.7303341978527088, "learning_rate": 9.636054942998538e-06, "loss": 0.1641, "step": 5092 }, { "epoch": 0.14857926366765856, "grad_norm": 0.9262007731517238, "learning_rate": 9.635877977085599e-06, "loss": 0.1661, "step": 5093 }, { "epoch": 0.1486084368983021, "grad_norm": 0.8515555470352184, "learning_rate": 9.635700969784648e-06, "loss": 0.1975, "step": 5094 }, { "epoch": 0.1486376101289457, "grad_norm": 0.7646387338569562, "learning_rate": 9.635523921097265e-06, "loss": 0.1685, "step": 5095 }, { "epoch": 0.14866678335958924, "grad_norm": 0.9120152936496411, "learning_rate": 9.635346831025032e-06, "loss": 0.1685, "step": 5096 }, { "epoch": 0.1486959565902328, "grad_norm": 0.8963548524790588, "learning_rate": 9.635169699569528e-06, "loss": 0.1688, "step": 5097 }, { "epoch": 0.14872512982087635, "grad_norm": 0.8247082107867015, "learning_rate": 9.634992526732336e-06, "loss": 0.1815, "step": 5098 }, { "epoch": 0.14875430305151993, "grad_norm": 0.894825694058036, "learning_rate": 9.634815312515038e-06, "loss": 0.1901, "step": 5099 }, { "epoch": 0.14878347628216348, "grad_norm": 0.84077347909432, "learning_rate": 9.634638056919213e-06, "loss": 0.1655, "step": 5100 }, { "epoch": 0.14881264951280704, "grad_norm": 0.781795925008661, "learning_rate": 9.634460759946449e-06, "loss": 0.1801, "step": 5101 }, { "epoch": 0.14884182274345062, "grad_norm": 0.9837437794935363, "learning_rate": 9.634283421598322e-06, "loss": 0.1749, "step": 5102 }, { "epoch": 0.14887099597409417, "grad_norm": 0.8140802808114325, "learning_rate": 9.63410604187642e-06, "loss": 0.156, "step": 5103 }, { "epoch": 0.14890016920473773, "grad_norm": 0.9795870482107814, "learning_rate": 9.633928620782327e-06, "loss": 0.1786, "step": 5104 }, { "epoch": 0.1489293424353813, "grad_norm": 0.8467942217568136, "learning_rate": 9.633751158317624e-06, "loss": 0.1864, "step": 5105 }, { "epoch": 0.14895851566602486, "grad_norm": 0.9165020355845057, "learning_rate": 9.633573654483898e-06, "loss": 0.179, "step": 5106 }, { "epoch": 0.1489876888966684, "grad_norm": 0.9626048326471355, "learning_rate": 9.633396109282733e-06, "loss": 0.1839, "step": 5107 }, { "epoch": 0.14901686212731197, "grad_norm": 0.7993374003325108, "learning_rate": 9.633218522715713e-06, "loss": 0.1642, "step": 5108 }, { "epoch": 0.14904603535795555, "grad_norm": 0.8175438779764319, "learning_rate": 9.633040894784423e-06, "loss": 0.1833, "step": 5109 }, { "epoch": 0.1490752085885991, "grad_norm": 0.8280294357075404, "learning_rate": 9.63286322549045e-06, "loss": 0.1653, "step": 5110 }, { "epoch": 0.14910438181924265, "grad_norm": 0.908338783381698, "learning_rate": 9.632685514835381e-06, "loss": 0.2017, "step": 5111 }, { "epoch": 0.14913355504988624, "grad_norm": 1.0516092502100833, "learning_rate": 9.632507762820802e-06, "loss": 0.1945, "step": 5112 }, { "epoch": 0.1491627282805298, "grad_norm": 0.8242778378158175, "learning_rate": 9.632329969448297e-06, "loss": 0.214, "step": 5113 }, { "epoch": 0.14919190151117334, "grad_norm": 0.9240705779461624, "learning_rate": 9.63215213471946e-06, "loss": 0.188, "step": 5114 }, { "epoch": 0.14922107474181692, "grad_norm": 0.9747253445918209, "learning_rate": 9.631974258635872e-06, "loss": 0.176, "step": 5115 }, { "epoch": 0.14925024797246048, "grad_norm": 0.7211727458179493, "learning_rate": 9.631796341199122e-06, "loss": 0.1661, "step": 5116 }, { "epoch": 0.14927942120310403, "grad_norm": 0.8473016855698315, "learning_rate": 9.631618382410804e-06, "loss": 0.1683, "step": 5117 }, { "epoch": 0.14930859443374758, "grad_norm": 0.970757502447642, "learning_rate": 9.631440382272498e-06, "loss": 0.178, "step": 5118 }, { "epoch": 0.14933776766439116, "grad_norm": 0.6904814083838131, "learning_rate": 9.631262340785802e-06, "loss": 0.1492, "step": 5119 }, { "epoch": 0.14936694089503472, "grad_norm": 0.7421442896285018, "learning_rate": 9.6310842579523e-06, "loss": 0.1924, "step": 5120 }, { "epoch": 0.14939611412567827, "grad_norm": 1.0651467930117666, "learning_rate": 9.630906133773583e-06, "loss": 0.1639, "step": 5121 }, { "epoch": 0.14942528735632185, "grad_norm": 0.8390889273605537, "learning_rate": 9.63072796825124e-06, "loss": 0.1725, "step": 5122 }, { "epoch": 0.1494544605869654, "grad_norm": 0.9309394813860895, "learning_rate": 9.630549761386865e-06, "loss": 0.1823, "step": 5123 }, { "epoch": 0.14948363381760896, "grad_norm": 0.8677477915122752, "learning_rate": 9.630371513182047e-06, "loss": 0.1674, "step": 5124 }, { "epoch": 0.1495128070482525, "grad_norm": 0.7952579105541172, "learning_rate": 9.630193223638378e-06, "loss": 0.1645, "step": 5125 }, { "epoch": 0.1495419802788961, "grad_norm": 0.805111346894642, "learning_rate": 9.630014892757449e-06, "loss": 0.168, "step": 5126 }, { "epoch": 0.14957115350953964, "grad_norm": 0.9688313650719023, "learning_rate": 9.629836520540851e-06, "loss": 0.1874, "step": 5127 }, { "epoch": 0.1496003267401832, "grad_norm": 1.100268308538829, "learning_rate": 9.629658106990179e-06, "loss": 0.1825, "step": 5128 }, { "epoch": 0.14962949997082678, "grad_norm": 0.8652801324456252, "learning_rate": 9.629479652107024e-06, "loss": 0.1855, "step": 5129 }, { "epoch": 0.14965867320147033, "grad_norm": 0.8891167955846057, "learning_rate": 9.62930115589298e-06, "loss": 0.1559, "step": 5130 }, { "epoch": 0.14968784643211389, "grad_norm": 0.9620161577897258, "learning_rate": 9.62912261834964e-06, "loss": 0.198, "step": 5131 }, { "epoch": 0.14971701966275747, "grad_norm": 0.9557939480904629, "learning_rate": 9.628944039478599e-06, "loss": 0.1839, "step": 5132 }, { "epoch": 0.14974619289340102, "grad_norm": 0.7887926047280494, "learning_rate": 9.628765419281452e-06, "loss": 0.1862, "step": 5133 }, { "epoch": 0.14977536612404457, "grad_norm": 0.9482297956073317, "learning_rate": 9.62858675775979e-06, "loss": 0.1729, "step": 5134 }, { "epoch": 0.14980453935468813, "grad_norm": 0.8407297247504261, "learning_rate": 9.62840805491521e-06, "loss": 0.1659, "step": 5135 }, { "epoch": 0.1498337125853317, "grad_norm": 0.7981482556755108, "learning_rate": 9.62822931074931e-06, "loss": 0.1801, "step": 5136 }, { "epoch": 0.14986288581597526, "grad_norm": 0.9121878201813708, "learning_rate": 9.62805052526368e-06, "loss": 0.2001, "step": 5137 }, { "epoch": 0.14989205904661881, "grad_norm": 1.113022027779779, "learning_rate": 9.627871698459925e-06, "loss": 0.1788, "step": 5138 }, { "epoch": 0.1499212322772624, "grad_norm": 0.897779033375477, "learning_rate": 9.627692830339633e-06, "loss": 0.1934, "step": 5139 }, { "epoch": 0.14995040550790595, "grad_norm": 0.7520491511565212, "learning_rate": 9.627513920904403e-06, "loss": 0.1609, "step": 5140 }, { "epoch": 0.1499795787385495, "grad_norm": 0.8198308417034144, "learning_rate": 9.627334970155837e-06, "loss": 0.1887, "step": 5141 }, { "epoch": 0.15000875196919305, "grad_norm": 0.7567235699570818, "learning_rate": 9.627155978095526e-06, "loss": 0.1605, "step": 5142 }, { "epoch": 0.15003792519983664, "grad_norm": 0.8121192608338688, "learning_rate": 9.626976944725071e-06, "loss": 0.155, "step": 5143 }, { "epoch": 0.1500670984304802, "grad_norm": 0.8404794822023604, "learning_rate": 9.626797870046071e-06, "loss": 0.1668, "step": 5144 }, { "epoch": 0.15009627166112374, "grad_norm": 0.8568013721923577, "learning_rate": 9.626618754060127e-06, "loss": 0.1675, "step": 5145 }, { "epoch": 0.15012544489176732, "grad_norm": 0.7932146477101287, "learning_rate": 9.626439596768831e-06, "loss": 0.2013, "step": 5146 }, { "epoch": 0.15015461812241088, "grad_norm": 0.9595123753157163, "learning_rate": 9.626260398173788e-06, "loss": 0.1901, "step": 5147 }, { "epoch": 0.15018379135305443, "grad_norm": 0.8542659439878874, "learning_rate": 9.626081158276597e-06, "loss": 0.1652, "step": 5148 }, { "epoch": 0.150212964583698, "grad_norm": 0.9191254664150484, "learning_rate": 9.625901877078857e-06, "loss": 0.1802, "step": 5149 }, { "epoch": 0.15024213781434156, "grad_norm": 0.8044557749570149, "learning_rate": 9.625722554582171e-06, "loss": 0.1558, "step": 5150 }, { "epoch": 0.15027131104498512, "grad_norm": 1.1638830160852909, "learning_rate": 9.625543190788138e-06, "loss": 0.166, "step": 5151 }, { "epoch": 0.15030048427562867, "grad_norm": 0.7792950629208888, "learning_rate": 9.625363785698358e-06, "loss": 0.1691, "step": 5152 }, { "epoch": 0.15032965750627225, "grad_norm": 0.9530255030838118, "learning_rate": 9.625184339314435e-06, "loss": 0.1545, "step": 5153 }, { "epoch": 0.1503588307369158, "grad_norm": 0.7949708710489812, "learning_rate": 9.625004851637972e-06, "loss": 0.1808, "step": 5154 }, { "epoch": 0.15038800396755936, "grad_norm": 0.884317595587077, "learning_rate": 9.624825322670567e-06, "loss": 0.2024, "step": 5155 }, { "epoch": 0.15041717719820294, "grad_norm": 1.021448310381221, "learning_rate": 9.624645752413827e-06, "loss": 0.1817, "step": 5156 }, { "epoch": 0.1504463504288465, "grad_norm": 1.011998252678421, "learning_rate": 9.624466140869353e-06, "loss": 0.166, "step": 5157 }, { "epoch": 0.15047552365949005, "grad_norm": 0.8965535961246643, "learning_rate": 9.62428648803875e-06, "loss": 0.1771, "step": 5158 }, { "epoch": 0.15050469689013363, "grad_norm": 0.8663976474058169, "learning_rate": 9.624106793923622e-06, "loss": 0.1823, "step": 5159 }, { "epoch": 0.15053387012077718, "grad_norm": 0.8580090319773263, "learning_rate": 9.62392705852557e-06, "loss": 0.1517, "step": 5160 }, { "epoch": 0.15056304335142073, "grad_norm": 0.8654448618053974, "learning_rate": 9.623747281846203e-06, "loss": 0.1695, "step": 5161 }, { "epoch": 0.1505922165820643, "grad_norm": 0.7153421101554999, "learning_rate": 9.623567463887123e-06, "loss": 0.1712, "step": 5162 }, { "epoch": 0.15062138981270787, "grad_norm": 0.7000303542349987, "learning_rate": 9.623387604649937e-06, "loss": 0.1591, "step": 5163 }, { "epoch": 0.15065056304335142, "grad_norm": 1.0505270905577375, "learning_rate": 9.62320770413625e-06, "loss": 0.1674, "step": 5164 }, { "epoch": 0.15067973627399497, "grad_norm": 0.7705851421827375, "learning_rate": 9.623027762347669e-06, "loss": 0.1835, "step": 5165 }, { "epoch": 0.15070890950463856, "grad_norm": 1.0398084532315381, "learning_rate": 9.622847779285798e-06, "loss": 0.1675, "step": 5166 }, { "epoch": 0.1507380827352821, "grad_norm": 0.8384019112904402, "learning_rate": 9.622667754952246e-06, "loss": 0.1654, "step": 5167 }, { "epoch": 0.15076725596592566, "grad_norm": 0.9030474618235137, "learning_rate": 9.62248768934862e-06, "loss": 0.1583, "step": 5168 }, { "epoch": 0.15079642919656921, "grad_norm": 0.8620656330514548, "learning_rate": 9.62230758247653e-06, "loss": 0.1434, "step": 5169 }, { "epoch": 0.1508256024272128, "grad_norm": 0.9205909108268115, "learning_rate": 9.622127434337578e-06, "loss": 0.1841, "step": 5170 }, { "epoch": 0.15085477565785635, "grad_norm": 1.0488539207987573, "learning_rate": 9.621947244933377e-06, "loss": 0.1992, "step": 5171 }, { "epoch": 0.1508839488884999, "grad_norm": 0.7687755427598137, "learning_rate": 9.621767014265534e-06, "loss": 0.1772, "step": 5172 }, { "epoch": 0.15091312211914348, "grad_norm": 0.9379189448252973, "learning_rate": 9.621586742335658e-06, "loss": 0.1617, "step": 5173 }, { "epoch": 0.15094229534978704, "grad_norm": 0.9344745331471433, "learning_rate": 9.62140642914536e-06, "loss": 0.1783, "step": 5174 }, { "epoch": 0.1509714685804306, "grad_norm": 0.9544197104113092, "learning_rate": 9.621226074696249e-06, "loss": 0.1823, "step": 5175 }, { "epoch": 0.15100064181107417, "grad_norm": 0.8035634563611824, "learning_rate": 9.621045678989933e-06, "loss": 0.1848, "step": 5176 }, { "epoch": 0.15102981504171772, "grad_norm": 1.2575767680881196, "learning_rate": 9.620865242028025e-06, "loss": 0.1656, "step": 5177 }, { "epoch": 0.15105898827236128, "grad_norm": 0.9709566370302833, "learning_rate": 9.620684763812135e-06, "loss": 0.1561, "step": 5178 }, { "epoch": 0.15108816150300483, "grad_norm": 0.8131796770604164, "learning_rate": 9.620504244343875e-06, "loss": 0.1895, "step": 5179 }, { "epoch": 0.1511173347336484, "grad_norm": 0.9585894437782273, "learning_rate": 9.620323683624855e-06, "loss": 0.1597, "step": 5180 }, { "epoch": 0.15114650796429197, "grad_norm": 0.7724375926022282, "learning_rate": 9.62014308165669e-06, "loss": 0.1802, "step": 5181 }, { "epoch": 0.15117568119493552, "grad_norm": 0.7903951900843166, "learning_rate": 9.619962438440988e-06, "loss": 0.1554, "step": 5182 }, { "epoch": 0.1512048544255791, "grad_norm": 0.8380458263545416, "learning_rate": 9.619781753979367e-06, "loss": 0.1607, "step": 5183 }, { "epoch": 0.15123402765622265, "grad_norm": 1.2674925913842625, "learning_rate": 9.619601028273436e-06, "loss": 0.148, "step": 5184 }, { "epoch": 0.1512632008868662, "grad_norm": 0.6770142237354774, "learning_rate": 9.61942026132481e-06, "loss": 0.1669, "step": 5185 }, { "epoch": 0.15129237411750976, "grad_norm": 1.2654969480912075, "learning_rate": 9.619239453135103e-06, "loss": 0.1922, "step": 5186 }, { "epoch": 0.15132154734815334, "grad_norm": 0.9072409755784046, "learning_rate": 9.619058603705927e-06, "loss": 0.16, "step": 5187 }, { "epoch": 0.1513507205787969, "grad_norm": 0.9758151903911123, "learning_rate": 9.6188777130389e-06, "loss": 0.1828, "step": 5188 }, { "epoch": 0.15137989380944045, "grad_norm": 0.8384463599622914, "learning_rate": 9.618696781135635e-06, "loss": 0.1837, "step": 5189 }, { "epoch": 0.15140906704008403, "grad_norm": 1.0062725527060505, "learning_rate": 9.618515807997748e-06, "loss": 0.1779, "step": 5190 }, { "epoch": 0.15143824027072758, "grad_norm": 0.9554657667561735, "learning_rate": 9.618334793626855e-06, "loss": 0.2033, "step": 5191 }, { "epoch": 0.15146741350137113, "grad_norm": 0.7103028587542359, "learning_rate": 9.61815373802457e-06, "loss": 0.1588, "step": 5192 }, { "epoch": 0.15149658673201472, "grad_norm": 0.9075256364303415, "learning_rate": 9.617972641192513e-06, "loss": 0.165, "step": 5193 }, { "epoch": 0.15152575996265827, "grad_norm": 1.1186651509945547, "learning_rate": 9.617791503132297e-06, "loss": 0.1742, "step": 5194 }, { "epoch": 0.15155493319330182, "grad_norm": 0.916327418833708, "learning_rate": 9.617610323845539e-06, "loss": 0.223, "step": 5195 }, { "epoch": 0.15158410642394538, "grad_norm": 0.874467535661217, "learning_rate": 9.617429103333862e-06, "loss": 0.1556, "step": 5196 }, { "epoch": 0.15161327965458896, "grad_norm": 1.1124542867749956, "learning_rate": 9.617247841598877e-06, "loss": 0.1789, "step": 5197 }, { "epoch": 0.1516424528852325, "grad_norm": 0.9619492305116694, "learning_rate": 9.617066538642209e-06, "loss": 0.1895, "step": 5198 }, { "epoch": 0.15167162611587606, "grad_norm": 1.0891589166378086, "learning_rate": 9.616885194465471e-06, "loss": 0.1647, "step": 5199 }, { "epoch": 0.15170079934651964, "grad_norm": 0.8885211172842047, "learning_rate": 9.616703809070283e-06, "loss": 0.1664, "step": 5200 }, { "epoch": 0.1517299725771632, "grad_norm": 0.8612314005430644, "learning_rate": 9.616522382458268e-06, "loss": 0.1675, "step": 5201 }, { "epoch": 0.15175914580780675, "grad_norm": 0.9704916637924584, "learning_rate": 9.616340914631041e-06, "loss": 0.1864, "step": 5202 }, { "epoch": 0.15178831903845033, "grad_norm": 0.8954603361269353, "learning_rate": 9.616159405590226e-06, "loss": 0.1916, "step": 5203 }, { "epoch": 0.15181749226909388, "grad_norm": 1.0133855686822917, "learning_rate": 9.615977855337442e-06, "loss": 0.1832, "step": 5204 }, { "epoch": 0.15184666549973744, "grad_norm": 0.8147693290478469, "learning_rate": 9.615796263874308e-06, "loss": 0.1587, "step": 5205 }, { "epoch": 0.151875838730381, "grad_norm": 1.0275112228198235, "learning_rate": 9.615614631202449e-06, "loss": 0.1581, "step": 5206 }, { "epoch": 0.15190501196102457, "grad_norm": 1.1285279421671728, "learning_rate": 9.615432957323481e-06, "loss": 0.1894, "step": 5207 }, { "epoch": 0.15193418519166813, "grad_norm": 0.8877871622878019, "learning_rate": 9.615251242239033e-06, "loss": 0.1739, "step": 5208 }, { "epoch": 0.15196335842231168, "grad_norm": 1.050245295509508, "learning_rate": 9.61506948595072e-06, "loss": 0.1652, "step": 5209 }, { "epoch": 0.15199253165295526, "grad_norm": 0.9636502036667239, "learning_rate": 9.614887688460171e-06, "loss": 0.1741, "step": 5210 }, { "epoch": 0.1520217048835988, "grad_norm": 1.020172444862389, "learning_rate": 9.614705849769006e-06, "loss": 0.1775, "step": 5211 }, { "epoch": 0.15205087811424237, "grad_norm": 0.7703176891004924, "learning_rate": 9.61452396987885e-06, "loss": 0.1719, "step": 5212 }, { "epoch": 0.15208005134488592, "grad_norm": 0.7928883634593038, "learning_rate": 9.614342048791322e-06, "loss": 0.2006, "step": 5213 }, { "epoch": 0.1521092245755295, "grad_norm": 0.8622712038997947, "learning_rate": 9.614160086508053e-06, "loss": 0.1738, "step": 5214 }, { "epoch": 0.15213839780617305, "grad_norm": 1.015876504668712, "learning_rate": 9.613978083030663e-06, "loss": 0.1908, "step": 5215 }, { "epoch": 0.1521675710368166, "grad_norm": 0.9547319868253054, "learning_rate": 9.613796038360779e-06, "loss": 0.1813, "step": 5216 }, { "epoch": 0.1521967442674602, "grad_norm": 0.8052457579092854, "learning_rate": 9.613613952500024e-06, "loss": 0.178, "step": 5217 }, { "epoch": 0.15222591749810374, "grad_norm": 0.9372740219522526, "learning_rate": 9.613431825450026e-06, "loss": 0.1479, "step": 5218 }, { "epoch": 0.1522550907287473, "grad_norm": 1.0724994981092462, "learning_rate": 9.613249657212408e-06, "loss": 0.1575, "step": 5219 }, { "epoch": 0.15228426395939088, "grad_norm": 0.8514462977175778, "learning_rate": 9.613067447788802e-06, "loss": 0.1506, "step": 5220 }, { "epoch": 0.15231343719003443, "grad_norm": 1.186589938781719, "learning_rate": 9.612885197180828e-06, "loss": 0.1783, "step": 5221 }, { "epoch": 0.15234261042067798, "grad_norm": 0.9758065586189032, "learning_rate": 9.612702905390116e-06, "loss": 0.1699, "step": 5222 }, { "epoch": 0.15237178365132154, "grad_norm": 0.8549776954726223, "learning_rate": 9.612520572418296e-06, "loss": 0.1536, "step": 5223 }, { "epoch": 0.15240095688196512, "grad_norm": 0.9629543975850083, "learning_rate": 9.612338198266993e-06, "loss": 0.1806, "step": 5224 }, { "epoch": 0.15243013011260867, "grad_norm": 0.944028999761969, "learning_rate": 9.612155782937835e-06, "loss": 0.1792, "step": 5225 }, { "epoch": 0.15245930334325222, "grad_norm": 0.8341628912648352, "learning_rate": 9.61197332643245e-06, "loss": 0.1704, "step": 5226 }, { "epoch": 0.1524884765738958, "grad_norm": 0.8256586584529005, "learning_rate": 9.61179082875247e-06, "loss": 0.196, "step": 5227 }, { "epoch": 0.15251764980453936, "grad_norm": 0.8263066750080229, "learning_rate": 9.611608289899521e-06, "loss": 0.1666, "step": 5228 }, { "epoch": 0.1525468230351829, "grad_norm": 0.9071922848564402, "learning_rate": 9.611425709875234e-06, "loss": 0.1835, "step": 5229 }, { "epoch": 0.1525759962658265, "grad_norm": 0.6553228819023137, "learning_rate": 9.611243088681239e-06, "loss": 0.1432, "step": 5230 }, { "epoch": 0.15260516949647004, "grad_norm": 0.7810850295323523, "learning_rate": 9.611060426319168e-06, "loss": 0.1773, "step": 5231 }, { "epoch": 0.1526343427271136, "grad_norm": 0.9786584587614672, "learning_rate": 9.61087772279065e-06, "loss": 0.1377, "step": 5232 }, { "epoch": 0.15266351595775715, "grad_norm": 0.688277715765858, "learning_rate": 9.610694978097314e-06, "loss": 0.1647, "step": 5233 }, { "epoch": 0.15269268918840073, "grad_norm": 0.9339862118585256, "learning_rate": 9.610512192240797e-06, "loss": 0.1758, "step": 5234 }, { "epoch": 0.15272186241904429, "grad_norm": 0.9559664169701665, "learning_rate": 9.610329365222725e-06, "loss": 0.1923, "step": 5235 }, { "epoch": 0.15275103564968784, "grad_norm": 0.7826416609748704, "learning_rate": 9.610146497044736e-06, "loss": 0.1703, "step": 5236 }, { "epoch": 0.15278020888033142, "grad_norm": 0.9205570412734108, "learning_rate": 9.609963587708457e-06, "loss": 0.2068, "step": 5237 }, { "epoch": 0.15280938211097497, "grad_norm": 0.9580224749521794, "learning_rate": 9.609780637215525e-06, "loss": 0.1729, "step": 5238 }, { "epoch": 0.15283855534161853, "grad_norm": 0.7730007566000654, "learning_rate": 9.609597645567572e-06, "loss": 0.1668, "step": 5239 }, { "epoch": 0.15286772857226208, "grad_norm": 0.8259023240302131, "learning_rate": 9.609414612766231e-06, "loss": 0.1562, "step": 5240 }, { "epoch": 0.15289690180290566, "grad_norm": 0.8524120965477474, "learning_rate": 9.609231538813137e-06, "loss": 0.1703, "step": 5241 }, { "epoch": 0.1529260750335492, "grad_norm": 0.7507180816390783, "learning_rate": 9.609048423709923e-06, "loss": 0.1637, "step": 5242 }, { "epoch": 0.15295524826419277, "grad_norm": 1.003073067269039, "learning_rate": 9.608865267458227e-06, "loss": 0.1764, "step": 5243 }, { "epoch": 0.15298442149483635, "grad_norm": 0.7127185709273672, "learning_rate": 9.60868207005968e-06, "loss": 0.1496, "step": 5244 }, { "epoch": 0.1530135947254799, "grad_norm": 0.772085291176157, "learning_rate": 9.608498831515921e-06, "loss": 0.2168, "step": 5245 }, { "epoch": 0.15304276795612345, "grad_norm": 0.9274081634817349, "learning_rate": 9.608315551828584e-06, "loss": 0.1735, "step": 5246 }, { "epoch": 0.15307194118676704, "grad_norm": 0.7450579897763286, "learning_rate": 9.608132230999308e-06, "loss": 0.1768, "step": 5247 }, { "epoch": 0.1531011144174106, "grad_norm": 0.7313613290022853, "learning_rate": 9.607948869029723e-06, "loss": 0.1442, "step": 5248 }, { "epoch": 0.15313028764805414, "grad_norm": 0.8460874138905079, "learning_rate": 9.607765465921475e-06, "loss": 0.1954, "step": 5249 }, { "epoch": 0.1531594608786977, "grad_norm": 0.9068670462246586, "learning_rate": 9.607582021676193e-06, "loss": 0.1687, "step": 5250 }, { "epoch": 0.15318863410934128, "grad_norm": 0.9946540282040286, "learning_rate": 9.607398536295522e-06, "loss": 0.1773, "step": 5251 }, { "epoch": 0.15321780733998483, "grad_norm": 0.9035621429233804, "learning_rate": 9.607215009781094e-06, "loss": 0.1719, "step": 5252 }, { "epoch": 0.15324698057062838, "grad_norm": 0.9185277929085259, "learning_rate": 9.607031442134554e-06, "loss": 0.177, "step": 5253 }, { "epoch": 0.15327615380127196, "grad_norm": 0.869343631941605, "learning_rate": 9.606847833357534e-06, "loss": 0.1675, "step": 5254 }, { "epoch": 0.15330532703191552, "grad_norm": 0.7384229055781494, "learning_rate": 9.606664183451677e-06, "loss": 0.1361, "step": 5255 }, { "epoch": 0.15333450026255907, "grad_norm": 0.9312720913620172, "learning_rate": 9.606480492418622e-06, "loss": 0.1833, "step": 5256 }, { "epoch": 0.15336367349320262, "grad_norm": 0.9086396553991825, "learning_rate": 9.606296760260008e-06, "loss": 0.152, "step": 5257 }, { "epoch": 0.1533928467238462, "grad_norm": 0.8849816361045499, "learning_rate": 9.606112986977477e-06, "loss": 0.1819, "step": 5258 }, { "epoch": 0.15342201995448976, "grad_norm": 0.9573284683873251, "learning_rate": 9.605929172572668e-06, "loss": 0.1692, "step": 5259 }, { "epoch": 0.1534511931851333, "grad_norm": 0.9932776120380594, "learning_rate": 9.605745317047224e-06, "loss": 0.1778, "step": 5260 }, { "epoch": 0.1534803664157769, "grad_norm": 0.7934079782105875, "learning_rate": 9.605561420402786e-06, "loss": 0.1704, "step": 5261 }, { "epoch": 0.15350953964642045, "grad_norm": 1.0393421979411546, "learning_rate": 9.605377482640991e-06, "loss": 0.1662, "step": 5262 }, { "epoch": 0.153538712877064, "grad_norm": 0.8575229870657547, "learning_rate": 9.60519350376349e-06, "loss": 0.1714, "step": 5263 }, { "epoch": 0.15356788610770758, "grad_norm": 0.968177165950683, "learning_rate": 9.605009483771919e-06, "loss": 0.1795, "step": 5264 }, { "epoch": 0.15359705933835113, "grad_norm": 0.8745136764727854, "learning_rate": 9.604825422667921e-06, "loss": 0.1902, "step": 5265 }, { "epoch": 0.1536262325689947, "grad_norm": 0.7757863453994622, "learning_rate": 9.604641320453143e-06, "loss": 0.1536, "step": 5266 }, { "epoch": 0.15365540579963824, "grad_norm": 0.7955542026906197, "learning_rate": 9.604457177129226e-06, "loss": 0.1776, "step": 5267 }, { "epoch": 0.15368457903028182, "grad_norm": 1.4192027347687417, "learning_rate": 9.604272992697814e-06, "loss": 0.202, "step": 5268 }, { "epoch": 0.15371375226092537, "grad_norm": 1.0294363103726043, "learning_rate": 9.604088767160553e-06, "loss": 0.1645, "step": 5269 }, { "epoch": 0.15374292549156893, "grad_norm": 0.84436034845223, "learning_rate": 9.603904500519086e-06, "loss": 0.1819, "step": 5270 }, { "epoch": 0.1537720987222125, "grad_norm": 1.2536092902006988, "learning_rate": 9.603720192775057e-06, "loss": 0.1605, "step": 5271 }, { "epoch": 0.15380127195285606, "grad_norm": 1.0431662330200993, "learning_rate": 9.603535843930116e-06, "loss": 0.1796, "step": 5272 }, { "epoch": 0.15383044518349961, "grad_norm": 0.916858461356858, "learning_rate": 9.603351453985903e-06, "loss": 0.1757, "step": 5273 }, { "epoch": 0.1538596184141432, "grad_norm": 0.9931652330333578, "learning_rate": 9.603167022944069e-06, "loss": 0.1658, "step": 5274 }, { "epoch": 0.15388879164478675, "grad_norm": 1.0203396642721403, "learning_rate": 9.602982550806259e-06, "loss": 0.1544, "step": 5275 }, { "epoch": 0.1539179648754303, "grad_norm": 0.8700065891494176, "learning_rate": 9.602798037574117e-06, "loss": 0.153, "step": 5276 }, { "epoch": 0.15394713810607386, "grad_norm": 0.8711074668522925, "learning_rate": 9.602613483249297e-06, "loss": 0.1747, "step": 5277 }, { "epoch": 0.15397631133671744, "grad_norm": 0.8864803591663832, "learning_rate": 9.60242888783344e-06, "loss": 0.1628, "step": 5278 }, { "epoch": 0.154005484567361, "grad_norm": 0.8195468392838261, "learning_rate": 9.602244251328197e-06, "loss": 0.1876, "step": 5279 }, { "epoch": 0.15403465779800454, "grad_norm": 0.7120788408881081, "learning_rate": 9.602059573735216e-06, "loss": 0.1477, "step": 5280 }, { "epoch": 0.15406383102864812, "grad_norm": 1.1433381576343686, "learning_rate": 9.601874855056144e-06, "loss": 0.191, "step": 5281 }, { "epoch": 0.15409300425929168, "grad_norm": 1.1968477507990083, "learning_rate": 9.601690095292634e-06, "loss": 0.1571, "step": 5282 }, { "epoch": 0.15412217748993523, "grad_norm": 0.7344208807854702, "learning_rate": 9.601505294446333e-06, "loss": 0.1788, "step": 5283 }, { "epoch": 0.15415135072057878, "grad_norm": 0.9059529903033163, "learning_rate": 9.60132045251889e-06, "loss": 0.1953, "step": 5284 }, { "epoch": 0.15418052395122236, "grad_norm": 1.0109403347908683, "learning_rate": 9.60113556951196e-06, "loss": 0.1667, "step": 5285 }, { "epoch": 0.15420969718186592, "grad_norm": 0.7689079319908673, "learning_rate": 9.600950645427185e-06, "loss": 0.1509, "step": 5286 }, { "epoch": 0.15423887041250947, "grad_norm": 1.00848130810581, "learning_rate": 9.600765680266225e-06, "loss": 0.1778, "step": 5287 }, { "epoch": 0.15426804364315305, "grad_norm": 0.8903844184600506, "learning_rate": 9.600580674030724e-06, "loss": 0.1809, "step": 5288 }, { "epoch": 0.1542972168737966, "grad_norm": 0.9875250119902921, "learning_rate": 9.600395626722339e-06, "loss": 0.2061, "step": 5289 }, { "epoch": 0.15432639010444016, "grad_norm": 1.0680535625688914, "learning_rate": 9.60021053834272e-06, "loss": 0.1896, "step": 5290 }, { "epoch": 0.15435556333508374, "grad_norm": 0.8705081594665709, "learning_rate": 9.60002540889352e-06, "loss": 0.1605, "step": 5291 }, { "epoch": 0.1543847365657273, "grad_norm": 0.9134141055638958, "learning_rate": 9.59984023837639e-06, "loss": 0.1667, "step": 5292 }, { "epoch": 0.15441390979637085, "grad_norm": 0.9746062664402452, "learning_rate": 9.599655026792984e-06, "loss": 0.2085, "step": 5293 }, { "epoch": 0.1544430830270144, "grad_norm": 1.0177194139187573, "learning_rate": 9.599469774144958e-06, "loss": 0.1848, "step": 5294 }, { "epoch": 0.15447225625765798, "grad_norm": 0.7336467370838153, "learning_rate": 9.599284480433963e-06, "loss": 0.1558, "step": 5295 }, { "epoch": 0.15450142948830153, "grad_norm": 0.8934190069714771, "learning_rate": 9.599099145661654e-06, "loss": 0.1593, "step": 5296 }, { "epoch": 0.1545306027189451, "grad_norm": 0.7370426149551272, "learning_rate": 9.598913769829685e-06, "loss": 0.1686, "step": 5297 }, { "epoch": 0.15455977594958867, "grad_norm": 0.8063677727885334, "learning_rate": 9.598728352939713e-06, "loss": 0.1655, "step": 5298 }, { "epoch": 0.15458894918023222, "grad_norm": 0.8655458639912662, "learning_rate": 9.59854289499339e-06, "loss": 0.2007, "step": 5299 }, { "epoch": 0.15461812241087577, "grad_norm": 0.9481333670787759, "learning_rate": 9.598357395992375e-06, "loss": 0.2051, "step": 5300 }, { "epoch": 0.15464729564151936, "grad_norm": 0.9178766672636066, "learning_rate": 9.598171855938323e-06, "loss": 0.1735, "step": 5301 }, { "epoch": 0.1546764688721629, "grad_norm": 0.9383207793954956, "learning_rate": 9.597986274832891e-06, "loss": 0.1885, "step": 5302 }, { "epoch": 0.15470564210280646, "grad_norm": 0.9201225255267969, "learning_rate": 9.597800652677734e-06, "loss": 0.1754, "step": 5303 }, { "epoch": 0.15473481533345002, "grad_norm": 0.8736399135909124, "learning_rate": 9.597614989474512e-06, "loss": 0.164, "step": 5304 }, { "epoch": 0.1547639885640936, "grad_norm": 0.8028640814658534, "learning_rate": 9.597429285224879e-06, "loss": 0.1654, "step": 5305 }, { "epoch": 0.15479316179473715, "grad_norm": 0.8444657613859604, "learning_rate": 9.597243539930496e-06, "loss": 0.1777, "step": 5306 }, { "epoch": 0.1548223350253807, "grad_norm": 0.9995551637483626, "learning_rate": 9.597057753593018e-06, "loss": 0.176, "step": 5307 }, { "epoch": 0.15485150825602428, "grad_norm": 1.0362160814459496, "learning_rate": 9.59687192621411e-06, "loss": 0.1801, "step": 5308 }, { "epoch": 0.15488068148666784, "grad_norm": 0.7848692139608957, "learning_rate": 9.596686057795424e-06, "loss": 0.1802, "step": 5309 }, { "epoch": 0.1549098547173114, "grad_norm": 0.8313163715921467, "learning_rate": 9.59650014833862e-06, "loss": 0.1841, "step": 5310 }, { "epoch": 0.15493902794795494, "grad_norm": 0.9399501200895383, "learning_rate": 9.596314197845365e-06, "loss": 0.1847, "step": 5311 }, { "epoch": 0.15496820117859852, "grad_norm": 1.0063637393463205, "learning_rate": 9.59612820631731e-06, "loss": 0.1573, "step": 5312 }, { "epoch": 0.15499737440924208, "grad_norm": 0.8905078304122748, "learning_rate": 9.595942173756121e-06, "loss": 0.181, "step": 5313 }, { "epoch": 0.15502654763988563, "grad_norm": 0.8347368721986382, "learning_rate": 9.595756100163459e-06, "loss": 0.1461, "step": 5314 }, { "epoch": 0.1550557208705292, "grad_norm": 0.7521674181445719, "learning_rate": 9.59556998554098e-06, "loss": 0.1756, "step": 5315 }, { "epoch": 0.15508489410117277, "grad_norm": 0.6519258185824471, "learning_rate": 9.595383829890352e-06, "loss": 0.166, "step": 5316 }, { "epoch": 0.15511406733181632, "grad_norm": 0.8118094863671605, "learning_rate": 9.595197633213233e-06, "loss": 0.1663, "step": 5317 }, { "epoch": 0.1551432405624599, "grad_norm": 0.8365807746765657, "learning_rate": 9.595011395511288e-06, "loss": 0.1569, "step": 5318 }, { "epoch": 0.15517241379310345, "grad_norm": 0.8117970104724412, "learning_rate": 9.594825116786177e-06, "loss": 0.1725, "step": 5319 }, { "epoch": 0.155201587023747, "grad_norm": 1.0313484257999297, "learning_rate": 9.594638797039564e-06, "loss": 0.1667, "step": 5320 }, { "epoch": 0.15523076025439056, "grad_norm": 0.7814312257479537, "learning_rate": 9.594452436273113e-06, "loss": 0.1444, "step": 5321 }, { "epoch": 0.15525993348503414, "grad_norm": 0.8938854346748851, "learning_rate": 9.594266034488487e-06, "loss": 0.1819, "step": 5322 }, { "epoch": 0.1552891067156777, "grad_norm": 0.8963589527425608, "learning_rate": 9.594079591687352e-06, "loss": 0.1534, "step": 5323 }, { "epoch": 0.15531827994632125, "grad_norm": 1.015163944737604, "learning_rate": 9.593893107871371e-06, "loss": 0.1572, "step": 5324 }, { "epoch": 0.15534745317696483, "grad_norm": 0.93609118151598, "learning_rate": 9.593706583042208e-06, "loss": 0.1667, "step": 5325 }, { "epoch": 0.15537662640760838, "grad_norm": 0.9236317790651747, "learning_rate": 9.593520017201528e-06, "loss": 0.1812, "step": 5326 }, { "epoch": 0.15540579963825193, "grad_norm": 0.8454415638573578, "learning_rate": 9.593333410351e-06, "loss": 0.184, "step": 5327 }, { "epoch": 0.1554349728688955, "grad_norm": 1.3257250309757944, "learning_rate": 9.593146762492287e-06, "loss": 0.1783, "step": 5328 }, { "epoch": 0.15546414609953907, "grad_norm": 1.0337770473486, "learning_rate": 9.592960073627055e-06, "loss": 0.1603, "step": 5329 }, { "epoch": 0.15549331933018262, "grad_norm": 0.9237513937216989, "learning_rate": 9.592773343756973e-06, "loss": 0.1514, "step": 5330 }, { "epoch": 0.15552249256082618, "grad_norm": 1.1175713018548445, "learning_rate": 9.592586572883709e-06, "loss": 0.1684, "step": 5331 }, { "epoch": 0.15555166579146976, "grad_norm": 0.9757367878718678, "learning_rate": 9.592399761008925e-06, "loss": 0.1635, "step": 5332 }, { "epoch": 0.1555808390221133, "grad_norm": 1.0955431275446061, "learning_rate": 9.592212908134295e-06, "loss": 0.2119, "step": 5333 }, { "epoch": 0.15561001225275686, "grad_norm": 0.923702646775755, "learning_rate": 9.592026014261482e-06, "loss": 0.1719, "step": 5334 }, { "epoch": 0.15563918548340044, "grad_norm": 0.8292061516193554, "learning_rate": 9.59183907939216e-06, "loss": 0.1842, "step": 5335 }, { "epoch": 0.155668358714044, "grad_norm": 0.9886712419812845, "learning_rate": 9.591652103527992e-06, "loss": 0.1537, "step": 5336 }, { "epoch": 0.15569753194468755, "grad_norm": 0.7914672609825438, "learning_rate": 9.591465086670651e-06, "loss": 0.16, "step": 5337 }, { "epoch": 0.1557267051753311, "grad_norm": 0.8589726046962344, "learning_rate": 9.591278028821806e-06, "loss": 0.1973, "step": 5338 }, { "epoch": 0.15575587840597468, "grad_norm": 0.8789279455211334, "learning_rate": 9.591090929983127e-06, "loss": 0.1521, "step": 5339 }, { "epoch": 0.15578505163661824, "grad_norm": 0.8498280798734861, "learning_rate": 9.590903790156282e-06, "loss": 0.1736, "step": 5340 }, { "epoch": 0.1558142248672618, "grad_norm": 0.7567362231450246, "learning_rate": 9.590716609342947e-06, "loss": 0.1713, "step": 5341 }, { "epoch": 0.15584339809790537, "grad_norm": 0.9352837267459277, "learning_rate": 9.590529387544789e-06, "loss": 0.1797, "step": 5342 }, { "epoch": 0.15587257132854893, "grad_norm": 0.9945709590177103, "learning_rate": 9.59034212476348e-06, "loss": 0.1724, "step": 5343 }, { "epoch": 0.15590174455919248, "grad_norm": 0.8160437077233418, "learning_rate": 9.590154821000692e-06, "loss": 0.1771, "step": 5344 }, { "epoch": 0.15593091778983606, "grad_norm": 0.9219373694189473, "learning_rate": 9.5899674762581e-06, "loss": 0.1703, "step": 5345 }, { "epoch": 0.1559600910204796, "grad_norm": 0.8541268506205408, "learning_rate": 9.589780090537371e-06, "loss": 0.157, "step": 5346 }, { "epoch": 0.15598926425112317, "grad_norm": 0.8950324678253496, "learning_rate": 9.589592663840182e-06, "loss": 0.1925, "step": 5347 }, { "epoch": 0.15601843748176672, "grad_norm": 0.7035913765548862, "learning_rate": 9.589405196168204e-06, "loss": 0.1651, "step": 5348 }, { "epoch": 0.1560476107124103, "grad_norm": 1.6294715856673787, "learning_rate": 9.589217687523114e-06, "loss": 0.1657, "step": 5349 }, { "epoch": 0.15607678394305385, "grad_norm": 0.8411972847649672, "learning_rate": 9.589030137906584e-06, "loss": 0.1592, "step": 5350 }, { "epoch": 0.1561059571736974, "grad_norm": 0.7574829571818203, "learning_rate": 9.588842547320287e-06, "loss": 0.182, "step": 5351 }, { "epoch": 0.156135130404341, "grad_norm": 0.9529049001866505, "learning_rate": 9.588654915765901e-06, "loss": 0.1912, "step": 5352 }, { "epoch": 0.15616430363498454, "grad_norm": 0.895152485217695, "learning_rate": 9.588467243245099e-06, "loss": 0.1724, "step": 5353 }, { "epoch": 0.1561934768656281, "grad_norm": 0.8419340032565599, "learning_rate": 9.588279529759556e-06, "loss": 0.1535, "step": 5354 }, { "epoch": 0.15622265009627165, "grad_norm": 0.8098932874842786, "learning_rate": 9.588091775310948e-06, "loss": 0.1682, "step": 5355 }, { "epoch": 0.15625182332691523, "grad_norm": 1.0321349408698615, "learning_rate": 9.587903979900953e-06, "loss": 0.1503, "step": 5356 }, { "epoch": 0.15628099655755878, "grad_norm": 0.9084734717859104, "learning_rate": 9.587716143531248e-06, "loss": 0.1763, "step": 5357 }, { "epoch": 0.15631016978820234, "grad_norm": 0.8982855549872955, "learning_rate": 9.587528266203505e-06, "loss": 0.1778, "step": 5358 }, { "epoch": 0.15633934301884592, "grad_norm": 0.8508686200377119, "learning_rate": 9.587340347919406e-06, "loss": 0.1855, "step": 5359 }, { "epoch": 0.15636851624948947, "grad_norm": 1.0722445709322017, "learning_rate": 9.587152388680628e-06, "loss": 0.1845, "step": 5360 }, { "epoch": 0.15639768948013302, "grad_norm": 0.9633100778239042, "learning_rate": 9.586964388488849e-06, "loss": 0.1822, "step": 5361 }, { "epoch": 0.1564268627107766, "grad_norm": 1.1789964108458322, "learning_rate": 9.586776347345745e-06, "loss": 0.2008, "step": 5362 }, { "epoch": 0.15645603594142016, "grad_norm": 0.9854937256141431, "learning_rate": 9.586588265252999e-06, "loss": 0.1583, "step": 5363 }, { "epoch": 0.1564852091720637, "grad_norm": 0.8743567094915753, "learning_rate": 9.586400142212287e-06, "loss": 0.1722, "step": 5364 }, { "epoch": 0.15651438240270726, "grad_norm": 1.2650349148349425, "learning_rate": 9.58621197822529e-06, "loss": 0.1827, "step": 5365 }, { "epoch": 0.15654355563335084, "grad_norm": 0.6444370402660897, "learning_rate": 9.586023773293687e-06, "loss": 0.1711, "step": 5366 }, { "epoch": 0.1565727288639944, "grad_norm": 0.9065177099854337, "learning_rate": 9.585835527419157e-06, "loss": 0.1852, "step": 5367 }, { "epoch": 0.15660190209463795, "grad_norm": 0.9754269080467419, "learning_rate": 9.585647240603384e-06, "loss": 0.1482, "step": 5368 }, { "epoch": 0.15663107532528153, "grad_norm": 0.645413530190737, "learning_rate": 9.585458912848048e-06, "loss": 0.1816, "step": 5369 }, { "epoch": 0.15666024855592509, "grad_norm": 0.7349710144313402, "learning_rate": 9.585270544154825e-06, "loss": 0.1865, "step": 5370 }, { "epoch": 0.15668942178656864, "grad_norm": 0.8156158013215924, "learning_rate": 9.585082134525405e-06, "loss": 0.1839, "step": 5371 }, { "epoch": 0.1567185950172122, "grad_norm": 0.7953523323285103, "learning_rate": 9.584893683961464e-06, "loss": 0.2123, "step": 5372 }, { "epoch": 0.15674776824785577, "grad_norm": 0.7576460182732079, "learning_rate": 9.58470519246469e-06, "loss": 0.1591, "step": 5373 }, { "epoch": 0.15677694147849933, "grad_norm": 0.7287874841201041, "learning_rate": 9.58451666003676e-06, "loss": 0.174, "step": 5374 }, { "epoch": 0.15680611470914288, "grad_norm": 0.880261036992491, "learning_rate": 9.58432808667936e-06, "loss": 0.1652, "step": 5375 }, { "epoch": 0.15683528793978646, "grad_norm": 0.7200438830764648, "learning_rate": 9.584139472394173e-06, "loss": 0.1956, "step": 5376 }, { "epoch": 0.15686446117043001, "grad_norm": 0.8144091771627586, "learning_rate": 9.583950817182883e-06, "loss": 0.1845, "step": 5377 }, { "epoch": 0.15689363440107357, "grad_norm": 1.0797429701934214, "learning_rate": 9.583762121047175e-06, "loss": 0.1918, "step": 5378 }, { "epoch": 0.15692280763171715, "grad_norm": 0.8390675918028163, "learning_rate": 9.583573383988733e-06, "loss": 0.236, "step": 5379 }, { "epoch": 0.1569519808623607, "grad_norm": 0.830419468352586, "learning_rate": 9.583384606009243e-06, "loss": 0.1648, "step": 5380 }, { "epoch": 0.15698115409300425, "grad_norm": 0.7595095834019951, "learning_rate": 9.583195787110387e-06, "loss": 0.1763, "step": 5381 }, { "epoch": 0.1570103273236478, "grad_norm": 0.8040826984867856, "learning_rate": 9.583006927293855e-06, "loss": 0.1583, "step": 5382 }, { "epoch": 0.1570395005542914, "grad_norm": 0.9431321511186578, "learning_rate": 9.582818026561332e-06, "loss": 0.1714, "step": 5383 }, { "epoch": 0.15706867378493494, "grad_norm": 0.976150288729968, "learning_rate": 9.5826290849145e-06, "loss": 0.1668, "step": 5384 }, { "epoch": 0.1570978470155785, "grad_norm": 0.6976835131639112, "learning_rate": 9.582440102355052e-06, "loss": 0.1677, "step": 5385 }, { "epoch": 0.15712702024622208, "grad_norm": 1.0044919521364357, "learning_rate": 9.582251078884672e-06, "loss": 0.1982, "step": 5386 }, { "epoch": 0.15715619347686563, "grad_norm": 0.8706452783430578, "learning_rate": 9.58206201450505e-06, "loss": 0.1784, "step": 5387 }, { "epoch": 0.15718536670750918, "grad_norm": 0.8441239421029116, "learning_rate": 9.58187290921787e-06, "loss": 0.168, "step": 5388 }, { "epoch": 0.15721453993815276, "grad_norm": 1.0036271562359225, "learning_rate": 9.581683763024825e-06, "loss": 0.1832, "step": 5389 }, { "epoch": 0.15724371316879632, "grad_norm": 0.7096517906693629, "learning_rate": 9.5814945759276e-06, "loss": 0.1699, "step": 5390 }, { "epoch": 0.15727288639943987, "grad_norm": 0.9350427729280126, "learning_rate": 9.581305347927883e-06, "loss": 0.1939, "step": 5391 }, { "epoch": 0.15730205963008342, "grad_norm": 0.8232720239413225, "learning_rate": 9.581116079027367e-06, "loss": 0.1818, "step": 5392 }, { "epoch": 0.157331232860727, "grad_norm": 0.8040089296347235, "learning_rate": 9.580926769227741e-06, "loss": 0.1906, "step": 5393 }, { "epoch": 0.15736040609137056, "grad_norm": 0.8168039532541982, "learning_rate": 9.580737418530693e-06, "loss": 0.1559, "step": 5394 }, { "epoch": 0.1573895793220141, "grad_norm": 0.8357454538670241, "learning_rate": 9.580548026937915e-06, "loss": 0.1698, "step": 5395 }, { "epoch": 0.1574187525526577, "grad_norm": 0.8003691384268423, "learning_rate": 9.580358594451098e-06, "loss": 0.1648, "step": 5396 }, { "epoch": 0.15744792578330125, "grad_norm": 0.9345653440989051, "learning_rate": 9.580169121071934e-06, "loss": 0.182, "step": 5397 }, { "epoch": 0.1574770990139448, "grad_norm": 0.86565735878832, "learning_rate": 9.579979606802112e-06, "loss": 0.1632, "step": 5398 }, { "epoch": 0.15750627224458835, "grad_norm": 0.9821047596796536, "learning_rate": 9.579790051643325e-06, "loss": 0.2039, "step": 5399 }, { "epoch": 0.15753544547523193, "grad_norm": 0.9166169957795464, "learning_rate": 9.579600455597266e-06, "loss": 0.1609, "step": 5400 }, { "epoch": 0.1575646187058755, "grad_norm": 0.9635580581460742, "learning_rate": 9.579410818665628e-06, "loss": 0.1557, "step": 5401 }, { "epoch": 0.15759379193651904, "grad_norm": 1.080706368444192, "learning_rate": 9.579221140850104e-06, "loss": 0.1944, "step": 5402 }, { "epoch": 0.15762296516716262, "grad_norm": 0.927293969695493, "learning_rate": 9.579031422152387e-06, "loss": 0.1764, "step": 5403 }, { "epoch": 0.15765213839780617, "grad_norm": 0.9799503631241685, "learning_rate": 9.57884166257417e-06, "loss": 0.1652, "step": 5404 }, { "epoch": 0.15768131162844973, "grad_norm": 1.0106878771654144, "learning_rate": 9.578651862117148e-06, "loss": 0.188, "step": 5405 }, { "epoch": 0.1577104848590933, "grad_norm": 0.7422579840142393, "learning_rate": 9.578462020783013e-06, "loss": 0.1628, "step": 5406 }, { "epoch": 0.15773965808973686, "grad_norm": 1.3446219409699107, "learning_rate": 9.578272138573463e-06, "loss": 0.1757, "step": 5407 }, { "epoch": 0.15776883132038041, "grad_norm": 1.0520443887428077, "learning_rate": 9.578082215490194e-06, "loss": 0.1895, "step": 5408 }, { "epoch": 0.15779800455102397, "grad_norm": 0.8377729339645991, "learning_rate": 9.577892251534899e-06, "loss": 0.1933, "step": 5409 }, { "epoch": 0.15782717778166755, "grad_norm": 0.743670948833949, "learning_rate": 9.577702246709275e-06, "loss": 0.1654, "step": 5410 }, { "epoch": 0.1578563510123111, "grad_norm": 1.030117423630722, "learning_rate": 9.577512201015017e-06, "loss": 0.1672, "step": 5411 }, { "epoch": 0.15788552424295466, "grad_norm": 0.9125179739569904, "learning_rate": 9.577322114453823e-06, "loss": 0.1775, "step": 5412 }, { "epoch": 0.15791469747359824, "grad_norm": 0.9642911522184233, "learning_rate": 9.57713198702739e-06, "loss": 0.1844, "step": 5413 }, { "epoch": 0.1579438707042418, "grad_norm": 0.8439583916956757, "learning_rate": 9.576941818737417e-06, "loss": 0.159, "step": 5414 }, { "epoch": 0.15797304393488534, "grad_norm": 0.7592182479014896, "learning_rate": 9.576751609585598e-06, "loss": 0.1573, "step": 5415 }, { "epoch": 0.15800221716552892, "grad_norm": 0.9401020753462049, "learning_rate": 9.576561359573634e-06, "loss": 0.1709, "step": 5416 }, { "epoch": 0.15803139039617248, "grad_norm": 0.9758425237649551, "learning_rate": 9.576371068703223e-06, "loss": 0.1919, "step": 5417 }, { "epoch": 0.15806056362681603, "grad_norm": 0.7927983301156055, "learning_rate": 9.576180736976063e-06, "loss": 0.1742, "step": 5418 }, { "epoch": 0.15808973685745958, "grad_norm": 0.9042913679129185, "learning_rate": 9.575990364393854e-06, "loss": 0.1855, "step": 5419 }, { "epoch": 0.15811891008810317, "grad_norm": 1.0121199501897107, "learning_rate": 9.575799950958296e-06, "loss": 0.1931, "step": 5420 }, { "epoch": 0.15814808331874672, "grad_norm": 0.7928701851499547, "learning_rate": 9.575609496671087e-06, "loss": 0.1656, "step": 5421 }, { "epoch": 0.15817725654939027, "grad_norm": 0.9124917297475127, "learning_rate": 9.57541900153393e-06, "loss": 0.1773, "step": 5422 }, { "epoch": 0.15820642978003385, "grad_norm": 0.9829388883479042, "learning_rate": 9.575228465548523e-06, "loss": 0.1692, "step": 5423 }, { "epoch": 0.1582356030106774, "grad_norm": 0.7949378524931011, "learning_rate": 9.57503788871657e-06, "loss": 0.1558, "step": 5424 }, { "epoch": 0.15826477624132096, "grad_norm": 0.8954911948894755, "learning_rate": 9.57484727103977e-06, "loss": 0.1726, "step": 5425 }, { "epoch": 0.1582939494719645, "grad_norm": 0.7890650899596373, "learning_rate": 9.574656612519826e-06, "loss": 0.1642, "step": 5426 }, { "epoch": 0.1583231227026081, "grad_norm": 0.8069001450485412, "learning_rate": 9.57446591315844e-06, "loss": 0.168, "step": 5427 }, { "epoch": 0.15835229593325165, "grad_norm": 0.9666639566313857, "learning_rate": 9.574275172957312e-06, "loss": 0.209, "step": 5428 }, { "epoch": 0.1583814691638952, "grad_norm": 0.9358702572827285, "learning_rate": 9.57408439191815e-06, "loss": 0.1843, "step": 5429 }, { "epoch": 0.15841064239453878, "grad_norm": 0.7631703200514474, "learning_rate": 9.573893570042654e-06, "loss": 0.1591, "step": 5430 }, { "epoch": 0.15843981562518233, "grad_norm": 1.1370567339111364, "learning_rate": 9.573702707332527e-06, "loss": 0.1791, "step": 5431 }, { "epoch": 0.1584689888558259, "grad_norm": 0.8459786444647961, "learning_rate": 9.573511803789475e-06, "loss": 0.1593, "step": 5432 }, { "epoch": 0.15849816208646947, "grad_norm": 0.8080793621441061, "learning_rate": 9.573320859415202e-06, "loss": 0.1971, "step": 5433 }, { "epoch": 0.15852733531711302, "grad_norm": 1.074489446362773, "learning_rate": 9.573129874211411e-06, "loss": 0.1996, "step": 5434 }, { "epoch": 0.15855650854775657, "grad_norm": 0.9428179071504762, "learning_rate": 9.57293884817981e-06, "loss": 0.1816, "step": 5435 }, { "epoch": 0.15858568177840013, "grad_norm": 0.8700286428009719, "learning_rate": 9.572747781322099e-06, "loss": 0.1804, "step": 5436 }, { "epoch": 0.1586148550090437, "grad_norm": 0.8717119106477643, "learning_rate": 9.57255667363999e-06, "loss": 0.1623, "step": 5437 }, { "epoch": 0.15864402823968726, "grad_norm": 0.7060402488018908, "learning_rate": 9.572365525135185e-06, "loss": 0.1646, "step": 5438 }, { "epoch": 0.15867320147033082, "grad_norm": 0.8811567443773396, "learning_rate": 9.572174335809394e-06, "loss": 0.1633, "step": 5439 }, { "epoch": 0.1587023747009744, "grad_norm": 0.7270687461858438, "learning_rate": 9.571983105664322e-06, "loss": 0.1772, "step": 5440 }, { "epoch": 0.15873154793161795, "grad_norm": 0.8497674277569558, "learning_rate": 9.571791834701675e-06, "loss": 0.1702, "step": 5441 }, { "epoch": 0.1587607211622615, "grad_norm": 0.8674696912018066, "learning_rate": 9.571600522923163e-06, "loss": 0.2009, "step": 5442 }, { "epoch": 0.15878989439290506, "grad_norm": 0.8656224250311568, "learning_rate": 9.571409170330491e-06, "loss": 0.1604, "step": 5443 }, { "epoch": 0.15881906762354864, "grad_norm": 0.7641537478070192, "learning_rate": 9.57121777692537e-06, "loss": 0.1809, "step": 5444 }, { "epoch": 0.1588482408541922, "grad_norm": 1.1083848267124718, "learning_rate": 9.571026342709508e-06, "loss": 0.184, "step": 5445 }, { "epoch": 0.15887741408483574, "grad_norm": 0.7476783036580589, "learning_rate": 9.570834867684615e-06, "loss": 0.1665, "step": 5446 }, { "epoch": 0.15890658731547933, "grad_norm": 0.6403376710925319, "learning_rate": 9.5706433518524e-06, "loss": 0.1715, "step": 5447 }, { "epoch": 0.15893576054612288, "grad_norm": 0.9263578967867363, "learning_rate": 9.57045179521457e-06, "loss": 0.1795, "step": 5448 }, { "epoch": 0.15896493377676643, "grad_norm": 0.8908157844591182, "learning_rate": 9.570260197772838e-06, "loss": 0.1744, "step": 5449 }, { "epoch": 0.15899410700741, "grad_norm": 0.7682575593731672, "learning_rate": 9.570068559528915e-06, "loss": 0.1492, "step": 5450 }, { "epoch": 0.15902328023805357, "grad_norm": 0.920412489242452, "learning_rate": 9.56987688048451e-06, "loss": 0.1744, "step": 5451 }, { "epoch": 0.15905245346869712, "grad_norm": 0.8094421064257414, "learning_rate": 9.569685160641335e-06, "loss": 0.1601, "step": 5452 }, { "epoch": 0.15908162669934067, "grad_norm": 0.9815816069454216, "learning_rate": 9.569493400001102e-06, "loss": 0.1831, "step": 5453 }, { "epoch": 0.15911079992998425, "grad_norm": 0.8030345602944954, "learning_rate": 9.569301598565523e-06, "loss": 0.1607, "step": 5454 }, { "epoch": 0.1591399731606278, "grad_norm": 0.931746539425742, "learning_rate": 9.56910975633631e-06, "loss": 0.1863, "step": 5455 }, { "epoch": 0.15916914639127136, "grad_norm": 0.9966607132436579, "learning_rate": 9.568917873315176e-06, "loss": 0.1812, "step": 5456 }, { "epoch": 0.15919831962191494, "grad_norm": 0.861422562580085, "learning_rate": 9.568725949503834e-06, "loss": 0.1611, "step": 5457 }, { "epoch": 0.1592274928525585, "grad_norm": 0.7667715833184627, "learning_rate": 9.568533984903999e-06, "loss": 0.1589, "step": 5458 }, { "epoch": 0.15925666608320205, "grad_norm": 0.7941531497924824, "learning_rate": 9.568341979517379e-06, "loss": 0.1553, "step": 5459 }, { "epoch": 0.15928583931384563, "grad_norm": 0.8236868673519583, "learning_rate": 9.568149933345696e-06, "loss": 0.1503, "step": 5460 }, { "epoch": 0.15931501254448918, "grad_norm": 0.9462066861728884, "learning_rate": 9.567957846390659e-06, "loss": 0.1653, "step": 5461 }, { "epoch": 0.15934418577513274, "grad_norm": 0.7394685143027268, "learning_rate": 9.567765718653985e-06, "loss": 0.148, "step": 5462 }, { "epoch": 0.1593733590057763, "grad_norm": 0.8259509489049427, "learning_rate": 9.56757355013739e-06, "loss": 0.1822, "step": 5463 }, { "epoch": 0.15940253223641987, "grad_norm": 0.7205320025845744, "learning_rate": 9.567381340842587e-06, "loss": 0.1607, "step": 5464 }, { "epoch": 0.15943170546706342, "grad_norm": 0.9278171605585844, "learning_rate": 9.567189090771297e-06, "loss": 0.1819, "step": 5465 }, { "epoch": 0.15946087869770698, "grad_norm": 0.7434139517527243, "learning_rate": 9.56699679992523e-06, "loss": 0.1654, "step": 5466 }, { "epoch": 0.15949005192835056, "grad_norm": 0.7608680124730055, "learning_rate": 9.566804468306106e-06, "loss": 0.1971, "step": 5467 }, { "epoch": 0.1595192251589941, "grad_norm": 0.8194086504675923, "learning_rate": 9.566612095915645e-06, "loss": 0.1558, "step": 5468 }, { "epoch": 0.15954839838963766, "grad_norm": 0.8348174586247553, "learning_rate": 9.566419682755556e-06, "loss": 0.1753, "step": 5469 }, { "epoch": 0.15957757162028122, "grad_norm": 0.7991239621964856, "learning_rate": 9.566227228827567e-06, "loss": 0.1789, "step": 5470 }, { "epoch": 0.1596067448509248, "grad_norm": 1.142913604038722, "learning_rate": 9.566034734133389e-06, "loss": 0.1584, "step": 5471 }, { "epoch": 0.15963591808156835, "grad_norm": 1.2896668932997617, "learning_rate": 9.565842198674745e-06, "loss": 0.1798, "step": 5472 }, { "epoch": 0.1596650913122119, "grad_norm": 0.6454306703971007, "learning_rate": 9.565649622453348e-06, "loss": 0.1493, "step": 5473 }, { "epoch": 0.15969426454285549, "grad_norm": 0.815570010582798, "learning_rate": 9.565457005470924e-06, "loss": 0.1752, "step": 5474 }, { "epoch": 0.15972343777349904, "grad_norm": 0.8262825873337042, "learning_rate": 9.565264347729188e-06, "loss": 0.202, "step": 5475 }, { "epoch": 0.1597526110041426, "grad_norm": 0.7935681384870441, "learning_rate": 9.565071649229864e-06, "loss": 0.1589, "step": 5476 }, { "epoch": 0.15978178423478617, "grad_norm": 0.8986351776738177, "learning_rate": 9.564878909974668e-06, "loss": 0.1599, "step": 5477 }, { "epoch": 0.15981095746542973, "grad_norm": 1.1528515935918588, "learning_rate": 9.564686129965324e-06, "loss": 0.1857, "step": 5478 }, { "epoch": 0.15984013069607328, "grad_norm": 0.8541012777506539, "learning_rate": 9.56449330920355e-06, "loss": 0.1662, "step": 5479 }, { "epoch": 0.15986930392671683, "grad_norm": 0.8620862149959613, "learning_rate": 9.564300447691073e-06, "loss": 0.1712, "step": 5480 }, { "epoch": 0.1598984771573604, "grad_norm": 0.8990771412602441, "learning_rate": 9.564107545429609e-06, "loss": 0.1683, "step": 5481 }, { "epoch": 0.15992765038800397, "grad_norm": 0.9941917188876096, "learning_rate": 9.563914602420882e-06, "loss": 0.1797, "step": 5482 }, { "epoch": 0.15995682361864752, "grad_norm": 0.9751589699648194, "learning_rate": 9.563721618666616e-06, "loss": 0.1647, "step": 5483 }, { "epoch": 0.1599859968492911, "grad_norm": 0.7272839451291004, "learning_rate": 9.563528594168533e-06, "loss": 0.145, "step": 5484 }, { "epoch": 0.16001517007993465, "grad_norm": 1.0892100242230787, "learning_rate": 9.563335528928355e-06, "loss": 0.1563, "step": 5485 }, { "epoch": 0.1600443433105782, "grad_norm": 0.9542053547141254, "learning_rate": 9.563142422947806e-06, "loss": 0.1882, "step": 5486 }, { "epoch": 0.1600735165412218, "grad_norm": 0.7216372949107076, "learning_rate": 9.562949276228612e-06, "loss": 0.1576, "step": 5487 }, { "epoch": 0.16010268977186534, "grad_norm": 0.9598284668236613, "learning_rate": 9.562756088772496e-06, "loss": 0.164, "step": 5488 }, { "epoch": 0.1601318630025089, "grad_norm": 0.8168559170918406, "learning_rate": 9.562562860581183e-06, "loss": 0.1567, "step": 5489 }, { "epoch": 0.16016103623315245, "grad_norm": 0.9889863519411258, "learning_rate": 9.562369591656397e-06, "loss": 0.1995, "step": 5490 }, { "epoch": 0.16019020946379603, "grad_norm": 0.9228763922672527, "learning_rate": 9.562176281999866e-06, "loss": 0.1785, "step": 5491 }, { "epoch": 0.16021938269443958, "grad_norm": 1.0300716925012865, "learning_rate": 9.561982931613314e-06, "loss": 0.1806, "step": 5492 }, { "epoch": 0.16024855592508314, "grad_norm": 0.8421692313466544, "learning_rate": 9.561789540498466e-06, "loss": 0.1564, "step": 5493 }, { "epoch": 0.16027772915572672, "grad_norm": 0.7979222576177974, "learning_rate": 9.56159610865705e-06, "loss": 0.1791, "step": 5494 }, { "epoch": 0.16030690238637027, "grad_norm": 0.8987821980671169, "learning_rate": 9.561402636090795e-06, "loss": 0.1549, "step": 5495 }, { "epoch": 0.16033607561701382, "grad_norm": 0.8303855245795769, "learning_rate": 9.561209122801424e-06, "loss": 0.1734, "step": 5496 }, { "epoch": 0.16036524884765738, "grad_norm": 0.8530449356921735, "learning_rate": 9.561015568790667e-06, "loss": 0.1541, "step": 5497 }, { "epoch": 0.16039442207830096, "grad_norm": 1.0519487125777036, "learning_rate": 9.560821974060253e-06, "loss": 0.1764, "step": 5498 }, { "epoch": 0.1604235953089445, "grad_norm": 0.7882587733865246, "learning_rate": 9.56062833861191e-06, "loss": 0.1941, "step": 5499 }, { "epoch": 0.16045276853958806, "grad_norm": 0.7412433977247188, "learning_rate": 9.560434662447364e-06, "loss": 0.1807, "step": 5500 }, { "epoch": 0.16048194177023165, "grad_norm": 0.7990183668196447, "learning_rate": 9.560240945568346e-06, "loss": 0.1818, "step": 5501 }, { "epoch": 0.1605111150008752, "grad_norm": 0.8293011877629302, "learning_rate": 9.560047187976586e-06, "loss": 0.1754, "step": 5502 }, { "epoch": 0.16054028823151875, "grad_norm": 0.8004321008653155, "learning_rate": 9.559853389673814e-06, "loss": 0.1708, "step": 5503 }, { "epoch": 0.16056946146216233, "grad_norm": 0.7603853919729233, "learning_rate": 9.559659550661759e-06, "loss": 0.1696, "step": 5504 }, { "epoch": 0.16059863469280589, "grad_norm": 0.8410948698440288, "learning_rate": 9.559465670942151e-06, "loss": 0.1771, "step": 5505 }, { "epoch": 0.16062780792344944, "grad_norm": 0.784680841322243, "learning_rate": 9.559271750516723e-06, "loss": 0.168, "step": 5506 }, { "epoch": 0.160656981154093, "grad_norm": 0.8670621225688402, "learning_rate": 9.559077789387204e-06, "loss": 0.1869, "step": 5507 }, { "epoch": 0.16068615438473657, "grad_norm": 0.9298948577198906, "learning_rate": 9.558883787555328e-06, "loss": 0.1893, "step": 5508 }, { "epoch": 0.16071532761538013, "grad_norm": 1.0162196307683045, "learning_rate": 9.558689745022825e-06, "loss": 0.1788, "step": 5509 }, { "epoch": 0.16074450084602368, "grad_norm": 1.1083655323504589, "learning_rate": 9.558495661791429e-06, "loss": 0.1449, "step": 5510 }, { "epoch": 0.16077367407666726, "grad_norm": 0.9613057259010386, "learning_rate": 9.558301537862873e-06, "loss": 0.1608, "step": 5511 }, { "epoch": 0.16080284730731081, "grad_norm": 0.8995678877128398, "learning_rate": 9.558107373238887e-06, "loss": 0.1601, "step": 5512 }, { "epoch": 0.16083202053795437, "grad_norm": 1.0690399837000504, "learning_rate": 9.557913167921206e-06, "loss": 0.1552, "step": 5513 }, { "epoch": 0.16086119376859792, "grad_norm": 0.9360316503120157, "learning_rate": 9.557718921911567e-06, "loss": 0.157, "step": 5514 }, { "epoch": 0.1608903669992415, "grad_norm": 0.9575391506624618, "learning_rate": 9.5575246352117e-06, "loss": 0.1924, "step": 5515 }, { "epoch": 0.16091954022988506, "grad_norm": 0.9290381634060488, "learning_rate": 9.55733030782334e-06, "loss": 0.1749, "step": 5516 }, { "epoch": 0.1609487134605286, "grad_norm": 0.7528786554678382, "learning_rate": 9.557135939748224e-06, "loss": 0.1837, "step": 5517 }, { "epoch": 0.1609778866911722, "grad_norm": 1.0485476870540271, "learning_rate": 9.556941530988087e-06, "loss": 0.1937, "step": 5518 }, { "epoch": 0.16100705992181574, "grad_norm": 0.9967608410686682, "learning_rate": 9.556747081544663e-06, "loss": 0.1733, "step": 5519 }, { "epoch": 0.1610362331524593, "grad_norm": 0.7148948430951124, "learning_rate": 9.556552591419688e-06, "loss": 0.1674, "step": 5520 }, { "epoch": 0.16106540638310288, "grad_norm": 1.1189871928418456, "learning_rate": 9.556358060614901e-06, "loss": 0.1876, "step": 5521 }, { "epoch": 0.16109457961374643, "grad_norm": 0.9812964305816959, "learning_rate": 9.556163489132036e-06, "loss": 0.1999, "step": 5522 }, { "epoch": 0.16112375284438998, "grad_norm": 0.8065992828984858, "learning_rate": 9.55596887697283e-06, "loss": 0.1967, "step": 5523 }, { "epoch": 0.16115292607503354, "grad_norm": 1.0285633053461214, "learning_rate": 9.555774224139022e-06, "loss": 0.1568, "step": 5524 }, { "epoch": 0.16118209930567712, "grad_norm": 0.8012421283696906, "learning_rate": 9.555579530632351e-06, "loss": 0.1449, "step": 5525 }, { "epoch": 0.16121127253632067, "grad_norm": 0.8074301832246265, "learning_rate": 9.555384796454551e-06, "loss": 0.1733, "step": 5526 }, { "epoch": 0.16124044576696422, "grad_norm": 0.9412707946278771, "learning_rate": 9.555190021607364e-06, "loss": 0.1658, "step": 5527 }, { "epoch": 0.1612696189976078, "grad_norm": 1.0414637734958772, "learning_rate": 9.554995206092527e-06, "loss": 0.2106, "step": 5528 }, { "epoch": 0.16129879222825136, "grad_norm": 0.8999965230351273, "learning_rate": 9.554800349911784e-06, "loss": 0.1802, "step": 5529 }, { "epoch": 0.1613279654588949, "grad_norm": 0.9708105064062906, "learning_rate": 9.554605453066868e-06, "loss": 0.2027, "step": 5530 }, { "epoch": 0.1613571386895385, "grad_norm": 0.8076048243727747, "learning_rate": 9.55441051555952e-06, "loss": 0.169, "step": 5531 }, { "epoch": 0.16138631192018205, "grad_norm": 0.9693097447828146, "learning_rate": 9.554215537391485e-06, "loss": 0.1857, "step": 5532 }, { "epoch": 0.1614154851508256, "grad_norm": 0.68000558152758, "learning_rate": 9.5540205185645e-06, "loss": 0.1715, "step": 5533 }, { "epoch": 0.16144465838146915, "grad_norm": 0.9131535674772204, "learning_rate": 9.553825459080306e-06, "loss": 0.1742, "step": 5534 }, { "epoch": 0.16147383161211273, "grad_norm": 0.9443091267167332, "learning_rate": 9.553630358940647e-06, "loss": 0.1684, "step": 5535 }, { "epoch": 0.1615030048427563, "grad_norm": 0.8825146081504416, "learning_rate": 9.553435218147262e-06, "loss": 0.1637, "step": 5536 }, { "epoch": 0.16153217807339984, "grad_norm": 0.8556868523999357, "learning_rate": 9.553240036701893e-06, "loss": 0.181, "step": 5537 }, { "epoch": 0.16156135130404342, "grad_norm": 1.0266545781521235, "learning_rate": 9.553044814606287e-06, "loss": 0.1845, "step": 5538 }, { "epoch": 0.16159052453468697, "grad_norm": 0.8386571943758669, "learning_rate": 9.552849551862182e-06, "loss": 0.1725, "step": 5539 }, { "epoch": 0.16161969776533053, "grad_norm": 0.7416334146289594, "learning_rate": 9.552654248471323e-06, "loss": 0.1419, "step": 5540 }, { "epoch": 0.16164887099597408, "grad_norm": 0.7914754642929637, "learning_rate": 9.552458904435454e-06, "loss": 0.1788, "step": 5541 }, { "epoch": 0.16167804422661766, "grad_norm": 0.8697126259385773, "learning_rate": 9.55226351975632e-06, "loss": 0.1791, "step": 5542 }, { "epoch": 0.16170721745726122, "grad_norm": 0.8546440583131948, "learning_rate": 9.552068094435663e-06, "loss": 0.1811, "step": 5543 }, { "epoch": 0.16173639068790477, "grad_norm": 0.7984828349344645, "learning_rate": 9.551872628475227e-06, "loss": 0.1487, "step": 5544 }, { "epoch": 0.16176556391854835, "grad_norm": 0.8557930680296238, "learning_rate": 9.551677121876761e-06, "loss": 0.1632, "step": 5545 }, { "epoch": 0.1617947371491919, "grad_norm": 0.8212788916557553, "learning_rate": 9.551481574642008e-06, "loss": 0.1918, "step": 5546 }, { "epoch": 0.16182391037983546, "grad_norm": 0.893961801426182, "learning_rate": 9.551285986772714e-06, "loss": 0.1712, "step": 5547 }, { "epoch": 0.16185308361047904, "grad_norm": 1.05923268609053, "learning_rate": 9.551090358270624e-06, "loss": 0.1753, "step": 5548 }, { "epoch": 0.1618822568411226, "grad_norm": 0.8934698772171971, "learning_rate": 9.550894689137487e-06, "loss": 0.1622, "step": 5549 }, { "epoch": 0.16191143007176614, "grad_norm": 1.0534821834609347, "learning_rate": 9.550698979375046e-06, "loss": 0.2013, "step": 5550 }, { "epoch": 0.1619406033024097, "grad_norm": 0.8065592466817278, "learning_rate": 9.550503228985053e-06, "loss": 0.1754, "step": 5551 }, { "epoch": 0.16196977653305328, "grad_norm": 1.027763839163784, "learning_rate": 9.550307437969254e-06, "loss": 0.1603, "step": 5552 }, { "epoch": 0.16199894976369683, "grad_norm": 0.7821991728621691, "learning_rate": 9.550111606329396e-06, "loss": 0.185, "step": 5553 }, { "epoch": 0.16202812299434038, "grad_norm": 0.9745333289682548, "learning_rate": 9.549915734067229e-06, "loss": 0.1697, "step": 5554 }, { "epoch": 0.16205729622498397, "grad_norm": 0.959499070010955, "learning_rate": 9.549719821184498e-06, "loss": 0.1752, "step": 5555 }, { "epoch": 0.16208646945562752, "grad_norm": 0.6914850172054944, "learning_rate": 9.549523867682955e-06, "loss": 0.1589, "step": 5556 }, { "epoch": 0.16211564268627107, "grad_norm": 0.8649204270790649, "learning_rate": 9.54932787356435e-06, "loss": 0.1686, "step": 5557 }, { "epoch": 0.16214481591691463, "grad_norm": 0.9069323247739839, "learning_rate": 9.54913183883043e-06, "loss": 0.1747, "step": 5558 }, { "epoch": 0.1621739891475582, "grad_norm": 0.7860750154736431, "learning_rate": 9.548935763482949e-06, "loss": 0.1626, "step": 5559 }, { "epoch": 0.16220316237820176, "grad_norm": 0.7955310104902404, "learning_rate": 9.548739647523654e-06, "loss": 0.1506, "step": 5560 }, { "epoch": 0.1622323356088453, "grad_norm": 1.0055939065865114, "learning_rate": 9.548543490954299e-06, "loss": 0.1723, "step": 5561 }, { "epoch": 0.1622615088394889, "grad_norm": 0.8232828850756518, "learning_rate": 9.548347293776632e-06, "loss": 0.1868, "step": 5562 }, { "epoch": 0.16229068207013245, "grad_norm": 0.8850197389033428, "learning_rate": 9.548151055992407e-06, "loss": 0.1684, "step": 5563 }, { "epoch": 0.162319855300776, "grad_norm": 0.9326340762746111, "learning_rate": 9.547954777603374e-06, "loss": 0.1787, "step": 5564 }, { "epoch": 0.16234902853141958, "grad_norm": 0.80297701862277, "learning_rate": 9.547758458611287e-06, "loss": 0.1705, "step": 5565 }, { "epoch": 0.16237820176206313, "grad_norm": 0.7602025155635146, "learning_rate": 9.5475620990179e-06, "loss": 0.1827, "step": 5566 }, { "epoch": 0.1624073749927067, "grad_norm": 1.3730000806359959, "learning_rate": 9.547365698824962e-06, "loss": 0.1789, "step": 5567 }, { "epoch": 0.16243654822335024, "grad_norm": 0.8780466005884809, "learning_rate": 9.547169258034228e-06, "loss": 0.16, "step": 5568 }, { "epoch": 0.16246572145399382, "grad_norm": 0.7973441690357147, "learning_rate": 9.546972776647454e-06, "loss": 0.1688, "step": 5569 }, { "epoch": 0.16249489468463738, "grad_norm": 0.9387914187545926, "learning_rate": 9.546776254666392e-06, "loss": 0.1712, "step": 5570 }, { "epoch": 0.16252406791528093, "grad_norm": 1.0942556767713258, "learning_rate": 9.546579692092797e-06, "loss": 0.1728, "step": 5571 }, { "epoch": 0.1625532411459245, "grad_norm": 0.8994339052118556, "learning_rate": 9.546383088928423e-06, "loss": 0.1851, "step": 5572 }, { "epoch": 0.16258241437656806, "grad_norm": 0.8631096758899228, "learning_rate": 9.546186445175027e-06, "loss": 0.1838, "step": 5573 }, { "epoch": 0.16261158760721162, "grad_norm": 1.0114924830145064, "learning_rate": 9.545989760834365e-06, "loss": 0.1857, "step": 5574 }, { "epoch": 0.1626407608378552, "grad_norm": 0.8805384329134841, "learning_rate": 9.545793035908188e-06, "loss": 0.1674, "step": 5575 }, { "epoch": 0.16266993406849875, "grad_norm": 0.7961742170072749, "learning_rate": 9.545596270398258e-06, "loss": 0.1974, "step": 5576 }, { "epoch": 0.1626991072991423, "grad_norm": 0.9199810343204181, "learning_rate": 9.54539946430633e-06, "loss": 0.1811, "step": 5577 }, { "epoch": 0.16272828052978586, "grad_norm": 0.8605639682493905, "learning_rate": 9.545202617634162e-06, "loss": 0.16, "step": 5578 }, { "epoch": 0.16275745376042944, "grad_norm": 0.989559048965002, "learning_rate": 9.545005730383508e-06, "loss": 0.1785, "step": 5579 }, { "epoch": 0.162786626991073, "grad_norm": 0.8561875892868223, "learning_rate": 9.544808802556129e-06, "loss": 0.1726, "step": 5580 }, { "epoch": 0.16281580022171654, "grad_norm": 0.8761340504741546, "learning_rate": 9.544611834153781e-06, "loss": 0.1483, "step": 5581 }, { "epoch": 0.16284497345236013, "grad_norm": 0.9637252105261748, "learning_rate": 9.544414825178223e-06, "loss": 0.1778, "step": 5582 }, { "epoch": 0.16287414668300368, "grad_norm": 0.8010618817615577, "learning_rate": 9.544217775631215e-06, "loss": 0.1713, "step": 5583 }, { "epoch": 0.16290331991364723, "grad_norm": 0.8309504926748028, "learning_rate": 9.544020685514515e-06, "loss": 0.1451, "step": 5584 }, { "epoch": 0.16293249314429079, "grad_norm": 0.8439330817438556, "learning_rate": 9.543823554829884e-06, "loss": 0.1656, "step": 5585 }, { "epoch": 0.16296166637493437, "grad_norm": 0.6605145046534995, "learning_rate": 9.54362638357908e-06, "loss": 0.1534, "step": 5586 }, { "epoch": 0.16299083960557792, "grad_norm": 0.7667144900167461, "learning_rate": 9.543429171763865e-06, "loss": 0.1763, "step": 5587 }, { "epoch": 0.16302001283622147, "grad_norm": 0.8680758585249889, "learning_rate": 9.543231919385999e-06, "loss": 0.1585, "step": 5588 }, { "epoch": 0.16304918606686505, "grad_norm": 0.7830878767600102, "learning_rate": 9.543034626447243e-06, "loss": 0.1641, "step": 5589 }, { "epoch": 0.1630783592975086, "grad_norm": 0.863822806015243, "learning_rate": 9.542837292949358e-06, "loss": 0.1905, "step": 5590 }, { "epoch": 0.16310753252815216, "grad_norm": 0.7331609304442888, "learning_rate": 9.542639918894105e-06, "loss": 0.171, "step": 5591 }, { "epoch": 0.16313670575879574, "grad_norm": 0.8984095274978744, "learning_rate": 9.542442504283249e-06, "loss": 0.1641, "step": 5592 }, { "epoch": 0.1631658789894393, "grad_norm": 0.8234488756856335, "learning_rate": 9.542245049118551e-06, "loss": 0.1696, "step": 5593 }, { "epoch": 0.16319505222008285, "grad_norm": 0.7996534533990739, "learning_rate": 9.542047553401773e-06, "loss": 0.174, "step": 5594 }, { "epoch": 0.1632242254507264, "grad_norm": 0.9163249519287838, "learning_rate": 9.541850017134678e-06, "loss": 0.174, "step": 5595 }, { "epoch": 0.16325339868136998, "grad_norm": 0.8898491930885384, "learning_rate": 9.54165244031903e-06, "loss": 0.191, "step": 5596 }, { "epoch": 0.16328257191201354, "grad_norm": 0.7146522455286817, "learning_rate": 9.541454822956592e-06, "loss": 0.1731, "step": 5597 }, { "epoch": 0.1633117451426571, "grad_norm": 0.9071124784826288, "learning_rate": 9.541257165049132e-06, "loss": 0.1873, "step": 5598 }, { "epoch": 0.16334091837330067, "grad_norm": 1.0050295452380662, "learning_rate": 9.541059466598413e-06, "loss": 0.1836, "step": 5599 }, { "epoch": 0.16337009160394422, "grad_norm": 0.960905206118692, "learning_rate": 9.540861727606196e-06, "loss": 0.1866, "step": 5600 }, { "epoch": 0.16339926483458778, "grad_norm": 1.0512020080564881, "learning_rate": 9.540663948074251e-06, "loss": 0.1609, "step": 5601 }, { "epoch": 0.16342843806523136, "grad_norm": 0.9435689980908454, "learning_rate": 9.540466128004342e-06, "loss": 0.1866, "step": 5602 }, { "epoch": 0.1634576112958749, "grad_norm": 1.1703447518077454, "learning_rate": 9.540268267398237e-06, "loss": 0.1726, "step": 5603 }, { "epoch": 0.16348678452651846, "grad_norm": 1.488296337065139, "learning_rate": 9.540070366257699e-06, "loss": 0.1829, "step": 5604 }, { "epoch": 0.16351595775716202, "grad_norm": 0.922847693332177, "learning_rate": 9.539872424584496e-06, "loss": 0.1969, "step": 5605 }, { "epoch": 0.1635451309878056, "grad_norm": 0.9796080261896619, "learning_rate": 9.539674442380397e-06, "loss": 0.1792, "step": 5606 }, { "epoch": 0.16357430421844915, "grad_norm": 1.1559993570626443, "learning_rate": 9.539476419647168e-06, "loss": 0.1811, "step": 5607 }, { "epoch": 0.1636034774490927, "grad_norm": 1.002853243594281, "learning_rate": 9.539278356386577e-06, "loss": 0.1813, "step": 5608 }, { "epoch": 0.16363265067973629, "grad_norm": 1.042737315841317, "learning_rate": 9.539080252600392e-06, "loss": 0.1737, "step": 5609 }, { "epoch": 0.16366182391037984, "grad_norm": 0.8994279842767887, "learning_rate": 9.538882108290384e-06, "loss": 0.1659, "step": 5610 }, { "epoch": 0.1636909971410234, "grad_norm": 1.1451182307194003, "learning_rate": 9.538683923458319e-06, "loss": 0.1722, "step": 5611 }, { "epoch": 0.16372017037166695, "grad_norm": 0.8847074219193041, "learning_rate": 9.538485698105965e-06, "loss": 0.1794, "step": 5612 }, { "epoch": 0.16374934360231053, "grad_norm": 0.8445970893768949, "learning_rate": 9.538287432235096e-06, "loss": 0.1737, "step": 5613 }, { "epoch": 0.16377851683295408, "grad_norm": 0.9592856169076226, "learning_rate": 9.53808912584748e-06, "loss": 0.1914, "step": 5614 }, { "epoch": 0.16380769006359763, "grad_norm": 0.7155258987602527, "learning_rate": 9.53789077894489e-06, "loss": 0.1689, "step": 5615 }, { "epoch": 0.16383686329424121, "grad_norm": 0.7297140127836609, "learning_rate": 9.537692391529093e-06, "loss": 0.1556, "step": 5616 }, { "epoch": 0.16386603652488477, "grad_norm": 0.98604310466573, "learning_rate": 9.53749396360186e-06, "loss": 0.1589, "step": 5617 }, { "epoch": 0.16389520975552832, "grad_norm": 0.7811506326450793, "learning_rate": 9.537295495164965e-06, "loss": 0.1926, "step": 5618 }, { "epoch": 0.1639243829861719, "grad_norm": 0.828578056936563, "learning_rate": 9.537096986220177e-06, "loss": 0.1641, "step": 5619 }, { "epoch": 0.16395355621681545, "grad_norm": 0.9639280325185331, "learning_rate": 9.536898436769273e-06, "loss": 0.1866, "step": 5620 }, { "epoch": 0.163982729447459, "grad_norm": 0.7248176740815467, "learning_rate": 9.536699846814023e-06, "loss": 0.1743, "step": 5621 }, { "epoch": 0.16401190267810256, "grad_norm": 0.8192479076642077, "learning_rate": 9.536501216356198e-06, "loss": 0.1711, "step": 5622 }, { "epoch": 0.16404107590874614, "grad_norm": 0.8799247150203047, "learning_rate": 9.536302545397575e-06, "loss": 0.2011, "step": 5623 }, { "epoch": 0.1640702491393897, "grad_norm": 0.7990696505438856, "learning_rate": 9.536103833939924e-06, "loss": 0.1714, "step": 5624 }, { "epoch": 0.16409942237003325, "grad_norm": 0.8583153072459949, "learning_rate": 9.535905081985022e-06, "loss": 0.1907, "step": 5625 }, { "epoch": 0.16412859560067683, "grad_norm": 0.8126369465430842, "learning_rate": 9.53570628953464e-06, "loss": 0.149, "step": 5626 }, { "epoch": 0.16415776883132038, "grad_norm": 0.8111059379127915, "learning_rate": 9.535507456590559e-06, "loss": 0.1939, "step": 5627 }, { "epoch": 0.16418694206196394, "grad_norm": 0.7953149377586634, "learning_rate": 9.535308583154546e-06, "loss": 0.1913, "step": 5628 }, { "epoch": 0.1642161152926075, "grad_norm": 1.0358783893962589, "learning_rate": 9.535109669228383e-06, "loss": 0.1657, "step": 5629 }, { "epoch": 0.16424528852325107, "grad_norm": 0.9209626254686679, "learning_rate": 9.534910714813843e-06, "loss": 0.1766, "step": 5630 }, { "epoch": 0.16427446175389462, "grad_norm": 0.7619437338287154, "learning_rate": 9.534711719912701e-06, "loss": 0.171, "step": 5631 }, { "epoch": 0.16430363498453818, "grad_norm": 1.101362466756387, "learning_rate": 9.534512684526738e-06, "loss": 0.1589, "step": 5632 }, { "epoch": 0.16433280821518176, "grad_norm": 0.9187218551616347, "learning_rate": 9.534313608657728e-06, "loss": 0.1726, "step": 5633 }, { "epoch": 0.1643619814458253, "grad_norm": 0.6625857365164486, "learning_rate": 9.534114492307447e-06, "loss": 0.1282, "step": 5634 }, { "epoch": 0.16439115467646886, "grad_norm": 1.0032110101859075, "learning_rate": 9.533915335477675e-06, "loss": 0.2211, "step": 5635 }, { "epoch": 0.16442032790711245, "grad_norm": 0.9061869707204406, "learning_rate": 9.53371613817019e-06, "loss": 0.1721, "step": 5636 }, { "epoch": 0.164449501137756, "grad_norm": 0.8174193509889471, "learning_rate": 9.533516900386768e-06, "loss": 0.1486, "step": 5637 }, { "epoch": 0.16447867436839955, "grad_norm": 0.848974270108626, "learning_rate": 9.53331762212919e-06, "loss": 0.195, "step": 5638 }, { "epoch": 0.1645078475990431, "grad_norm": 0.8537514937918482, "learning_rate": 9.533118303399234e-06, "loss": 0.1512, "step": 5639 }, { "epoch": 0.1645370208296867, "grad_norm": 0.7619147168312305, "learning_rate": 9.53291894419868e-06, "loss": 0.1543, "step": 5640 }, { "epoch": 0.16456619406033024, "grad_norm": 0.7312178056896909, "learning_rate": 9.53271954452931e-06, "loss": 0.1475, "step": 5641 }, { "epoch": 0.1645953672909738, "grad_norm": 0.8892306734556975, "learning_rate": 9.5325201043929e-06, "loss": 0.1967, "step": 5642 }, { "epoch": 0.16462454052161737, "grad_norm": 0.8135042187040571, "learning_rate": 9.53232062379123e-06, "loss": 0.1904, "step": 5643 }, { "epoch": 0.16465371375226093, "grad_norm": 0.8037126020990946, "learning_rate": 9.532121102726088e-06, "loss": 0.1686, "step": 5644 }, { "epoch": 0.16468288698290448, "grad_norm": 0.8641625415937917, "learning_rate": 9.531921541199249e-06, "loss": 0.1906, "step": 5645 }, { "epoch": 0.16471206021354806, "grad_norm": 1.0199354918080799, "learning_rate": 9.531721939212497e-06, "loss": 0.1827, "step": 5646 }, { "epoch": 0.16474123344419161, "grad_norm": 0.8532780844787409, "learning_rate": 9.53152229676761e-06, "loss": 0.1537, "step": 5647 }, { "epoch": 0.16477040667483517, "grad_norm": 0.9380098937926005, "learning_rate": 9.531322613866378e-06, "loss": 0.1719, "step": 5648 }, { "epoch": 0.16479957990547872, "grad_norm": 0.7853122808500244, "learning_rate": 9.531122890510577e-06, "loss": 0.167, "step": 5649 }, { "epoch": 0.1648287531361223, "grad_norm": 0.9987451694364654, "learning_rate": 9.530923126701994e-06, "loss": 0.1777, "step": 5650 }, { "epoch": 0.16485792636676586, "grad_norm": 1.13444692949112, "learning_rate": 9.530723322442408e-06, "loss": 0.1762, "step": 5651 }, { "epoch": 0.1648870995974094, "grad_norm": 0.9127974602991187, "learning_rate": 9.530523477733608e-06, "loss": 0.1584, "step": 5652 }, { "epoch": 0.164916272828053, "grad_norm": 0.98414473118197, "learning_rate": 9.530323592577376e-06, "loss": 0.1649, "step": 5653 }, { "epoch": 0.16494544605869654, "grad_norm": 0.9241482100282952, "learning_rate": 9.530123666975498e-06, "loss": 0.1655, "step": 5654 }, { "epoch": 0.1649746192893401, "grad_norm": 0.8354171263977225, "learning_rate": 9.529923700929753e-06, "loss": 0.1782, "step": 5655 }, { "epoch": 0.16500379251998365, "grad_norm": 1.1181879737222529, "learning_rate": 9.529723694441935e-06, "loss": 0.1907, "step": 5656 }, { "epoch": 0.16503296575062723, "grad_norm": 0.7588624883101615, "learning_rate": 9.529523647513824e-06, "loss": 0.1565, "step": 5657 }, { "epoch": 0.16506213898127078, "grad_norm": 0.9046596801968325, "learning_rate": 9.529323560147204e-06, "loss": 0.1788, "step": 5658 }, { "epoch": 0.16509131221191434, "grad_norm": 0.8841903567900284, "learning_rate": 9.529123432343868e-06, "loss": 0.1992, "step": 5659 }, { "epoch": 0.16512048544255792, "grad_norm": 0.8972858405509329, "learning_rate": 9.528923264105597e-06, "loss": 0.158, "step": 5660 }, { "epoch": 0.16514965867320147, "grad_norm": 0.8204773792832021, "learning_rate": 9.528723055434182e-06, "loss": 0.1641, "step": 5661 }, { "epoch": 0.16517883190384502, "grad_norm": 0.8422016494152981, "learning_rate": 9.528522806331409e-06, "loss": 0.186, "step": 5662 }, { "epoch": 0.1652080051344886, "grad_norm": 1.0036955772986154, "learning_rate": 9.528322516799064e-06, "loss": 0.1788, "step": 5663 }, { "epoch": 0.16523717836513216, "grad_norm": 0.8658475774572192, "learning_rate": 9.528122186838935e-06, "loss": 0.1682, "step": 5664 }, { "epoch": 0.1652663515957757, "grad_norm": 0.9343986033765772, "learning_rate": 9.527921816452815e-06, "loss": 0.196, "step": 5665 }, { "epoch": 0.16529552482641927, "grad_norm": 0.9523178297434219, "learning_rate": 9.527721405642489e-06, "loss": 0.1598, "step": 5666 }, { "epoch": 0.16532469805706285, "grad_norm": 0.8053235060679232, "learning_rate": 9.527520954409748e-06, "loss": 0.1837, "step": 5667 }, { "epoch": 0.1653538712877064, "grad_norm": 0.9254610811105273, "learning_rate": 9.527320462756379e-06, "loss": 0.1632, "step": 5668 }, { "epoch": 0.16538304451834995, "grad_norm": 0.8281553470358143, "learning_rate": 9.527119930684174e-06, "loss": 0.1702, "step": 5669 }, { "epoch": 0.16541221774899353, "grad_norm": 0.6681916431569204, "learning_rate": 9.526919358194923e-06, "loss": 0.1847, "step": 5670 }, { "epoch": 0.1654413909796371, "grad_norm": 0.8700015870060569, "learning_rate": 9.526718745290418e-06, "loss": 0.1643, "step": 5671 }, { "epoch": 0.16547056421028064, "grad_norm": 0.9119618151662121, "learning_rate": 9.526518091972447e-06, "loss": 0.188, "step": 5672 }, { "epoch": 0.16549973744092422, "grad_norm": 0.7895264606281064, "learning_rate": 9.526317398242803e-06, "loss": 0.1717, "step": 5673 }, { "epoch": 0.16552891067156777, "grad_norm": 0.8419253020973396, "learning_rate": 9.52611666410328e-06, "loss": 0.1682, "step": 5674 }, { "epoch": 0.16555808390221133, "grad_norm": 0.9489149000767232, "learning_rate": 9.525915889555666e-06, "loss": 0.1559, "step": 5675 }, { "epoch": 0.16558725713285488, "grad_norm": 1.1362243233797817, "learning_rate": 9.525715074601756e-06, "loss": 0.1551, "step": 5676 }, { "epoch": 0.16561643036349846, "grad_norm": 0.8007023063084524, "learning_rate": 9.525514219243342e-06, "loss": 0.1671, "step": 5677 }, { "epoch": 0.16564560359414202, "grad_norm": 0.8760805163055179, "learning_rate": 9.525313323482217e-06, "loss": 0.1884, "step": 5678 }, { "epoch": 0.16567477682478557, "grad_norm": 0.7956839749398408, "learning_rate": 9.525112387320177e-06, "loss": 0.1659, "step": 5679 }, { "epoch": 0.16570395005542915, "grad_norm": 0.970887167937794, "learning_rate": 9.524911410759012e-06, "loss": 0.1632, "step": 5680 }, { "epoch": 0.1657331232860727, "grad_norm": 1.3408322576460419, "learning_rate": 9.524710393800518e-06, "loss": 0.1861, "step": 5681 }, { "epoch": 0.16576229651671626, "grad_norm": 0.9953309589317046, "learning_rate": 9.524509336446489e-06, "loss": 0.1553, "step": 5682 }, { "epoch": 0.1657914697473598, "grad_norm": 1.0691731422030517, "learning_rate": 9.524308238698723e-06, "loss": 0.1746, "step": 5683 }, { "epoch": 0.1658206429780034, "grad_norm": 0.9751236644786784, "learning_rate": 9.52410710055901e-06, "loss": 0.1469, "step": 5684 }, { "epoch": 0.16584981620864694, "grad_norm": 1.376478960450591, "learning_rate": 9.52390592202915e-06, "loss": 0.1604, "step": 5685 }, { "epoch": 0.1658789894392905, "grad_norm": 0.9059184036656142, "learning_rate": 9.523704703110939e-06, "loss": 0.181, "step": 5686 }, { "epoch": 0.16590816266993408, "grad_norm": 0.9531920355015756, "learning_rate": 9.523503443806173e-06, "loss": 0.1871, "step": 5687 }, { "epoch": 0.16593733590057763, "grad_norm": 1.3610865414447926, "learning_rate": 9.523302144116647e-06, "loss": 0.1468, "step": 5688 }, { "epoch": 0.16596650913122118, "grad_norm": 0.9879132279272322, "learning_rate": 9.523100804044159e-06, "loss": 0.1886, "step": 5689 }, { "epoch": 0.16599568236186477, "grad_norm": 0.9880328281509605, "learning_rate": 9.522899423590507e-06, "loss": 0.17, "step": 5690 }, { "epoch": 0.16602485559250832, "grad_norm": 0.8304979236237056, "learning_rate": 9.52269800275749e-06, "loss": 0.1827, "step": 5691 }, { "epoch": 0.16605402882315187, "grad_norm": 0.9552655816701444, "learning_rate": 9.522496541546901e-06, "loss": 0.1884, "step": 5692 }, { "epoch": 0.16608320205379543, "grad_norm": 0.8358397925988778, "learning_rate": 9.522295039960544e-06, "loss": 0.1643, "step": 5693 }, { "epoch": 0.166112375284439, "grad_norm": 0.6558762570348208, "learning_rate": 9.522093498000218e-06, "loss": 0.1639, "step": 5694 }, { "epoch": 0.16614154851508256, "grad_norm": 0.9007862062668994, "learning_rate": 9.521891915667722e-06, "loss": 0.1665, "step": 5695 }, { "epoch": 0.1661707217457261, "grad_norm": 0.8837016889555012, "learning_rate": 9.52169029296485e-06, "loss": 0.1949, "step": 5696 }, { "epoch": 0.1661998949763697, "grad_norm": 0.7108733949655556, "learning_rate": 9.521488629893411e-06, "loss": 0.1772, "step": 5697 }, { "epoch": 0.16622906820701325, "grad_norm": 1.7374926645259825, "learning_rate": 9.521286926455198e-06, "loss": 0.1789, "step": 5698 }, { "epoch": 0.1662582414376568, "grad_norm": 1.0873970809436402, "learning_rate": 9.521085182652016e-06, "loss": 0.1643, "step": 5699 }, { "epoch": 0.16628741466830035, "grad_norm": 0.9235896302946089, "learning_rate": 9.520883398485665e-06, "loss": 0.1598, "step": 5700 }, { "epoch": 0.16631658789894394, "grad_norm": 0.8480519275285644, "learning_rate": 9.520681573957944e-06, "loss": 0.1547, "step": 5701 }, { "epoch": 0.1663457611295875, "grad_norm": 0.8684837851224382, "learning_rate": 9.520479709070661e-06, "loss": 0.1657, "step": 5702 }, { "epoch": 0.16637493436023104, "grad_norm": 0.8678892323263708, "learning_rate": 9.52027780382561e-06, "loss": 0.209, "step": 5703 }, { "epoch": 0.16640410759087462, "grad_norm": 0.7701939936552961, "learning_rate": 9.5200758582246e-06, "loss": 0.1808, "step": 5704 }, { "epoch": 0.16643328082151818, "grad_norm": 0.7743930184201232, "learning_rate": 9.519873872269431e-06, "loss": 0.1687, "step": 5705 }, { "epoch": 0.16646245405216173, "grad_norm": 0.8668160425028448, "learning_rate": 9.519671845961908e-06, "loss": 0.1751, "step": 5706 }, { "epoch": 0.1664916272828053, "grad_norm": 1.0076935631153972, "learning_rate": 9.519469779303833e-06, "loss": 0.1701, "step": 5707 }, { "epoch": 0.16652080051344886, "grad_norm": 0.7363457026736757, "learning_rate": 9.519267672297013e-06, "loss": 0.1487, "step": 5708 }, { "epoch": 0.16654997374409242, "grad_norm": 0.8646562493640063, "learning_rate": 9.519065524943247e-06, "loss": 0.1532, "step": 5709 }, { "epoch": 0.16657914697473597, "grad_norm": 0.9669843846088726, "learning_rate": 9.518863337244344e-06, "loss": 0.1767, "step": 5710 }, { "epoch": 0.16660832020537955, "grad_norm": 0.7631156015200443, "learning_rate": 9.518661109202107e-06, "loss": 0.149, "step": 5711 }, { "epoch": 0.1666374934360231, "grad_norm": 0.9086542199023313, "learning_rate": 9.518458840818343e-06, "loss": 0.1494, "step": 5712 }, { "epoch": 0.16666666666666666, "grad_norm": 0.9339741301798014, "learning_rate": 9.518256532094859e-06, "loss": 0.1917, "step": 5713 }, { "epoch": 0.16669583989731024, "grad_norm": 0.9002561742502497, "learning_rate": 9.518054183033456e-06, "loss": 0.1496, "step": 5714 }, { "epoch": 0.1667250131279538, "grad_norm": 0.8132471465484027, "learning_rate": 9.517851793635946e-06, "loss": 0.1735, "step": 5715 }, { "epoch": 0.16675418635859734, "grad_norm": 0.8527596315922845, "learning_rate": 9.517649363904132e-06, "loss": 0.1886, "step": 5716 }, { "epoch": 0.16678335958924093, "grad_norm": 0.8714980015972134, "learning_rate": 9.517446893839824e-06, "loss": 0.1692, "step": 5717 }, { "epoch": 0.16681253281988448, "grad_norm": 0.8587323457269065, "learning_rate": 9.517244383444829e-06, "loss": 0.1681, "step": 5718 }, { "epoch": 0.16684170605052803, "grad_norm": 0.9271190757581552, "learning_rate": 9.517041832720953e-06, "loss": 0.1441, "step": 5719 }, { "epoch": 0.16687087928117159, "grad_norm": 0.9111026655110478, "learning_rate": 9.516839241670006e-06, "loss": 0.1813, "step": 5720 }, { "epoch": 0.16690005251181517, "grad_norm": 0.8138309657015205, "learning_rate": 9.516636610293798e-06, "loss": 0.1993, "step": 5721 }, { "epoch": 0.16692922574245872, "grad_norm": 0.9979093129062674, "learning_rate": 9.516433938594137e-06, "loss": 0.1764, "step": 5722 }, { "epoch": 0.16695839897310227, "grad_norm": 0.8876233363953347, "learning_rate": 9.51623122657283e-06, "loss": 0.1888, "step": 5723 }, { "epoch": 0.16698757220374585, "grad_norm": 0.7661865290715371, "learning_rate": 9.516028474231689e-06, "loss": 0.1652, "step": 5724 }, { "epoch": 0.1670167454343894, "grad_norm": 0.7970528798299262, "learning_rate": 9.515825681572523e-06, "loss": 0.1705, "step": 5725 }, { "epoch": 0.16704591866503296, "grad_norm": 0.8731151303887534, "learning_rate": 9.515622848597145e-06, "loss": 0.1685, "step": 5726 }, { "epoch": 0.16707509189567651, "grad_norm": 0.8423270951003244, "learning_rate": 9.515419975307365e-06, "loss": 0.2072, "step": 5727 }, { "epoch": 0.1671042651263201, "grad_norm": 0.9563534421355293, "learning_rate": 9.515217061704991e-06, "loss": 0.1861, "step": 5728 }, { "epoch": 0.16713343835696365, "grad_norm": 1.0794263591197344, "learning_rate": 9.515014107791839e-06, "loss": 0.1862, "step": 5729 }, { "epoch": 0.1671626115876072, "grad_norm": 0.8482002265219688, "learning_rate": 9.514811113569718e-06, "loss": 0.167, "step": 5730 }, { "epoch": 0.16719178481825078, "grad_norm": 0.8831824460817326, "learning_rate": 9.514608079040441e-06, "loss": 0.1649, "step": 5731 }, { "epoch": 0.16722095804889434, "grad_norm": 0.8771172281238426, "learning_rate": 9.51440500420582e-06, "loss": 0.168, "step": 5732 }, { "epoch": 0.1672501312795379, "grad_norm": 0.801982788928741, "learning_rate": 9.51420188906767e-06, "loss": 0.1793, "step": 5733 }, { "epoch": 0.16727930451018147, "grad_norm": 0.9045361378829273, "learning_rate": 9.513998733627802e-06, "loss": 0.179, "step": 5734 }, { "epoch": 0.16730847774082502, "grad_norm": 0.9572368123826093, "learning_rate": 9.513795537888032e-06, "loss": 0.1709, "step": 5735 }, { "epoch": 0.16733765097146858, "grad_norm": 0.9885183697072522, "learning_rate": 9.513592301850174e-06, "loss": 0.1658, "step": 5736 }, { "epoch": 0.16736682420211213, "grad_norm": 0.8311614188640513, "learning_rate": 9.51338902551604e-06, "loss": 0.1757, "step": 5737 }, { "epoch": 0.1673959974327557, "grad_norm": 0.9266043594963513, "learning_rate": 9.513185708887445e-06, "loss": 0.1685, "step": 5738 }, { "epoch": 0.16742517066339926, "grad_norm": 0.8195980583339016, "learning_rate": 9.512982351966207e-06, "loss": 0.1695, "step": 5739 }, { "epoch": 0.16745434389404282, "grad_norm": 0.8522298747866902, "learning_rate": 9.51277895475414e-06, "loss": 0.173, "step": 5740 }, { "epoch": 0.1674835171246864, "grad_norm": 0.8191427424235472, "learning_rate": 9.51257551725306e-06, "loss": 0.1659, "step": 5741 }, { "epoch": 0.16751269035532995, "grad_norm": 0.883421807881772, "learning_rate": 9.512372039464782e-06, "loss": 0.1802, "step": 5742 }, { "epoch": 0.1675418635859735, "grad_norm": 0.8813050107409939, "learning_rate": 9.512168521391123e-06, "loss": 0.1639, "step": 5743 }, { "epoch": 0.16757103681661709, "grad_norm": 0.9627359759151015, "learning_rate": 9.511964963033902e-06, "loss": 0.194, "step": 5744 }, { "epoch": 0.16760021004726064, "grad_norm": 0.7925154895715254, "learning_rate": 9.511761364394935e-06, "loss": 0.1694, "step": 5745 }, { "epoch": 0.1676293832779042, "grad_norm": 0.850554932296648, "learning_rate": 9.51155772547604e-06, "loss": 0.1515, "step": 5746 }, { "epoch": 0.16765855650854775, "grad_norm": 0.8708859149242727, "learning_rate": 9.511354046279032e-06, "loss": 0.1725, "step": 5747 }, { "epoch": 0.16768772973919133, "grad_norm": 0.909647459641582, "learning_rate": 9.511150326805734e-06, "loss": 0.1995, "step": 5748 }, { "epoch": 0.16771690296983488, "grad_norm": 0.8624369587185624, "learning_rate": 9.510946567057963e-06, "loss": 0.1752, "step": 5749 }, { "epoch": 0.16774607620047843, "grad_norm": 0.8705414073551789, "learning_rate": 9.510742767037538e-06, "loss": 0.1655, "step": 5750 }, { "epoch": 0.16777524943112201, "grad_norm": 0.8458393316577807, "learning_rate": 9.510538926746276e-06, "loss": 0.1691, "step": 5751 }, { "epoch": 0.16780442266176557, "grad_norm": 0.7781652817375657, "learning_rate": 9.510335046186001e-06, "loss": 0.1847, "step": 5752 }, { "epoch": 0.16783359589240912, "grad_norm": 0.7684798531988254, "learning_rate": 9.510131125358532e-06, "loss": 0.1997, "step": 5753 }, { "epoch": 0.16786276912305267, "grad_norm": 0.7430952463355108, "learning_rate": 9.509927164265688e-06, "loss": 0.1543, "step": 5754 }, { "epoch": 0.16789194235369626, "grad_norm": 0.7541261204553376, "learning_rate": 9.509723162909292e-06, "loss": 0.1617, "step": 5755 }, { "epoch": 0.1679211155843398, "grad_norm": 1.0395113794202016, "learning_rate": 9.509519121291164e-06, "loss": 0.168, "step": 5756 }, { "epoch": 0.16795028881498336, "grad_norm": 0.8457258943799788, "learning_rate": 9.509315039413126e-06, "loss": 0.1668, "step": 5757 }, { "epoch": 0.16797946204562694, "grad_norm": 0.8008591685751763, "learning_rate": 9.509110917276997e-06, "loss": 0.1658, "step": 5758 }, { "epoch": 0.1680086352762705, "grad_norm": 1.0079926273568525, "learning_rate": 9.508906754884603e-06, "loss": 0.1943, "step": 5759 }, { "epoch": 0.16803780850691405, "grad_norm": 0.796659282858451, "learning_rate": 9.508702552237768e-06, "loss": 0.1741, "step": 5760 }, { "epoch": 0.16806698173755763, "grad_norm": 0.9530830112998195, "learning_rate": 9.508498309338313e-06, "loss": 0.1626, "step": 5761 }, { "epoch": 0.16809615496820118, "grad_norm": 0.8356891526275132, "learning_rate": 9.50829402618806e-06, "loss": 0.2005, "step": 5762 }, { "epoch": 0.16812532819884474, "grad_norm": 0.9700998337612351, "learning_rate": 9.508089702788835e-06, "loss": 0.1966, "step": 5763 }, { "epoch": 0.1681545014294883, "grad_norm": 0.7978492663470175, "learning_rate": 9.50788533914246e-06, "loss": 0.1479, "step": 5764 }, { "epoch": 0.16818367466013187, "grad_norm": 0.8288991693189907, "learning_rate": 9.507680935250762e-06, "loss": 0.1873, "step": 5765 }, { "epoch": 0.16821284789077542, "grad_norm": 0.7963707692136316, "learning_rate": 9.507476491115564e-06, "loss": 0.1828, "step": 5766 }, { "epoch": 0.16824202112141898, "grad_norm": 0.8758297637996928, "learning_rate": 9.507272006738692e-06, "loss": 0.1466, "step": 5767 }, { "epoch": 0.16827119435206256, "grad_norm": 0.8754145738446034, "learning_rate": 9.50706748212197e-06, "loss": 0.1904, "step": 5768 }, { "epoch": 0.1683003675827061, "grad_norm": 0.7414681198536065, "learning_rate": 9.506862917267228e-06, "loss": 0.1713, "step": 5769 }, { "epoch": 0.16832954081334967, "grad_norm": 0.7989072402561423, "learning_rate": 9.506658312176288e-06, "loss": 0.1419, "step": 5770 }, { "epoch": 0.16835871404399322, "grad_norm": 0.7659770789703081, "learning_rate": 9.506453666850982e-06, "loss": 0.1562, "step": 5771 }, { "epoch": 0.1683878872746368, "grad_norm": 0.9861733022695788, "learning_rate": 9.50624898129313e-06, "loss": 0.1646, "step": 5772 }, { "epoch": 0.16841706050528035, "grad_norm": 0.9031594025140651, "learning_rate": 9.506044255504563e-06, "loss": 0.1901, "step": 5773 }, { "epoch": 0.1684462337359239, "grad_norm": 0.9649187871318449, "learning_rate": 9.50583948948711e-06, "loss": 0.1716, "step": 5774 }, { "epoch": 0.1684754069665675, "grad_norm": 0.9163837986095745, "learning_rate": 9.505634683242595e-06, "loss": 0.1537, "step": 5775 }, { "epoch": 0.16850458019721104, "grad_norm": 0.7913032036590982, "learning_rate": 9.505429836772852e-06, "loss": 0.16, "step": 5776 }, { "epoch": 0.1685337534278546, "grad_norm": 1.0419250495818513, "learning_rate": 9.505224950079705e-06, "loss": 0.1684, "step": 5777 }, { "epoch": 0.16856292665849817, "grad_norm": 0.8186132470280999, "learning_rate": 9.505020023164985e-06, "loss": 0.1549, "step": 5778 }, { "epoch": 0.16859209988914173, "grad_norm": 0.9145764220434629, "learning_rate": 9.504815056030523e-06, "loss": 0.1798, "step": 5779 }, { "epoch": 0.16862127311978528, "grad_norm": 0.7939721453720862, "learning_rate": 9.504610048678148e-06, "loss": 0.1757, "step": 5780 }, { "epoch": 0.16865044635042883, "grad_norm": 0.9123615104748429, "learning_rate": 9.504405001109688e-06, "loss": 0.1634, "step": 5781 }, { "epoch": 0.16867961958107242, "grad_norm": 0.7837052496776843, "learning_rate": 9.504199913326977e-06, "loss": 0.1433, "step": 5782 }, { "epoch": 0.16870879281171597, "grad_norm": 0.9136821640560577, "learning_rate": 9.503994785331845e-06, "loss": 0.1827, "step": 5783 }, { "epoch": 0.16873796604235952, "grad_norm": 0.8988171773056757, "learning_rate": 9.50378961712612e-06, "loss": 0.1697, "step": 5784 }, { "epoch": 0.1687671392730031, "grad_norm": 0.8454932841127984, "learning_rate": 9.50358440871164e-06, "loss": 0.1913, "step": 5785 }, { "epoch": 0.16879631250364666, "grad_norm": 0.9256260902701139, "learning_rate": 9.50337916009023e-06, "loss": 0.1785, "step": 5786 }, { "epoch": 0.1688254857342902, "grad_norm": 0.8611191499739325, "learning_rate": 9.503173871263728e-06, "loss": 0.1585, "step": 5787 }, { "epoch": 0.1688546589649338, "grad_norm": 1.1466914106383315, "learning_rate": 9.502968542233963e-06, "loss": 0.188, "step": 5788 }, { "epoch": 0.16888383219557734, "grad_norm": 0.855778321669791, "learning_rate": 9.502763173002772e-06, "loss": 0.1756, "step": 5789 }, { "epoch": 0.1689130054262209, "grad_norm": 1.1559620482353958, "learning_rate": 9.502557763571984e-06, "loss": 0.1736, "step": 5790 }, { "epoch": 0.16894217865686445, "grad_norm": 0.8539528088437137, "learning_rate": 9.502352313943437e-06, "loss": 0.1652, "step": 5791 }, { "epoch": 0.16897135188750803, "grad_norm": 1.0127769894227874, "learning_rate": 9.502146824118964e-06, "loss": 0.1729, "step": 5792 }, { "epoch": 0.16900052511815158, "grad_norm": 1.0727713735757451, "learning_rate": 9.501941294100397e-06, "loss": 0.1648, "step": 5793 }, { "epoch": 0.16902969834879514, "grad_norm": 0.9718664482621123, "learning_rate": 9.501735723889573e-06, "loss": 0.159, "step": 5794 }, { "epoch": 0.16905887157943872, "grad_norm": 1.3117615410056271, "learning_rate": 9.501530113488326e-06, "loss": 0.1744, "step": 5795 }, { "epoch": 0.16908804481008227, "grad_norm": 1.0712446014810333, "learning_rate": 9.501324462898495e-06, "loss": 0.1749, "step": 5796 }, { "epoch": 0.16911721804072583, "grad_norm": 0.8634629738870782, "learning_rate": 9.501118772121913e-06, "loss": 0.1892, "step": 5797 }, { "epoch": 0.16914639127136938, "grad_norm": 1.0273353474251192, "learning_rate": 9.500913041160417e-06, "loss": 0.1707, "step": 5798 }, { "epoch": 0.16917556450201296, "grad_norm": 1.0731080401598547, "learning_rate": 9.500707270015846e-06, "loss": 0.1939, "step": 5799 }, { "epoch": 0.1692047377326565, "grad_norm": 1.0145773911709222, "learning_rate": 9.500501458690031e-06, "loss": 0.186, "step": 5800 }, { "epoch": 0.16923391096330007, "grad_norm": 0.8661426553472247, "learning_rate": 9.500295607184815e-06, "loss": 0.1807, "step": 5801 }, { "epoch": 0.16926308419394365, "grad_norm": 0.8685623315834823, "learning_rate": 9.500089715502035e-06, "loss": 0.166, "step": 5802 }, { "epoch": 0.1692922574245872, "grad_norm": 0.8529004373329677, "learning_rate": 9.499883783643526e-06, "loss": 0.1672, "step": 5803 }, { "epoch": 0.16932143065523075, "grad_norm": 0.6636763544981219, "learning_rate": 9.499677811611133e-06, "loss": 0.162, "step": 5804 }, { "epoch": 0.16935060388587433, "grad_norm": 0.7199486842416523, "learning_rate": 9.499471799406687e-06, "loss": 0.168, "step": 5805 }, { "epoch": 0.1693797771165179, "grad_norm": 0.7058273381665765, "learning_rate": 9.49926574703203e-06, "loss": 0.1727, "step": 5806 }, { "epoch": 0.16940895034716144, "grad_norm": 0.7683216964440348, "learning_rate": 9.499059654489005e-06, "loss": 0.1457, "step": 5807 }, { "epoch": 0.169438123577805, "grad_norm": 0.7008463940438889, "learning_rate": 9.498853521779449e-06, "loss": 0.1513, "step": 5808 }, { "epoch": 0.16946729680844858, "grad_norm": 0.77511154420062, "learning_rate": 9.498647348905203e-06, "loss": 0.186, "step": 5809 }, { "epoch": 0.16949647003909213, "grad_norm": 0.8665380179551119, "learning_rate": 9.498441135868107e-06, "loss": 0.1804, "step": 5810 }, { "epoch": 0.16952564326973568, "grad_norm": 0.864757819281507, "learning_rate": 9.498234882670003e-06, "loss": 0.1512, "step": 5811 }, { "epoch": 0.16955481650037926, "grad_norm": 0.670650331227537, "learning_rate": 9.49802858931273e-06, "loss": 0.1806, "step": 5812 }, { "epoch": 0.16958398973102282, "grad_norm": 0.9059467872314915, "learning_rate": 9.497822255798132e-06, "loss": 0.1798, "step": 5813 }, { "epoch": 0.16961316296166637, "grad_norm": 0.936561903060316, "learning_rate": 9.497615882128053e-06, "loss": 0.1619, "step": 5814 }, { "epoch": 0.16964233619230992, "grad_norm": 0.8561048927543008, "learning_rate": 9.497409468304331e-06, "loss": 0.1629, "step": 5815 }, { "epoch": 0.1696715094229535, "grad_norm": 1.0939282699945563, "learning_rate": 9.49720301432881e-06, "loss": 0.1657, "step": 5816 }, { "epoch": 0.16970068265359706, "grad_norm": 1.3073161896135992, "learning_rate": 9.496996520203336e-06, "loss": 0.1787, "step": 5817 }, { "epoch": 0.1697298558842406, "grad_norm": 1.4015824687904928, "learning_rate": 9.496789985929749e-06, "loss": 0.1585, "step": 5818 }, { "epoch": 0.1697590291148842, "grad_norm": 0.9853956862235628, "learning_rate": 9.496583411509897e-06, "loss": 0.156, "step": 5819 }, { "epoch": 0.16978820234552774, "grad_norm": 0.7535386398845096, "learning_rate": 9.49637679694562e-06, "loss": 0.166, "step": 5820 }, { "epoch": 0.1698173755761713, "grad_norm": 1.1852578585965288, "learning_rate": 9.496170142238763e-06, "loss": 0.2054, "step": 5821 }, { "epoch": 0.16984654880681488, "grad_norm": 0.6938151094907697, "learning_rate": 9.495963447391174e-06, "loss": 0.1493, "step": 5822 }, { "epoch": 0.16987572203745843, "grad_norm": 0.825948110000058, "learning_rate": 9.495756712404695e-06, "loss": 0.1709, "step": 5823 }, { "epoch": 0.16990489526810199, "grad_norm": 0.8517842125407908, "learning_rate": 9.495549937281177e-06, "loss": 0.1946, "step": 5824 }, { "epoch": 0.16993406849874554, "grad_norm": 0.8141468243310551, "learning_rate": 9.495343122022458e-06, "loss": 0.146, "step": 5825 }, { "epoch": 0.16996324172938912, "grad_norm": 0.8520375754287067, "learning_rate": 9.495136266630392e-06, "loss": 0.1805, "step": 5826 }, { "epoch": 0.16999241496003267, "grad_norm": 0.8924959092288862, "learning_rate": 9.49492937110682e-06, "loss": 0.2049, "step": 5827 }, { "epoch": 0.17002158819067623, "grad_norm": 0.7921049782065184, "learning_rate": 9.494722435453593e-06, "loss": 0.1619, "step": 5828 }, { "epoch": 0.1700507614213198, "grad_norm": 0.8075579509681048, "learning_rate": 9.494515459672557e-06, "loss": 0.1754, "step": 5829 }, { "epoch": 0.17007993465196336, "grad_norm": 0.924799922280922, "learning_rate": 9.49430844376556e-06, "loss": 0.169, "step": 5830 }, { "epoch": 0.1701091078826069, "grad_norm": 0.7704035544183194, "learning_rate": 9.494101387734448e-06, "loss": 0.1633, "step": 5831 }, { "epoch": 0.1701382811132505, "grad_norm": 0.7869394969098592, "learning_rate": 9.493894291581074e-06, "loss": 0.1566, "step": 5832 }, { "epoch": 0.17016745434389405, "grad_norm": 0.9816539631901909, "learning_rate": 9.493687155307285e-06, "loss": 0.1649, "step": 5833 }, { "epoch": 0.1701966275745376, "grad_norm": 0.8285838160023006, "learning_rate": 9.493479978914928e-06, "loss": 0.173, "step": 5834 }, { "epoch": 0.17022580080518115, "grad_norm": 0.7251819896058284, "learning_rate": 9.493272762405856e-06, "loss": 0.1633, "step": 5835 }, { "epoch": 0.17025497403582474, "grad_norm": 0.8053394908046079, "learning_rate": 9.493065505781916e-06, "loss": 0.1884, "step": 5836 }, { "epoch": 0.1702841472664683, "grad_norm": 0.8806605956908382, "learning_rate": 9.49285820904496e-06, "loss": 0.1711, "step": 5837 }, { "epoch": 0.17031332049711184, "grad_norm": 0.7246739029732067, "learning_rate": 9.492650872196839e-06, "loss": 0.1613, "step": 5838 }, { "epoch": 0.17034249372775542, "grad_norm": 0.8886602726063461, "learning_rate": 9.492443495239404e-06, "loss": 0.1658, "step": 5839 }, { "epoch": 0.17037166695839898, "grad_norm": 0.8578512789545912, "learning_rate": 9.492236078174504e-06, "loss": 0.1753, "step": 5840 }, { "epoch": 0.17040084018904253, "grad_norm": 1.030892003656916, "learning_rate": 9.492028621003994e-06, "loss": 0.1645, "step": 5841 }, { "epoch": 0.17043001341968608, "grad_norm": 1.062780621892427, "learning_rate": 9.491821123729725e-06, "loss": 0.1726, "step": 5842 }, { "epoch": 0.17045918665032966, "grad_norm": 0.944916376038252, "learning_rate": 9.49161358635355e-06, "loss": 0.1743, "step": 5843 }, { "epoch": 0.17048835988097322, "grad_norm": 0.9317109319704673, "learning_rate": 9.49140600887732e-06, "loss": 0.198, "step": 5844 }, { "epoch": 0.17051753311161677, "grad_norm": 0.8489490810089662, "learning_rate": 9.49119839130289e-06, "loss": 0.1691, "step": 5845 }, { "epoch": 0.17054670634226035, "grad_norm": 0.9000019676171469, "learning_rate": 9.49099073363211e-06, "loss": 0.1644, "step": 5846 }, { "epoch": 0.1705758795729039, "grad_norm": 0.6412596210619251, "learning_rate": 9.49078303586684e-06, "loss": 0.1541, "step": 5847 }, { "epoch": 0.17060505280354746, "grad_norm": 0.8121724387267714, "learning_rate": 9.49057529800893e-06, "loss": 0.1775, "step": 5848 }, { "epoch": 0.17063422603419104, "grad_norm": 0.7549442879290451, "learning_rate": 9.490367520060236e-06, "loss": 0.1587, "step": 5849 }, { "epoch": 0.1706633992648346, "grad_norm": 0.8276613389561046, "learning_rate": 9.490159702022611e-06, "loss": 0.1524, "step": 5850 }, { "epoch": 0.17069257249547815, "grad_norm": 0.6336513971427156, "learning_rate": 9.489951843897916e-06, "loss": 0.1403, "step": 5851 }, { "epoch": 0.1707217457261217, "grad_norm": 0.835059243274728, "learning_rate": 9.489743945688e-06, "loss": 0.1801, "step": 5852 }, { "epoch": 0.17075091895676528, "grad_norm": 0.8635128132339861, "learning_rate": 9.489536007394721e-06, "loss": 0.1545, "step": 5853 }, { "epoch": 0.17078009218740883, "grad_norm": 0.937145630045578, "learning_rate": 9.489328029019939e-06, "loss": 0.1942, "step": 5854 }, { "epoch": 0.1708092654180524, "grad_norm": 0.8666681316470812, "learning_rate": 9.489120010565506e-06, "loss": 0.1652, "step": 5855 }, { "epoch": 0.17083843864869597, "grad_norm": 0.9248298409223987, "learning_rate": 9.488911952033283e-06, "loss": 0.159, "step": 5856 }, { "epoch": 0.17086761187933952, "grad_norm": 1.0064340540443737, "learning_rate": 9.488703853425125e-06, "loss": 0.1645, "step": 5857 }, { "epoch": 0.17089678510998307, "grad_norm": 0.9921130219170589, "learning_rate": 9.48849571474289e-06, "loss": 0.1615, "step": 5858 }, { "epoch": 0.17092595834062665, "grad_norm": 0.7842710062827105, "learning_rate": 9.488287535988437e-06, "loss": 0.1696, "step": 5859 }, { "epoch": 0.1709551315712702, "grad_norm": 1.2510016437280238, "learning_rate": 9.488079317163624e-06, "loss": 0.1512, "step": 5860 }, { "epoch": 0.17098430480191376, "grad_norm": 1.1097337672361536, "learning_rate": 9.48787105827031e-06, "loss": 0.1964, "step": 5861 }, { "epoch": 0.17101347803255731, "grad_norm": 0.9184401922265988, "learning_rate": 9.487662759310354e-06, "loss": 0.188, "step": 5862 }, { "epoch": 0.1710426512632009, "grad_norm": 0.8779127180149895, "learning_rate": 9.487454420285618e-06, "loss": 0.1921, "step": 5863 }, { "epoch": 0.17107182449384445, "grad_norm": 1.0451577051652654, "learning_rate": 9.48724604119796e-06, "loss": 0.1722, "step": 5864 }, { "epoch": 0.171100997724488, "grad_norm": 1.0543682482136674, "learning_rate": 9.487037622049238e-06, "loss": 0.1486, "step": 5865 }, { "epoch": 0.17113017095513158, "grad_norm": 0.9540005552983775, "learning_rate": 9.486829162841318e-06, "loss": 0.161, "step": 5866 }, { "epoch": 0.17115934418577514, "grad_norm": 0.9103475364595928, "learning_rate": 9.486620663576058e-06, "loss": 0.1627, "step": 5867 }, { "epoch": 0.1711885174164187, "grad_norm": 0.9717469794001211, "learning_rate": 9.486412124255318e-06, "loss": 0.1894, "step": 5868 }, { "epoch": 0.17121769064706224, "grad_norm": 0.7894868945012453, "learning_rate": 9.486203544880963e-06, "loss": 0.1707, "step": 5869 }, { "epoch": 0.17124686387770582, "grad_norm": 0.9958722679967967, "learning_rate": 9.485994925454853e-06, "loss": 0.1624, "step": 5870 }, { "epoch": 0.17127603710834938, "grad_norm": 0.9983706914619377, "learning_rate": 9.485786265978852e-06, "loss": 0.1894, "step": 5871 }, { "epoch": 0.17130521033899293, "grad_norm": 0.8709152103796894, "learning_rate": 9.485577566454822e-06, "loss": 0.187, "step": 5872 }, { "epoch": 0.1713343835696365, "grad_norm": 0.7704062122345586, "learning_rate": 9.485368826884625e-06, "loss": 0.1687, "step": 5873 }, { "epoch": 0.17136355680028006, "grad_norm": 0.9113342942131873, "learning_rate": 9.485160047270128e-06, "loss": 0.1797, "step": 5874 }, { "epoch": 0.17139273003092362, "grad_norm": 0.8294673446858788, "learning_rate": 9.48495122761319e-06, "loss": 0.1787, "step": 5875 }, { "epoch": 0.1714219032615672, "grad_norm": 0.7857387256772692, "learning_rate": 9.48474236791568e-06, "loss": 0.1792, "step": 5876 }, { "epoch": 0.17145107649221075, "grad_norm": 0.7558495006071084, "learning_rate": 9.484533468179461e-06, "loss": 0.179, "step": 5877 }, { "epoch": 0.1714802497228543, "grad_norm": 0.7936706599984249, "learning_rate": 9.484324528406397e-06, "loss": 0.1888, "step": 5878 }, { "epoch": 0.17150942295349786, "grad_norm": 0.7705994564390533, "learning_rate": 9.484115548598353e-06, "loss": 0.1719, "step": 5879 }, { "epoch": 0.17153859618414144, "grad_norm": 0.7123561091210682, "learning_rate": 9.483906528757199e-06, "loss": 0.169, "step": 5880 }, { "epoch": 0.171567769414785, "grad_norm": 0.7658641762756876, "learning_rate": 9.483697468884795e-06, "loss": 0.1585, "step": 5881 }, { "epoch": 0.17159694264542855, "grad_norm": 0.7731858572594492, "learning_rate": 9.483488368983012e-06, "loss": 0.1663, "step": 5882 }, { "epoch": 0.17162611587607213, "grad_norm": 0.723524529100536, "learning_rate": 9.483279229053715e-06, "loss": 0.1838, "step": 5883 }, { "epoch": 0.17165528910671568, "grad_norm": 0.837544800571379, "learning_rate": 9.48307004909877e-06, "loss": 0.156, "step": 5884 }, { "epoch": 0.17168446233735923, "grad_norm": 0.7445408836660294, "learning_rate": 9.482860829120046e-06, "loss": 0.1569, "step": 5885 }, { "epoch": 0.1717136355680028, "grad_norm": 0.9026515150135537, "learning_rate": 9.482651569119412e-06, "loss": 0.1529, "step": 5886 }, { "epoch": 0.17174280879864637, "grad_norm": 0.8318603556547081, "learning_rate": 9.482442269098734e-06, "loss": 0.1881, "step": 5887 }, { "epoch": 0.17177198202928992, "grad_norm": 0.8781507598297048, "learning_rate": 9.482232929059882e-06, "loss": 0.1774, "step": 5888 }, { "epoch": 0.17180115525993347, "grad_norm": 0.8663416585325672, "learning_rate": 9.482023549004725e-06, "loss": 0.1579, "step": 5889 }, { "epoch": 0.17183032849057706, "grad_norm": 0.9783300054513878, "learning_rate": 9.48181412893513e-06, "loss": 0.163, "step": 5890 }, { "epoch": 0.1718595017212206, "grad_norm": 1.0992628919712968, "learning_rate": 9.481604668852969e-06, "loss": 0.1858, "step": 5891 }, { "epoch": 0.17188867495186416, "grad_norm": 1.474379319261719, "learning_rate": 9.48139516876011e-06, "loss": 0.1917, "step": 5892 }, { "epoch": 0.17191784818250774, "grad_norm": 0.9871554734600607, "learning_rate": 9.481185628658427e-06, "loss": 0.1567, "step": 5893 }, { "epoch": 0.1719470214131513, "grad_norm": 0.8606364597631231, "learning_rate": 9.480976048549788e-06, "loss": 0.1584, "step": 5894 }, { "epoch": 0.17197619464379485, "grad_norm": 1.0110607347591787, "learning_rate": 9.480766428436064e-06, "loss": 0.1546, "step": 5895 }, { "epoch": 0.1720053678744384, "grad_norm": 0.8948162546528683, "learning_rate": 9.480556768319127e-06, "loss": 0.1724, "step": 5896 }, { "epoch": 0.17203454110508198, "grad_norm": 1.041592621819511, "learning_rate": 9.480347068200848e-06, "loss": 0.1653, "step": 5897 }, { "epoch": 0.17206371433572554, "grad_norm": 0.8787071543385294, "learning_rate": 9.480137328083102e-06, "loss": 0.1618, "step": 5898 }, { "epoch": 0.1720928875663691, "grad_norm": 0.9065587370815846, "learning_rate": 9.479927547967758e-06, "loss": 0.1756, "step": 5899 }, { "epoch": 0.17212206079701267, "grad_norm": 1.1462145546323987, "learning_rate": 9.47971772785669e-06, "loss": 0.1534, "step": 5900 }, { "epoch": 0.17215123402765622, "grad_norm": 0.8907073665411045, "learning_rate": 9.479507867751772e-06, "loss": 0.1838, "step": 5901 }, { "epoch": 0.17218040725829978, "grad_norm": 0.8302950690582827, "learning_rate": 9.479297967654877e-06, "loss": 0.1477, "step": 5902 }, { "epoch": 0.17220958048894336, "grad_norm": 1.0954433713833198, "learning_rate": 9.479088027567879e-06, "loss": 0.1661, "step": 5903 }, { "epoch": 0.1722387537195869, "grad_norm": 0.8691451504944712, "learning_rate": 9.478878047492653e-06, "loss": 0.1754, "step": 5904 }, { "epoch": 0.17226792695023047, "grad_norm": 1.0099113901889492, "learning_rate": 9.478668027431071e-06, "loss": 0.1695, "step": 5905 }, { "epoch": 0.17229710018087402, "grad_norm": 0.9850315987586054, "learning_rate": 9.478457967385013e-06, "loss": 0.1894, "step": 5906 }, { "epoch": 0.1723262734115176, "grad_norm": 0.7299713781503557, "learning_rate": 9.47824786735635e-06, "loss": 0.1422, "step": 5907 }, { "epoch": 0.17235544664216115, "grad_norm": 0.8527877793748475, "learning_rate": 9.478037727346959e-06, "loss": 0.1688, "step": 5908 }, { "epoch": 0.1723846198728047, "grad_norm": 0.7605342388667926, "learning_rate": 9.477827547358716e-06, "loss": 0.1654, "step": 5909 }, { "epoch": 0.1724137931034483, "grad_norm": 0.7355909282845711, "learning_rate": 9.477617327393496e-06, "loss": 0.1578, "step": 5910 }, { "epoch": 0.17244296633409184, "grad_norm": 0.895120318100169, "learning_rate": 9.47740706745318e-06, "loss": 0.1666, "step": 5911 }, { "epoch": 0.1724721395647354, "grad_norm": 0.7411542237171944, "learning_rate": 9.47719676753964e-06, "loss": 0.185, "step": 5912 }, { "epoch": 0.17250131279537895, "grad_norm": 0.710531623170141, "learning_rate": 9.476986427654759e-06, "loss": 0.1589, "step": 5913 }, { "epoch": 0.17253048602602253, "grad_norm": 0.7799338090969948, "learning_rate": 9.476776047800412e-06, "loss": 0.1717, "step": 5914 }, { "epoch": 0.17255965925666608, "grad_norm": 0.6765085921456918, "learning_rate": 9.476565627978473e-06, "loss": 0.1505, "step": 5915 }, { "epoch": 0.17258883248730963, "grad_norm": 1.1168412809581576, "learning_rate": 9.47635516819083e-06, "loss": 0.1527, "step": 5916 }, { "epoch": 0.17261800571795322, "grad_norm": 0.776208522254885, "learning_rate": 9.476144668439353e-06, "loss": 0.1811, "step": 5917 }, { "epoch": 0.17264717894859677, "grad_norm": 0.8716613828854245, "learning_rate": 9.475934128725926e-06, "loss": 0.164, "step": 5918 }, { "epoch": 0.17267635217924032, "grad_norm": 0.7464377444385695, "learning_rate": 9.475723549052427e-06, "loss": 0.1558, "step": 5919 }, { "epoch": 0.1727055254098839, "grad_norm": 0.8552805410092627, "learning_rate": 9.475512929420739e-06, "loss": 0.1651, "step": 5920 }, { "epoch": 0.17273469864052746, "grad_norm": 0.8271860599322377, "learning_rate": 9.475302269832736e-06, "loss": 0.1598, "step": 5921 }, { "epoch": 0.172763871871171, "grad_norm": 0.9967403670391106, "learning_rate": 9.475091570290306e-06, "loss": 0.1584, "step": 5922 }, { "epoch": 0.17279304510181456, "grad_norm": 0.9750872399469732, "learning_rate": 9.474880830795326e-06, "loss": 0.1686, "step": 5923 }, { "epoch": 0.17282221833245814, "grad_norm": 0.9847673654342468, "learning_rate": 9.474670051349677e-06, "loss": 0.1659, "step": 5924 }, { "epoch": 0.1728513915631017, "grad_norm": 1.5394662452136487, "learning_rate": 9.474459231955243e-06, "loss": 0.1882, "step": 5925 }, { "epoch": 0.17288056479374525, "grad_norm": 0.8466411139553673, "learning_rate": 9.474248372613904e-06, "loss": 0.1705, "step": 5926 }, { "epoch": 0.17290973802438883, "grad_norm": 0.797873163935156, "learning_rate": 9.474037473327546e-06, "loss": 0.1929, "step": 5927 }, { "epoch": 0.17293891125503238, "grad_norm": 1.0790938186326624, "learning_rate": 9.473826534098048e-06, "loss": 0.1621, "step": 5928 }, { "epoch": 0.17296808448567594, "grad_norm": 0.9646935693715456, "learning_rate": 9.473615554927294e-06, "loss": 0.1808, "step": 5929 }, { "epoch": 0.17299725771631952, "grad_norm": 0.8499971109968554, "learning_rate": 9.473404535817168e-06, "loss": 0.1757, "step": 5930 }, { "epoch": 0.17302643094696307, "grad_norm": 0.8560382926739467, "learning_rate": 9.473193476769556e-06, "loss": 0.1693, "step": 5931 }, { "epoch": 0.17305560417760663, "grad_norm": 1.0118514806139574, "learning_rate": 9.47298237778634e-06, "loss": 0.1765, "step": 5932 }, { "epoch": 0.17308477740825018, "grad_norm": 0.8925696467654138, "learning_rate": 9.472771238869404e-06, "loss": 0.1825, "step": 5933 }, { "epoch": 0.17311395063889376, "grad_norm": 1.1306236346184735, "learning_rate": 9.472560060020635e-06, "loss": 0.1787, "step": 5934 }, { "epoch": 0.1731431238695373, "grad_norm": 1.1127923557527213, "learning_rate": 9.472348841241917e-06, "loss": 0.1656, "step": 5935 }, { "epoch": 0.17317229710018087, "grad_norm": 0.9330077890733786, "learning_rate": 9.472137582535137e-06, "loss": 0.1771, "step": 5936 }, { "epoch": 0.17320147033082445, "grad_norm": 0.8874432218424393, "learning_rate": 9.47192628390218e-06, "loss": 0.1713, "step": 5937 }, { "epoch": 0.173230643561468, "grad_norm": 0.7874904272003118, "learning_rate": 9.471714945344932e-06, "loss": 0.1793, "step": 5938 }, { "epoch": 0.17325981679211155, "grad_norm": 1.0186968902520714, "learning_rate": 9.471503566865281e-06, "loss": 0.1784, "step": 5939 }, { "epoch": 0.1732889900227551, "grad_norm": 0.7667923188504596, "learning_rate": 9.471292148465113e-06, "loss": 0.1898, "step": 5940 }, { "epoch": 0.1733181632533987, "grad_norm": 0.7736002263705937, "learning_rate": 9.471080690146316e-06, "loss": 0.1546, "step": 5941 }, { "epoch": 0.17334733648404224, "grad_norm": 0.8722639941577405, "learning_rate": 9.470869191910779e-06, "loss": 0.1596, "step": 5942 }, { "epoch": 0.1733765097146858, "grad_norm": 0.9742425619725633, "learning_rate": 9.47065765376039e-06, "loss": 0.1701, "step": 5943 }, { "epoch": 0.17340568294532938, "grad_norm": 0.8977959599437407, "learning_rate": 9.470446075697033e-06, "loss": 0.1874, "step": 5944 }, { "epoch": 0.17343485617597293, "grad_norm": 0.8078948420257451, "learning_rate": 9.470234457722604e-06, "loss": 0.1744, "step": 5945 }, { "epoch": 0.17346402940661648, "grad_norm": 0.8299434972604273, "learning_rate": 9.470022799838986e-06, "loss": 0.1822, "step": 5946 }, { "epoch": 0.17349320263726006, "grad_norm": 0.9917905356144868, "learning_rate": 9.469811102048074e-06, "loss": 0.1774, "step": 5947 }, { "epoch": 0.17352237586790362, "grad_norm": 0.7140443255715782, "learning_rate": 9.469599364351756e-06, "loss": 0.153, "step": 5948 }, { "epoch": 0.17355154909854717, "grad_norm": 0.9216111115796793, "learning_rate": 9.46938758675192e-06, "loss": 0.1676, "step": 5949 }, { "epoch": 0.17358072232919072, "grad_norm": 0.8813427707493571, "learning_rate": 9.46917576925046e-06, "loss": 0.1472, "step": 5950 }, { "epoch": 0.1736098955598343, "grad_norm": 0.745755009849987, "learning_rate": 9.468963911849264e-06, "loss": 0.1761, "step": 5951 }, { "epoch": 0.17363906879047786, "grad_norm": 0.9162878895306341, "learning_rate": 9.468752014550227e-06, "loss": 0.1837, "step": 5952 }, { "epoch": 0.1736682420211214, "grad_norm": 0.9514592588014141, "learning_rate": 9.468540077355237e-06, "loss": 0.1837, "step": 5953 }, { "epoch": 0.173697415251765, "grad_norm": 0.8191123760490736, "learning_rate": 9.468328100266189e-06, "loss": 0.1692, "step": 5954 }, { "epoch": 0.17372658848240854, "grad_norm": 0.8295743832141615, "learning_rate": 9.468116083284972e-06, "loss": 0.1745, "step": 5955 }, { "epoch": 0.1737557617130521, "grad_norm": 0.9984093734614587, "learning_rate": 9.467904026413485e-06, "loss": 0.1653, "step": 5956 }, { "epoch": 0.17378493494369565, "grad_norm": 0.8485915407094916, "learning_rate": 9.467691929653615e-06, "loss": 0.1516, "step": 5957 }, { "epoch": 0.17381410817433923, "grad_norm": 0.8760360025884906, "learning_rate": 9.46747979300726e-06, "loss": 0.1734, "step": 5958 }, { "epoch": 0.17384328140498279, "grad_norm": 0.974936331869323, "learning_rate": 9.46726761647631e-06, "loss": 0.1604, "step": 5959 }, { "epoch": 0.17387245463562634, "grad_norm": 0.8487539303292866, "learning_rate": 9.467055400062661e-06, "loss": 0.1659, "step": 5960 }, { "epoch": 0.17390162786626992, "grad_norm": 0.879925549034562, "learning_rate": 9.466843143768208e-06, "loss": 0.1537, "step": 5961 }, { "epoch": 0.17393080109691347, "grad_norm": 1.0531656602568154, "learning_rate": 9.466630847594846e-06, "loss": 0.1588, "step": 5962 }, { "epoch": 0.17395997432755703, "grad_norm": 0.8055294366853316, "learning_rate": 9.46641851154447e-06, "loss": 0.1529, "step": 5963 }, { "epoch": 0.1739891475582006, "grad_norm": 0.8963424247946293, "learning_rate": 9.466206135618976e-06, "loss": 0.1388, "step": 5964 }, { "epoch": 0.17401832078884416, "grad_norm": 0.8520309048234174, "learning_rate": 9.46599371982026e-06, "loss": 0.1421, "step": 5965 }, { "epoch": 0.17404749401948771, "grad_norm": 0.9403170645611213, "learning_rate": 9.465781264150218e-06, "loss": 0.145, "step": 5966 }, { "epoch": 0.17407666725013127, "grad_norm": 0.7752863862495293, "learning_rate": 9.465568768610746e-06, "loss": 0.1815, "step": 5967 }, { "epoch": 0.17410584048077485, "grad_norm": 0.7940347796017446, "learning_rate": 9.465356233203744e-06, "loss": 0.1653, "step": 5968 }, { "epoch": 0.1741350137114184, "grad_norm": 0.9184169748005556, "learning_rate": 9.465143657931107e-06, "loss": 0.155, "step": 5969 }, { "epoch": 0.17416418694206195, "grad_norm": 0.8519137706543816, "learning_rate": 9.464931042794732e-06, "loss": 0.1706, "step": 5970 }, { "epoch": 0.17419336017270554, "grad_norm": 1.0760462073750925, "learning_rate": 9.464718387796519e-06, "loss": 0.1962, "step": 5971 }, { "epoch": 0.1742225334033491, "grad_norm": 0.9306352113149406, "learning_rate": 9.464505692938366e-06, "loss": 0.1602, "step": 5972 }, { "epoch": 0.17425170663399264, "grad_norm": 0.9626143433325164, "learning_rate": 9.464292958222173e-06, "loss": 0.1799, "step": 5973 }, { "epoch": 0.17428087986463622, "grad_norm": 0.8169859941832072, "learning_rate": 9.464080183649838e-06, "loss": 0.2107, "step": 5974 }, { "epoch": 0.17431005309527978, "grad_norm": 0.8663734991313604, "learning_rate": 9.46386736922326e-06, "loss": 0.1526, "step": 5975 }, { "epoch": 0.17433922632592333, "grad_norm": 0.9345566173290707, "learning_rate": 9.46365451494434e-06, "loss": 0.1616, "step": 5976 }, { "epoch": 0.17436839955656688, "grad_norm": 0.768155821852064, "learning_rate": 9.463441620814978e-06, "loss": 0.1564, "step": 5977 }, { "epoch": 0.17439757278721046, "grad_norm": 0.9104263530019562, "learning_rate": 9.463228686837073e-06, "loss": 0.1618, "step": 5978 }, { "epoch": 0.17442674601785402, "grad_norm": 0.7977559775174695, "learning_rate": 9.463015713012531e-06, "loss": 0.1752, "step": 5979 }, { "epoch": 0.17445591924849757, "grad_norm": 0.8462361553626403, "learning_rate": 9.462802699343248e-06, "loss": 0.1526, "step": 5980 }, { "epoch": 0.17448509247914115, "grad_norm": 0.9133740290586382, "learning_rate": 9.462589645831128e-06, "loss": 0.2182, "step": 5981 }, { "epoch": 0.1745142657097847, "grad_norm": 0.7961643263590523, "learning_rate": 9.462376552478074e-06, "loss": 0.1599, "step": 5982 }, { "epoch": 0.17454343894042826, "grad_norm": 0.8578588468885399, "learning_rate": 9.462163419285987e-06, "loss": 0.1813, "step": 5983 }, { "epoch": 0.1745726121710718, "grad_norm": 1.0301119862778012, "learning_rate": 9.46195024625677e-06, "loss": 0.1771, "step": 5984 }, { "epoch": 0.1746017854017154, "grad_norm": 0.9098537210614265, "learning_rate": 9.461737033392327e-06, "loss": 0.154, "step": 5985 }, { "epoch": 0.17463095863235895, "grad_norm": 1.021547156446205, "learning_rate": 9.461523780694559e-06, "loss": 0.1792, "step": 5986 }, { "epoch": 0.1746601318630025, "grad_norm": 1.0632900295887269, "learning_rate": 9.461310488165373e-06, "loss": 0.1887, "step": 5987 }, { "epoch": 0.17468930509364608, "grad_norm": 1.296644032438599, "learning_rate": 9.461097155806673e-06, "loss": 0.1698, "step": 5988 }, { "epoch": 0.17471847832428963, "grad_norm": 0.9021366184339357, "learning_rate": 9.46088378362036e-06, "loss": 0.1681, "step": 5989 }, { "epoch": 0.1747476515549332, "grad_norm": 1.1251424074743566, "learning_rate": 9.460670371608345e-06, "loss": 0.2058, "step": 5990 }, { "epoch": 0.17477682478557677, "grad_norm": 0.9416358799561906, "learning_rate": 9.460456919772527e-06, "loss": 0.1645, "step": 5991 }, { "epoch": 0.17480599801622032, "grad_norm": 1.1801873635148534, "learning_rate": 9.460243428114815e-06, "loss": 0.1857, "step": 5992 }, { "epoch": 0.17483517124686387, "grad_norm": 0.9746959799548469, "learning_rate": 9.460029896637115e-06, "loss": 0.1506, "step": 5993 }, { "epoch": 0.17486434447750743, "grad_norm": 1.036935427850256, "learning_rate": 9.459816325341331e-06, "loss": 0.1684, "step": 5994 }, { "epoch": 0.174893517708151, "grad_norm": 0.7227916773233188, "learning_rate": 9.459602714229373e-06, "loss": 0.1819, "step": 5995 }, { "epoch": 0.17492269093879456, "grad_norm": 0.7485200504083205, "learning_rate": 9.459389063303147e-06, "loss": 0.1471, "step": 5996 }, { "epoch": 0.17495186416943811, "grad_norm": 0.9782731977611967, "learning_rate": 9.45917537256456e-06, "loss": 0.191, "step": 5997 }, { "epoch": 0.1749810374000817, "grad_norm": 0.8880707327511158, "learning_rate": 9.458961642015518e-06, "loss": 0.1485, "step": 5998 }, { "epoch": 0.17501021063072525, "grad_norm": 0.8374546938347205, "learning_rate": 9.458747871657931e-06, "loss": 0.1749, "step": 5999 }, { "epoch": 0.1750393838613688, "grad_norm": 1.3244317301623223, "learning_rate": 9.45853406149371e-06, "loss": 0.157, "step": 6000 }, { "epoch": 0.17506855709201236, "grad_norm": 1.331792773017602, "learning_rate": 9.45832021152476e-06, "loss": 0.1953, "step": 6001 }, { "epoch": 0.17509773032265594, "grad_norm": 0.8670937692733209, "learning_rate": 9.458106321752992e-06, "loss": 0.1745, "step": 6002 }, { "epoch": 0.1751269035532995, "grad_norm": 1.064234831429125, "learning_rate": 9.457892392180313e-06, "loss": 0.1731, "step": 6003 }, { "epoch": 0.17515607678394304, "grad_norm": 0.9256814528488202, "learning_rate": 9.457678422808636e-06, "loss": 0.1863, "step": 6004 }, { "epoch": 0.17518525001458662, "grad_norm": 1.1655334845881768, "learning_rate": 9.45746441363987e-06, "loss": 0.1429, "step": 6005 }, { "epoch": 0.17521442324523018, "grad_norm": 0.9539661582528656, "learning_rate": 9.457250364675926e-06, "loss": 0.1785, "step": 6006 }, { "epoch": 0.17524359647587373, "grad_norm": 0.8417616364845183, "learning_rate": 9.457036275918714e-06, "loss": 0.1558, "step": 6007 }, { "epoch": 0.1752727697065173, "grad_norm": 1.041565807270326, "learning_rate": 9.456822147370149e-06, "loss": 0.1854, "step": 6008 }, { "epoch": 0.17530194293716087, "grad_norm": 0.7819352945132542, "learning_rate": 9.456607979032137e-06, "loss": 0.1599, "step": 6009 }, { "epoch": 0.17533111616780442, "grad_norm": 0.9423586350948675, "learning_rate": 9.456393770906594e-06, "loss": 0.1703, "step": 6010 }, { "epoch": 0.17536028939844797, "grad_norm": 0.8588015736703734, "learning_rate": 9.45617952299543e-06, "loss": 0.1773, "step": 6011 }, { "epoch": 0.17538946262909155, "grad_norm": 0.7507797498074688, "learning_rate": 9.455965235300559e-06, "loss": 0.1753, "step": 6012 }, { "epoch": 0.1754186358597351, "grad_norm": 1.2850760496932208, "learning_rate": 9.455750907823895e-06, "loss": 0.18, "step": 6013 }, { "epoch": 0.17544780909037866, "grad_norm": 0.8619489869393079, "learning_rate": 9.45553654056735e-06, "loss": 0.1579, "step": 6014 }, { "epoch": 0.17547698232102224, "grad_norm": 0.7063174720726706, "learning_rate": 9.45532213353284e-06, "loss": 0.1606, "step": 6015 }, { "epoch": 0.1755061555516658, "grad_norm": 0.7749751225775768, "learning_rate": 9.455107686722276e-06, "loss": 0.1626, "step": 6016 }, { "epoch": 0.17553532878230935, "grad_norm": 0.8938572402846047, "learning_rate": 9.454893200137574e-06, "loss": 0.1955, "step": 6017 }, { "epoch": 0.17556450201295293, "grad_norm": 0.7553586630209235, "learning_rate": 9.45467867378065e-06, "loss": 0.1532, "step": 6018 }, { "epoch": 0.17559367524359648, "grad_norm": 0.8913931513797135, "learning_rate": 9.454464107653418e-06, "loss": 0.1577, "step": 6019 }, { "epoch": 0.17562284847424003, "grad_norm": 0.8958707414497841, "learning_rate": 9.454249501757794e-06, "loss": 0.1942, "step": 6020 }, { "epoch": 0.1756520217048836, "grad_norm": 1.0879702654646908, "learning_rate": 9.454034856095693e-06, "loss": 0.1688, "step": 6021 }, { "epoch": 0.17568119493552717, "grad_norm": 0.7765588735381302, "learning_rate": 9.45382017066903e-06, "loss": 0.1803, "step": 6022 }, { "epoch": 0.17571036816617072, "grad_norm": 0.8144183889369842, "learning_rate": 9.453605445479727e-06, "loss": 0.1716, "step": 6023 }, { "epoch": 0.17573954139681427, "grad_norm": 0.7901507030206922, "learning_rate": 9.453390680529696e-06, "loss": 0.16, "step": 6024 }, { "epoch": 0.17576871462745786, "grad_norm": 0.9581655271792159, "learning_rate": 9.453175875820857e-06, "loss": 0.181, "step": 6025 }, { "epoch": 0.1757978878581014, "grad_norm": 0.7957862568945363, "learning_rate": 9.452961031355128e-06, "loss": 0.1694, "step": 6026 }, { "epoch": 0.17582706108874496, "grad_norm": 0.8253825296684637, "learning_rate": 9.452746147134423e-06, "loss": 0.1731, "step": 6027 }, { "epoch": 0.17585623431938852, "grad_norm": 0.8902491986234154, "learning_rate": 9.452531223160665e-06, "loss": 0.148, "step": 6028 }, { "epoch": 0.1758854075500321, "grad_norm": 0.860239722845713, "learning_rate": 9.452316259435771e-06, "loss": 0.1761, "step": 6029 }, { "epoch": 0.17591458078067565, "grad_norm": 1.2344045246556863, "learning_rate": 9.45210125596166e-06, "loss": 0.1718, "step": 6030 }, { "epoch": 0.1759437540113192, "grad_norm": 0.9847896196009573, "learning_rate": 9.451886212740253e-06, "loss": 0.1543, "step": 6031 }, { "epoch": 0.17597292724196278, "grad_norm": 0.8664705599311466, "learning_rate": 9.45167112977347e-06, "loss": 0.1601, "step": 6032 }, { "epoch": 0.17600210047260634, "grad_norm": 1.0049382101091084, "learning_rate": 9.451456007063227e-06, "loss": 0.1798, "step": 6033 }, { "epoch": 0.1760312737032499, "grad_norm": 0.8925356302001541, "learning_rate": 9.451240844611447e-06, "loss": 0.167, "step": 6034 }, { "epoch": 0.17606044693389347, "grad_norm": 0.7737694120149674, "learning_rate": 9.451025642420053e-06, "loss": 0.1752, "step": 6035 }, { "epoch": 0.17608962016453703, "grad_norm": 0.997220088503365, "learning_rate": 9.450810400490964e-06, "loss": 0.1857, "step": 6036 }, { "epoch": 0.17611879339518058, "grad_norm": 0.5647631873998366, "learning_rate": 9.450595118826102e-06, "loss": 0.1308, "step": 6037 }, { "epoch": 0.17614796662582413, "grad_norm": 0.8906089630960201, "learning_rate": 9.450379797427389e-06, "loss": 0.1719, "step": 6038 }, { "epoch": 0.1761771398564677, "grad_norm": 0.8681627581345246, "learning_rate": 9.450164436296749e-06, "loss": 0.1563, "step": 6039 }, { "epoch": 0.17620631308711127, "grad_norm": 1.049205464938567, "learning_rate": 9.449949035436103e-06, "loss": 0.1704, "step": 6040 }, { "epoch": 0.17623548631775482, "grad_norm": 0.8109953914533022, "learning_rate": 9.449733594847372e-06, "loss": 0.1629, "step": 6041 }, { "epoch": 0.1762646595483984, "grad_norm": 0.8814713554775707, "learning_rate": 9.449518114532484e-06, "loss": 0.1474, "step": 6042 }, { "epoch": 0.17629383277904195, "grad_norm": 0.8461899653006055, "learning_rate": 9.449302594493359e-06, "loss": 0.1661, "step": 6043 }, { "epoch": 0.1763230060096855, "grad_norm": 0.8465339571038941, "learning_rate": 9.449087034731924e-06, "loss": 0.166, "step": 6044 }, { "epoch": 0.1763521792403291, "grad_norm": 0.8806843254973223, "learning_rate": 9.448871435250102e-06, "loss": 0.1616, "step": 6045 }, { "epoch": 0.17638135247097264, "grad_norm": 0.89434729703085, "learning_rate": 9.448655796049817e-06, "loss": 0.1627, "step": 6046 }, { "epoch": 0.1764105257016162, "grad_norm": 0.6465515443049981, "learning_rate": 9.448440117132995e-06, "loss": 0.1562, "step": 6047 }, { "epoch": 0.17643969893225975, "grad_norm": 0.9649083594734409, "learning_rate": 9.448224398501562e-06, "loss": 0.2078, "step": 6048 }, { "epoch": 0.17646887216290333, "grad_norm": 1.0466966296126328, "learning_rate": 9.448008640157444e-06, "loss": 0.1997, "step": 6049 }, { "epoch": 0.17649804539354688, "grad_norm": 0.8800708082618894, "learning_rate": 9.447792842102566e-06, "loss": 0.1664, "step": 6050 }, { "epoch": 0.17652721862419044, "grad_norm": 1.063802355330643, "learning_rate": 9.447577004338855e-06, "loss": 0.1781, "step": 6051 }, { "epoch": 0.17655639185483402, "grad_norm": 0.9923137827728826, "learning_rate": 9.447361126868238e-06, "loss": 0.1667, "step": 6052 }, { "epoch": 0.17658556508547757, "grad_norm": 0.7367424048833521, "learning_rate": 9.447145209692643e-06, "loss": 0.1577, "step": 6053 }, { "epoch": 0.17661473831612112, "grad_norm": 0.8162899202253272, "learning_rate": 9.446929252813997e-06, "loss": 0.1612, "step": 6054 }, { "epoch": 0.17664391154676468, "grad_norm": 0.7287134917054725, "learning_rate": 9.446713256234229e-06, "loss": 0.1548, "step": 6055 }, { "epoch": 0.17667308477740826, "grad_norm": 0.9588223252770661, "learning_rate": 9.446497219955266e-06, "loss": 0.1878, "step": 6056 }, { "epoch": 0.1767022580080518, "grad_norm": 0.7985875635412174, "learning_rate": 9.446281143979038e-06, "loss": 0.1373, "step": 6057 }, { "epoch": 0.17673143123869536, "grad_norm": 0.8723456676721568, "learning_rate": 9.446065028307472e-06, "loss": 0.1828, "step": 6058 }, { "epoch": 0.17676060446933894, "grad_norm": 0.8644260320732804, "learning_rate": 9.4458488729425e-06, "loss": 0.1608, "step": 6059 }, { "epoch": 0.1767897776999825, "grad_norm": 0.8467008843915851, "learning_rate": 9.44563267788605e-06, "loss": 0.1579, "step": 6060 }, { "epoch": 0.17681895093062605, "grad_norm": 0.8381171799356435, "learning_rate": 9.445416443140052e-06, "loss": 0.176, "step": 6061 }, { "epoch": 0.17684812416126963, "grad_norm": 0.6937196730004178, "learning_rate": 9.445200168706438e-06, "loss": 0.1741, "step": 6062 }, { "epoch": 0.17687729739191319, "grad_norm": 1.3537163352995736, "learning_rate": 9.444983854587138e-06, "loss": 0.1656, "step": 6063 }, { "epoch": 0.17690647062255674, "grad_norm": 0.883871849743838, "learning_rate": 9.444767500784084e-06, "loss": 0.1757, "step": 6064 }, { "epoch": 0.1769356438532003, "grad_norm": 0.7363846114493716, "learning_rate": 9.444551107299205e-06, "loss": 0.1602, "step": 6065 }, { "epoch": 0.17696481708384387, "grad_norm": 0.8035527678014244, "learning_rate": 9.444334674134437e-06, "loss": 0.1573, "step": 6066 }, { "epoch": 0.17699399031448743, "grad_norm": 0.9608067326938806, "learning_rate": 9.444118201291707e-06, "loss": 0.1568, "step": 6067 }, { "epoch": 0.17702316354513098, "grad_norm": 0.957312868090363, "learning_rate": 9.443901688772953e-06, "loss": 0.1816, "step": 6068 }, { "epoch": 0.17705233677577456, "grad_norm": 0.8634941496582655, "learning_rate": 9.443685136580105e-06, "loss": 0.1401, "step": 6069 }, { "epoch": 0.1770815100064181, "grad_norm": 0.8787652676909244, "learning_rate": 9.443468544715097e-06, "loss": 0.1834, "step": 6070 }, { "epoch": 0.17711068323706167, "grad_norm": 0.8212803919043005, "learning_rate": 9.443251913179862e-06, "loss": 0.1573, "step": 6071 }, { "epoch": 0.17713985646770522, "grad_norm": 0.73746500851273, "learning_rate": 9.443035241976335e-06, "loss": 0.1568, "step": 6072 }, { "epoch": 0.1771690296983488, "grad_norm": 0.7926757313992104, "learning_rate": 9.442818531106451e-06, "loss": 0.1791, "step": 6073 }, { "epoch": 0.17719820292899235, "grad_norm": 0.9429842205284127, "learning_rate": 9.442601780572141e-06, "loss": 0.1981, "step": 6074 }, { "epoch": 0.1772273761596359, "grad_norm": 0.833712576794287, "learning_rate": 9.442384990375344e-06, "loss": 0.1437, "step": 6075 }, { "epoch": 0.1772565493902795, "grad_norm": 0.7937003278803161, "learning_rate": 9.442168160517995e-06, "loss": 0.1653, "step": 6076 }, { "epoch": 0.17728572262092304, "grad_norm": 0.9252279333681923, "learning_rate": 9.44195129100203e-06, "loss": 0.1773, "step": 6077 }, { "epoch": 0.1773148958515666, "grad_norm": 1.01782313691626, "learning_rate": 9.441734381829382e-06, "loss": 0.1501, "step": 6078 }, { "epoch": 0.17734406908221018, "grad_norm": 0.9169277443816802, "learning_rate": 9.441517433001992e-06, "loss": 0.1502, "step": 6079 }, { "epoch": 0.17737324231285373, "grad_norm": 0.6476156203642134, "learning_rate": 9.441300444521792e-06, "loss": 0.1448, "step": 6080 }, { "epoch": 0.17740241554349728, "grad_norm": 0.7252667035537854, "learning_rate": 9.441083416390725e-06, "loss": 0.162, "step": 6081 }, { "epoch": 0.17743158877414084, "grad_norm": 0.9443395614628517, "learning_rate": 9.440866348610723e-06, "loss": 0.1607, "step": 6082 }, { "epoch": 0.17746076200478442, "grad_norm": 0.7600945171755237, "learning_rate": 9.440649241183727e-06, "loss": 0.1833, "step": 6083 }, { "epoch": 0.17748993523542797, "grad_norm": 0.931479269840474, "learning_rate": 9.440432094111675e-06, "loss": 0.1689, "step": 6084 }, { "epoch": 0.17751910846607152, "grad_norm": 0.7627085732521647, "learning_rate": 9.440214907396506e-06, "loss": 0.1665, "step": 6085 }, { "epoch": 0.1775482816967151, "grad_norm": 0.7402417189600208, "learning_rate": 9.439997681040156e-06, "loss": 0.1539, "step": 6086 }, { "epoch": 0.17757745492735866, "grad_norm": 0.7909033891310475, "learning_rate": 9.439780415044568e-06, "loss": 0.1491, "step": 6087 }, { "epoch": 0.1776066281580022, "grad_norm": 0.9789034030402775, "learning_rate": 9.439563109411682e-06, "loss": 0.1773, "step": 6088 }, { "epoch": 0.1776358013886458, "grad_norm": 1.0910629320412395, "learning_rate": 9.439345764143434e-06, "loss": 0.1579, "step": 6089 }, { "epoch": 0.17766497461928935, "grad_norm": 0.8578377522873836, "learning_rate": 9.439128379241767e-06, "loss": 0.1447, "step": 6090 }, { "epoch": 0.1776941478499329, "grad_norm": 1.120834663212433, "learning_rate": 9.438910954708622e-06, "loss": 0.1731, "step": 6091 }, { "epoch": 0.17772332108057645, "grad_norm": 0.7903505977482612, "learning_rate": 9.43869349054594e-06, "loss": 0.1675, "step": 6092 }, { "epoch": 0.17775249431122003, "grad_norm": 0.8174964146133391, "learning_rate": 9.438475986755661e-06, "loss": 0.1647, "step": 6093 }, { "epoch": 0.17778166754186359, "grad_norm": 0.6848338727131013, "learning_rate": 9.438258443339729e-06, "loss": 0.1748, "step": 6094 }, { "epoch": 0.17781084077250714, "grad_norm": 0.8040857348148285, "learning_rate": 9.438040860300085e-06, "loss": 0.18, "step": 6095 }, { "epoch": 0.17784001400315072, "grad_norm": 0.9080380906748584, "learning_rate": 9.437823237638672e-06, "loss": 0.1864, "step": 6096 }, { "epoch": 0.17786918723379427, "grad_norm": 0.7563824475411975, "learning_rate": 9.43760557535743e-06, "loss": 0.1682, "step": 6097 }, { "epoch": 0.17789836046443783, "grad_norm": 0.7708359204805659, "learning_rate": 9.437387873458308e-06, "loss": 0.1731, "step": 6098 }, { "epoch": 0.17792753369508138, "grad_norm": 0.8988494689525325, "learning_rate": 9.437170131943245e-06, "loss": 0.1842, "step": 6099 }, { "epoch": 0.17795670692572496, "grad_norm": 0.881316888063764, "learning_rate": 9.436952350814187e-06, "loss": 0.1932, "step": 6100 }, { "epoch": 0.17798588015636851, "grad_norm": 0.8412463896369021, "learning_rate": 9.436734530073078e-06, "loss": 0.1842, "step": 6101 }, { "epoch": 0.17801505338701207, "grad_norm": 0.7500046491642948, "learning_rate": 9.43651666972186e-06, "loss": 0.139, "step": 6102 }, { "epoch": 0.17804422661765565, "grad_norm": 1.1570837505681435, "learning_rate": 9.436298769762481e-06, "loss": 0.1901, "step": 6103 }, { "epoch": 0.1780733998482992, "grad_norm": 0.7852084294455182, "learning_rate": 9.436080830196888e-06, "loss": 0.1489, "step": 6104 }, { "epoch": 0.17810257307894276, "grad_norm": 0.7920356544226356, "learning_rate": 9.435862851027023e-06, "loss": 0.1664, "step": 6105 }, { "epoch": 0.17813174630958634, "grad_norm": 0.8037907284038206, "learning_rate": 9.435644832254831e-06, "loss": 0.1818, "step": 6106 }, { "epoch": 0.1781609195402299, "grad_norm": 0.9146267883701465, "learning_rate": 9.435426773882264e-06, "loss": 0.1751, "step": 6107 }, { "epoch": 0.17819009277087344, "grad_norm": 0.8506247433730693, "learning_rate": 9.435208675911263e-06, "loss": 0.1675, "step": 6108 }, { "epoch": 0.178219266001517, "grad_norm": 1.2478055350424464, "learning_rate": 9.43499053834378e-06, "loss": 0.1766, "step": 6109 }, { "epoch": 0.17824843923216058, "grad_norm": 0.7861486485923962, "learning_rate": 9.434772361181759e-06, "loss": 0.1632, "step": 6110 }, { "epoch": 0.17827761246280413, "grad_norm": 0.9131489960481203, "learning_rate": 9.434554144427148e-06, "loss": 0.1731, "step": 6111 }, { "epoch": 0.17830678569344768, "grad_norm": 1.1593414358106822, "learning_rate": 9.434335888081898e-06, "loss": 0.1839, "step": 6112 }, { "epoch": 0.17833595892409126, "grad_norm": 0.847344521852335, "learning_rate": 9.434117592147955e-06, "loss": 0.1533, "step": 6113 }, { "epoch": 0.17836513215473482, "grad_norm": 1.0958624282003797, "learning_rate": 9.43389925662727e-06, "loss": 0.1768, "step": 6114 }, { "epoch": 0.17839430538537837, "grad_norm": 1.2150248763347455, "learning_rate": 9.433680881521789e-06, "loss": 0.1707, "step": 6115 }, { "epoch": 0.17842347861602195, "grad_norm": 0.898320883463539, "learning_rate": 9.433462466833462e-06, "loss": 0.169, "step": 6116 }, { "epoch": 0.1784526518466655, "grad_norm": 1.0818057778123038, "learning_rate": 9.433244012564245e-06, "loss": 0.1774, "step": 6117 }, { "epoch": 0.17848182507730906, "grad_norm": 1.093611499526309, "learning_rate": 9.433025518716081e-06, "loss": 0.1719, "step": 6118 }, { "epoch": 0.1785109983079526, "grad_norm": 1.04629029446073, "learning_rate": 9.432806985290924e-06, "loss": 0.1773, "step": 6119 }, { "epoch": 0.1785401715385962, "grad_norm": 0.9329986990371699, "learning_rate": 9.432588412290725e-06, "loss": 0.1721, "step": 6120 }, { "epoch": 0.17856934476923975, "grad_norm": 0.8700545346597031, "learning_rate": 9.432369799717434e-06, "loss": 0.1983, "step": 6121 }, { "epoch": 0.1785985179998833, "grad_norm": 1.0542415237391032, "learning_rate": 9.432151147573003e-06, "loss": 0.1828, "step": 6122 }, { "epoch": 0.17862769123052688, "grad_norm": 0.838955245856994, "learning_rate": 9.431932455859384e-06, "loss": 0.1351, "step": 6123 }, { "epoch": 0.17865686446117043, "grad_norm": 1.3967139817878313, "learning_rate": 9.431713724578531e-06, "loss": 0.2086, "step": 6124 }, { "epoch": 0.178686037691814, "grad_norm": 0.9763115375116108, "learning_rate": 9.431494953732396e-06, "loss": 0.1567, "step": 6125 }, { "epoch": 0.17871521092245754, "grad_norm": 0.849450274946255, "learning_rate": 9.431276143322933e-06, "loss": 0.1768, "step": 6126 }, { "epoch": 0.17874438415310112, "grad_norm": 1.176015247895556, "learning_rate": 9.431057293352093e-06, "loss": 0.1791, "step": 6127 }, { "epoch": 0.17877355738374467, "grad_norm": 1.2749422903424745, "learning_rate": 9.430838403821831e-06, "loss": 0.2019, "step": 6128 }, { "epoch": 0.17880273061438823, "grad_norm": 0.9438674709566486, "learning_rate": 9.430619474734102e-06, "loss": 0.1769, "step": 6129 }, { "epoch": 0.1788319038450318, "grad_norm": 0.8042485643091488, "learning_rate": 9.43040050609086e-06, "loss": 0.2025, "step": 6130 }, { "epoch": 0.17886107707567536, "grad_norm": 1.0309599888690277, "learning_rate": 9.43018149789406e-06, "loss": 0.1685, "step": 6131 }, { "epoch": 0.17889025030631892, "grad_norm": 0.7531252493687638, "learning_rate": 9.429962450145657e-06, "loss": 0.1466, "step": 6132 }, { "epoch": 0.1789194235369625, "grad_norm": 0.8056546655740853, "learning_rate": 9.429743362847608e-06, "loss": 0.1838, "step": 6133 }, { "epoch": 0.17894859676760605, "grad_norm": 0.9787697867974474, "learning_rate": 9.429524236001866e-06, "loss": 0.2005, "step": 6134 }, { "epoch": 0.1789777699982496, "grad_norm": 0.7721888307938807, "learning_rate": 9.429305069610389e-06, "loss": 0.1482, "step": 6135 }, { "epoch": 0.17900694322889316, "grad_norm": 0.7173122780538939, "learning_rate": 9.429085863675135e-06, "loss": 0.1672, "step": 6136 }, { "epoch": 0.17903611645953674, "grad_norm": 0.7886093796379801, "learning_rate": 9.42886661819806e-06, "loss": 0.17, "step": 6137 }, { "epoch": 0.1790652896901803, "grad_norm": 0.6716444684628302, "learning_rate": 9.42864733318112e-06, "loss": 0.148, "step": 6138 }, { "epoch": 0.17909446292082384, "grad_norm": 0.7653181569944403, "learning_rate": 9.428428008626274e-06, "loss": 0.1501, "step": 6139 }, { "epoch": 0.17912363615146742, "grad_norm": 0.7644070134856724, "learning_rate": 9.42820864453548e-06, "loss": 0.1726, "step": 6140 }, { "epoch": 0.17915280938211098, "grad_norm": 0.9368679198269921, "learning_rate": 9.427989240910695e-06, "loss": 0.1565, "step": 6141 }, { "epoch": 0.17918198261275453, "grad_norm": 0.6316129360047353, "learning_rate": 9.42776979775388e-06, "loss": 0.1487, "step": 6142 }, { "epoch": 0.17921115584339808, "grad_norm": 0.8383230480926974, "learning_rate": 9.427550315066994e-06, "loss": 0.1758, "step": 6143 }, { "epoch": 0.17924032907404167, "grad_norm": 0.9890782465178287, "learning_rate": 9.427330792851996e-06, "loss": 0.1881, "step": 6144 }, { "epoch": 0.17926950230468522, "grad_norm": 0.7080945023913502, "learning_rate": 9.427111231110844e-06, "loss": 0.1745, "step": 6145 }, { "epoch": 0.17929867553532877, "grad_norm": 0.7666605079265063, "learning_rate": 9.4268916298455e-06, "loss": 0.1557, "step": 6146 }, { "epoch": 0.17932784876597235, "grad_norm": 0.8405628010532938, "learning_rate": 9.426671989057926e-06, "loss": 0.1538, "step": 6147 }, { "epoch": 0.1793570219966159, "grad_norm": 0.8001625757953447, "learning_rate": 9.42645230875008e-06, "loss": 0.1742, "step": 6148 }, { "epoch": 0.17938619522725946, "grad_norm": 0.9332369139694799, "learning_rate": 9.426232588923925e-06, "loss": 0.1515, "step": 6149 }, { "epoch": 0.17941536845790304, "grad_norm": 0.8351790934983703, "learning_rate": 9.426012829581421e-06, "loss": 0.1644, "step": 6150 }, { "epoch": 0.1794445416885466, "grad_norm": 0.7994829000612014, "learning_rate": 9.42579303072453e-06, "loss": 0.182, "step": 6151 }, { "epoch": 0.17947371491919015, "grad_norm": 0.959677623640591, "learning_rate": 9.425573192355219e-06, "loss": 0.1886, "step": 6152 }, { "epoch": 0.1795028881498337, "grad_norm": 0.9195168526872746, "learning_rate": 9.425353314475445e-06, "loss": 0.1647, "step": 6153 }, { "epoch": 0.17953206138047728, "grad_norm": 0.711177053303128, "learning_rate": 9.425133397087171e-06, "loss": 0.158, "step": 6154 }, { "epoch": 0.17956123461112083, "grad_norm": 0.6299993751647488, "learning_rate": 9.424913440192366e-06, "loss": 0.155, "step": 6155 }, { "epoch": 0.1795904078417644, "grad_norm": 0.7729505405505678, "learning_rate": 9.424693443792988e-06, "loss": 0.1545, "step": 6156 }, { "epoch": 0.17961958107240797, "grad_norm": 1.0723102235306599, "learning_rate": 9.424473407891003e-06, "loss": 0.1654, "step": 6157 }, { "epoch": 0.17964875430305152, "grad_norm": 0.7710784361163612, "learning_rate": 9.424253332488377e-06, "loss": 0.1494, "step": 6158 }, { "epoch": 0.17967792753369508, "grad_norm": 0.8657758464748335, "learning_rate": 9.424033217587072e-06, "loss": 0.1604, "step": 6159 }, { "epoch": 0.17970710076433866, "grad_norm": 0.9032855261951235, "learning_rate": 9.423813063189056e-06, "loss": 0.1773, "step": 6160 }, { "epoch": 0.1797362739949822, "grad_norm": 0.8345559494797617, "learning_rate": 9.423592869296292e-06, "loss": 0.1609, "step": 6161 }, { "epoch": 0.17976544722562576, "grad_norm": 0.8849683920236567, "learning_rate": 9.423372635910748e-06, "loss": 0.1889, "step": 6162 }, { "epoch": 0.17979462045626932, "grad_norm": 0.9609974502826502, "learning_rate": 9.42315236303439e-06, "loss": 0.174, "step": 6163 }, { "epoch": 0.1798237936869129, "grad_norm": 0.9045587451641919, "learning_rate": 9.42293205066918e-06, "loss": 0.1553, "step": 6164 }, { "epoch": 0.17985296691755645, "grad_norm": 0.9235731750614937, "learning_rate": 9.422711698817091e-06, "loss": 0.1562, "step": 6165 }, { "epoch": 0.1798821401482, "grad_norm": 0.9983722343381372, "learning_rate": 9.422491307480085e-06, "loss": 0.1538, "step": 6166 }, { "epoch": 0.17991131337884358, "grad_norm": 1.040014406683864, "learning_rate": 9.422270876660136e-06, "loss": 0.1516, "step": 6167 }, { "epoch": 0.17994048660948714, "grad_norm": 0.907915871955659, "learning_rate": 9.422050406359207e-06, "loss": 0.1742, "step": 6168 }, { "epoch": 0.1799696598401307, "grad_norm": 0.8433086264631569, "learning_rate": 9.421829896579267e-06, "loss": 0.1555, "step": 6169 }, { "epoch": 0.17999883307077424, "grad_norm": 1.1187874230437564, "learning_rate": 9.421609347322285e-06, "loss": 0.1832, "step": 6170 }, { "epoch": 0.18002800630141783, "grad_norm": 0.8590340789382764, "learning_rate": 9.42138875859023e-06, "loss": 0.1468, "step": 6171 }, { "epoch": 0.18005717953206138, "grad_norm": 0.7310620734230633, "learning_rate": 9.421168130385074e-06, "loss": 0.1675, "step": 6172 }, { "epoch": 0.18008635276270493, "grad_norm": 0.9963718411069702, "learning_rate": 9.420947462708783e-06, "loss": 0.171, "step": 6173 }, { "epoch": 0.1801155259933485, "grad_norm": 0.9020944829065396, "learning_rate": 9.420726755563327e-06, "loss": 0.1751, "step": 6174 }, { "epoch": 0.18014469922399207, "grad_norm": 0.7587580020082834, "learning_rate": 9.42050600895068e-06, "loss": 0.1562, "step": 6175 }, { "epoch": 0.18017387245463562, "grad_norm": 0.922642096759342, "learning_rate": 9.42028522287281e-06, "loss": 0.1786, "step": 6176 }, { "epoch": 0.1802030456852792, "grad_norm": 0.8950283400370009, "learning_rate": 9.420064397331688e-06, "loss": 0.1998, "step": 6177 }, { "epoch": 0.18023221891592275, "grad_norm": 0.7929709305026523, "learning_rate": 9.419843532329287e-06, "loss": 0.147, "step": 6178 }, { "epoch": 0.1802613921465663, "grad_norm": 1.2104020486551266, "learning_rate": 9.419622627867577e-06, "loss": 0.1641, "step": 6179 }, { "epoch": 0.18029056537720986, "grad_norm": 0.8553677422338132, "learning_rate": 9.419401683948533e-06, "loss": 0.1713, "step": 6180 }, { "epoch": 0.18031973860785344, "grad_norm": 0.8737367582203313, "learning_rate": 9.419180700574123e-06, "loss": 0.186, "step": 6181 }, { "epoch": 0.180348911838497, "grad_norm": 0.8067083291331099, "learning_rate": 9.418959677746325e-06, "loss": 0.1863, "step": 6182 }, { "epoch": 0.18037808506914055, "grad_norm": 0.7832841294836334, "learning_rate": 9.418738615467108e-06, "loss": 0.1524, "step": 6183 }, { "epoch": 0.18040725829978413, "grad_norm": 0.9379152920288453, "learning_rate": 9.41851751373845e-06, "loss": 0.1591, "step": 6184 }, { "epoch": 0.18043643153042768, "grad_norm": 0.8878302057374515, "learning_rate": 9.41829637256232e-06, "loss": 0.173, "step": 6185 }, { "epoch": 0.18046560476107124, "grad_norm": 0.9087940608673092, "learning_rate": 9.418075191940697e-06, "loss": 0.1816, "step": 6186 }, { "epoch": 0.1804947779917148, "grad_norm": 0.7582838323836925, "learning_rate": 9.417853971875553e-06, "loss": 0.1604, "step": 6187 }, { "epoch": 0.18052395122235837, "grad_norm": 1.3421748469345602, "learning_rate": 9.417632712368861e-06, "loss": 0.1652, "step": 6188 }, { "epoch": 0.18055312445300192, "grad_norm": 0.818452488353429, "learning_rate": 9.417411413422601e-06, "loss": 0.1735, "step": 6189 }, { "epoch": 0.18058229768364548, "grad_norm": 0.8319082879299589, "learning_rate": 9.417190075038745e-06, "loss": 0.1565, "step": 6190 }, { "epoch": 0.18061147091428906, "grad_norm": 0.9572358486376007, "learning_rate": 9.416968697219272e-06, "loss": 0.142, "step": 6191 }, { "epoch": 0.1806406441449326, "grad_norm": 1.0652700399568353, "learning_rate": 9.416747279966155e-06, "loss": 0.181, "step": 6192 }, { "epoch": 0.18066981737557616, "grad_norm": 0.8093986188290544, "learning_rate": 9.416525823281375e-06, "loss": 0.1587, "step": 6193 }, { "epoch": 0.18069899060621974, "grad_norm": 0.8793735767853701, "learning_rate": 9.416304327166905e-06, "loss": 0.1844, "step": 6194 }, { "epoch": 0.1807281638368633, "grad_norm": 0.9718214223440156, "learning_rate": 9.416082791624726e-06, "loss": 0.1934, "step": 6195 }, { "epoch": 0.18075733706750685, "grad_norm": 0.756008513886222, "learning_rate": 9.415861216656812e-06, "loss": 0.1361, "step": 6196 }, { "epoch": 0.1807865102981504, "grad_norm": 0.9071404958173924, "learning_rate": 9.415639602265144e-06, "loss": 0.1675, "step": 6197 }, { "epoch": 0.18081568352879399, "grad_norm": 0.8786545741210041, "learning_rate": 9.4154179484517e-06, "loss": 0.1699, "step": 6198 }, { "epoch": 0.18084485675943754, "grad_norm": 0.9263575442534925, "learning_rate": 9.415196255218457e-06, "loss": 0.1499, "step": 6199 }, { "epoch": 0.1808740299900811, "grad_norm": 0.7796762701779799, "learning_rate": 9.414974522567398e-06, "loss": 0.1437, "step": 6200 }, { "epoch": 0.18090320322072467, "grad_norm": 0.866025414208799, "learning_rate": 9.414752750500499e-06, "loss": 0.1977, "step": 6201 }, { "epoch": 0.18093237645136823, "grad_norm": 1.186550089311323, "learning_rate": 9.414530939019741e-06, "loss": 0.1613, "step": 6202 }, { "epoch": 0.18096154968201178, "grad_norm": 1.0800366163797837, "learning_rate": 9.414309088127105e-06, "loss": 0.1958, "step": 6203 }, { "epoch": 0.18099072291265536, "grad_norm": 0.9683499345321749, "learning_rate": 9.414087197824573e-06, "loss": 0.1769, "step": 6204 }, { "epoch": 0.18101989614329891, "grad_norm": 1.0385735295901457, "learning_rate": 9.413865268114123e-06, "loss": 0.165, "step": 6205 }, { "epoch": 0.18104906937394247, "grad_norm": 0.929342593065302, "learning_rate": 9.413643298997736e-06, "loss": 0.1531, "step": 6206 }, { "epoch": 0.18107824260458602, "grad_norm": 0.7853337979334546, "learning_rate": 9.413421290477397e-06, "loss": 0.1556, "step": 6207 }, { "epoch": 0.1811074158352296, "grad_norm": 0.8194819974039269, "learning_rate": 9.413199242555086e-06, "loss": 0.1745, "step": 6208 }, { "epoch": 0.18113658906587315, "grad_norm": 0.7724744253414053, "learning_rate": 9.412977155232787e-06, "loss": 0.154, "step": 6209 }, { "epoch": 0.1811657622965167, "grad_norm": 0.8542435209665531, "learning_rate": 9.412755028512478e-06, "loss": 0.174, "step": 6210 }, { "epoch": 0.1811949355271603, "grad_norm": 1.022489064681061, "learning_rate": 9.412532862396149e-06, "loss": 0.1657, "step": 6211 }, { "epoch": 0.18122410875780384, "grad_norm": 0.7578179979500707, "learning_rate": 9.412310656885779e-06, "loss": 0.2094, "step": 6212 }, { "epoch": 0.1812532819884474, "grad_norm": 0.8060204496253309, "learning_rate": 9.412088411983352e-06, "loss": 0.1478, "step": 6213 }, { "epoch": 0.18128245521909095, "grad_norm": 0.9909780923516098, "learning_rate": 9.411866127690855e-06, "loss": 0.1604, "step": 6214 }, { "epoch": 0.18131162844973453, "grad_norm": 0.7047233474518533, "learning_rate": 9.411643804010266e-06, "loss": 0.1631, "step": 6215 }, { "epoch": 0.18134080168037808, "grad_norm": 0.8575426263434166, "learning_rate": 9.411421440943577e-06, "loss": 0.1676, "step": 6216 }, { "epoch": 0.18136997491102164, "grad_norm": 1.0595142798092263, "learning_rate": 9.411199038492771e-06, "loss": 0.1601, "step": 6217 }, { "epoch": 0.18139914814166522, "grad_norm": 0.8622059520119328, "learning_rate": 9.410976596659833e-06, "loss": 0.1621, "step": 6218 }, { "epoch": 0.18142832137230877, "grad_norm": 0.9781827108687683, "learning_rate": 9.410754115446747e-06, "loss": 0.1605, "step": 6219 }, { "epoch": 0.18145749460295232, "grad_norm": 0.8949547567745179, "learning_rate": 9.410531594855503e-06, "loss": 0.1858, "step": 6220 }, { "epoch": 0.1814866678335959, "grad_norm": 0.9421490204150713, "learning_rate": 9.410309034888086e-06, "loss": 0.1556, "step": 6221 }, { "epoch": 0.18151584106423946, "grad_norm": 0.9222360795511418, "learning_rate": 9.410086435546481e-06, "loss": 0.1776, "step": 6222 }, { "epoch": 0.181545014294883, "grad_norm": 0.9042954423969205, "learning_rate": 9.409863796832679e-06, "loss": 0.1931, "step": 6223 }, { "epoch": 0.18157418752552656, "grad_norm": 0.87669143043865, "learning_rate": 9.409641118748665e-06, "loss": 0.1938, "step": 6224 }, { "epoch": 0.18160336075617015, "grad_norm": 0.8911654886162883, "learning_rate": 9.409418401296429e-06, "loss": 0.1572, "step": 6225 }, { "epoch": 0.1816325339868137, "grad_norm": 0.9490587471244761, "learning_rate": 9.409195644477955e-06, "loss": 0.1601, "step": 6226 }, { "epoch": 0.18166170721745725, "grad_norm": 0.7109438871780286, "learning_rate": 9.408972848295237e-06, "loss": 0.1904, "step": 6227 }, { "epoch": 0.18169088044810083, "grad_norm": 0.9835123319739566, "learning_rate": 9.408750012750262e-06, "loss": 0.1664, "step": 6228 }, { "epoch": 0.1817200536787444, "grad_norm": 0.9472216549479102, "learning_rate": 9.408527137845019e-06, "loss": 0.1545, "step": 6229 }, { "epoch": 0.18174922690938794, "grad_norm": 0.6824306815751001, "learning_rate": 9.408304223581497e-06, "loss": 0.1521, "step": 6230 }, { "epoch": 0.18177840014003152, "grad_norm": 0.8559321147756367, "learning_rate": 9.40808126996169e-06, "loss": 0.2012, "step": 6231 }, { "epoch": 0.18180757337067507, "grad_norm": 1.04243711595298, "learning_rate": 9.407858276987582e-06, "loss": 0.1629, "step": 6232 }, { "epoch": 0.18183674660131863, "grad_norm": 0.781091931137516, "learning_rate": 9.407635244661171e-06, "loss": 0.1714, "step": 6233 }, { "epoch": 0.18186591983196218, "grad_norm": 0.7351836000670849, "learning_rate": 9.407412172984443e-06, "loss": 0.1809, "step": 6234 }, { "epoch": 0.18189509306260576, "grad_norm": 0.8888704776542667, "learning_rate": 9.407189061959391e-06, "loss": 0.1832, "step": 6235 }, { "epoch": 0.18192426629324931, "grad_norm": 0.9210496492979364, "learning_rate": 9.406965911588009e-06, "loss": 0.1396, "step": 6236 }, { "epoch": 0.18195343952389287, "grad_norm": 0.7581127058051876, "learning_rate": 9.406742721872283e-06, "loss": 0.1838, "step": 6237 }, { "epoch": 0.18198261275453645, "grad_norm": 0.7802841499323447, "learning_rate": 9.406519492814215e-06, "loss": 0.1777, "step": 6238 }, { "epoch": 0.18201178598518, "grad_norm": 1.0567663023915306, "learning_rate": 9.406296224415791e-06, "loss": 0.1714, "step": 6239 }, { "epoch": 0.18204095921582356, "grad_norm": 1.0572082746664182, "learning_rate": 9.406072916679006e-06, "loss": 0.1707, "step": 6240 }, { "epoch": 0.1820701324464671, "grad_norm": 1.1517742435805653, "learning_rate": 9.405849569605853e-06, "loss": 0.1631, "step": 6241 }, { "epoch": 0.1820993056771107, "grad_norm": 0.9723575965767852, "learning_rate": 9.405626183198329e-06, "loss": 0.1626, "step": 6242 }, { "epoch": 0.18212847890775424, "grad_norm": 0.8955581516573967, "learning_rate": 9.405402757458424e-06, "loss": 0.1844, "step": 6243 }, { "epoch": 0.1821576521383978, "grad_norm": 0.8263967085292345, "learning_rate": 9.405179292388135e-06, "loss": 0.1528, "step": 6244 }, { "epoch": 0.18218682536904138, "grad_norm": 0.8216943605496201, "learning_rate": 9.404955787989458e-06, "loss": 0.1688, "step": 6245 }, { "epoch": 0.18221599859968493, "grad_norm": 1.2210662319131085, "learning_rate": 9.404732244264387e-06, "loss": 0.1562, "step": 6246 }, { "epoch": 0.18224517183032848, "grad_norm": 0.8952438572608528, "learning_rate": 9.404508661214918e-06, "loss": 0.1441, "step": 6247 }, { "epoch": 0.18227434506097207, "grad_norm": 0.809408536103548, "learning_rate": 9.404285038843047e-06, "loss": 0.1582, "step": 6248 }, { "epoch": 0.18230351829161562, "grad_norm": 0.6595809122634859, "learning_rate": 9.404061377150771e-06, "loss": 0.1508, "step": 6249 }, { "epoch": 0.18233269152225917, "grad_norm": 0.7862198516734406, "learning_rate": 9.403837676140084e-06, "loss": 0.1513, "step": 6250 }, { "epoch": 0.18236186475290272, "grad_norm": 0.6850742799073719, "learning_rate": 9.403613935812988e-06, "loss": 0.1689, "step": 6251 }, { "epoch": 0.1823910379835463, "grad_norm": 0.8220992034464922, "learning_rate": 9.403390156171477e-06, "loss": 0.1385, "step": 6252 }, { "epoch": 0.18242021121418986, "grad_norm": 0.9788715151395558, "learning_rate": 9.40316633721755e-06, "loss": 0.1675, "step": 6253 }, { "epoch": 0.1824493844448334, "grad_norm": 0.8508562582919345, "learning_rate": 9.402942478953205e-06, "loss": 0.207, "step": 6254 }, { "epoch": 0.182478557675477, "grad_norm": 0.970477424704942, "learning_rate": 9.402718581380442e-06, "loss": 0.1678, "step": 6255 }, { "epoch": 0.18250773090612055, "grad_norm": 0.8971265181992876, "learning_rate": 9.402494644501256e-06, "loss": 0.1542, "step": 6256 }, { "epoch": 0.1825369041367641, "grad_norm": 0.9824876826399018, "learning_rate": 9.402270668317651e-06, "loss": 0.1632, "step": 6257 }, { "epoch": 0.18256607736740765, "grad_norm": 0.6949440436484277, "learning_rate": 9.402046652831623e-06, "loss": 0.1619, "step": 6258 }, { "epoch": 0.18259525059805123, "grad_norm": 0.8770120753793275, "learning_rate": 9.401822598045173e-06, "loss": 0.1644, "step": 6259 }, { "epoch": 0.1826244238286948, "grad_norm": 0.9068548662885453, "learning_rate": 9.401598503960303e-06, "loss": 0.1695, "step": 6260 }, { "epoch": 0.18265359705933834, "grad_norm": 0.8946411605336688, "learning_rate": 9.401374370579013e-06, "loss": 0.1571, "step": 6261 }, { "epoch": 0.18268277028998192, "grad_norm": 0.8074587967392921, "learning_rate": 9.401150197903301e-06, "loss": 0.1602, "step": 6262 }, { "epoch": 0.18271194352062547, "grad_norm": 2.60264728155018, "learning_rate": 9.400925985935172e-06, "loss": 0.1817, "step": 6263 }, { "epoch": 0.18274111675126903, "grad_norm": 0.9913172170775648, "learning_rate": 9.400701734676628e-06, "loss": 0.172, "step": 6264 }, { "epoch": 0.1827702899819126, "grad_norm": 0.8779597607438039, "learning_rate": 9.400477444129667e-06, "loss": 0.1919, "step": 6265 }, { "epoch": 0.18279946321255616, "grad_norm": 0.8429548045429296, "learning_rate": 9.400253114296293e-06, "loss": 0.1685, "step": 6266 }, { "epoch": 0.18282863644319972, "grad_norm": 0.9852071087522531, "learning_rate": 9.400028745178512e-06, "loss": 0.1846, "step": 6267 }, { "epoch": 0.18285780967384327, "grad_norm": 1.0386223536856265, "learning_rate": 9.399804336778325e-06, "loss": 0.1751, "step": 6268 }, { "epoch": 0.18288698290448685, "grad_norm": 0.7428465970760484, "learning_rate": 9.399579889097733e-06, "loss": 0.1652, "step": 6269 }, { "epoch": 0.1829161561351304, "grad_norm": 0.8416234696604418, "learning_rate": 9.399355402138743e-06, "loss": 0.1747, "step": 6270 }, { "epoch": 0.18294532936577396, "grad_norm": 0.8880831660665703, "learning_rate": 9.399130875903357e-06, "loss": 0.1621, "step": 6271 }, { "epoch": 0.18297450259641754, "grad_norm": 0.8646320623649186, "learning_rate": 9.398906310393582e-06, "loss": 0.176, "step": 6272 }, { "epoch": 0.1830036758270611, "grad_norm": 0.8468516507308228, "learning_rate": 9.398681705611423e-06, "loss": 0.1634, "step": 6273 }, { "epoch": 0.18303284905770464, "grad_norm": 1.0238616595287175, "learning_rate": 9.39845706155888e-06, "loss": 0.1806, "step": 6274 }, { "epoch": 0.18306202228834823, "grad_norm": 0.9580535676433123, "learning_rate": 9.398232378237965e-06, "loss": 0.1695, "step": 6275 }, { "epoch": 0.18309119551899178, "grad_norm": 0.954693563100793, "learning_rate": 9.398007655650682e-06, "loss": 0.176, "step": 6276 }, { "epoch": 0.18312036874963533, "grad_norm": 0.7206433187720213, "learning_rate": 9.397782893799036e-06, "loss": 0.1505, "step": 6277 }, { "epoch": 0.18314954198027888, "grad_norm": 0.8757530542891719, "learning_rate": 9.397558092685033e-06, "loss": 0.1624, "step": 6278 }, { "epoch": 0.18317871521092247, "grad_norm": 1.0543168020175973, "learning_rate": 9.397333252310682e-06, "loss": 0.1636, "step": 6279 }, { "epoch": 0.18320788844156602, "grad_norm": 0.778576249760065, "learning_rate": 9.39710837267799e-06, "loss": 0.1827, "step": 6280 }, { "epoch": 0.18323706167220957, "grad_norm": 1.1641877106989917, "learning_rate": 9.396883453788964e-06, "loss": 0.2028, "step": 6281 }, { "epoch": 0.18326623490285315, "grad_norm": 1.1271654281740786, "learning_rate": 9.39665849564561e-06, "loss": 0.1523, "step": 6282 }, { "epoch": 0.1832954081334967, "grad_norm": 0.8299374316477975, "learning_rate": 9.396433498249939e-06, "loss": 0.168, "step": 6283 }, { "epoch": 0.18332458136414026, "grad_norm": 1.5415467716062783, "learning_rate": 9.396208461603962e-06, "loss": 0.1718, "step": 6284 }, { "epoch": 0.1833537545947838, "grad_norm": 1.0033100132751955, "learning_rate": 9.395983385709683e-06, "loss": 0.1458, "step": 6285 }, { "epoch": 0.1833829278254274, "grad_norm": 0.7665849418163303, "learning_rate": 9.395758270569114e-06, "loss": 0.165, "step": 6286 }, { "epoch": 0.18341210105607095, "grad_norm": 0.9567717284645186, "learning_rate": 9.395533116184266e-06, "loss": 0.1542, "step": 6287 }, { "epoch": 0.1834412742867145, "grad_norm": 0.8993418807931096, "learning_rate": 9.395307922557145e-06, "loss": 0.1763, "step": 6288 }, { "epoch": 0.18347044751735808, "grad_norm": 0.8079546539093717, "learning_rate": 9.395082689689765e-06, "loss": 0.1565, "step": 6289 }, { "epoch": 0.18349962074800164, "grad_norm": 0.9748613091499144, "learning_rate": 9.394857417584137e-06, "loss": 0.1899, "step": 6290 }, { "epoch": 0.1835287939786452, "grad_norm": 0.974800626920447, "learning_rate": 9.394632106242271e-06, "loss": 0.1431, "step": 6291 }, { "epoch": 0.18355796720928877, "grad_norm": 1.2020729522605325, "learning_rate": 9.394406755666177e-06, "loss": 0.1732, "step": 6292 }, { "epoch": 0.18358714043993232, "grad_norm": 1.1276256250163865, "learning_rate": 9.39418136585787e-06, "loss": 0.1771, "step": 6293 }, { "epoch": 0.18361631367057588, "grad_norm": 0.6975061214800776, "learning_rate": 9.393955936819362e-06, "loss": 0.172, "step": 6294 }, { "epoch": 0.18364548690121943, "grad_norm": 0.9135330478235544, "learning_rate": 9.393730468552661e-06, "loss": 0.1851, "step": 6295 }, { "epoch": 0.183674660131863, "grad_norm": 0.9041104249236712, "learning_rate": 9.393504961059786e-06, "loss": 0.1727, "step": 6296 }, { "epoch": 0.18370383336250656, "grad_norm": 0.8974073959120973, "learning_rate": 9.393279414342747e-06, "loss": 0.1659, "step": 6297 }, { "epoch": 0.18373300659315012, "grad_norm": 0.9090586042105718, "learning_rate": 9.393053828403558e-06, "loss": 0.1657, "step": 6298 }, { "epoch": 0.1837621798237937, "grad_norm": 1.0353462341734232, "learning_rate": 9.392828203244232e-06, "loss": 0.169, "step": 6299 }, { "epoch": 0.18379135305443725, "grad_norm": 0.8231975707209924, "learning_rate": 9.392602538866785e-06, "loss": 0.1686, "step": 6300 }, { "epoch": 0.1838205262850808, "grad_norm": 0.7398106825842896, "learning_rate": 9.39237683527323e-06, "loss": 0.1641, "step": 6301 }, { "epoch": 0.18384969951572439, "grad_norm": 0.9598872308409644, "learning_rate": 9.392151092465587e-06, "loss": 0.1572, "step": 6302 }, { "epoch": 0.18387887274636794, "grad_norm": 0.7734106660152992, "learning_rate": 9.391925310445863e-06, "loss": 0.1482, "step": 6303 }, { "epoch": 0.1839080459770115, "grad_norm": 0.7760284564417853, "learning_rate": 9.391699489216082e-06, "loss": 0.1516, "step": 6304 }, { "epoch": 0.18393721920765504, "grad_norm": 0.9199543724306547, "learning_rate": 9.391473628778253e-06, "loss": 0.1794, "step": 6305 }, { "epoch": 0.18396639243829863, "grad_norm": 0.8064664006424641, "learning_rate": 9.391247729134399e-06, "loss": 0.156, "step": 6306 }, { "epoch": 0.18399556566894218, "grad_norm": 0.9541207820094149, "learning_rate": 9.391021790286532e-06, "loss": 0.1597, "step": 6307 }, { "epoch": 0.18402473889958573, "grad_norm": 0.835873642125506, "learning_rate": 9.39079581223667e-06, "loss": 0.15, "step": 6308 }, { "epoch": 0.1840539121302293, "grad_norm": 0.913013134531752, "learning_rate": 9.390569794986833e-06, "loss": 0.1751, "step": 6309 }, { "epoch": 0.18408308536087287, "grad_norm": 0.8186151914454821, "learning_rate": 9.390343738539036e-06, "loss": 0.1839, "step": 6310 }, { "epoch": 0.18411225859151642, "grad_norm": 0.989796088579576, "learning_rate": 9.390117642895298e-06, "loss": 0.1607, "step": 6311 }, { "epoch": 0.18414143182215997, "grad_norm": 0.766298593376058, "learning_rate": 9.389891508057638e-06, "loss": 0.1621, "step": 6312 }, { "epoch": 0.18417060505280355, "grad_norm": 0.9280216493572327, "learning_rate": 9.389665334028073e-06, "loss": 0.1754, "step": 6313 }, { "epoch": 0.1841997782834471, "grad_norm": 0.8973743583496462, "learning_rate": 9.389439120808625e-06, "loss": 0.1873, "step": 6314 }, { "epoch": 0.18422895151409066, "grad_norm": 0.8473033153608642, "learning_rate": 9.389212868401313e-06, "loss": 0.1678, "step": 6315 }, { "epoch": 0.18425812474473424, "grad_norm": 0.9923850099865504, "learning_rate": 9.388986576808156e-06, "loss": 0.1688, "step": 6316 }, { "epoch": 0.1842872979753778, "grad_norm": 1.0566776003326883, "learning_rate": 9.388760246031175e-06, "loss": 0.1696, "step": 6317 }, { "epoch": 0.18431647120602135, "grad_norm": 1.0754251362690752, "learning_rate": 9.38853387607239e-06, "loss": 0.1494, "step": 6318 }, { "epoch": 0.18434564443666493, "grad_norm": 0.7831028392985439, "learning_rate": 9.388307466933821e-06, "loss": 0.1728, "step": 6319 }, { "epoch": 0.18437481766730848, "grad_norm": 0.8504901453125424, "learning_rate": 9.388081018617492e-06, "loss": 0.1511, "step": 6320 }, { "epoch": 0.18440399089795204, "grad_norm": 1.0756459690975153, "learning_rate": 9.387854531125421e-06, "loss": 0.164, "step": 6321 }, { "epoch": 0.1844331641285956, "grad_norm": 0.9622875988951577, "learning_rate": 9.387628004459633e-06, "loss": 0.1823, "step": 6322 }, { "epoch": 0.18446233735923917, "grad_norm": 0.8239741278722938, "learning_rate": 9.387401438622151e-06, "loss": 0.1958, "step": 6323 }, { "epoch": 0.18449151058988272, "grad_norm": 0.8432333835187112, "learning_rate": 9.387174833614996e-06, "loss": 0.1561, "step": 6324 }, { "epoch": 0.18452068382052628, "grad_norm": 0.9037546123012228, "learning_rate": 9.38694818944019e-06, "loss": 0.1761, "step": 6325 }, { "epoch": 0.18454985705116986, "grad_norm": 0.6479765240275243, "learning_rate": 9.386721506099759e-06, "loss": 0.1706, "step": 6326 }, { "epoch": 0.1845790302818134, "grad_norm": 0.8840684744698483, "learning_rate": 9.386494783595725e-06, "loss": 0.1498, "step": 6327 }, { "epoch": 0.18460820351245696, "grad_norm": 4.114698133905036, "learning_rate": 9.386268021930114e-06, "loss": 0.1699, "step": 6328 }, { "epoch": 0.18463737674310052, "grad_norm": 0.8557881661633471, "learning_rate": 9.386041221104947e-06, "loss": 0.1751, "step": 6329 }, { "epoch": 0.1846665499737441, "grad_norm": 0.6900398003115896, "learning_rate": 9.385814381122252e-06, "loss": 0.1611, "step": 6330 }, { "epoch": 0.18469572320438765, "grad_norm": 0.9162916362396664, "learning_rate": 9.385587501984056e-06, "loss": 0.1589, "step": 6331 }, { "epoch": 0.1847248964350312, "grad_norm": 0.8271291333589972, "learning_rate": 9.385360583692378e-06, "loss": 0.1482, "step": 6332 }, { "epoch": 0.18475406966567479, "grad_norm": 0.7268643631329941, "learning_rate": 9.385133626249247e-06, "loss": 0.1528, "step": 6333 }, { "epoch": 0.18478324289631834, "grad_norm": 1.0353747666604984, "learning_rate": 9.384906629656692e-06, "loss": 0.1775, "step": 6334 }, { "epoch": 0.1848124161269619, "grad_norm": 0.9373739253374651, "learning_rate": 9.384679593916737e-06, "loss": 0.171, "step": 6335 }, { "epoch": 0.18484158935760547, "grad_norm": 0.7342795308586916, "learning_rate": 9.384452519031409e-06, "loss": 0.1529, "step": 6336 }, { "epoch": 0.18487076258824903, "grad_norm": 0.9723050522783112, "learning_rate": 9.384225405002736e-06, "loss": 0.1691, "step": 6337 }, { "epoch": 0.18489993581889258, "grad_norm": 0.9092930360712453, "learning_rate": 9.383998251832744e-06, "loss": 0.1648, "step": 6338 }, { "epoch": 0.18492910904953613, "grad_norm": 0.8127884031882446, "learning_rate": 9.383771059523464e-06, "loss": 0.1938, "step": 6339 }, { "epoch": 0.18495828228017971, "grad_norm": 0.8100007733503917, "learning_rate": 9.383543828076923e-06, "loss": 0.1617, "step": 6340 }, { "epoch": 0.18498745551082327, "grad_norm": 1.0994614799445628, "learning_rate": 9.383316557495145e-06, "loss": 0.1598, "step": 6341 }, { "epoch": 0.18501662874146682, "grad_norm": 0.7678430777969951, "learning_rate": 9.383089247780168e-06, "loss": 0.1669, "step": 6342 }, { "epoch": 0.1850458019721104, "grad_norm": 0.8686584777523905, "learning_rate": 9.382861898934013e-06, "loss": 0.1539, "step": 6343 }, { "epoch": 0.18507497520275396, "grad_norm": 1.0690350375174442, "learning_rate": 9.382634510958714e-06, "loss": 0.1845, "step": 6344 }, { "epoch": 0.1851041484333975, "grad_norm": 0.8535316947589281, "learning_rate": 9.382407083856302e-06, "loss": 0.1728, "step": 6345 }, { "epoch": 0.1851333216640411, "grad_norm": 0.9349007679845841, "learning_rate": 9.382179617628804e-06, "loss": 0.1627, "step": 6346 }, { "epoch": 0.18516249489468464, "grad_norm": 0.9874725412467277, "learning_rate": 9.381952112278254e-06, "loss": 0.1641, "step": 6347 }, { "epoch": 0.1851916681253282, "grad_norm": 0.9545658165985447, "learning_rate": 9.38172456780668e-06, "loss": 0.2031, "step": 6348 }, { "epoch": 0.18522084135597175, "grad_norm": 0.859255529423847, "learning_rate": 9.381496984216117e-06, "loss": 0.1796, "step": 6349 }, { "epoch": 0.18525001458661533, "grad_norm": 0.8752842002519265, "learning_rate": 9.381269361508593e-06, "loss": 0.1616, "step": 6350 }, { "epoch": 0.18527918781725888, "grad_norm": 0.7403846602310657, "learning_rate": 9.381041699686143e-06, "loss": 0.1531, "step": 6351 }, { "epoch": 0.18530836104790244, "grad_norm": 0.9101251262560319, "learning_rate": 9.380813998750798e-06, "loss": 0.1728, "step": 6352 }, { "epoch": 0.18533753427854602, "grad_norm": 0.9226859299655494, "learning_rate": 9.380586258704592e-06, "loss": 0.1902, "step": 6353 }, { "epoch": 0.18536670750918957, "grad_norm": 0.735729210753132, "learning_rate": 9.380358479549556e-06, "loss": 0.1512, "step": 6354 }, { "epoch": 0.18539588073983312, "grad_norm": 0.7761692285022256, "learning_rate": 9.380130661287728e-06, "loss": 0.1516, "step": 6355 }, { "epoch": 0.18542505397047668, "grad_norm": 0.9170830195308721, "learning_rate": 9.379902803921135e-06, "loss": 0.1607, "step": 6356 }, { "epoch": 0.18545422720112026, "grad_norm": 0.8316013696108943, "learning_rate": 9.379674907451819e-06, "loss": 0.164, "step": 6357 }, { "epoch": 0.1854834004317638, "grad_norm": 0.8536503482264336, "learning_rate": 9.379446971881808e-06, "loss": 0.1611, "step": 6358 }, { "epoch": 0.18551257366240737, "grad_norm": 0.839788580451009, "learning_rate": 9.379218997213143e-06, "loss": 0.1661, "step": 6359 }, { "epoch": 0.18554174689305095, "grad_norm": 0.8795878200249163, "learning_rate": 9.378990983447855e-06, "loss": 0.1651, "step": 6360 }, { "epoch": 0.1855709201236945, "grad_norm": 0.9092622775474988, "learning_rate": 9.37876293058798e-06, "loss": 0.1843, "step": 6361 }, { "epoch": 0.18560009335433805, "grad_norm": 0.8255685089027337, "learning_rate": 9.378534838635556e-06, "loss": 0.1548, "step": 6362 }, { "epoch": 0.18562926658498163, "grad_norm": 0.9788167032986756, "learning_rate": 9.378306707592618e-06, "loss": 0.1846, "step": 6363 }, { "epoch": 0.1856584398156252, "grad_norm": 0.8886286713130412, "learning_rate": 9.378078537461203e-06, "loss": 0.1678, "step": 6364 }, { "epoch": 0.18568761304626874, "grad_norm": 0.888688783427601, "learning_rate": 9.377850328243348e-06, "loss": 0.1522, "step": 6365 }, { "epoch": 0.1857167862769123, "grad_norm": 0.6460136648863666, "learning_rate": 9.377622079941089e-06, "loss": 0.1448, "step": 6366 }, { "epoch": 0.18574595950755587, "grad_norm": 1.0355527099034294, "learning_rate": 9.377393792556466e-06, "loss": 0.1849, "step": 6367 }, { "epoch": 0.18577513273819943, "grad_norm": 0.9430101147387856, "learning_rate": 9.377165466091516e-06, "loss": 0.1641, "step": 6368 }, { "epoch": 0.18580430596884298, "grad_norm": 0.7912800017061814, "learning_rate": 9.376937100548277e-06, "loss": 0.1634, "step": 6369 }, { "epoch": 0.18583347919948656, "grad_norm": 1.2790306723717684, "learning_rate": 9.376708695928791e-06, "loss": 0.1537, "step": 6370 }, { "epoch": 0.18586265243013012, "grad_norm": 0.9569122567998494, "learning_rate": 9.376480252235091e-06, "loss": 0.1554, "step": 6371 }, { "epoch": 0.18589182566077367, "grad_norm": 0.8385870624766267, "learning_rate": 9.376251769469223e-06, "loss": 0.1696, "step": 6372 }, { "epoch": 0.18592099889141722, "grad_norm": 1.2551188792359957, "learning_rate": 9.376023247633224e-06, "loss": 0.1739, "step": 6373 }, { "epoch": 0.1859501721220608, "grad_norm": 0.9203606008213495, "learning_rate": 9.375794686729132e-06, "loss": 0.1553, "step": 6374 }, { "epoch": 0.18597934535270436, "grad_norm": 0.9664355287797862, "learning_rate": 9.37556608675899e-06, "loss": 0.1753, "step": 6375 }, { "epoch": 0.1860085185833479, "grad_norm": 0.8985595808477231, "learning_rate": 9.375337447724839e-06, "loss": 0.1992, "step": 6376 }, { "epoch": 0.1860376918139915, "grad_norm": 0.8896127544730188, "learning_rate": 9.37510876962872e-06, "loss": 0.1645, "step": 6377 }, { "epoch": 0.18606686504463504, "grad_norm": 0.9140614984575676, "learning_rate": 9.374880052472674e-06, "loss": 0.1495, "step": 6378 }, { "epoch": 0.1860960382752786, "grad_norm": 0.8551668307596201, "learning_rate": 9.374651296258743e-06, "loss": 0.154, "step": 6379 }, { "epoch": 0.18612521150592218, "grad_norm": 0.7345254800454083, "learning_rate": 9.374422500988971e-06, "loss": 0.1641, "step": 6380 }, { "epoch": 0.18615438473656573, "grad_norm": 0.8967007878719371, "learning_rate": 9.374193666665397e-06, "loss": 0.2001, "step": 6381 }, { "epoch": 0.18618355796720928, "grad_norm": 1.0206969373347077, "learning_rate": 9.373964793290067e-06, "loss": 0.1617, "step": 6382 }, { "epoch": 0.18621273119785284, "grad_norm": 0.9382054746335238, "learning_rate": 9.373735880865024e-06, "loss": 0.1779, "step": 6383 }, { "epoch": 0.18624190442849642, "grad_norm": 0.7812344511784418, "learning_rate": 9.373506929392311e-06, "loss": 0.1626, "step": 6384 }, { "epoch": 0.18627107765913997, "grad_norm": 0.9776708510589419, "learning_rate": 9.373277938873973e-06, "loss": 0.1775, "step": 6385 }, { "epoch": 0.18630025088978353, "grad_norm": 0.9224752017283132, "learning_rate": 9.373048909312052e-06, "loss": 0.1786, "step": 6386 }, { "epoch": 0.1863294241204271, "grad_norm": 0.6783865099924923, "learning_rate": 9.372819840708594e-06, "loss": 0.1732, "step": 6387 }, { "epoch": 0.18635859735107066, "grad_norm": 1.3051642832191346, "learning_rate": 9.372590733065645e-06, "loss": 0.1851, "step": 6388 }, { "epoch": 0.1863877705817142, "grad_norm": 1.0003059103132785, "learning_rate": 9.37236158638525e-06, "loss": 0.1774, "step": 6389 }, { "epoch": 0.1864169438123578, "grad_norm": 1.2418311183576725, "learning_rate": 9.372132400669456e-06, "loss": 0.1906, "step": 6390 }, { "epoch": 0.18644611704300135, "grad_norm": 1.1159368494090276, "learning_rate": 9.371903175920306e-06, "loss": 0.1789, "step": 6391 }, { "epoch": 0.1864752902736449, "grad_norm": 0.8045398393384725, "learning_rate": 9.371673912139847e-06, "loss": 0.1568, "step": 6392 }, { "epoch": 0.18650446350428845, "grad_norm": 0.8406425441848276, "learning_rate": 9.371444609330129e-06, "loss": 0.1633, "step": 6393 }, { "epoch": 0.18653363673493203, "grad_norm": 0.7074436767919758, "learning_rate": 9.371215267493195e-06, "loss": 0.1641, "step": 6394 }, { "epoch": 0.1865628099655756, "grad_norm": 0.8501370586374717, "learning_rate": 9.370985886631097e-06, "loss": 0.1834, "step": 6395 }, { "epoch": 0.18659198319621914, "grad_norm": 0.9463208895737822, "learning_rate": 9.370756466745879e-06, "loss": 0.1542, "step": 6396 }, { "epoch": 0.18662115642686272, "grad_norm": 0.8719077905712171, "learning_rate": 9.37052700783959e-06, "loss": 0.1657, "step": 6397 }, { "epoch": 0.18665032965750628, "grad_norm": 0.9821077085761606, "learning_rate": 9.37029750991428e-06, "loss": 0.1524, "step": 6398 }, { "epoch": 0.18667950288814983, "grad_norm": 0.8425957668888753, "learning_rate": 9.370067972971998e-06, "loss": 0.1602, "step": 6399 }, { "epoch": 0.18670867611879338, "grad_norm": 0.9156979950842294, "learning_rate": 9.369838397014792e-06, "loss": 0.1914, "step": 6400 }, { "epoch": 0.18673784934943696, "grad_norm": 0.9284921112275565, "learning_rate": 9.36960878204471e-06, "loss": 0.1723, "step": 6401 }, { "epoch": 0.18676702258008052, "grad_norm": 0.813841492557956, "learning_rate": 9.369379128063807e-06, "loss": 0.1695, "step": 6402 }, { "epoch": 0.18679619581072407, "grad_norm": 1.0530021092744695, "learning_rate": 9.369149435074127e-06, "loss": 0.1486, "step": 6403 }, { "epoch": 0.18682536904136765, "grad_norm": 0.8517192153127459, "learning_rate": 9.368919703077726e-06, "loss": 0.1685, "step": 6404 }, { "epoch": 0.1868545422720112, "grad_norm": 0.9136196497971955, "learning_rate": 9.368689932076651e-06, "loss": 0.1774, "step": 6405 }, { "epoch": 0.18688371550265476, "grad_norm": 0.6810571973131726, "learning_rate": 9.368460122072958e-06, "loss": 0.1422, "step": 6406 }, { "epoch": 0.18691288873329834, "grad_norm": 0.98660818876648, "learning_rate": 9.368230273068694e-06, "loss": 0.1691, "step": 6407 }, { "epoch": 0.1869420619639419, "grad_norm": 0.6836505378334928, "learning_rate": 9.368000385065914e-06, "loss": 0.1296, "step": 6408 }, { "epoch": 0.18697123519458544, "grad_norm": 1.1529083326425609, "learning_rate": 9.367770458066668e-06, "loss": 0.1753, "step": 6409 }, { "epoch": 0.187000408425229, "grad_norm": 0.9984352785617496, "learning_rate": 9.36754049207301e-06, "loss": 0.188, "step": 6410 }, { "epoch": 0.18702958165587258, "grad_norm": 0.8513446484147696, "learning_rate": 9.367310487086994e-06, "loss": 0.1552, "step": 6411 }, { "epoch": 0.18705875488651613, "grad_norm": 0.8994726050458148, "learning_rate": 9.367080443110672e-06, "loss": 0.1618, "step": 6412 }, { "epoch": 0.18708792811715969, "grad_norm": 0.9579227745596848, "learning_rate": 9.366850360146098e-06, "loss": 0.1932, "step": 6413 }, { "epoch": 0.18711710134780327, "grad_norm": 0.836438245202908, "learning_rate": 9.366620238195327e-06, "loss": 0.1573, "step": 6414 }, { "epoch": 0.18714627457844682, "grad_norm": 0.9597197293795949, "learning_rate": 9.366390077260413e-06, "loss": 0.1408, "step": 6415 }, { "epoch": 0.18717544780909037, "grad_norm": 0.878411391925992, "learning_rate": 9.366159877343411e-06, "loss": 0.1932, "step": 6416 }, { "epoch": 0.18720462103973395, "grad_norm": 0.8044571836578218, "learning_rate": 9.365929638446375e-06, "loss": 0.1526, "step": 6417 }, { "epoch": 0.1872337942703775, "grad_norm": 0.8660836589451121, "learning_rate": 9.365699360571361e-06, "loss": 0.1624, "step": 6418 }, { "epoch": 0.18726296750102106, "grad_norm": 0.7468442113023918, "learning_rate": 9.365469043720428e-06, "loss": 0.17, "step": 6419 }, { "epoch": 0.1872921407316646, "grad_norm": 0.9647577885839785, "learning_rate": 9.365238687895626e-06, "loss": 0.152, "step": 6420 }, { "epoch": 0.1873213139623082, "grad_norm": 0.8733550915431748, "learning_rate": 9.365008293099017e-06, "loss": 0.1507, "step": 6421 }, { "epoch": 0.18735048719295175, "grad_norm": 0.833192434914469, "learning_rate": 9.364777859332656e-06, "loss": 0.1737, "step": 6422 }, { "epoch": 0.1873796604235953, "grad_norm": 0.943764908474735, "learning_rate": 9.364547386598599e-06, "loss": 0.1855, "step": 6423 }, { "epoch": 0.18740883365423888, "grad_norm": 0.9674986000953253, "learning_rate": 9.364316874898906e-06, "loss": 0.1575, "step": 6424 }, { "epoch": 0.18743800688488244, "grad_norm": 0.8465644267118999, "learning_rate": 9.364086324235634e-06, "loss": 0.1818, "step": 6425 }, { "epoch": 0.187467180115526, "grad_norm": 3.682477680581137, "learning_rate": 9.36385573461084e-06, "loss": 0.158, "step": 6426 }, { "epoch": 0.18749635334616954, "grad_norm": 1.1389325051338943, "learning_rate": 9.363625106026585e-06, "loss": 0.1399, "step": 6427 }, { "epoch": 0.18752552657681312, "grad_norm": 0.9335206135751585, "learning_rate": 9.363394438484926e-06, "loss": 0.1548, "step": 6428 }, { "epoch": 0.18755469980745668, "grad_norm": 0.9756779180721152, "learning_rate": 9.363163731987924e-06, "loss": 0.1612, "step": 6429 }, { "epoch": 0.18758387303810023, "grad_norm": 0.9415783249737719, "learning_rate": 9.362932986537636e-06, "loss": 0.1745, "step": 6430 }, { "epoch": 0.1876130462687438, "grad_norm": 0.91126906562831, "learning_rate": 9.362702202136125e-06, "loss": 0.1721, "step": 6431 }, { "epoch": 0.18764221949938736, "grad_norm": 0.86616612377215, "learning_rate": 9.36247137878545e-06, "loss": 0.1499, "step": 6432 }, { "epoch": 0.18767139273003092, "grad_norm": 1.0357674865729583, "learning_rate": 9.362240516487672e-06, "loss": 0.1665, "step": 6433 }, { "epoch": 0.1877005659606745, "grad_norm": 0.82598036222361, "learning_rate": 9.362009615244852e-06, "loss": 0.1454, "step": 6434 }, { "epoch": 0.18772973919131805, "grad_norm": 0.8879978864420757, "learning_rate": 9.36177867505905e-06, "loss": 0.1768, "step": 6435 }, { "epoch": 0.1877589124219616, "grad_norm": 0.8781657754241868, "learning_rate": 9.36154769593233e-06, "loss": 0.1527, "step": 6436 }, { "epoch": 0.18778808565260516, "grad_norm": 0.9536049107109157, "learning_rate": 9.361316677866756e-06, "loss": 0.153, "step": 6437 }, { "epoch": 0.18781725888324874, "grad_norm": 0.8754771873434788, "learning_rate": 9.361085620864384e-06, "loss": 0.1847, "step": 6438 }, { "epoch": 0.1878464321138923, "grad_norm": 0.9788522664013113, "learning_rate": 9.360854524927283e-06, "loss": 0.1598, "step": 6439 }, { "epoch": 0.18787560534453585, "grad_norm": 0.8847500326674329, "learning_rate": 9.360623390057513e-06, "loss": 0.1575, "step": 6440 }, { "epoch": 0.18790477857517943, "grad_norm": 0.8954633065214586, "learning_rate": 9.36039221625714e-06, "loss": 0.1628, "step": 6441 }, { "epoch": 0.18793395180582298, "grad_norm": 1.0958594723145838, "learning_rate": 9.360161003528225e-06, "loss": 0.1855, "step": 6442 }, { "epoch": 0.18796312503646653, "grad_norm": 1.0493007576651754, "learning_rate": 9.359929751872832e-06, "loss": 0.1757, "step": 6443 }, { "epoch": 0.1879922982671101, "grad_norm": 0.7734016970896587, "learning_rate": 9.359698461293029e-06, "loss": 0.1831, "step": 6444 }, { "epoch": 0.18802147149775367, "grad_norm": 0.7670175090011172, "learning_rate": 9.359467131790878e-06, "loss": 0.1845, "step": 6445 }, { "epoch": 0.18805064472839722, "grad_norm": 0.837747572703878, "learning_rate": 9.359235763368444e-06, "loss": 0.1661, "step": 6446 }, { "epoch": 0.18807981795904077, "grad_norm": 0.9472609193995318, "learning_rate": 9.359004356027796e-06, "loss": 0.18, "step": 6447 }, { "epoch": 0.18810899118968435, "grad_norm": 0.7727561093733268, "learning_rate": 9.358772909770996e-06, "loss": 0.1691, "step": 6448 }, { "epoch": 0.1881381644203279, "grad_norm": 0.9422799353347984, "learning_rate": 9.358541424600112e-06, "loss": 0.1788, "step": 6449 }, { "epoch": 0.18816733765097146, "grad_norm": 1.0040165688649716, "learning_rate": 9.358309900517212e-06, "loss": 0.2013, "step": 6450 }, { "epoch": 0.18819651088161504, "grad_norm": 0.6921997078968785, "learning_rate": 9.358078337524362e-06, "loss": 0.1488, "step": 6451 }, { "epoch": 0.1882256841122586, "grad_norm": 0.8853309572382455, "learning_rate": 9.357846735623627e-06, "loss": 0.1971, "step": 6452 }, { "epoch": 0.18825485734290215, "grad_norm": 0.9426241570187159, "learning_rate": 9.357615094817076e-06, "loss": 0.1524, "step": 6453 }, { "epoch": 0.1882840305735457, "grad_norm": 0.876801031963035, "learning_rate": 9.35738341510678e-06, "loss": 0.1897, "step": 6454 }, { "epoch": 0.18831320380418928, "grad_norm": 0.8590444765798828, "learning_rate": 9.357151696494805e-06, "loss": 0.1811, "step": 6455 }, { "epoch": 0.18834237703483284, "grad_norm": 1.041073665471181, "learning_rate": 9.356919938983217e-06, "loss": 0.1648, "step": 6456 }, { "epoch": 0.1883715502654764, "grad_norm": 0.7810445562439615, "learning_rate": 9.35668814257409e-06, "loss": 0.1568, "step": 6457 }, { "epoch": 0.18840072349611997, "grad_norm": 0.9808872059364543, "learning_rate": 9.356456307269493e-06, "loss": 0.149, "step": 6458 }, { "epoch": 0.18842989672676352, "grad_norm": 0.7957062531699588, "learning_rate": 9.35622443307149e-06, "loss": 0.1566, "step": 6459 }, { "epoch": 0.18845906995740708, "grad_norm": 0.9969133059553652, "learning_rate": 9.355992519982159e-06, "loss": 0.1707, "step": 6460 }, { "epoch": 0.18848824318805066, "grad_norm": 0.9009970339068955, "learning_rate": 9.355760568003564e-06, "loss": 0.1634, "step": 6461 }, { "epoch": 0.1885174164186942, "grad_norm": 0.9034640625249046, "learning_rate": 9.35552857713778e-06, "loss": 0.158, "step": 6462 }, { "epoch": 0.18854658964933776, "grad_norm": 0.8307151872442313, "learning_rate": 9.355296547386876e-06, "loss": 0.1796, "step": 6463 }, { "epoch": 0.18857576287998132, "grad_norm": 0.7823639156560412, "learning_rate": 9.355064478752925e-06, "loss": 0.1559, "step": 6464 }, { "epoch": 0.1886049361106249, "grad_norm": 1.1364863653210286, "learning_rate": 9.354832371237996e-06, "loss": 0.1643, "step": 6465 }, { "epoch": 0.18863410934126845, "grad_norm": 0.819555976878176, "learning_rate": 9.354600224844166e-06, "loss": 0.1766, "step": 6466 }, { "epoch": 0.188663282571912, "grad_norm": 0.8021540114947207, "learning_rate": 9.354368039573502e-06, "loss": 0.1578, "step": 6467 }, { "epoch": 0.1886924558025556, "grad_norm": 1.0294397190067515, "learning_rate": 9.354135815428081e-06, "loss": 0.1749, "step": 6468 }, { "epoch": 0.18872162903319914, "grad_norm": 0.8596814036866786, "learning_rate": 9.353903552409975e-06, "loss": 0.1583, "step": 6469 }, { "epoch": 0.1887508022638427, "grad_norm": 0.9579756939625307, "learning_rate": 9.353671250521257e-06, "loss": 0.1912, "step": 6470 }, { "epoch": 0.18877997549448625, "grad_norm": 0.8536816187458158, "learning_rate": 9.353438909764e-06, "loss": 0.1682, "step": 6471 }, { "epoch": 0.18880914872512983, "grad_norm": 0.8471358060067807, "learning_rate": 9.353206530140282e-06, "loss": 0.1575, "step": 6472 }, { "epoch": 0.18883832195577338, "grad_norm": 0.7423011039671563, "learning_rate": 9.352974111652174e-06, "loss": 0.1705, "step": 6473 }, { "epoch": 0.18886749518641693, "grad_norm": 0.872510793966477, "learning_rate": 9.352741654301752e-06, "loss": 0.1919, "step": 6474 }, { "epoch": 0.18889666841706051, "grad_norm": 0.8843322687907549, "learning_rate": 9.352509158091092e-06, "loss": 0.176, "step": 6475 }, { "epoch": 0.18892584164770407, "grad_norm": 0.9153013391100571, "learning_rate": 9.35227662302227e-06, "loss": 0.1737, "step": 6476 }, { "epoch": 0.18895501487834762, "grad_norm": 0.8340567504416462, "learning_rate": 9.35204404909736e-06, "loss": 0.1556, "step": 6477 }, { "epoch": 0.1889841881089912, "grad_norm": 0.7850765747011559, "learning_rate": 9.35181143631844e-06, "loss": 0.147, "step": 6478 }, { "epoch": 0.18901336133963476, "grad_norm": 1.04531413559154, "learning_rate": 9.351578784687589e-06, "loss": 0.1667, "step": 6479 }, { "epoch": 0.1890425345702783, "grad_norm": 0.8802548522615019, "learning_rate": 9.351346094206878e-06, "loss": 0.1461, "step": 6480 }, { "epoch": 0.18907170780092186, "grad_norm": 0.8610718168542294, "learning_rate": 9.351113364878388e-06, "loss": 0.161, "step": 6481 }, { "epoch": 0.18910088103156544, "grad_norm": 0.9291714448392332, "learning_rate": 9.350880596704199e-06, "loss": 0.1791, "step": 6482 }, { "epoch": 0.189130054262209, "grad_norm": 1.0332328719886352, "learning_rate": 9.350647789686384e-06, "loss": 0.1772, "step": 6483 }, { "epoch": 0.18915922749285255, "grad_norm": 0.7525244706407437, "learning_rate": 9.350414943827027e-06, "loss": 0.1466, "step": 6484 }, { "epoch": 0.18918840072349613, "grad_norm": 1.052568781939131, "learning_rate": 9.350182059128202e-06, "loss": 0.1645, "step": 6485 }, { "epoch": 0.18921757395413968, "grad_norm": 0.8692515713275414, "learning_rate": 9.34994913559199e-06, "loss": 0.1478, "step": 6486 }, { "epoch": 0.18924674718478324, "grad_norm": 0.7641666212824467, "learning_rate": 9.34971617322047e-06, "loss": 0.1699, "step": 6487 }, { "epoch": 0.18927592041542682, "grad_norm": 1.012211156978612, "learning_rate": 9.349483172015723e-06, "loss": 0.1503, "step": 6488 }, { "epoch": 0.18930509364607037, "grad_norm": 0.8705933276035973, "learning_rate": 9.349250131979829e-06, "loss": 0.1756, "step": 6489 }, { "epoch": 0.18933426687671392, "grad_norm": 0.8260686852987795, "learning_rate": 9.349017053114868e-06, "loss": 0.1706, "step": 6490 }, { "epoch": 0.18936344010735748, "grad_norm": 1.0823426214485259, "learning_rate": 9.34878393542292e-06, "loss": 0.1625, "step": 6491 }, { "epoch": 0.18939261333800106, "grad_norm": 0.838019795588882, "learning_rate": 9.348550778906069e-06, "loss": 0.1488, "step": 6492 }, { "epoch": 0.1894217865686446, "grad_norm": 0.8252509549584727, "learning_rate": 9.348317583566393e-06, "loss": 0.161, "step": 6493 }, { "epoch": 0.18945095979928817, "grad_norm": 0.8667037898451724, "learning_rate": 9.348084349405977e-06, "loss": 0.1573, "step": 6494 }, { "epoch": 0.18948013302993175, "grad_norm": 0.9632097702897997, "learning_rate": 9.347851076426902e-06, "loss": 0.1616, "step": 6495 }, { "epoch": 0.1895093062605753, "grad_norm": 0.9263513470808169, "learning_rate": 9.347617764631248e-06, "loss": 0.1696, "step": 6496 }, { "epoch": 0.18953847949121885, "grad_norm": 0.9603998868666372, "learning_rate": 9.347384414021103e-06, "loss": 0.1867, "step": 6497 }, { "epoch": 0.1895676527218624, "grad_norm": 0.9443097850177389, "learning_rate": 9.347151024598547e-06, "loss": 0.1646, "step": 6498 }, { "epoch": 0.189596825952506, "grad_norm": 0.9662622519989555, "learning_rate": 9.346917596365663e-06, "loss": 0.191, "step": 6499 }, { "epoch": 0.18962599918314954, "grad_norm": 0.7954713778166729, "learning_rate": 9.346684129324539e-06, "loss": 0.1623, "step": 6500 }, { "epoch": 0.1896551724137931, "grad_norm": 0.8878743061399259, "learning_rate": 9.346450623477255e-06, "loss": 0.1905, "step": 6501 }, { "epoch": 0.18968434564443667, "grad_norm": 1.0539982976897557, "learning_rate": 9.346217078825898e-06, "loss": 0.1586, "step": 6502 }, { "epoch": 0.18971351887508023, "grad_norm": 0.796267607966624, "learning_rate": 9.345983495372552e-06, "loss": 0.1772, "step": 6503 }, { "epoch": 0.18974269210572378, "grad_norm": 0.9537548330363622, "learning_rate": 9.345749873119304e-06, "loss": 0.1952, "step": 6504 }, { "epoch": 0.18977186533636736, "grad_norm": 1.1008248733668338, "learning_rate": 9.345516212068237e-06, "loss": 0.1787, "step": 6505 }, { "epoch": 0.18980103856701092, "grad_norm": 0.9523573448915854, "learning_rate": 9.34528251222144e-06, "loss": 0.1526, "step": 6506 }, { "epoch": 0.18983021179765447, "grad_norm": 0.9023001720464379, "learning_rate": 9.345048773580995e-06, "loss": 0.1711, "step": 6507 }, { "epoch": 0.18985938502829802, "grad_norm": 0.9134559758736757, "learning_rate": 9.344814996148995e-06, "loss": 0.1614, "step": 6508 }, { "epoch": 0.1898885582589416, "grad_norm": 0.9209440705402696, "learning_rate": 9.344581179927523e-06, "loss": 0.1853, "step": 6509 }, { "epoch": 0.18991773148958516, "grad_norm": 1.0425855002494953, "learning_rate": 9.344347324918667e-06, "loss": 0.203, "step": 6510 }, { "epoch": 0.1899469047202287, "grad_norm": 0.9322617122236974, "learning_rate": 9.344113431124517e-06, "loss": 0.1594, "step": 6511 }, { "epoch": 0.1899760779508723, "grad_norm": 0.7838747514885989, "learning_rate": 9.343879498547157e-06, "loss": 0.1669, "step": 6512 }, { "epoch": 0.19000525118151584, "grad_norm": 1.0160814798959792, "learning_rate": 9.343645527188678e-06, "loss": 0.1763, "step": 6513 }, { "epoch": 0.1900344244121594, "grad_norm": 0.9135837272844288, "learning_rate": 9.34341151705117e-06, "loss": 0.159, "step": 6514 }, { "epoch": 0.19006359764280295, "grad_norm": 0.7831207396365385, "learning_rate": 9.34317746813672e-06, "loss": 0.1617, "step": 6515 }, { "epoch": 0.19009277087344653, "grad_norm": 0.8770832195483749, "learning_rate": 9.342943380447417e-06, "loss": 0.1857, "step": 6516 }, { "epoch": 0.19012194410409008, "grad_norm": 0.8567349049776124, "learning_rate": 9.342709253985356e-06, "loss": 0.1663, "step": 6517 }, { "epoch": 0.19015111733473364, "grad_norm": 0.6983197993077767, "learning_rate": 9.342475088752621e-06, "loss": 0.1398, "step": 6518 }, { "epoch": 0.19018029056537722, "grad_norm": 0.8021887784195425, "learning_rate": 9.342240884751305e-06, "loss": 0.1837, "step": 6519 }, { "epoch": 0.19020946379602077, "grad_norm": 0.8892876153441276, "learning_rate": 9.342006641983499e-06, "loss": 0.1656, "step": 6520 }, { "epoch": 0.19023863702666433, "grad_norm": 0.8278555770237782, "learning_rate": 9.341772360451294e-06, "loss": 0.1438, "step": 6521 }, { "epoch": 0.1902678102573079, "grad_norm": 0.9707716915155568, "learning_rate": 9.341538040156783e-06, "loss": 0.1524, "step": 6522 }, { "epoch": 0.19029698348795146, "grad_norm": 0.9330398926275478, "learning_rate": 9.341303681102056e-06, "loss": 0.178, "step": 6523 }, { "epoch": 0.190326156718595, "grad_norm": 0.7281296627655914, "learning_rate": 9.341069283289207e-06, "loss": 0.1646, "step": 6524 }, { "epoch": 0.19035532994923857, "grad_norm": 1.0189311532604206, "learning_rate": 9.340834846720326e-06, "loss": 0.1605, "step": 6525 }, { "epoch": 0.19038450317988215, "grad_norm": 0.7388679711664593, "learning_rate": 9.340600371397508e-06, "loss": 0.1555, "step": 6526 }, { "epoch": 0.1904136764105257, "grad_norm": 0.9170058175685646, "learning_rate": 9.340365857322846e-06, "loss": 0.1565, "step": 6527 }, { "epoch": 0.19044284964116925, "grad_norm": 0.7596638777324773, "learning_rate": 9.340131304498435e-06, "loss": 0.1416, "step": 6528 }, { "epoch": 0.19047202287181283, "grad_norm": 0.8463017137810688, "learning_rate": 9.339896712926367e-06, "loss": 0.187, "step": 6529 }, { "epoch": 0.1905011961024564, "grad_norm": 0.7594154273046081, "learning_rate": 9.339662082608739e-06, "loss": 0.1685, "step": 6530 }, { "epoch": 0.19053036933309994, "grad_norm": 0.7411891091664682, "learning_rate": 9.33942741354764e-06, "loss": 0.16, "step": 6531 }, { "epoch": 0.19055954256374352, "grad_norm": 0.9121398575667056, "learning_rate": 9.339192705745172e-06, "loss": 0.1768, "step": 6532 }, { "epoch": 0.19058871579438708, "grad_norm": 0.8243088908698623, "learning_rate": 9.338957959203427e-06, "loss": 0.1651, "step": 6533 }, { "epoch": 0.19061788902503063, "grad_norm": 0.8719208184465156, "learning_rate": 9.3387231739245e-06, "loss": 0.1877, "step": 6534 }, { "epoch": 0.19064706225567418, "grad_norm": 0.752749898025691, "learning_rate": 9.338488349910489e-06, "loss": 0.1681, "step": 6535 }, { "epoch": 0.19067623548631776, "grad_norm": 1.0676027126913896, "learning_rate": 9.33825348716349e-06, "loss": 0.1636, "step": 6536 }, { "epoch": 0.19070540871696132, "grad_norm": 0.745378376347966, "learning_rate": 9.338018585685599e-06, "loss": 0.1743, "step": 6537 }, { "epoch": 0.19073458194760487, "grad_norm": 0.867800714762912, "learning_rate": 9.337783645478912e-06, "loss": 0.172, "step": 6538 }, { "epoch": 0.19076375517824845, "grad_norm": 1.0383533865490961, "learning_rate": 9.337548666545532e-06, "loss": 0.2088, "step": 6539 }, { "epoch": 0.190792928408892, "grad_norm": 0.8019896158769209, "learning_rate": 9.33731364888755e-06, "loss": 0.1437, "step": 6540 }, { "epoch": 0.19082210163953556, "grad_norm": 0.7264352087170607, "learning_rate": 9.337078592507069e-06, "loss": 0.1537, "step": 6541 }, { "epoch": 0.1908512748701791, "grad_norm": 0.9476453646977872, "learning_rate": 9.336843497406184e-06, "loss": 0.1805, "step": 6542 }, { "epoch": 0.1908804481008227, "grad_norm": 0.8229482466231757, "learning_rate": 9.336608363586997e-06, "loss": 0.1565, "step": 6543 }, { "epoch": 0.19090962133146624, "grad_norm": 0.8411750734928479, "learning_rate": 9.336373191051604e-06, "loss": 0.1693, "step": 6544 }, { "epoch": 0.1909387945621098, "grad_norm": 0.752537013957642, "learning_rate": 9.336137979802107e-06, "loss": 0.1423, "step": 6545 }, { "epoch": 0.19096796779275338, "grad_norm": 0.813754702244634, "learning_rate": 9.335902729840606e-06, "loss": 0.1862, "step": 6546 }, { "epoch": 0.19099714102339693, "grad_norm": 0.7726436207337717, "learning_rate": 9.3356674411692e-06, "loss": 0.1607, "step": 6547 }, { "epoch": 0.19102631425404049, "grad_norm": 0.8806303552398813, "learning_rate": 9.33543211378999e-06, "loss": 0.1869, "step": 6548 }, { "epoch": 0.19105548748468407, "grad_norm": 0.7724633385704062, "learning_rate": 9.335196747705077e-06, "loss": 0.1593, "step": 6549 }, { "epoch": 0.19108466071532762, "grad_norm": 1.007481687845384, "learning_rate": 9.334961342916563e-06, "loss": 0.1327, "step": 6550 }, { "epoch": 0.19111383394597117, "grad_norm": 0.7255188714256837, "learning_rate": 9.334725899426549e-06, "loss": 0.1467, "step": 6551 }, { "epoch": 0.19114300717661473, "grad_norm": 0.8636454349993008, "learning_rate": 9.334490417237137e-06, "loss": 0.1678, "step": 6552 }, { "epoch": 0.1911721804072583, "grad_norm": 0.8242913205173432, "learning_rate": 9.334254896350428e-06, "loss": 0.1508, "step": 6553 }, { "epoch": 0.19120135363790186, "grad_norm": 1.1292800022059526, "learning_rate": 9.334019336768525e-06, "loss": 0.1515, "step": 6554 }, { "epoch": 0.19123052686854541, "grad_norm": 0.884273781257435, "learning_rate": 9.333783738493534e-06, "loss": 0.1525, "step": 6555 }, { "epoch": 0.191259700099189, "grad_norm": 0.9425489409246627, "learning_rate": 9.333548101527557e-06, "loss": 0.1616, "step": 6556 }, { "epoch": 0.19128887332983255, "grad_norm": 0.9218937211022523, "learning_rate": 9.333312425872696e-06, "loss": 0.1548, "step": 6557 }, { "epoch": 0.1913180465604761, "grad_norm": 0.8745561746227835, "learning_rate": 9.333076711531055e-06, "loss": 0.1552, "step": 6558 }, { "epoch": 0.19134721979111965, "grad_norm": 0.971277358473962, "learning_rate": 9.33284095850474e-06, "loss": 0.1627, "step": 6559 }, { "epoch": 0.19137639302176324, "grad_norm": 0.729642279874078, "learning_rate": 9.332605166795857e-06, "loss": 0.1605, "step": 6560 }, { "epoch": 0.1914055662524068, "grad_norm": 0.9982954152299268, "learning_rate": 9.332369336406508e-06, "loss": 0.1486, "step": 6561 }, { "epoch": 0.19143473948305034, "grad_norm": 1.000662881706075, "learning_rate": 9.332133467338799e-06, "loss": 0.1635, "step": 6562 }, { "epoch": 0.19146391271369392, "grad_norm": 0.7443616070218757, "learning_rate": 9.331897559594839e-06, "loss": 0.1439, "step": 6563 }, { "epoch": 0.19149308594433748, "grad_norm": 1.0920462291255546, "learning_rate": 9.33166161317673e-06, "loss": 0.1745, "step": 6564 }, { "epoch": 0.19152225917498103, "grad_norm": 0.8604818796068482, "learning_rate": 9.33142562808658e-06, "loss": 0.1842, "step": 6565 }, { "epoch": 0.1915514324056246, "grad_norm": 0.9014934031732198, "learning_rate": 9.331189604326498e-06, "loss": 0.1855, "step": 6566 }, { "epoch": 0.19158060563626816, "grad_norm": 1.158240541171249, "learning_rate": 9.330953541898587e-06, "loss": 0.1642, "step": 6567 }, { "epoch": 0.19160977886691172, "grad_norm": 1.0939223699238565, "learning_rate": 9.330717440804957e-06, "loss": 0.165, "step": 6568 }, { "epoch": 0.19163895209755527, "grad_norm": 0.7959937577769631, "learning_rate": 9.330481301047716e-06, "loss": 0.1647, "step": 6569 }, { "epoch": 0.19166812532819885, "grad_norm": 0.8363215560981384, "learning_rate": 9.330245122628972e-06, "loss": 0.1833, "step": 6570 }, { "epoch": 0.1916972985588424, "grad_norm": 1.0241992590915194, "learning_rate": 9.33000890555083e-06, "loss": 0.1692, "step": 6571 }, { "epoch": 0.19172647178948596, "grad_norm": 0.8207735379074976, "learning_rate": 9.329772649815407e-06, "loss": 0.1904, "step": 6572 }, { "epoch": 0.19175564502012954, "grad_norm": 0.9699830671805612, "learning_rate": 9.329536355424804e-06, "loss": 0.1896, "step": 6573 }, { "epoch": 0.1917848182507731, "grad_norm": 0.8962596024080848, "learning_rate": 9.329300022381135e-06, "loss": 0.171, "step": 6574 }, { "epoch": 0.19181399148141665, "grad_norm": 0.8742934378036707, "learning_rate": 9.329063650686511e-06, "loss": 0.1658, "step": 6575 }, { "epoch": 0.19184316471206023, "grad_norm": 0.8093306750612865, "learning_rate": 9.328827240343037e-06, "loss": 0.1735, "step": 6576 }, { "epoch": 0.19187233794270378, "grad_norm": 0.7606877618978442, "learning_rate": 9.328590791352828e-06, "loss": 0.1673, "step": 6577 }, { "epoch": 0.19190151117334733, "grad_norm": 0.973003170760142, "learning_rate": 9.328354303717995e-06, "loss": 0.1843, "step": 6578 }, { "epoch": 0.1919306844039909, "grad_norm": 0.7516757091467464, "learning_rate": 9.328117777440647e-06, "loss": 0.1704, "step": 6579 }, { "epoch": 0.19195985763463447, "grad_norm": 0.8252779180987707, "learning_rate": 9.327881212522896e-06, "loss": 0.1511, "step": 6580 }, { "epoch": 0.19198903086527802, "grad_norm": 0.898972360986517, "learning_rate": 9.327644608966855e-06, "loss": 0.1692, "step": 6581 }, { "epoch": 0.19201820409592157, "grad_norm": 0.9227271030833828, "learning_rate": 9.327407966774635e-06, "loss": 0.1602, "step": 6582 }, { "epoch": 0.19204737732656516, "grad_norm": 0.9116754764672668, "learning_rate": 9.327171285948352e-06, "loss": 0.1936, "step": 6583 }, { "epoch": 0.1920765505572087, "grad_norm": 0.9654934836715419, "learning_rate": 9.326934566490116e-06, "loss": 0.1517, "step": 6584 }, { "epoch": 0.19210572378785226, "grad_norm": 0.8335776057812551, "learning_rate": 9.326697808402041e-06, "loss": 0.1479, "step": 6585 }, { "epoch": 0.19213489701849581, "grad_norm": 0.8741283696287376, "learning_rate": 9.32646101168624e-06, "loss": 0.1683, "step": 6586 }, { "epoch": 0.1921640702491394, "grad_norm": 0.8851147245285534, "learning_rate": 9.326224176344829e-06, "loss": 0.1514, "step": 6587 }, { "epoch": 0.19219324347978295, "grad_norm": 1.0776747378112042, "learning_rate": 9.32598730237992e-06, "loss": 0.1776, "step": 6588 }, { "epoch": 0.1922224167104265, "grad_norm": 0.9799316250456251, "learning_rate": 9.32575038979363e-06, "loss": 0.174, "step": 6589 }, { "epoch": 0.19225158994107008, "grad_norm": 0.8183463022592024, "learning_rate": 9.325513438588073e-06, "loss": 0.1701, "step": 6590 }, { "epoch": 0.19228076317171364, "grad_norm": 0.8361733681857653, "learning_rate": 9.325276448765365e-06, "loss": 0.16, "step": 6591 }, { "epoch": 0.1923099364023572, "grad_norm": 0.7967788134473966, "learning_rate": 9.325039420327621e-06, "loss": 0.1581, "step": 6592 }, { "epoch": 0.19233910963300077, "grad_norm": 0.894301695688426, "learning_rate": 9.324802353276957e-06, "loss": 0.1748, "step": 6593 }, { "epoch": 0.19236828286364432, "grad_norm": 0.6669028858586441, "learning_rate": 9.324565247615491e-06, "loss": 0.1728, "step": 6594 }, { "epoch": 0.19239745609428788, "grad_norm": 0.8436065735195872, "learning_rate": 9.324328103345338e-06, "loss": 0.1743, "step": 6595 }, { "epoch": 0.19242662932493143, "grad_norm": 0.8692035951921453, "learning_rate": 9.324090920468615e-06, "loss": 0.184, "step": 6596 }, { "epoch": 0.192455802555575, "grad_norm": 0.9045271630083936, "learning_rate": 9.323853698987443e-06, "loss": 0.1774, "step": 6597 }, { "epoch": 0.19248497578621857, "grad_norm": 0.6962124244886452, "learning_rate": 9.323616438903937e-06, "loss": 0.152, "step": 6598 }, { "epoch": 0.19251414901686212, "grad_norm": 0.8647424905362106, "learning_rate": 9.323379140220215e-06, "loss": 0.1851, "step": 6599 }, { "epoch": 0.1925433222475057, "grad_norm": 0.8542025098778888, "learning_rate": 9.323141802938395e-06, "loss": 0.1748, "step": 6600 }, { "epoch": 0.19257249547814925, "grad_norm": 0.8022133668432814, "learning_rate": 9.322904427060598e-06, "loss": 0.1704, "step": 6601 }, { "epoch": 0.1926016687087928, "grad_norm": 0.9583753138003067, "learning_rate": 9.322667012588942e-06, "loss": 0.1938, "step": 6602 }, { "epoch": 0.1926308419394364, "grad_norm": 0.7933999801245497, "learning_rate": 9.322429559525548e-06, "loss": 0.1499, "step": 6603 }, { "epoch": 0.19266001517007994, "grad_norm": 0.8192991203228889, "learning_rate": 9.322192067872533e-06, "loss": 0.1401, "step": 6604 }, { "epoch": 0.1926891884007235, "grad_norm": 0.7750423447305869, "learning_rate": 9.321954537632019e-06, "loss": 0.167, "step": 6605 }, { "epoch": 0.19271836163136705, "grad_norm": 0.8832348182547236, "learning_rate": 9.321716968806127e-06, "loss": 0.1652, "step": 6606 }, { "epoch": 0.19274753486201063, "grad_norm": 0.9731916544546582, "learning_rate": 9.32147936139698e-06, "loss": 0.1885, "step": 6607 }, { "epoch": 0.19277670809265418, "grad_norm": 0.9056726077735234, "learning_rate": 9.321241715406694e-06, "loss": 0.1893, "step": 6608 }, { "epoch": 0.19280588132329773, "grad_norm": 0.9771268289081955, "learning_rate": 9.321004030837394e-06, "loss": 0.1966, "step": 6609 }, { "epoch": 0.19283505455394132, "grad_norm": 0.9250638256199035, "learning_rate": 9.320766307691202e-06, "loss": 0.1908, "step": 6610 }, { "epoch": 0.19286422778458487, "grad_norm": 1.1064866746245714, "learning_rate": 9.32052854597024e-06, "loss": 0.1897, "step": 6611 }, { "epoch": 0.19289340101522842, "grad_norm": 1.2970316922890326, "learning_rate": 9.32029074567663e-06, "loss": 0.1275, "step": 6612 }, { "epoch": 0.19292257424587198, "grad_norm": 0.9121026402023582, "learning_rate": 9.320052906812495e-06, "loss": 0.1836, "step": 6613 }, { "epoch": 0.19295174747651556, "grad_norm": 0.9468523757153974, "learning_rate": 9.31981502937996e-06, "loss": 0.1575, "step": 6614 }, { "epoch": 0.1929809207071591, "grad_norm": 0.8629423786920889, "learning_rate": 9.319577113381147e-06, "loss": 0.18, "step": 6615 }, { "epoch": 0.19301009393780266, "grad_norm": 0.7974688953391694, "learning_rate": 9.319339158818182e-06, "loss": 0.1722, "step": 6616 }, { "epoch": 0.19303926716844624, "grad_norm": 0.8848472276388092, "learning_rate": 9.319101165693187e-06, "loss": 0.1878, "step": 6617 }, { "epoch": 0.1930684403990898, "grad_norm": 0.7751275570666104, "learning_rate": 9.318863134008288e-06, "loss": 0.1773, "step": 6618 }, { "epoch": 0.19309761362973335, "grad_norm": 0.8620195648849519, "learning_rate": 9.31862506376561e-06, "loss": 0.1602, "step": 6619 }, { "epoch": 0.19312678686037693, "grad_norm": 0.6632416890148962, "learning_rate": 9.318386954967278e-06, "loss": 0.1572, "step": 6620 }, { "epoch": 0.19315596009102048, "grad_norm": 0.9913204114730793, "learning_rate": 9.318148807615418e-06, "loss": 0.1557, "step": 6621 }, { "epoch": 0.19318513332166404, "grad_norm": 0.8709380792413741, "learning_rate": 9.317910621712156e-06, "loss": 0.1736, "step": 6622 }, { "epoch": 0.1932143065523076, "grad_norm": 0.8025506641134886, "learning_rate": 9.31767239725962e-06, "loss": 0.172, "step": 6623 }, { "epoch": 0.19324347978295117, "grad_norm": 0.6836281445296057, "learning_rate": 9.317434134259934e-06, "loss": 0.1639, "step": 6624 }, { "epoch": 0.19327265301359473, "grad_norm": 1.0082377785568835, "learning_rate": 9.317195832715228e-06, "loss": 0.1513, "step": 6625 }, { "epoch": 0.19330182624423828, "grad_norm": 0.9049552447867992, "learning_rate": 9.31695749262763e-06, "loss": 0.1743, "step": 6626 }, { "epoch": 0.19333099947488186, "grad_norm": 0.6619899587092459, "learning_rate": 9.316719113999263e-06, "loss": 0.1335, "step": 6627 }, { "epoch": 0.1933601727055254, "grad_norm": 0.7845890178964112, "learning_rate": 9.316480696832259e-06, "loss": 0.1442, "step": 6628 }, { "epoch": 0.19338934593616897, "grad_norm": 0.8581096565647223, "learning_rate": 9.316242241128746e-06, "loss": 0.16, "step": 6629 }, { "epoch": 0.19341851916681252, "grad_norm": 0.8871473185494342, "learning_rate": 9.316003746890854e-06, "loss": 0.1667, "step": 6630 }, { "epoch": 0.1934476923974561, "grad_norm": 0.72978329018481, "learning_rate": 9.315765214120709e-06, "loss": 0.171, "step": 6631 }, { "epoch": 0.19347686562809965, "grad_norm": 0.9565276574491248, "learning_rate": 9.315526642820443e-06, "loss": 0.162, "step": 6632 }, { "epoch": 0.1935060388587432, "grad_norm": 0.8492242129049904, "learning_rate": 9.315288032992185e-06, "loss": 0.1533, "step": 6633 }, { "epoch": 0.1935352120893868, "grad_norm": 0.8202558106143849, "learning_rate": 9.315049384638065e-06, "loss": 0.1306, "step": 6634 }, { "epoch": 0.19356438532003034, "grad_norm": 0.9556762645442757, "learning_rate": 9.314810697760214e-06, "loss": 0.164, "step": 6635 }, { "epoch": 0.1935935585506739, "grad_norm": 0.970958578848858, "learning_rate": 9.314571972360765e-06, "loss": 0.2004, "step": 6636 }, { "epoch": 0.19362273178131748, "grad_norm": 1.076134050832174, "learning_rate": 9.314333208441847e-06, "loss": 0.1925, "step": 6637 }, { "epoch": 0.19365190501196103, "grad_norm": 0.7572545491179042, "learning_rate": 9.314094406005592e-06, "loss": 0.1581, "step": 6638 }, { "epoch": 0.19368107824260458, "grad_norm": 0.7839825173434335, "learning_rate": 9.31385556505413e-06, "loss": 0.1512, "step": 6639 }, { "epoch": 0.19371025147324814, "grad_norm": 0.8591181566493377, "learning_rate": 9.313616685589596e-06, "loss": 0.18, "step": 6640 }, { "epoch": 0.19373942470389172, "grad_norm": 0.956879252265813, "learning_rate": 9.313377767614125e-06, "loss": 0.1809, "step": 6641 }, { "epoch": 0.19376859793453527, "grad_norm": 0.9351799785382513, "learning_rate": 9.313138811129844e-06, "loss": 0.1644, "step": 6642 }, { "epoch": 0.19379777116517882, "grad_norm": 0.9192455379752127, "learning_rate": 9.31289981613889e-06, "loss": 0.1649, "step": 6643 }, { "epoch": 0.1938269443958224, "grad_norm": 0.931037002354566, "learning_rate": 9.312660782643397e-06, "loss": 0.1714, "step": 6644 }, { "epoch": 0.19385611762646596, "grad_norm": 0.9645718792541462, "learning_rate": 9.312421710645496e-06, "loss": 0.1797, "step": 6645 }, { "epoch": 0.1938852908571095, "grad_norm": 0.891172809150048, "learning_rate": 9.312182600147325e-06, "loss": 0.1747, "step": 6646 }, { "epoch": 0.1939144640877531, "grad_norm": 0.912154300639407, "learning_rate": 9.311943451151017e-06, "loss": 0.171, "step": 6647 }, { "epoch": 0.19394363731839664, "grad_norm": 0.965505005244721, "learning_rate": 9.311704263658707e-06, "loss": 0.1668, "step": 6648 }, { "epoch": 0.1939728105490402, "grad_norm": 0.8875713134156251, "learning_rate": 9.311465037672532e-06, "loss": 0.1679, "step": 6649 }, { "epoch": 0.19400198377968375, "grad_norm": 1.018908109861898, "learning_rate": 9.311225773194624e-06, "loss": 0.165, "step": 6650 }, { "epoch": 0.19403115701032733, "grad_norm": 1.0440928186899154, "learning_rate": 9.310986470227121e-06, "loss": 0.1462, "step": 6651 }, { "epoch": 0.19406033024097089, "grad_norm": 0.9394903747176593, "learning_rate": 9.310747128772162e-06, "loss": 0.1764, "step": 6652 }, { "epoch": 0.19408950347161444, "grad_norm": 0.8371496225635469, "learning_rate": 9.31050774883188e-06, "loss": 0.1659, "step": 6653 }, { "epoch": 0.19411867670225802, "grad_norm": 0.8341934815383878, "learning_rate": 9.310268330408417e-06, "loss": 0.1525, "step": 6654 }, { "epoch": 0.19414784993290157, "grad_norm": 1.1010564884598677, "learning_rate": 9.310028873503905e-06, "loss": 0.1597, "step": 6655 }, { "epoch": 0.19417702316354513, "grad_norm": 0.7642353216518087, "learning_rate": 9.309789378120483e-06, "loss": 0.1514, "step": 6656 }, { "epoch": 0.19420619639418868, "grad_norm": 0.9972370208399856, "learning_rate": 9.309549844260292e-06, "loss": 0.1936, "step": 6657 }, { "epoch": 0.19423536962483226, "grad_norm": 1.077879262495552, "learning_rate": 9.309310271925469e-06, "loss": 0.1709, "step": 6658 }, { "epoch": 0.1942645428554758, "grad_norm": 0.9146424859094537, "learning_rate": 9.309070661118151e-06, "loss": 0.1722, "step": 6659 }, { "epoch": 0.19429371608611937, "grad_norm": 0.959592205810473, "learning_rate": 9.30883101184048e-06, "loss": 0.1599, "step": 6660 }, { "epoch": 0.19432288931676295, "grad_norm": 0.7038250088223772, "learning_rate": 9.308591324094594e-06, "loss": 0.1405, "step": 6661 }, { "epoch": 0.1943520625474065, "grad_norm": 0.9424024148519394, "learning_rate": 9.308351597882632e-06, "loss": 0.152, "step": 6662 }, { "epoch": 0.19438123577805005, "grad_norm": 0.8046587631286805, "learning_rate": 9.308111833206737e-06, "loss": 0.1676, "step": 6663 }, { "epoch": 0.19441040900869364, "grad_norm": 0.9187185870845379, "learning_rate": 9.307872030069049e-06, "loss": 0.1734, "step": 6664 }, { "epoch": 0.1944395822393372, "grad_norm": 0.779844975332377, "learning_rate": 9.307632188471707e-06, "loss": 0.1574, "step": 6665 }, { "epoch": 0.19446875546998074, "grad_norm": 0.945219663686819, "learning_rate": 9.30739230841685e-06, "loss": 0.1611, "step": 6666 }, { "epoch": 0.1944979287006243, "grad_norm": 0.8842010436067574, "learning_rate": 9.307152389906626e-06, "loss": 0.1833, "step": 6667 }, { "epoch": 0.19452710193126788, "grad_norm": 0.9778749000964558, "learning_rate": 9.306912432943173e-06, "loss": 0.1665, "step": 6668 }, { "epoch": 0.19455627516191143, "grad_norm": 0.793338859736364, "learning_rate": 9.306672437528635e-06, "loss": 0.169, "step": 6669 }, { "epoch": 0.19458544839255498, "grad_norm": 0.7503759724553432, "learning_rate": 9.306432403665152e-06, "loss": 0.154, "step": 6670 }, { "epoch": 0.19461462162319856, "grad_norm": 0.7981291064907011, "learning_rate": 9.30619233135487e-06, "loss": 0.1485, "step": 6671 }, { "epoch": 0.19464379485384212, "grad_norm": 0.8824631369306233, "learning_rate": 9.30595222059993e-06, "loss": 0.1587, "step": 6672 }, { "epoch": 0.19467296808448567, "grad_norm": 0.9097674315717833, "learning_rate": 9.305712071402474e-06, "loss": 0.1897, "step": 6673 }, { "epoch": 0.19470214131512925, "grad_norm": 0.9764717255707349, "learning_rate": 9.305471883764651e-06, "loss": 0.1516, "step": 6674 }, { "epoch": 0.1947313145457728, "grad_norm": 0.857674852944612, "learning_rate": 9.305231657688605e-06, "loss": 0.1533, "step": 6675 }, { "epoch": 0.19476048777641636, "grad_norm": 0.7699677407374467, "learning_rate": 9.304991393176475e-06, "loss": 0.1458, "step": 6676 }, { "epoch": 0.1947896610070599, "grad_norm": 0.8300309033541458, "learning_rate": 9.30475109023041e-06, "loss": 0.1924, "step": 6677 }, { "epoch": 0.1948188342377035, "grad_norm": 0.8280787865320262, "learning_rate": 9.304510748852558e-06, "loss": 0.1483, "step": 6678 }, { "epoch": 0.19484800746834705, "grad_norm": 0.8621778447224153, "learning_rate": 9.304270369045058e-06, "loss": 0.1768, "step": 6679 }, { "epoch": 0.1948771806989906, "grad_norm": 0.8403947736527188, "learning_rate": 9.30402995081006e-06, "loss": 0.1647, "step": 6680 }, { "epoch": 0.19490635392963418, "grad_norm": 1.1243189869466546, "learning_rate": 9.303789494149711e-06, "loss": 0.173, "step": 6681 }, { "epoch": 0.19493552716027773, "grad_norm": 0.7889869801713401, "learning_rate": 9.303548999066157e-06, "loss": 0.1583, "step": 6682 }, { "epoch": 0.19496470039092129, "grad_norm": 0.9309794684973345, "learning_rate": 9.303308465561544e-06, "loss": 0.1637, "step": 6683 }, { "epoch": 0.19499387362156484, "grad_norm": 0.877032678167129, "learning_rate": 9.303067893638022e-06, "loss": 0.1983, "step": 6684 }, { "epoch": 0.19502304685220842, "grad_norm": 0.7429751270285164, "learning_rate": 9.302827283297736e-06, "loss": 0.1406, "step": 6685 }, { "epoch": 0.19505222008285197, "grad_norm": 1.1250560779172676, "learning_rate": 9.302586634542835e-06, "loss": 0.1559, "step": 6686 }, { "epoch": 0.19508139331349553, "grad_norm": 0.6895416128858841, "learning_rate": 9.302345947375469e-06, "loss": 0.1703, "step": 6687 }, { "epoch": 0.1951105665441391, "grad_norm": 0.7863778257084978, "learning_rate": 9.302105221797784e-06, "loss": 0.1504, "step": 6688 }, { "epoch": 0.19513973977478266, "grad_norm": 1.0091930647466942, "learning_rate": 9.30186445781193e-06, "loss": 0.1656, "step": 6689 }, { "epoch": 0.19516891300542621, "grad_norm": 0.7497217945700104, "learning_rate": 9.301623655420058e-06, "loss": 0.1483, "step": 6690 }, { "epoch": 0.1951980862360698, "grad_norm": 0.7333323914970696, "learning_rate": 9.301382814624318e-06, "loss": 0.1734, "step": 6691 }, { "epoch": 0.19522725946671335, "grad_norm": 1.0619694790705123, "learning_rate": 9.301141935426856e-06, "loss": 0.1787, "step": 6692 }, { "epoch": 0.1952564326973569, "grad_norm": 1.1363839954284976, "learning_rate": 9.300901017829827e-06, "loss": 0.1586, "step": 6693 }, { "epoch": 0.19528560592800046, "grad_norm": 0.9425364829644454, "learning_rate": 9.300660061835382e-06, "loss": 0.1578, "step": 6694 }, { "epoch": 0.19531477915864404, "grad_norm": 0.8252125484495676, "learning_rate": 9.30041906744567e-06, "loss": 0.1702, "step": 6695 }, { "epoch": 0.1953439523892876, "grad_norm": 0.9493848757992757, "learning_rate": 9.30017803466284e-06, "loss": 0.1566, "step": 6696 }, { "epoch": 0.19537312561993114, "grad_norm": 1.0631928542107913, "learning_rate": 9.299936963489051e-06, "loss": 0.1722, "step": 6697 }, { "epoch": 0.19540229885057472, "grad_norm": 0.9191586911053998, "learning_rate": 9.29969585392645e-06, "loss": 0.1579, "step": 6698 }, { "epoch": 0.19543147208121828, "grad_norm": 0.9179787981754665, "learning_rate": 9.299454705977191e-06, "loss": 0.172, "step": 6699 }, { "epoch": 0.19546064531186183, "grad_norm": 0.8691153998650513, "learning_rate": 9.299213519643427e-06, "loss": 0.157, "step": 6700 }, { "epoch": 0.19548981854250538, "grad_norm": 0.9333121570377566, "learning_rate": 9.298972294927308e-06, "loss": 0.1592, "step": 6701 }, { "epoch": 0.19551899177314896, "grad_norm": 0.736950193272381, "learning_rate": 9.298731031830994e-06, "loss": 0.1698, "step": 6702 }, { "epoch": 0.19554816500379252, "grad_norm": 0.9342355920214414, "learning_rate": 9.298489730356635e-06, "loss": 0.1426, "step": 6703 }, { "epoch": 0.19557733823443607, "grad_norm": 0.8999560289479994, "learning_rate": 9.298248390506387e-06, "loss": 0.1654, "step": 6704 }, { "epoch": 0.19560651146507965, "grad_norm": 0.7436024642026525, "learning_rate": 9.2980070122824e-06, "loss": 0.158, "step": 6705 }, { "epoch": 0.1956356846957232, "grad_norm": 0.836181561382301, "learning_rate": 9.297765595686834e-06, "loss": 0.1698, "step": 6706 }, { "epoch": 0.19566485792636676, "grad_norm": 0.8837979522452214, "learning_rate": 9.297524140721843e-06, "loss": 0.1443, "step": 6707 }, { "epoch": 0.19569403115701034, "grad_norm": 0.8967727658671764, "learning_rate": 9.297282647389583e-06, "loss": 0.1788, "step": 6708 }, { "epoch": 0.1957232043876539, "grad_norm": 0.8503853960943315, "learning_rate": 9.297041115692208e-06, "loss": 0.1827, "step": 6709 }, { "epoch": 0.19575237761829745, "grad_norm": 1.06943952060511, "learning_rate": 9.296799545631876e-06, "loss": 0.1689, "step": 6710 }, { "epoch": 0.195781550848941, "grad_norm": 0.7823124754458123, "learning_rate": 9.296557937210745e-06, "loss": 0.1471, "step": 6711 }, { "epoch": 0.19581072407958458, "grad_norm": 0.8053578511032365, "learning_rate": 9.296316290430969e-06, "loss": 0.1509, "step": 6712 }, { "epoch": 0.19583989731022813, "grad_norm": 0.8349161213608354, "learning_rate": 9.296074605294707e-06, "loss": 0.1734, "step": 6713 }, { "epoch": 0.1958690705408717, "grad_norm": 0.8981238701740619, "learning_rate": 9.295832881804116e-06, "loss": 0.1694, "step": 6714 }, { "epoch": 0.19589824377151527, "grad_norm": 1.0839142407035054, "learning_rate": 9.295591119961356e-06, "loss": 0.1537, "step": 6715 }, { "epoch": 0.19592741700215882, "grad_norm": 0.8004759947306443, "learning_rate": 9.295349319768583e-06, "loss": 0.1895, "step": 6716 }, { "epoch": 0.19595659023280237, "grad_norm": 1.0750397339890871, "learning_rate": 9.295107481227957e-06, "loss": 0.1678, "step": 6717 }, { "epoch": 0.19598576346344596, "grad_norm": 1.0142931560315016, "learning_rate": 9.294865604341635e-06, "loss": 0.1782, "step": 6718 }, { "epoch": 0.1960149366940895, "grad_norm": 0.7769502369432942, "learning_rate": 9.29462368911178e-06, "loss": 0.1819, "step": 6719 }, { "epoch": 0.19604410992473306, "grad_norm": 0.8453196871331251, "learning_rate": 9.29438173554055e-06, "loss": 0.1722, "step": 6720 }, { "epoch": 0.19607328315537662, "grad_norm": 0.8773114967078133, "learning_rate": 9.294139743630104e-06, "loss": 0.1702, "step": 6721 }, { "epoch": 0.1961024563860202, "grad_norm": 0.8901471248144578, "learning_rate": 9.293897713382603e-06, "loss": 0.1367, "step": 6722 }, { "epoch": 0.19613162961666375, "grad_norm": 0.7341032379100687, "learning_rate": 9.29365564480021e-06, "loss": 0.1786, "step": 6723 }, { "epoch": 0.1961608028473073, "grad_norm": 0.8538138627001078, "learning_rate": 9.293413537885083e-06, "loss": 0.1861, "step": 6724 }, { "epoch": 0.19618997607795088, "grad_norm": 0.9065472952693894, "learning_rate": 9.293171392639385e-06, "loss": 0.1492, "step": 6725 }, { "epoch": 0.19621914930859444, "grad_norm": 0.9563289558009515, "learning_rate": 9.292929209065278e-06, "loss": 0.1601, "step": 6726 }, { "epoch": 0.196248322539238, "grad_norm": 0.8153129262051362, "learning_rate": 9.292686987164924e-06, "loss": 0.19, "step": 6727 }, { "epoch": 0.19627749576988154, "grad_norm": 0.9845470112599826, "learning_rate": 9.292444726940485e-06, "loss": 0.1473, "step": 6728 }, { "epoch": 0.19630666900052512, "grad_norm": 0.8607922807974953, "learning_rate": 9.292202428394124e-06, "loss": 0.1435, "step": 6729 }, { "epoch": 0.19633584223116868, "grad_norm": 0.7267509290603898, "learning_rate": 9.291960091528004e-06, "loss": 0.1621, "step": 6730 }, { "epoch": 0.19636501546181223, "grad_norm": 0.8054969037959844, "learning_rate": 9.29171771634429e-06, "loss": 0.1583, "step": 6731 }, { "epoch": 0.1963941886924558, "grad_norm": 1.136024857434458, "learning_rate": 9.291475302845145e-06, "loss": 0.185, "step": 6732 }, { "epoch": 0.19642336192309937, "grad_norm": 0.8106671548634049, "learning_rate": 9.291232851032733e-06, "loss": 0.1788, "step": 6733 }, { "epoch": 0.19645253515374292, "grad_norm": 1.0138559362243085, "learning_rate": 9.290990360909218e-06, "loss": 0.1515, "step": 6734 }, { "epoch": 0.1964817083843865, "grad_norm": 0.9685956393538463, "learning_rate": 9.290747832476765e-06, "loss": 0.1487, "step": 6735 }, { "epoch": 0.19651088161503005, "grad_norm": 1.0979140692203073, "learning_rate": 9.29050526573754e-06, "loss": 0.1553, "step": 6736 }, { "epoch": 0.1965400548456736, "grad_norm": 0.9914402686672533, "learning_rate": 9.290262660693708e-06, "loss": 0.176, "step": 6737 }, { "epoch": 0.19656922807631716, "grad_norm": 0.8714868605471512, "learning_rate": 9.290020017347434e-06, "loss": 0.2007, "step": 6738 }, { "epoch": 0.19659840130696074, "grad_norm": 0.962420922455051, "learning_rate": 9.289777335700888e-06, "loss": 0.1886, "step": 6739 }, { "epoch": 0.1966275745376043, "grad_norm": 0.8631583971632991, "learning_rate": 9.289534615756231e-06, "loss": 0.1545, "step": 6740 }, { "epoch": 0.19665674776824785, "grad_norm": 0.7641851756402492, "learning_rate": 9.289291857515634e-06, "loss": 0.1394, "step": 6741 }, { "epoch": 0.19668592099889143, "grad_norm": 0.8290259161847129, "learning_rate": 9.289049060981264e-06, "loss": 0.1712, "step": 6742 }, { "epoch": 0.19671509422953498, "grad_norm": 0.7554586638487255, "learning_rate": 9.288806226155288e-06, "loss": 0.1477, "step": 6743 }, { "epoch": 0.19674426746017853, "grad_norm": 0.811628315137839, "learning_rate": 9.288563353039873e-06, "loss": 0.1939, "step": 6744 }, { "epoch": 0.1967734406908221, "grad_norm": 0.8483973942457574, "learning_rate": 9.288320441637189e-06, "loss": 0.1664, "step": 6745 }, { "epoch": 0.19680261392146567, "grad_norm": 0.8667848777043733, "learning_rate": 9.288077491949403e-06, "loss": 0.1835, "step": 6746 }, { "epoch": 0.19683178715210922, "grad_norm": 0.871493082736332, "learning_rate": 9.287834503978685e-06, "loss": 0.191, "step": 6747 }, { "epoch": 0.19686096038275278, "grad_norm": 0.7598035949504574, "learning_rate": 9.287591477727205e-06, "loss": 0.1839, "step": 6748 }, { "epoch": 0.19689013361339636, "grad_norm": 0.8706855653558321, "learning_rate": 9.28734841319713e-06, "loss": 0.1614, "step": 6749 }, { "epoch": 0.1969193068440399, "grad_norm": 0.7810263676089523, "learning_rate": 9.287105310390634e-06, "loss": 0.1579, "step": 6750 }, { "epoch": 0.19694848007468346, "grad_norm": 0.9410537912137862, "learning_rate": 9.286862169309886e-06, "loss": 0.1645, "step": 6751 }, { "epoch": 0.19697765330532704, "grad_norm": 1.301653554428064, "learning_rate": 9.286618989957053e-06, "loss": 0.1611, "step": 6752 }, { "epoch": 0.1970068265359706, "grad_norm": 1.0884767791477747, "learning_rate": 9.286375772334309e-06, "loss": 0.1556, "step": 6753 }, { "epoch": 0.19703599976661415, "grad_norm": 0.6899878328774506, "learning_rate": 9.286132516443826e-06, "loss": 0.1581, "step": 6754 }, { "epoch": 0.1970651729972577, "grad_norm": 0.8429315060403528, "learning_rate": 9.285889222287776e-06, "loss": 0.1603, "step": 6755 }, { "epoch": 0.19709434622790128, "grad_norm": 0.9048267404546941, "learning_rate": 9.28564588986833e-06, "loss": 0.2059, "step": 6756 }, { "epoch": 0.19712351945854484, "grad_norm": 0.7372171872163484, "learning_rate": 9.285402519187659e-06, "loss": 0.1671, "step": 6757 }, { "epoch": 0.1971526926891884, "grad_norm": 0.7491177044705533, "learning_rate": 9.285159110247938e-06, "loss": 0.1521, "step": 6758 }, { "epoch": 0.19718186591983197, "grad_norm": 0.9188720580649349, "learning_rate": 9.28491566305134e-06, "loss": 0.1657, "step": 6759 }, { "epoch": 0.19721103915047553, "grad_norm": 0.766881910798833, "learning_rate": 9.284672177600039e-06, "loss": 0.1511, "step": 6760 }, { "epoch": 0.19724021238111908, "grad_norm": 0.8821957120476288, "learning_rate": 9.284428653896207e-06, "loss": 0.149, "step": 6761 }, { "epoch": 0.19726938561176266, "grad_norm": 0.8651784938194986, "learning_rate": 9.284185091942017e-06, "loss": 0.1632, "step": 6762 }, { "epoch": 0.1972985588424062, "grad_norm": 0.7624688335799735, "learning_rate": 9.283941491739648e-06, "loss": 0.1706, "step": 6763 }, { "epoch": 0.19732773207304977, "grad_norm": 1.0473505300517565, "learning_rate": 9.28369785329127e-06, "loss": 0.1788, "step": 6764 }, { "epoch": 0.19735690530369332, "grad_norm": 0.8783393953173931, "learning_rate": 9.283454176599059e-06, "loss": 0.1759, "step": 6765 }, { "epoch": 0.1973860785343369, "grad_norm": 1.090863210412366, "learning_rate": 9.283210461665195e-06, "loss": 0.1809, "step": 6766 }, { "epoch": 0.19741525176498045, "grad_norm": 0.8816239928570382, "learning_rate": 9.282966708491848e-06, "loss": 0.1513, "step": 6767 }, { "epoch": 0.197444424995624, "grad_norm": 0.906288892074761, "learning_rate": 9.282722917081197e-06, "loss": 0.1764, "step": 6768 }, { "epoch": 0.1974735982262676, "grad_norm": 0.8922413127003459, "learning_rate": 9.282479087435419e-06, "loss": 0.1577, "step": 6769 }, { "epoch": 0.19750277145691114, "grad_norm": 0.8980575146322631, "learning_rate": 9.28223521955669e-06, "loss": 0.176, "step": 6770 }, { "epoch": 0.1975319446875547, "grad_norm": 0.8317359960658007, "learning_rate": 9.281991313447185e-06, "loss": 0.1475, "step": 6771 }, { "epoch": 0.19756111791819825, "grad_norm": 0.9007402706647583, "learning_rate": 9.281747369109086e-06, "loss": 0.1707, "step": 6772 }, { "epoch": 0.19759029114884183, "grad_norm": 1.072492056741432, "learning_rate": 9.281503386544569e-06, "loss": 0.1502, "step": 6773 }, { "epoch": 0.19761946437948538, "grad_norm": 0.8813739309462332, "learning_rate": 9.281259365755811e-06, "loss": 0.1737, "step": 6774 }, { "epoch": 0.19764863761012894, "grad_norm": 0.8453346845434963, "learning_rate": 9.28101530674499e-06, "loss": 0.1608, "step": 6775 }, { "epoch": 0.19767781084077252, "grad_norm": 0.8030093121854524, "learning_rate": 9.280771209514287e-06, "loss": 0.1457, "step": 6776 }, { "epoch": 0.19770698407141607, "grad_norm": 0.9315923295272862, "learning_rate": 9.280527074065881e-06, "loss": 0.174, "step": 6777 }, { "epoch": 0.19773615730205962, "grad_norm": 1.0099377696711123, "learning_rate": 9.280282900401953e-06, "loss": 0.155, "step": 6778 }, { "epoch": 0.1977653305327032, "grad_norm": 0.876679878840367, "learning_rate": 9.280038688524678e-06, "loss": 0.1823, "step": 6779 }, { "epoch": 0.19779450376334676, "grad_norm": 1.1406207717128962, "learning_rate": 9.279794438436241e-06, "loss": 0.175, "step": 6780 }, { "epoch": 0.1978236769939903, "grad_norm": 0.8593591024214584, "learning_rate": 9.279550150138821e-06, "loss": 0.1669, "step": 6781 }, { "epoch": 0.19785285022463386, "grad_norm": 1.119386990221924, "learning_rate": 9.279305823634599e-06, "loss": 0.1493, "step": 6782 }, { "epoch": 0.19788202345527744, "grad_norm": 0.8571760555090274, "learning_rate": 9.279061458925755e-06, "loss": 0.1497, "step": 6783 }, { "epoch": 0.197911196685921, "grad_norm": 1.1002761353387693, "learning_rate": 9.278817056014473e-06, "loss": 0.1583, "step": 6784 }, { "epoch": 0.19794036991656455, "grad_norm": 1.0908781638352976, "learning_rate": 9.278572614902932e-06, "loss": 0.2065, "step": 6785 }, { "epoch": 0.19796954314720813, "grad_norm": 0.851372087478325, "learning_rate": 9.278328135593318e-06, "loss": 0.1693, "step": 6786 }, { "epoch": 0.19799871637785169, "grad_norm": 0.8643394174725874, "learning_rate": 9.278083618087811e-06, "loss": 0.1552, "step": 6787 }, { "epoch": 0.19802788960849524, "grad_norm": 0.7977493439466831, "learning_rate": 9.277839062388594e-06, "loss": 0.1732, "step": 6788 }, { "epoch": 0.19805706283913882, "grad_norm": 0.7624833243387342, "learning_rate": 9.277594468497853e-06, "loss": 0.1775, "step": 6789 }, { "epoch": 0.19808623606978237, "grad_norm": 0.8444387072539988, "learning_rate": 9.277349836417769e-06, "loss": 0.1724, "step": 6790 }, { "epoch": 0.19811540930042593, "grad_norm": 0.8496209699680165, "learning_rate": 9.277105166150525e-06, "loss": 0.1716, "step": 6791 }, { "epoch": 0.19814458253106948, "grad_norm": 0.9393124415834895, "learning_rate": 9.276860457698308e-06, "loss": 0.1821, "step": 6792 }, { "epoch": 0.19817375576171306, "grad_norm": 0.8848031449719974, "learning_rate": 9.276615711063303e-06, "loss": 0.1688, "step": 6793 }, { "epoch": 0.19820292899235661, "grad_norm": 0.9007435531593639, "learning_rate": 9.276370926247693e-06, "loss": 0.1739, "step": 6794 }, { "epoch": 0.19823210222300017, "grad_norm": 0.9604942915988172, "learning_rate": 9.276126103253664e-06, "loss": 0.1599, "step": 6795 }, { "epoch": 0.19826127545364375, "grad_norm": 0.8461491811556062, "learning_rate": 9.275881242083402e-06, "loss": 0.1715, "step": 6796 }, { "epoch": 0.1982904486842873, "grad_norm": 1.0300730779632639, "learning_rate": 9.275636342739094e-06, "loss": 0.1992, "step": 6797 }, { "epoch": 0.19831962191493085, "grad_norm": 0.8445314481632383, "learning_rate": 9.275391405222923e-06, "loss": 0.1727, "step": 6798 }, { "epoch": 0.1983487951455744, "grad_norm": 0.7359376197202013, "learning_rate": 9.27514642953708e-06, "loss": 0.1654, "step": 6799 }, { "epoch": 0.198377968376218, "grad_norm": 0.8443801360351817, "learning_rate": 9.274901415683751e-06, "loss": 0.158, "step": 6800 }, { "epoch": 0.19840714160686154, "grad_norm": 0.8307639478949574, "learning_rate": 9.27465636366512e-06, "loss": 0.1787, "step": 6801 }, { "epoch": 0.1984363148375051, "grad_norm": 0.9858139091472421, "learning_rate": 9.27441127348338e-06, "loss": 0.1918, "step": 6802 }, { "epoch": 0.19846548806814868, "grad_norm": 0.8464730211325557, "learning_rate": 9.274166145140715e-06, "loss": 0.1699, "step": 6803 }, { "epoch": 0.19849466129879223, "grad_norm": 0.7180746493841713, "learning_rate": 9.273920978639315e-06, "loss": 0.1559, "step": 6804 }, { "epoch": 0.19852383452943578, "grad_norm": 0.799114963785012, "learning_rate": 9.27367577398137e-06, "loss": 0.1622, "step": 6805 }, { "epoch": 0.19855300776007936, "grad_norm": 0.6447383763126631, "learning_rate": 9.273430531169068e-06, "loss": 0.1439, "step": 6806 }, { "epoch": 0.19858218099072292, "grad_norm": 0.7725223467058463, "learning_rate": 9.273185250204597e-06, "loss": 0.1633, "step": 6807 }, { "epoch": 0.19861135422136647, "grad_norm": 0.7575513821876597, "learning_rate": 9.272939931090148e-06, "loss": 0.1544, "step": 6808 }, { "epoch": 0.19864052745201002, "grad_norm": 0.7586924842351225, "learning_rate": 9.272694573827914e-06, "loss": 0.1687, "step": 6809 }, { "epoch": 0.1986697006826536, "grad_norm": 0.9272205458155205, "learning_rate": 9.272449178420079e-06, "loss": 0.1749, "step": 6810 }, { "epoch": 0.19869887391329716, "grad_norm": 0.7176069199236317, "learning_rate": 9.27220374486884e-06, "loss": 0.1564, "step": 6811 }, { "epoch": 0.1987280471439407, "grad_norm": 0.7930359692764781, "learning_rate": 9.271958273176385e-06, "loss": 0.1696, "step": 6812 }, { "epoch": 0.1987572203745843, "grad_norm": 0.9127337094725932, "learning_rate": 9.271712763344907e-06, "loss": 0.1754, "step": 6813 }, { "epoch": 0.19878639360522785, "grad_norm": 0.8029291901645138, "learning_rate": 9.271467215376598e-06, "loss": 0.1817, "step": 6814 }, { "epoch": 0.1988155668358714, "grad_norm": 2.336362348610908, "learning_rate": 9.271221629273647e-06, "loss": 0.1618, "step": 6815 }, { "epoch": 0.19884474006651495, "grad_norm": 0.986410040723693, "learning_rate": 9.27097600503825e-06, "loss": 0.1604, "step": 6816 }, { "epoch": 0.19887391329715853, "grad_norm": 0.9145616410938826, "learning_rate": 9.2707303426726e-06, "loss": 0.1528, "step": 6817 }, { "epoch": 0.1989030865278021, "grad_norm": 0.8020578464674823, "learning_rate": 9.270484642178888e-06, "loss": 0.1506, "step": 6818 }, { "epoch": 0.19893225975844564, "grad_norm": 0.8017613401330342, "learning_rate": 9.270238903559307e-06, "loss": 0.1644, "step": 6819 }, { "epoch": 0.19896143298908922, "grad_norm": 0.9630723985747422, "learning_rate": 9.269993126816055e-06, "loss": 0.1661, "step": 6820 }, { "epoch": 0.19899060621973277, "grad_norm": 1.071793350005467, "learning_rate": 9.269747311951322e-06, "loss": 0.1656, "step": 6821 }, { "epoch": 0.19901977945037633, "grad_norm": 0.7592639567566531, "learning_rate": 9.269501458967306e-06, "loss": 0.167, "step": 6822 }, { "epoch": 0.1990489526810199, "grad_norm": 0.9551099958261103, "learning_rate": 9.269255567866199e-06, "loss": 0.1683, "step": 6823 }, { "epoch": 0.19907812591166346, "grad_norm": 0.8111207539993681, "learning_rate": 9.269009638650198e-06, "loss": 0.1573, "step": 6824 }, { "epoch": 0.19910729914230701, "grad_norm": 0.9348714473789744, "learning_rate": 9.268763671321497e-06, "loss": 0.1663, "step": 6825 }, { "epoch": 0.19913647237295057, "grad_norm": 0.9722626260327435, "learning_rate": 9.268517665882294e-06, "loss": 0.1808, "step": 6826 }, { "epoch": 0.19916564560359415, "grad_norm": 0.9170527580958073, "learning_rate": 9.268271622334784e-06, "loss": 0.1769, "step": 6827 }, { "epoch": 0.1991948188342377, "grad_norm": 1.014970958407438, "learning_rate": 9.268025540681163e-06, "loss": 0.1879, "step": 6828 }, { "epoch": 0.19922399206488126, "grad_norm": 0.8491689365116207, "learning_rate": 9.26777942092363e-06, "loss": 0.1487, "step": 6829 }, { "epoch": 0.19925316529552484, "grad_norm": 0.8266477970103245, "learning_rate": 9.26753326306438e-06, "loss": 0.1828, "step": 6830 }, { "epoch": 0.1992823385261684, "grad_norm": 0.9012240304171691, "learning_rate": 9.267287067105612e-06, "loss": 0.1635, "step": 6831 }, { "epoch": 0.19931151175681194, "grad_norm": 0.8504390585172055, "learning_rate": 9.267040833049525e-06, "loss": 0.1647, "step": 6832 }, { "epoch": 0.19934068498745552, "grad_norm": 0.7854031137197991, "learning_rate": 9.266794560898315e-06, "loss": 0.1581, "step": 6833 }, { "epoch": 0.19936985821809908, "grad_norm": 0.74083613402986, "learning_rate": 9.266548250654183e-06, "loss": 0.1446, "step": 6834 }, { "epoch": 0.19939903144874263, "grad_norm": 0.8604736649988246, "learning_rate": 9.266301902319326e-06, "loss": 0.1766, "step": 6835 }, { "epoch": 0.19942820467938618, "grad_norm": 0.7926394245167371, "learning_rate": 9.266055515895945e-06, "loss": 0.1608, "step": 6836 }, { "epoch": 0.19945737791002977, "grad_norm": 0.616829938563071, "learning_rate": 9.265809091386236e-06, "loss": 0.1459, "step": 6837 }, { "epoch": 0.19948655114067332, "grad_norm": 0.8577998647176115, "learning_rate": 9.265562628792402e-06, "loss": 0.1735, "step": 6838 }, { "epoch": 0.19951572437131687, "grad_norm": 0.9264624005572271, "learning_rate": 9.265316128116647e-06, "loss": 0.18, "step": 6839 }, { "epoch": 0.19954489760196045, "grad_norm": 0.829671413022701, "learning_rate": 9.265069589361165e-06, "loss": 0.1875, "step": 6840 }, { "epoch": 0.199574070832604, "grad_norm": 0.7875201566196365, "learning_rate": 9.264823012528159e-06, "loss": 0.1673, "step": 6841 }, { "epoch": 0.19960324406324756, "grad_norm": 0.7874558435542366, "learning_rate": 9.264576397619832e-06, "loss": 0.1629, "step": 6842 }, { "epoch": 0.1996324172938911, "grad_norm": 0.7724462576193868, "learning_rate": 9.264329744638385e-06, "loss": 0.1466, "step": 6843 }, { "epoch": 0.1996615905245347, "grad_norm": 0.7668766098109513, "learning_rate": 9.264083053586022e-06, "loss": 0.1862, "step": 6844 }, { "epoch": 0.19969076375517825, "grad_norm": 0.773502420006225, "learning_rate": 9.263836324464942e-06, "loss": 0.1803, "step": 6845 }, { "epoch": 0.1997199369858218, "grad_norm": 0.7835549402281338, "learning_rate": 9.263589557277349e-06, "loss": 0.1441, "step": 6846 }, { "epoch": 0.19974911021646538, "grad_norm": 0.8700013437396111, "learning_rate": 9.263342752025446e-06, "loss": 0.1921, "step": 6847 }, { "epoch": 0.19977828344710893, "grad_norm": 0.8383673342033546, "learning_rate": 9.263095908711436e-06, "loss": 0.1758, "step": 6848 }, { "epoch": 0.1998074566777525, "grad_norm": 0.9235433147483293, "learning_rate": 9.262849027337524e-06, "loss": 0.1747, "step": 6849 }, { "epoch": 0.19983662990839607, "grad_norm": 0.8038146197528632, "learning_rate": 9.262602107905913e-06, "loss": 0.1463, "step": 6850 }, { "epoch": 0.19986580313903962, "grad_norm": 0.752886841622253, "learning_rate": 9.26235515041881e-06, "loss": 0.1313, "step": 6851 }, { "epoch": 0.19989497636968317, "grad_norm": 0.9008267139088517, "learning_rate": 9.262108154878415e-06, "loss": 0.1682, "step": 6852 }, { "epoch": 0.19992414960032673, "grad_norm": 0.9045358933870375, "learning_rate": 9.261861121286938e-06, "loss": 0.1679, "step": 6853 }, { "epoch": 0.1999533228309703, "grad_norm": 1.1205794896514105, "learning_rate": 9.261614049646581e-06, "loss": 0.158, "step": 6854 }, { "epoch": 0.19998249606161386, "grad_norm": 0.9444648051004103, "learning_rate": 9.261366939959552e-06, "loss": 0.1611, "step": 6855 }, { "epoch": 0.20001166929225742, "grad_norm": 0.8900281994126368, "learning_rate": 9.261119792228056e-06, "loss": 0.1622, "step": 6856 }, { "epoch": 0.200040842522901, "grad_norm": 0.9793048129383088, "learning_rate": 9.260872606454299e-06, "loss": 0.1554, "step": 6857 }, { "epoch": 0.20007001575354455, "grad_norm": 0.7866144396388639, "learning_rate": 9.260625382640489e-06, "loss": 0.1402, "step": 6858 }, { "epoch": 0.2000991889841881, "grad_norm": 1.1020344557217077, "learning_rate": 9.260378120788833e-06, "loss": 0.1975, "step": 6859 }, { "epoch": 0.20012836221483168, "grad_norm": 0.9935447255693488, "learning_rate": 9.260130820901539e-06, "loss": 0.1812, "step": 6860 }, { "epoch": 0.20015753544547524, "grad_norm": 0.92842363614548, "learning_rate": 9.259883482980812e-06, "loss": 0.1681, "step": 6861 }, { "epoch": 0.2001867086761188, "grad_norm": 0.8056007956260339, "learning_rate": 9.259636107028863e-06, "loss": 0.1464, "step": 6862 }, { "epoch": 0.20021588190676234, "grad_norm": 0.8201984804203223, "learning_rate": 9.2593886930479e-06, "loss": 0.1628, "step": 6863 }, { "epoch": 0.20024505513740593, "grad_norm": 0.9561164466426443, "learning_rate": 9.259141241040132e-06, "loss": 0.1631, "step": 6864 }, { "epoch": 0.20027422836804948, "grad_norm": 0.7606171656600601, "learning_rate": 9.258893751007768e-06, "loss": 0.1916, "step": 6865 }, { "epoch": 0.20030340159869303, "grad_norm": 0.9893724394233364, "learning_rate": 9.258646222953014e-06, "loss": 0.1785, "step": 6866 }, { "epoch": 0.2003325748293366, "grad_norm": 1.0492910901475423, "learning_rate": 9.258398656878086e-06, "loss": 0.1832, "step": 6867 }, { "epoch": 0.20036174805998017, "grad_norm": 0.8239922216935243, "learning_rate": 9.25815105278519e-06, "loss": 0.1447, "step": 6868 }, { "epoch": 0.20039092129062372, "grad_norm": 0.9857818821904483, "learning_rate": 9.257903410676542e-06, "loss": 0.156, "step": 6869 }, { "epoch": 0.20042009452126727, "grad_norm": 1.1030047783364014, "learning_rate": 9.257655730554343e-06, "loss": 0.1626, "step": 6870 }, { "epoch": 0.20044926775191085, "grad_norm": 0.7547724311474497, "learning_rate": 9.257408012420814e-06, "loss": 0.1441, "step": 6871 }, { "epoch": 0.2004784409825544, "grad_norm": 0.8284604181580133, "learning_rate": 9.25716025627816e-06, "loss": 0.1588, "step": 6872 }, { "epoch": 0.20050761421319796, "grad_norm": 0.8664436845875464, "learning_rate": 9.256912462128598e-06, "loss": 0.1478, "step": 6873 }, { "epoch": 0.20053678744384154, "grad_norm": 0.7916567029228911, "learning_rate": 9.256664629974336e-06, "loss": 0.1738, "step": 6874 }, { "epoch": 0.2005659606744851, "grad_norm": 0.876323632801491, "learning_rate": 9.256416759817589e-06, "loss": 0.1574, "step": 6875 }, { "epoch": 0.20059513390512865, "grad_norm": 0.8757684449739846, "learning_rate": 9.256168851660568e-06, "loss": 0.1627, "step": 6876 }, { "epoch": 0.20062430713577223, "grad_norm": 0.7331586042635607, "learning_rate": 9.255920905505489e-06, "loss": 0.166, "step": 6877 }, { "epoch": 0.20065348036641578, "grad_norm": 0.884088409347574, "learning_rate": 9.255672921354564e-06, "loss": 0.1572, "step": 6878 }, { "epoch": 0.20068265359705934, "grad_norm": 0.9423527980206594, "learning_rate": 9.255424899210006e-06, "loss": 0.1738, "step": 6879 }, { "epoch": 0.2007118268277029, "grad_norm": 0.7542517853422843, "learning_rate": 9.255176839074031e-06, "loss": 0.1736, "step": 6880 }, { "epoch": 0.20074100005834647, "grad_norm": 0.9210894675280078, "learning_rate": 9.254928740948854e-06, "loss": 0.1571, "step": 6881 }, { "epoch": 0.20077017328899002, "grad_norm": 0.9075573167169664, "learning_rate": 9.254680604836688e-06, "loss": 0.1654, "step": 6882 }, { "epoch": 0.20079934651963358, "grad_norm": 0.7357175761969743, "learning_rate": 9.254432430739749e-06, "loss": 0.1576, "step": 6883 }, { "epoch": 0.20082851975027716, "grad_norm": 0.9293557786696476, "learning_rate": 9.254184218660252e-06, "loss": 0.1757, "step": 6884 }, { "epoch": 0.2008576929809207, "grad_norm": 1.2702381815754837, "learning_rate": 9.253935968600416e-06, "loss": 0.1973, "step": 6885 }, { "epoch": 0.20088686621156426, "grad_norm": 0.8137709259145418, "learning_rate": 9.253687680562454e-06, "loss": 0.1494, "step": 6886 }, { "epoch": 0.20091603944220782, "grad_norm": 0.7591368911767536, "learning_rate": 9.253439354548583e-06, "loss": 0.1455, "step": 6887 }, { "epoch": 0.2009452126728514, "grad_norm": 0.8431311808072077, "learning_rate": 9.253190990561022e-06, "loss": 0.177, "step": 6888 }, { "epoch": 0.20097438590349495, "grad_norm": 0.7439925566693707, "learning_rate": 9.252942588601988e-06, "loss": 0.1629, "step": 6889 }, { "epoch": 0.2010035591341385, "grad_norm": 0.8463428058909843, "learning_rate": 9.252694148673695e-06, "loss": 0.1783, "step": 6890 }, { "epoch": 0.20103273236478209, "grad_norm": 0.8698538347710973, "learning_rate": 9.252445670778367e-06, "loss": 0.17, "step": 6891 }, { "epoch": 0.20106190559542564, "grad_norm": 1.0189241222151606, "learning_rate": 9.252197154918217e-06, "loss": 0.1683, "step": 6892 }, { "epoch": 0.2010910788260692, "grad_norm": 0.7984973411853468, "learning_rate": 9.251948601095466e-06, "loss": 0.1963, "step": 6893 }, { "epoch": 0.20112025205671277, "grad_norm": 1.1090263605234885, "learning_rate": 9.251700009312334e-06, "loss": 0.1655, "step": 6894 }, { "epoch": 0.20114942528735633, "grad_norm": 0.7521774243577772, "learning_rate": 9.25145137957104e-06, "loss": 0.166, "step": 6895 }, { "epoch": 0.20117859851799988, "grad_norm": 0.8901929590357675, "learning_rate": 9.251202711873802e-06, "loss": 0.163, "step": 6896 }, { "epoch": 0.20120777174864343, "grad_norm": 0.8387379421239792, "learning_rate": 9.25095400622284e-06, "loss": 0.1647, "step": 6897 }, { "epoch": 0.201236944979287, "grad_norm": 0.7271202499149118, "learning_rate": 9.250705262620376e-06, "loss": 0.169, "step": 6898 }, { "epoch": 0.20126611820993057, "grad_norm": 0.7917441056416124, "learning_rate": 9.25045648106863e-06, "loss": 0.1608, "step": 6899 }, { "epoch": 0.20129529144057412, "grad_norm": 1.0166959242851203, "learning_rate": 9.250207661569824e-06, "loss": 0.1534, "step": 6900 }, { "epoch": 0.2013244646712177, "grad_norm": 0.7654424818295686, "learning_rate": 9.249958804126178e-06, "loss": 0.1824, "step": 6901 }, { "epoch": 0.20135363790186125, "grad_norm": 0.9651451193769214, "learning_rate": 9.249709908739914e-06, "loss": 0.1843, "step": 6902 }, { "epoch": 0.2013828111325048, "grad_norm": 0.8894528565331724, "learning_rate": 9.249460975413256e-06, "loss": 0.1569, "step": 6903 }, { "epoch": 0.2014119843631484, "grad_norm": 0.6423539728835258, "learning_rate": 9.249212004148424e-06, "loss": 0.16, "step": 6904 }, { "epoch": 0.20144115759379194, "grad_norm": 0.8063827987347563, "learning_rate": 9.248962994947641e-06, "loss": 0.1559, "step": 6905 }, { "epoch": 0.2014703308244355, "grad_norm": 0.8624478514379726, "learning_rate": 9.248713947813131e-06, "loss": 0.1608, "step": 6906 }, { "epoch": 0.20149950405507905, "grad_norm": 0.8950892712865031, "learning_rate": 9.248464862747117e-06, "loss": 0.1734, "step": 6907 }, { "epoch": 0.20152867728572263, "grad_norm": 0.8905745182797603, "learning_rate": 9.248215739751825e-06, "loss": 0.1794, "step": 6908 }, { "epoch": 0.20155785051636618, "grad_norm": 0.9989720835810473, "learning_rate": 9.247966578829476e-06, "loss": 0.1573, "step": 6909 }, { "epoch": 0.20158702374700974, "grad_norm": 0.8291663064917253, "learning_rate": 9.247717379982293e-06, "loss": 0.1749, "step": 6910 }, { "epoch": 0.20161619697765332, "grad_norm": 0.9065232306006418, "learning_rate": 9.247468143212505e-06, "loss": 0.1698, "step": 6911 }, { "epoch": 0.20164537020829687, "grad_norm": 0.7636604866786924, "learning_rate": 9.247218868522335e-06, "loss": 0.1598, "step": 6912 }, { "epoch": 0.20167454343894042, "grad_norm": 0.7669056755864361, "learning_rate": 9.24696955591401e-06, "loss": 0.1608, "step": 6913 }, { "epoch": 0.20170371666958398, "grad_norm": 0.8165590403692065, "learning_rate": 9.246720205389752e-06, "loss": 0.164, "step": 6914 }, { "epoch": 0.20173288990022756, "grad_norm": 0.6512869425367765, "learning_rate": 9.246470816951792e-06, "loss": 0.169, "step": 6915 }, { "epoch": 0.2017620631308711, "grad_norm": 0.851582519614119, "learning_rate": 9.246221390602353e-06, "loss": 0.1714, "step": 6916 }, { "epoch": 0.20179123636151466, "grad_norm": 0.8819487941044123, "learning_rate": 9.245971926343664e-06, "loss": 0.1591, "step": 6917 }, { "epoch": 0.20182040959215825, "grad_norm": 0.9313966169414754, "learning_rate": 9.245722424177953e-06, "loss": 0.1553, "step": 6918 }, { "epoch": 0.2018495828228018, "grad_norm": 0.691431593294132, "learning_rate": 9.245472884107442e-06, "loss": 0.157, "step": 6919 }, { "epoch": 0.20187875605344535, "grad_norm": 1.0897206048393633, "learning_rate": 9.245223306134364e-06, "loss": 0.1683, "step": 6920 }, { "epoch": 0.20190792928408893, "grad_norm": 0.7985922287025494, "learning_rate": 9.244973690260947e-06, "loss": 0.1312, "step": 6921 }, { "epoch": 0.20193710251473249, "grad_norm": 1.1762938233012288, "learning_rate": 9.244724036489416e-06, "loss": 0.1776, "step": 6922 }, { "epoch": 0.20196627574537604, "grad_norm": 0.6709814809299784, "learning_rate": 9.244474344822003e-06, "loss": 0.1612, "step": 6923 }, { "epoch": 0.2019954489760196, "grad_norm": 0.8355430771473132, "learning_rate": 9.244224615260939e-06, "loss": 0.1623, "step": 6924 }, { "epoch": 0.20202462220666317, "grad_norm": 0.841972104666194, "learning_rate": 9.243974847808447e-06, "loss": 0.1451, "step": 6925 }, { "epoch": 0.20205379543730673, "grad_norm": 0.9060411147309869, "learning_rate": 9.243725042466762e-06, "loss": 0.1629, "step": 6926 }, { "epoch": 0.20208296866795028, "grad_norm": 1.0164509121189214, "learning_rate": 9.243475199238115e-06, "loss": 0.1508, "step": 6927 }, { "epoch": 0.20211214189859386, "grad_norm": 1.2165006713459436, "learning_rate": 9.243225318124731e-06, "loss": 0.1521, "step": 6928 }, { "epoch": 0.20214131512923741, "grad_norm": 0.9375090116468068, "learning_rate": 9.242975399128846e-06, "loss": 0.1364, "step": 6929 }, { "epoch": 0.20217048835988097, "grad_norm": 0.7444053922033216, "learning_rate": 9.242725442252689e-06, "loss": 0.1709, "step": 6930 }, { "epoch": 0.20219966159052452, "grad_norm": 0.8938477042452997, "learning_rate": 9.242475447498494e-06, "loss": 0.1975, "step": 6931 }, { "epoch": 0.2022288348211681, "grad_norm": 0.9444066284799371, "learning_rate": 9.242225414868489e-06, "loss": 0.175, "step": 6932 }, { "epoch": 0.20225800805181166, "grad_norm": 0.7975877128495011, "learning_rate": 9.241975344364908e-06, "loss": 0.1668, "step": 6933 }, { "epoch": 0.2022871812824552, "grad_norm": 0.9344549468260859, "learning_rate": 9.241725235989984e-06, "loss": 0.1775, "step": 6934 }, { "epoch": 0.2023163545130988, "grad_norm": 0.9183123396344244, "learning_rate": 9.24147508974595e-06, "loss": 0.2053, "step": 6935 }, { "epoch": 0.20234552774374234, "grad_norm": 0.812772993081817, "learning_rate": 9.24122490563504e-06, "loss": 0.1748, "step": 6936 }, { "epoch": 0.2023747009743859, "grad_norm": 1.2402670599204466, "learning_rate": 9.240974683659484e-06, "loss": 0.1757, "step": 6937 }, { "epoch": 0.20240387420502948, "grad_norm": 0.9170751278495316, "learning_rate": 9.24072442382152e-06, "loss": 0.1974, "step": 6938 }, { "epoch": 0.20243304743567303, "grad_norm": 1.0772371356975892, "learning_rate": 9.240474126123382e-06, "loss": 0.1603, "step": 6939 }, { "epoch": 0.20246222066631658, "grad_norm": 0.8619979857339938, "learning_rate": 9.240223790567301e-06, "loss": 0.1687, "step": 6940 }, { "epoch": 0.20249139389696014, "grad_norm": 0.7450528065605044, "learning_rate": 9.239973417155514e-06, "loss": 0.1663, "step": 6941 }, { "epoch": 0.20252056712760372, "grad_norm": 0.929227600853273, "learning_rate": 9.239723005890259e-06, "loss": 0.1615, "step": 6942 }, { "epoch": 0.20254974035824727, "grad_norm": 0.612880897375697, "learning_rate": 9.239472556773767e-06, "loss": 0.1539, "step": 6943 }, { "epoch": 0.20257891358889082, "grad_norm": 0.797816385622781, "learning_rate": 9.239222069808278e-06, "loss": 0.1586, "step": 6944 }, { "epoch": 0.2026080868195344, "grad_norm": 0.8512931410273533, "learning_rate": 9.238971544996024e-06, "loss": 0.1911, "step": 6945 }, { "epoch": 0.20263726005017796, "grad_norm": 0.7940492984333843, "learning_rate": 9.238720982339244e-06, "loss": 0.1887, "step": 6946 }, { "epoch": 0.2026664332808215, "grad_norm": 0.9662901286894028, "learning_rate": 9.238470381840177e-06, "loss": 0.1605, "step": 6947 }, { "epoch": 0.2026956065114651, "grad_norm": 0.7812194229483999, "learning_rate": 9.238219743501056e-06, "loss": 0.1469, "step": 6948 }, { "epoch": 0.20272477974210865, "grad_norm": 0.9627478070169841, "learning_rate": 9.237969067324122e-06, "loss": 0.1756, "step": 6949 }, { "epoch": 0.2027539529727522, "grad_norm": 0.9180610650279988, "learning_rate": 9.237718353311614e-06, "loss": 0.1743, "step": 6950 }, { "epoch": 0.20278312620339575, "grad_norm": 0.8145939137932461, "learning_rate": 9.237467601465765e-06, "loss": 0.1642, "step": 6951 }, { "epoch": 0.20281229943403933, "grad_norm": 0.9541162411630298, "learning_rate": 9.237216811788818e-06, "loss": 0.1877, "step": 6952 }, { "epoch": 0.2028414726646829, "grad_norm": 0.7818593113436227, "learning_rate": 9.23696598428301e-06, "loss": 0.1525, "step": 6953 }, { "epoch": 0.20287064589532644, "grad_norm": 0.8353926491125307, "learning_rate": 9.236715118950584e-06, "loss": 0.1879, "step": 6954 }, { "epoch": 0.20289981912597002, "grad_norm": 0.8569013518121356, "learning_rate": 9.236464215793773e-06, "loss": 0.1641, "step": 6955 }, { "epoch": 0.20292899235661357, "grad_norm": 0.7441027716221941, "learning_rate": 9.236213274814822e-06, "loss": 0.1595, "step": 6956 }, { "epoch": 0.20295816558725713, "grad_norm": 0.6849361560051208, "learning_rate": 9.23596229601597e-06, "loss": 0.1535, "step": 6957 }, { "epoch": 0.20298733881790068, "grad_norm": 0.7500331412109894, "learning_rate": 9.23571127939946e-06, "loss": 0.1598, "step": 6958 }, { "epoch": 0.20301651204854426, "grad_norm": 0.8118918095758154, "learning_rate": 9.23546022496753e-06, "loss": 0.1946, "step": 6959 }, { "epoch": 0.20304568527918782, "grad_norm": 0.8545793980969125, "learning_rate": 9.23520913272242e-06, "loss": 0.1733, "step": 6960 }, { "epoch": 0.20307485850983137, "grad_norm": 0.816879846775412, "learning_rate": 9.234958002666377e-06, "loss": 0.1637, "step": 6961 }, { "epoch": 0.20310403174047495, "grad_norm": 0.8082137698355487, "learning_rate": 9.234706834801637e-06, "loss": 0.1909, "step": 6962 }, { "epoch": 0.2031332049711185, "grad_norm": 0.886717342632263, "learning_rate": 9.234455629130447e-06, "loss": 0.1437, "step": 6963 }, { "epoch": 0.20316237820176206, "grad_norm": 0.7627804041244418, "learning_rate": 9.234204385655048e-06, "loss": 0.1669, "step": 6964 }, { "epoch": 0.20319155143240564, "grad_norm": 0.9082853257968745, "learning_rate": 9.233953104377683e-06, "loss": 0.155, "step": 6965 }, { "epoch": 0.2032207246630492, "grad_norm": 0.7534748533281638, "learning_rate": 9.233701785300594e-06, "loss": 0.1533, "step": 6966 }, { "epoch": 0.20324989789369274, "grad_norm": 0.8623488199709958, "learning_rate": 9.233450428426028e-06, "loss": 0.1481, "step": 6967 }, { "epoch": 0.2032790711243363, "grad_norm": 0.9881855600799769, "learning_rate": 9.233199033756225e-06, "loss": 0.1712, "step": 6968 }, { "epoch": 0.20330824435497988, "grad_norm": 0.844767954923981, "learning_rate": 9.232947601293434e-06, "loss": 0.1582, "step": 6969 }, { "epoch": 0.20333741758562343, "grad_norm": 0.8276002682189507, "learning_rate": 9.232696131039896e-06, "loss": 0.1791, "step": 6970 }, { "epoch": 0.20336659081626698, "grad_norm": 0.8617800403501159, "learning_rate": 9.232444622997856e-06, "loss": 0.1616, "step": 6971 }, { "epoch": 0.20339576404691057, "grad_norm": 0.8659663059040512, "learning_rate": 9.232193077169564e-06, "loss": 0.1654, "step": 6972 }, { "epoch": 0.20342493727755412, "grad_norm": 0.779554960350323, "learning_rate": 9.23194149355726e-06, "loss": 0.151, "step": 6973 }, { "epoch": 0.20345411050819767, "grad_norm": 0.8464240910273081, "learning_rate": 9.231689872163193e-06, "loss": 0.1754, "step": 6974 }, { "epoch": 0.20348328373884125, "grad_norm": 0.8346599537505763, "learning_rate": 9.23143821298961e-06, "loss": 0.1564, "step": 6975 }, { "epoch": 0.2035124569694848, "grad_norm": 0.7887283247170853, "learning_rate": 9.231186516038756e-06, "loss": 0.169, "step": 6976 }, { "epoch": 0.20354163020012836, "grad_norm": 0.8313811211179252, "learning_rate": 9.230934781312879e-06, "loss": 0.1657, "step": 6977 }, { "epoch": 0.2035708034307719, "grad_norm": 0.6988999318097441, "learning_rate": 9.230683008814226e-06, "loss": 0.1501, "step": 6978 }, { "epoch": 0.2035999766614155, "grad_norm": 0.9094972559651215, "learning_rate": 9.230431198545045e-06, "loss": 0.1589, "step": 6979 }, { "epoch": 0.20362914989205905, "grad_norm": 0.8904694721866652, "learning_rate": 9.230179350507584e-06, "loss": 0.1537, "step": 6980 }, { "epoch": 0.2036583231227026, "grad_norm": 0.9547194753711763, "learning_rate": 9.229927464704094e-06, "loss": 0.1606, "step": 6981 }, { "epoch": 0.20368749635334618, "grad_norm": 0.9622437302424728, "learning_rate": 9.22967554113682e-06, "loss": 0.1509, "step": 6982 }, { "epoch": 0.20371666958398973, "grad_norm": 0.8020621072325536, "learning_rate": 9.22942357980801e-06, "loss": 0.1726, "step": 6983 }, { "epoch": 0.2037458428146333, "grad_norm": 0.8312982184894929, "learning_rate": 9.229171580719917e-06, "loss": 0.1567, "step": 6984 }, { "epoch": 0.20377501604527684, "grad_norm": 0.9668106852012995, "learning_rate": 9.228919543874793e-06, "loss": 0.1985, "step": 6985 }, { "epoch": 0.20380418927592042, "grad_norm": 0.8657568338629704, "learning_rate": 9.22866746927488e-06, "loss": 0.1732, "step": 6986 }, { "epoch": 0.20383336250656398, "grad_norm": 0.8623396974875376, "learning_rate": 9.228415356922437e-06, "loss": 0.1663, "step": 6987 }, { "epoch": 0.20386253573720753, "grad_norm": 0.8453525214965825, "learning_rate": 9.228163206819709e-06, "loss": 0.198, "step": 6988 }, { "epoch": 0.2038917089678511, "grad_norm": 0.9125294274115585, "learning_rate": 9.22791101896895e-06, "loss": 0.1661, "step": 6989 }, { "epoch": 0.20392088219849466, "grad_norm": 0.8576353726170916, "learning_rate": 9.227658793372412e-06, "loss": 0.1639, "step": 6990 }, { "epoch": 0.20395005542913822, "grad_norm": 0.8394250192109743, "learning_rate": 9.227406530032343e-06, "loss": 0.2013, "step": 6991 }, { "epoch": 0.2039792286597818, "grad_norm": 0.8698217646400507, "learning_rate": 9.227154228951e-06, "loss": 0.1581, "step": 6992 }, { "epoch": 0.20400840189042535, "grad_norm": 0.7342443622768361, "learning_rate": 9.226901890130632e-06, "loss": 0.1646, "step": 6993 }, { "epoch": 0.2040375751210689, "grad_norm": 0.7905139694958258, "learning_rate": 9.226649513573494e-06, "loss": 0.1632, "step": 6994 }, { "epoch": 0.20406674835171246, "grad_norm": 0.9604884114024702, "learning_rate": 9.226397099281837e-06, "loss": 0.1966, "step": 6995 }, { "epoch": 0.20409592158235604, "grad_norm": 0.8188683052092606, "learning_rate": 9.226144647257916e-06, "loss": 0.1489, "step": 6996 }, { "epoch": 0.2041250948129996, "grad_norm": 1.0209871621250723, "learning_rate": 9.225892157503983e-06, "loss": 0.16, "step": 6997 }, { "epoch": 0.20415426804364314, "grad_norm": 0.9416078696474081, "learning_rate": 9.225639630022295e-06, "loss": 0.1827, "step": 6998 }, { "epoch": 0.20418344127428673, "grad_norm": 0.9224229764513623, "learning_rate": 9.225387064815106e-06, "loss": 0.175, "step": 6999 }, { "epoch": 0.20421261450493028, "grad_norm": 1.0303395643999491, "learning_rate": 9.225134461884668e-06, "loss": 0.1511, "step": 7000 }, { "epoch": 0.20424178773557383, "grad_norm": 0.82603145282564, "learning_rate": 9.224881821233239e-06, "loss": 0.1639, "step": 7001 }, { "epoch": 0.20427096096621739, "grad_norm": 0.9880245574139768, "learning_rate": 9.224629142863075e-06, "loss": 0.1844, "step": 7002 }, { "epoch": 0.20430013419686097, "grad_norm": 1.2395735111046278, "learning_rate": 9.224376426776428e-06, "loss": 0.1764, "step": 7003 }, { "epoch": 0.20432930742750452, "grad_norm": 0.7913835154235777, "learning_rate": 9.224123672975557e-06, "loss": 0.1789, "step": 7004 }, { "epoch": 0.20435848065814807, "grad_norm": 1.1300233207007448, "learning_rate": 9.22387088146272e-06, "loss": 0.1538, "step": 7005 }, { "epoch": 0.20438765388879165, "grad_norm": 1.1307792259323688, "learning_rate": 9.223618052240171e-06, "loss": 0.1743, "step": 7006 }, { "epoch": 0.2044168271194352, "grad_norm": 0.8735797992845247, "learning_rate": 9.22336518531017e-06, "loss": 0.1587, "step": 7007 }, { "epoch": 0.20444600035007876, "grad_norm": 0.796213194462923, "learning_rate": 9.223112280674971e-06, "loss": 0.1574, "step": 7008 }, { "epoch": 0.20447517358072234, "grad_norm": 0.9041885368326816, "learning_rate": 9.222859338336834e-06, "loss": 0.171, "step": 7009 }, { "epoch": 0.2045043468113659, "grad_norm": 0.7192505392225091, "learning_rate": 9.222606358298017e-06, "loss": 0.1529, "step": 7010 }, { "epoch": 0.20453352004200945, "grad_norm": 0.8520647621195595, "learning_rate": 9.222353340560779e-06, "loss": 0.1694, "step": 7011 }, { "epoch": 0.204562693272653, "grad_norm": 0.7528888103249979, "learning_rate": 9.222100285127376e-06, "loss": 0.1527, "step": 7012 }, { "epoch": 0.20459186650329658, "grad_norm": 0.8277757246979951, "learning_rate": 9.221847192000072e-06, "loss": 0.1966, "step": 7013 }, { "epoch": 0.20462103973394014, "grad_norm": 0.762915048514747, "learning_rate": 9.221594061181122e-06, "loss": 0.1638, "step": 7014 }, { "epoch": 0.2046502129645837, "grad_norm": 0.9475856252908226, "learning_rate": 9.22134089267279e-06, "loss": 0.182, "step": 7015 }, { "epoch": 0.20467938619522727, "grad_norm": 0.8304613080203108, "learning_rate": 9.221087686477335e-06, "loss": 0.144, "step": 7016 }, { "epoch": 0.20470855942587082, "grad_norm": 0.9407708063085051, "learning_rate": 9.220834442597015e-06, "loss": 0.1406, "step": 7017 }, { "epoch": 0.20473773265651438, "grad_norm": 0.9594131857982843, "learning_rate": 9.220581161034093e-06, "loss": 0.1919, "step": 7018 }, { "epoch": 0.20476690588715796, "grad_norm": 0.8012598194062235, "learning_rate": 9.22032784179083e-06, "loss": 0.1613, "step": 7019 }, { "epoch": 0.2047960791178015, "grad_norm": 0.783222446437833, "learning_rate": 9.220074484869488e-06, "loss": 0.1524, "step": 7020 }, { "epoch": 0.20482525234844506, "grad_norm": 1.0735242632621904, "learning_rate": 9.219821090272326e-06, "loss": 0.1649, "step": 7021 }, { "epoch": 0.20485442557908862, "grad_norm": 0.8837234931799185, "learning_rate": 9.219567658001613e-06, "loss": 0.1657, "step": 7022 }, { "epoch": 0.2048835988097322, "grad_norm": 0.8152130609318874, "learning_rate": 9.219314188059605e-06, "loss": 0.1556, "step": 7023 }, { "epoch": 0.20491277204037575, "grad_norm": 0.8369414123535003, "learning_rate": 9.219060680448567e-06, "loss": 0.1939, "step": 7024 }, { "epoch": 0.2049419452710193, "grad_norm": 1.0024558097070586, "learning_rate": 9.218807135170763e-06, "loss": 0.1925, "step": 7025 }, { "epoch": 0.20497111850166289, "grad_norm": 0.7609945916463682, "learning_rate": 9.218553552228454e-06, "loss": 0.1798, "step": 7026 }, { "epoch": 0.20500029173230644, "grad_norm": 0.8195889686366729, "learning_rate": 9.218299931623907e-06, "loss": 0.1609, "step": 7027 }, { "epoch": 0.20502946496295, "grad_norm": 0.8855074580997848, "learning_rate": 9.218046273359385e-06, "loss": 0.1663, "step": 7028 }, { "epoch": 0.20505863819359355, "grad_norm": 0.8058363475589431, "learning_rate": 9.217792577437154e-06, "loss": 0.1615, "step": 7029 }, { "epoch": 0.20508781142423713, "grad_norm": 0.7965888061691665, "learning_rate": 9.217538843859477e-06, "loss": 0.184, "step": 7030 }, { "epoch": 0.20511698465488068, "grad_norm": 0.9233542755276904, "learning_rate": 9.217285072628621e-06, "loss": 0.1978, "step": 7031 }, { "epoch": 0.20514615788552423, "grad_norm": 0.7417683680594019, "learning_rate": 9.217031263746849e-06, "loss": 0.1549, "step": 7032 }, { "epoch": 0.20517533111616781, "grad_norm": 0.7508384502098701, "learning_rate": 9.216777417216429e-06, "loss": 0.1561, "step": 7033 }, { "epoch": 0.20520450434681137, "grad_norm": 0.7734620855738245, "learning_rate": 9.216523533039628e-06, "loss": 0.1619, "step": 7034 }, { "epoch": 0.20523367757745492, "grad_norm": 0.8909058400944175, "learning_rate": 9.21626961121871e-06, "loss": 0.1733, "step": 7035 }, { "epoch": 0.2052628508080985, "grad_norm": 0.8887479590938959, "learning_rate": 9.216015651755944e-06, "loss": 0.1708, "step": 7036 }, { "epoch": 0.20529202403874205, "grad_norm": 0.7810672670879556, "learning_rate": 9.215761654653597e-06, "loss": 0.1761, "step": 7037 }, { "epoch": 0.2053211972693856, "grad_norm": 0.825915603229344, "learning_rate": 9.215507619913937e-06, "loss": 0.1416, "step": 7038 }, { "epoch": 0.20535037050002916, "grad_norm": 0.9494067374597331, "learning_rate": 9.215253547539229e-06, "loss": 0.1604, "step": 7039 }, { "epoch": 0.20537954373067274, "grad_norm": 0.849334261808857, "learning_rate": 9.214999437531746e-06, "loss": 0.1665, "step": 7040 }, { "epoch": 0.2054087169613163, "grad_norm": 0.9562283780863475, "learning_rate": 9.214745289893753e-06, "loss": 0.168, "step": 7041 }, { "epoch": 0.20543789019195985, "grad_norm": 0.8395458214040364, "learning_rate": 9.21449110462752e-06, "loss": 0.1501, "step": 7042 }, { "epoch": 0.20546706342260343, "grad_norm": 0.8971796442230011, "learning_rate": 9.214236881735317e-06, "loss": 0.134, "step": 7043 }, { "epoch": 0.20549623665324698, "grad_norm": 1.0300220366056503, "learning_rate": 9.213982621219413e-06, "loss": 0.1744, "step": 7044 }, { "epoch": 0.20552540988389054, "grad_norm": 1.1743529491957736, "learning_rate": 9.213728323082079e-06, "loss": 0.1748, "step": 7045 }, { "epoch": 0.20555458311453412, "grad_norm": 1.0193126074031889, "learning_rate": 9.213473987325583e-06, "loss": 0.1632, "step": 7046 }, { "epoch": 0.20558375634517767, "grad_norm": 0.8004089414962124, "learning_rate": 9.213219613952198e-06, "loss": 0.1524, "step": 7047 }, { "epoch": 0.20561292957582122, "grad_norm": 1.0382022058331408, "learning_rate": 9.212965202964192e-06, "loss": 0.172, "step": 7048 }, { "epoch": 0.20564210280646478, "grad_norm": 0.8307365765560831, "learning_rate": 9.212710754363841e-06, "loss": 0.1494, "step": 7049 }, { "epoch": 0.20567127603710836, "grad_norm": 0.89444786051651, "learning_rate": 9.212456268153414e-06, "loss": 0.1748, "step": 7050 }, { "epoch": 0.2057004492677519, "grad_norm": 0.7110872416229941, "learning_rate": 9.212201744335182e-06, "loss": 0.16, "step": 7051 }, { "epoch": 0.20572962249839546, "grad_norm": 0.9553424011884314, "learning_rate": 9.211947182911418e-06, "loss": 0.1777, "step": 7052 }, { "epoch": 0.20575879572903905, "grad_norm": 0.8427944379759794, "learning_rate": 9.211692583884395e-06, "loss": 0.1692, "step": 7053 }, { "epoch": 0.2057879689596826, "grad_norm": 0.8912967009010678, "learning_rate": 9.211437947256387e-06, "loss": 0.1734, "step": 7054 }, { "epoch": 0.20581714219032615, "grad_norm": 1.0440606064314335, "learning_rate": 9.211183273029667e-06, "loss": 0.1605, "step": 7055 }, { "epoch": 0.2058463154209697, "grad_norm": 0.8608823932440356, "learning_rate": 9.210928561206507e-06, "loss": 0.174, "step": 7056 }, { "epoch": 0.2058754886516133, "grad_norm": 0.8798085720541676, "learning_rate": 9.210673811789181e-06, "loss": 0.1549, "step": 7057 }, { "epoch": 0.20590466188225684, "grad_norm": 1.0416204542312937, "learning_rate": 9.210419024779967e-06, "loss": 0.1979, "step": 7058 }, { "epoch": 0.2059338351129004, "grad_norm": 0.8238960482510895, "learning_rate": 9.210164200181133e-06, "loss": 0.164, "step": 7059 }, { "epoch": 0.20596300834354397, "grad_norm": 0.9591263439518445, "learning_rate": 9.209909337994963e-06, "loss": 0.1715, "step": 7060 }, { "epoch": 0.20599218157418753, "grad_norm": 0.8637896079605812, "learning_rate": 9.209654438223724e-06, "loss": 0.1736, "step": 7061 }, { "epoch": 0.20602135480483108, "grad_norm": 0.8204201986572738, "learning_rate": 9.209399500869695e-06, "loss": 0.1771, "step": 7062 }, { "epoch": 0.20605052803547466, "grad_norm": 0.8331051108982461, "learning_rate": 9.209144525935154e-06, "loss": 0.1635, "step": 7063 }, { "epoch": 0.20607970126611821, "grad_norm": 0.8698195907117675, "learning_rate": 9.208889513422374e-06, "loss": 0.1653, "step": 7064 }, { "epoch": 0.20610887449676177, "grad_norm": 0.7944483701447789, "learning_rate": 9.208634463333634e-06, "loss": 0.1462, "step": 7065 }, { "epoch": 0.20613804772740532, "grad_norm": 0.9980133372433863, "learning_rate": 9.20837937567121e-06, "loss": 0.1849, "step": 7066 }, { "epoch": 0.2061672209580489, "grad_norm": 0.8137617243493083, "learning_rate": 9.20812425043738e-06, "loss": 0.1949, "step": 7067 }, { "epoch": 0.20619639418869246, "grad_norm": 0.9273455101518907, "learning_rate": 9.20786908763442e-06, "loss": 0.1379, "step": 7068 }, { "epoch": 0.206225567419336, "grad_norm": 0.8629397425055924, "learning_rate": 9.20761388726461e-06, "loss": 0.1676, "step": 7069 }, { "epoch": 0.2062547406499796, "grad_norm": 0.6833022223183531, "learning_rate": 9.207358649330229e-06, "loss": 0.1484, "step": 7070 }, { "epoch": 0.20628391388062314, "grad_norm": 0.8137189089894815, "learning_rate": 9.207103373833553e-06, "loss": 0.1598, "step": 7071 }, { "epoch": 0.2063130871112667, "grad_norm": 0.9811256991406814, "learning_rate": 9.206848060776861e-06, "loss": 0.1588, "step": 7072 }, { "epoch": 0.20634226034191025, "grad_norm": 0.8532945645803387, "learning_rate": 9.206592710162436e-06, "loss": 0.1549, "step": 7073 }, { "epoch": 0.20637143357255383, "grad_norm": 0.8748392553037043, "learning_rate": 9.206337321992554e-06, "loss": 0.1729, "step": 7074 }, { "epoch": 0.20640060680319738, "grad_norm": 0.7947230153235141, "learning_rate": 9.206081896269498e-06, "loss": 0.1427, "step": 7075 }, { "epoch": 0.20642978003384094, "grad_norm": 0.8735131842080504, "learning_rate": 9.205826432995547e-06, "loss": 0.1874, "step": 7076 }, { "epoch": 0.20645895326448452, "grad_norm": 0.904135571112561, "learning_rate": 9.20557093217298e-06, "loss": 0.1574, "step": 7077 }, { "epoch": 0.20648812649512807, "grad_norm": 1.1024578813330324, "learning_rate": 9.20531539380408e-06, "loss": 0.152, "step": 7078 }, { "epoch": 0.20651729972577162, "grad_norm": 0.8570271618694294, "learning_rate": 9.205059817891128e-06, "loss": 0.1478, "step": 7079 }, { "epoch": 0.2065464729564152, "grad_norm": 0.7449770153439637, "learning_rate": 9.204804204436406e-06, "loss": 0.1632, "step": 7080 }, { "epoch": 0.20657564618705876, "grad_norm": 0.9955024876594831, "learning_rate": 9.204548553442196e-06, "loss": 0.2087, "step": 7081 }, { "epoch": 0.2066048194177023, "grad_norm": 0.8777825750110577, "learning_rate": 9.204292864910781e-06, "loss": 0.1488, "step": 7082 }, { "epoch": 0.20663399264834587, "grad_norm": 0.8349816278410988, "learning_rate": 9.204037138844441e-06, "loss": 0.1598, "step": 7083 }, { "epoch": 0.20666316587898945, "grad_norm": 1.2555677252554858, "learning_rate": 9.203781375245465e-06, "loss": 0.182, "step": 7084 }, { "epoch": 0.206692339109633, "grad_norm": 0.8331678704155605, "learning_rate": 9.203525574116127e-06, "loss": 0.1719, "step": 7085 }, { "epoch": 0.20672151234027655, "grad_norm": 0.7213427739702032, "learning_rate": 9.20326973545872e-06, "loss": 0.1465, "step": 7086 }, { "epoch": 0.20675068557092013, "grad_norm": 0.7682660254920037, "learning_rate": 9.203013859275523e-06, "loss": 0.1567, "step": 7087 }, { "epoch": 0.2067798588015637, "grad_norm": 0.8731578594256073, "learning_rate": 9.202757945568822e-06, "loss": 0.1708, "step": 7088 }, { "epoch": 0.20680903203220724, "grad_norm": 0.8333449796435296, "learning_rate": 9.2025019943409e-06, "loss": 0.145, "step": 7089 }, { "epoch": 0.20683820526285082, "grad_norm": 0.8760915723100529, "learning_rate": 9.202246005594045e-06, "loss": 0.1705, "step": 7090 }, { "epoch": 0.20686737849349437, "grad_norm": 1.0430655009673637, "learning_rate": 9.20198997933054e-06, "loss": 0.1764, "step": 7091 }, { "epoch": 0.20689655172413793, "grad_norm": 0.8727107465988335, "learning_rate": 9.201733915552672e-06, "loss": 0.1528, "step": 7092 }, { "epoch": 0.20692572495478148, "grad_norm": 0.9070862177074983, "learning_rate": 9.201477814262727e-06, "loss": 0.1685, "step": 7093 }, { "epoch": 0.20695489818542506, "grad_norm": 0.9758127645458515, "learning_rate": 9.20122167546299e-06, "loss": 0.1574, "step": 7094 }, { "epoch": 0.20698407141606862, "grad_norm": 0.8160619992259059, "learning_rate": 9.20096549915575e-06, "loss": 0.1737, "step": 7095 }, { "epoch": 0.20701324464671217, "grad_norm": 0.7248935104136316, "learning_rate": 9.200709285343292e-06, "loss": 0.1451, "step": 7096 }, { "epoch": 0.20704241787735575, "grad_norm": 0.7895891594620774, "learning_rate": 9.200453034027903e-06, "loss": 0.1441, "step": 7097 }, { "epoch": 0.2070715911079993, "grad_norm": 0.9970660714634246, "learning_rate": 9.200196745211873e-06, "loss": 0.1796, "step": 7098 }, { "epoch": 0.20710076433864286, "grad_norm": 0.7827229750158712, "learning_rate": 9.19994041889749e-06, "loss": 0.1676, "step": 7099 }, { "epoch": 0.2071299375692864, "grad_norm": 0.7112071647046179, "learning_rate": 9.19968405508704e-06, "loss": 0.1454, "step": 7100 }, { "epoch": 0.20715911079993, "grad_norm": 0.6660471009508432, "learning_rate": 9.199427653782815e-06, "loss": 0.168, "step": 7101 }, { "epoch": 0.20718828403057354, "grad_norm": 0.8299462389483362, "learning_rate": 9.199171214987103e-06, "loss": 0.1601, "step": 7102 }, { "epoch": 0.2072174572612171, "grad_norm": 0.8344605365563602, "learning_rate": 9.198914738702191e-06, "loss": 0.148, "step": 7103 }, { "epoch": 0.20724663049186068, "grad_norm": 0.7109615234153235, "learning_rate": 9.19865822493037e-06, "loss": 0.1748, "step": 7104 }, { "epoch": 0.20727580372250423, "grad_norm": 0.9412419045345006, "learning_rate": 9.198401673673934e-06, "loss": 0.1813, "step": 7105 }, { "epoch": 0.20730497695314778, "grad_norm": 1.1276383291887195, "learning_rate": 9.198145084935167e-06, "loss": 0.1892, "step": 7106 }, { "epoch": 0.20733415018379137, "grad_norm": 0.9710707860135948, "learning_rate": 9.197888458716364e-06, "loss": 0.1624, "step": 7107 }, { "epoch": 0.20736332341443492, "grad_norm": 1.019701360818089, "learning_rate": 9.197631795019815e-06, "loss": 0.1566, "step": 7108 }, { "epoch": 0.20739249664507847, "grad_norm": 0.74507133497317, "learning_rate": 9.197375093847811e-06, "loss": 0.1758, "step": 7109 }, { "epoch": 0.20742166987572203, "grad_norm": 1.0144998879685472, "learning_rate": 9.197118355202644e-06, "loss": 0.1707, "step": 7110 }, { "epoch": 0.2074508431063656, "grad_norm": 0.8741559351263685, "learning_rate": 9.196861579086607e-06, "loss": 0.1722, "step": 7111 }, { "epoch": 0.20748001633700916, "grad_norm": 0.6842181467468504, "learning_rate": 9.196604765501991e-06, "loss": 0.1478, "step": 7112 }, { "epoch": 0.2075091895676527, "grad_norm": 0.9035682230126743, "learning_rate": 9.196347914451089e-06, "loss": 0.1753, "step": 7113 }, { "epoch": 0.2075383627982963, "grad_norm": 0.802764908227016, "learning_rate": 9.196091025936195e-06, "loss": 0.1662, "step": 7114 }, { "epoch": 0.20756753602893985, "grad_norm": 0.8312720985285272, "learning_rate": 9.195834099959604e-06, "loss": 0.155, "step": 7115 }, { "epoch": 0.2075967092595834, "grad_norm": 0.7328392867043356, "learning_rate": 9.195577136523606e-06, "loss": 0.171, "step": 7116 }, { "epoch": 0.20762588249022695, "grad_norm": 0.7986201166657328, "learning_rate": 9.195320135630496e-06, "loss": 0.1784, "step": 7117 }, { "epoch": 0.20765505572087053, "grad_norm": 0.8296984793649945, "learning_rate": 9.19506309728257e-06, "loss": 0.1426, "step": 7118 }, { "epoch": 0.2076842289515141, "grad_norm": 0.6494697354782675, "learning_rate": 9.194806021482123e-06, "loss": 0.1379, "step": 7119 }, { "epoch": 0.20771340218215764, "grad_norm": 1.2721754865246282, "learning_rate": 9.194548908231448e-06, "loss": 0.1491, "step": 7120 }, { "epoch": 0.20774257541280122, "grad_norm": 1.114672137703868, "learning_rate": 9.194291757532842e-06, "loss": 0.1481, "step": 7121 }, { "epoch": 0.20777174864344478, "grad_norm": 0.8637457423044791, "learning_rate": 9.194034569388602e-06, "loss": 0.1589, "step": 7122 }, { "epoch": 0.20780092187408833, "grad_norm": 0.7333022679873099, "learning_rate": 9.193777343801021e-06, "loss": 0.1765, "step": 7123 }, { "epoch": 0.2078300951047319, "grad_norm": 0.9906453369889608, "learning_rate": 9.193520080772398e-06, "loss": 0.1891, "step": 7124 }, { "epoch": 0.20785926833537546, "grad_norm": 0.9500924370608755, "learning_rate": 9.193262780305028e-06, "loss": 0.2019, "step": 7125 }, { "epoch": 0.20788844156601902, "grad_norm": 1.052800775250883, "learning_rate": 9.193005442401209e-06, "loss": 0.1505, "step": 7126 }, { "epoch": 0.20791761479666257, "grad_norm": 0.7881281559280178, "learning_rate": 9.192748067063238e-06, "loss": 0.1727, "step": 7127 }, { "epoch": 0.20794678802730615, "grad_norm": 0.9240133278018553, "learning_rate": 9.192490654293414e-06, "loss": 0.1557, "step": 7128 }, { "epoch": 0.2079759612579497, "grad_norm": 0.9479760475904528, "learning_rate": 9.192233204094034e-06, "loss": 0.1691, "step": 7129 }, { "epoch": 0.20800513448859326, "grad_norm": 0.9927279076327339, "learning_rate": 9.191975716467397e-06, "loss": 0.1541, "step": 7130 }, { "epoch": 0.20803430771923684, "grad_norm": 0.9893259101543747, "learning_rate": 9.1917181914158e-06, "loss": 0.1408, "step": 7131 }, { "epoch": 0.2080634809498804, "grad_norm": 0.895924776365021, "learning_rate": 9.191460628941544e-06, "loss": 0.1874, "step": 7132 }, { "epoch": 0.20809265418052394, "grad_norm": 0.9570681155974422, "learning_rate": 9.191203029046929e-06, "loss": 0.1894, "step": 7133 }, { "epoch": 0.20812182741116753, "grad_norm": 1.0782237602145626, "learning_rate": 9.190945391734254e-06, "loss": 0.1959, "step": 7134 }, { "epoch": 0.20815100064181108, "grad_norm": 0.879667010937371, "learning_rate": 9.190687717005818e-06, "loss": 0.1554, "step": 7135 }, { "epoch": 0.20818017387245463, "grad_norm": 1.7162003841975093, "learning_rate": 9.190430004863924e-06, "loss": 0.173, "step": 7136 }, { "epoch": 0.20820934710309819, "grad_norm": 1.114298507950001, "learning_rate": 9.190172255310869e-06, "loss": 0.1772, "step": 7137 }, { "epoch": 0.20823852033374177, "grad_norm": 0.9187882525918382, "learning_rate": 9.18991446834896e-06, "loss": 0.1714, "step": 7138 }, { "epoch": 0.20826769356438532, "grad_norm": 0.8934628611463499, "learning_rate": 9.189656643980492e-06, "loss": 0.1503, "step": 7139 }, { "epoch": 0.20829686679502887, "grad_norm": 0.8729740494879913, "learning_rate": 9.189398782207771e-06, "loss": 0.1561, "step": 7140 }, { "epoch": 0.20832604002567245, "grad_norm": 0.8532934030012483, "learning_rate": 9.189140883033097e-06, "loss": 0.1615, "step": 7141 }, { "epoch": 0.208355213256316, "grad_norm": 0.8884195549436464, "learning_rate": 9.188882946458773e-06, "loss": 0.175, "step": 7142 }, { "epoch": 0.20838438648695956, "grad_norm": 0.9929578120059851, "learning_rate": 9.188624972487101e-06, "loss": 0.1552, "step": 7143 }, { "epoch": 0.20841355971760311, "grad_norm": 0.8440531149573288, "learning_rate": 9.188366961120386e-06, "loss": 0.1798, "step": 7144 }, { "epoch": 0.2084427329482467, "grad_norm": 0.7885896114796663, "learning_rate": 9.188108912360932e-06, "loss": 0.1861, "step": 7145 }, { "epoch": 0.20847190617889025, "grad_norm": 1.0741397692528705, "learning_rate": 9.18785082621104e-06, "loss": 0.177, "step": 7146 }, { "epoch": 0.2085010794095338, "grad_norm": 0.7888149571309065, "learning_rate": 9.187592702673017e-06, "loss": 0.1751, "step": 7147 }, { "epoch": 0.20853025264017738, "grad_norm": 0.8882939061613188, "learning_rate": 9.187334541749165e-06, "loss": 0.167, "step": 7148 }, { "epoch": 0.20855942587082094, "grad_norm": 1.1154958535430273, "learning_rate": 9.187076343441787e-06, "loss": 0.1871, "step": 7149 }, { "epoch": 0.2085885991014645, "grad_norm": 0.8528943675779154, "learning_rate": 9.186818107753195e-06, "loss": 0.1558, "step": 7150 }, { "epoch": 0.20861777233210807, "grad_norm": 0.9105786245507598, "learning_rate": 9.18655983468569e-06, "loss": 0.1457, "step": 7151 }, { "epoch": 0.20864694556275162, "grad_norm": 0.8758734781424876, "learning_rate": 9.186301524241576e-06, "loss": 0.2017, "step": 7152 }, { "epoch": 0.20867611879339518, "grad_norm": 0.9515679452598051, "learning_rate": 9.186043176423162e-06, "loss": 0.1895, "step": 7153 }, { "epoch": 0.20870529202403873, "grad_norm": 0.9761232825843177, "learning_rate": 9.185784791232755e-06, "loss": 0.1914, "step": 7154 }, { "epoch": 0.2087344652546823, "grad_norm": 0.7893318345979233, "learning_rate": 9.185526368672662e-06, "loss": 0.1627, "step": 7155 }, { "epoch": 0.20876363848532586, "grad_norm": 0.9538821863437764, "learning_rate": 9.185267908745186e-06, "loss": 0.1758, "step": 7156 }, { "epoch": 0.20879281171596942, "grad_norm": 0.9263212016878108, "learning_rate": 9.185009411452638e-06, "loss": 0.1539, "step": 7157 }, { "epoch": 0.208821984946613, "grad_norm": 0.7255824406567902, "learning_rate": 9.184750876797325e-06, "loss": 0.1606, "step": 7158 }, { "epoch": 0.20885115817725655, "grad_norm": 0.8900029933857889, "learning_rate": 9.184492304781555e-06, "loss": 0.153, "step": 7159 }, { "epoch": 0.2088803314079001, "grad_norm": 0.8014889704446233, "learning_rate": 9.184233695407635e-06, "loss": 0.1592, "step": 7160 }, { "epoch": 0.20890950463854369, "grad_norm": 0.8585024131393774, "learning_rate": 9.18397504867788e-06, "loss": 0.1887, "step": 7161 }, { "epoch": 0.20893867786918724, "grad_norm": 0.6927086297948429, "learning_rate": 9.18371636459459e-06, "loss": 0.1509, "step": 7162 }, { "epoch": 0.2089678510998308, "grad_norm": 0.8285743828056754, "learning_rate": 9.183457643160082e-06, "loss": 0.1887, "step": 7163 }, { "epoch": 0.20899702433047435, "grad_norm": 0.861001703495805, "learning_rate": 9.183198884376661e-06, "loss": 0.1712, "step": 7164 }, { "epoch": 0.20902619756111793, "grad_norm": 0.7323659507296036, "learning_rate": 9.18294008824664e-06, "loss": 0.1697, "step": 7165 }, { "epoch": 0.20905537079176148, "grad_norm": 0.7159030178356711, "learning_rate": 9.182681254772327e-06, "loss": 0.168, "step": 7166 }, { "epoch": 0.20908454402240503, "grad_norm": 0.7549284021070887, "learning_rate": 9.182422383956036e-06, "loss": 0.1709, "step": 7167 }, { "epoch": 0.20911371725304861, "grad_norm": 0.94434557018474, "learning_rate": 9.182163475800077e-06, "loss": 0.1554, "step": 7168 }, { "epoch": 0.20914289048369217, "grad_norm": 0.8347875207314545, "learning_rate": 9.181904530306757e-06, "loss": 0.1729, "step": 7169 }, { "epoch": 0.20917206371433572, "grad_norm": 0.6783003229252659, "learning_rate": 9.181645547478395e-06, "loss": 0.1366, "step": 7170 }, { "epoch": 0.20920123694497927, "grad_norm": 1.1050779346558968, "learning_rate": 9.1813865273173e-06, "loss": 0.165, "step": 7171 }, { "epoch": 0.20923041017562286, "grad_norm": 0.8185826575869648, "learning_rate": 9.181127469825784e-06, "loss": 0.1742, "step": 7172 }, { "epoch": 0.2092595834062664, "grad_norm": 1.0439897072743844, "learning_rate": 9.180868375006158e-06, "loss": 0.1655, "step": 7173 }, { "epoch": 0.20928875663690996, "grad_norm": 1.0412342674908153, "learning_rate": 9.180609242860739e-06, "loss": 0.1684, "step": 7174 }, { "epoch": 0.20931792986755354, "grad_norm": 0.8897105234040097, "learning_rate": 9.180350073391838e-06, "loss": 0.1593, "step": 7175 }, { "epoch": 0.2093471030981971, "grad_norm": 0.8714788467791885, "learning_rate": 9.18009086660177e-06, "loss": 0.1723, "step": 7176 }, { "epoch": 0.20937627632884065, "grad_norm": 0.7005511708820562, "learning_rate": 9.179831622492847e-06, "loss": 0.135, "step": 7177 }, { "epoch": 0.20940544955948423, "grad_norm": 1.2540627711913974, "learning_rate": 9.179572341067387e-06, "loss": 0.149, "step": 7178 }, { "epoch": 0.20943462279012778, "grad_norm": 0.7243322612171238, "learning_rate": 9.179313022327703e-06, "loss": 0.1518, "step": 7179 }, { "epoch": 0.20946379602077134, "grad_norm": 0.8735805655729467, "learning_rate": 9.17905366627611e-06, "loss": 0.1772, "step": 7180 }, { "epoch": 0.2094929692514149, "grad_norm": 1.2367216747115835, "learning_rate": 9.178794272914921e-06, "loss": 0.16, "step": 7181 }, { "epoch": 0.20952214248205847, "grad_norm": 0.7919243540922308, "learning_rate": 9.178534842246457e-06, "loss": 0.1558, "step": 7182 }, { "epoch": 0.20955131571270202, "grad_norm": 0.6764322676637853, "learning_rate": 9.17827537427303e-06, "loss": 0.1633, "step": 7183 }, { "epoch": 0.20958048894334558, "grad_norm": 0.8948139087747934, "learning_rate": 9.178015868996959e-06, "loss": 0.169, "step": 7184 }, { "epoch": 0.20960966217398916, "grad_norm": 0.9265613587274324, "learning_rate": 9.17775632642056e-06, "loss": 0.1812, "step": 7185 }, { "epoch": 0.2096388354046327, "grad_norm": 0.8292242281181695, "learning_rate": 9.177496746546148e-06, "loss": 0.1568, "step": 7186 }, { "epoch": 0.20966800863527627, "grad_norm": 0.7472986987443127, "learning_rate": 9.177237129376043e-06, "loss": 0.1551, "step": 7187 }, { "epoch": 0.20969718186591982, "grad_norm": 0.9179470707908803, "learning_rate": 9.176977474912563e-06, "loss": 0.181, "step": 7188 }, { "epoch": 0.2097263550965634, "grad_norm": 0.9507366317154728, "learning_rate": 9.176717783158023e-06, "loss": 0.1752, "step": 7189 }, { "epoch": 0.20975552832720695, "grad_norm": 0.7908172868930405, "learning_rate": 9.176458054114746e-06, "loss": 0.1425, "step": 7190 }, { "epoch": 0.2097847015578505, "grad_norm": 0.9594138180980587, "learning_rate": 9.176198287785048e-06, "loss": 0.1714, "step": 7191 }, { "epoch": 0.2098138747884941, "grad_norm": 0.868901899928297, "learning_rate": 9.175938484171248e-06, "loss": 0.1654, "step": 7192 }, { "epoch": 0.20984304801913764, "grad_norm": 0.8398311088406083, "learning_rate": 9.175678643275668e-06, "loss": 0.1818, "step": 7193 }, { "epoch": 0.2098722212497812, "grad_norm": 0.8050506810692823, "learning_rate": 9.175418765100624e-06, "loss": 0.1681, "step": 7194 }, { "epoch": 0.20990139448042477, "grad_norm": 0.8963428993314191, "learning_rate": 9.175158849648438e-06, "loss": 0.1586, "step": 7195 }, { "epoch": 0.20993056771106833, "grad_norm": 0.8480606598967553, "learning_rate": 9.17489889692143e-06, "loss": 0.1618, "step": 7196 }, { "epoch": 0.20995974094171188, "grad_norm": 0.7181614838963151, "learning_rate": 9.174638906921921e-06, "loss": 0.1765, "step": 7197 }, { "epoch": 0.20998891417235543, "grad_norm": 0.850916230145168, "learning_rate": 9.174378879652235e-06, "loss": 0.1534, "step": 7198 }, { "epoch": 0.21001808740299902, "grad_norm": 0.9370392528764342, "learning_rate": 9.17411881511469e-06, "loss": 0.1849, "step": 7199 }, { "epoch": 0.21004726063364257, "grad_norm": 0.9624771284835438, "learning_rate": 9.173858713311606e-06, "loss": 0.1609, "step": 7200 }, { "epoch": 0.21007643386428612, "grad_norm": 0.7370014896462503, "learning_rate": 9.17359857424531e-06, "loss": 0.1477, "step": 7201 }, { "epoch": 0.2101056070949297, "grad_norm": 1.030148428794689, "learning_rate": 9.173338397918123e-06, "loss": 0.1738, "step": 7202 }, { "epoch": 0.21013478032557326, "grad_norm": 0.9207997100627249, "learning_rate": 9.173078184332366e-06, "loss": 0.1721, "step": 7203 }, { "epoch": 0.2101639535562168, "grad_norm": 0.7433046817468101, "learning_rate": 9.172817933490364e-06, "loss": 0.1439, "step": 7204 }, { "epoch": 0.2101931267868604, "grad_norm": 1.0273490729952162, "learning_rate": 9.172557645394438e-06, "loss": 0.1634, "step": 7205 }, { "epoch": 0.21022230001750394, "grad_norm": 0.9397161896137732, "learning_rate": 9.172297320046915e-06, "loss": 0.1654, "step": 7206 }, { "epoch": 0.2102514732481475, "grad_norm": 0.8099268542749183, "learning_rate": 9.172036957450116e-06, "loss": 0.1482, "step": 7207 }, { "epoch": 0.21028064647879105, "grad_norm": 0.8430220199734931, "learning_rate": 9.171776557606368e-06, "loss": 0.1539, "step": 7208 }, { "epoch": 0.21030981970943463, "grad_norm": 1.2139852278920045, "learning_rate": 9.171516120517993e-06, "loss": 0.2128, "step": 7209 }, { "epoch": 0.21033899294007818, "grad_norm": 0.9566087572947806, "learning_rate": 9.17125564618732e-06, "loss": 0.1805, "step": 7210 }, { "epoch": 0.21036816617072174, "grad_norm": 0.7991018451759162, "learning_rate": 9.170995134616673e-06, "loss": 0.1394, "step": 7211 }, { "epoch": 0.21039733940136532, "grad_norm": 0.9455057662208112, "learning_rate": 9.170734585808376e-06, "loss": 0.1541, "step": 7212 }, { "epoch": 0.21042651263200887, "grad_norm": 0.8655907379066675, "learning_rate": 9.170473999764755e-06, "loss": 0.184, "step": 7213 }, { "epoch": 0.21045568586265243, "grad_norm": 0.8199372371606958, "learning_rate": 9.17021337648814e-06, "loss": 0.1639, "step": 7214 }, { "epoch": 0.21048485909329598, "grad_norm": 0.6649741533327053, "learning_rate": 9.169952715980854e-06, "loss": 0.143, "step": 7215 }, { "epoch": 0.21051403232393956, "grad_norm": 0.9405125419912356, "learning_rate": 9.169692018245226e-06, "loss": 0.191, "step": 7216 }, { "epoch": 0.2105432055545831, "grad_norm": 0.8292947727476876, "learning_rate": 9.169431283283583e-06, "loss": 0.1738, "step": 7217 }, { "epoch": 0.21057237878522667, "grad_norm": 0.7121011638614227, "learning_rate": 9.169170511098254e-06, "loss": 0.1563, "step": 7218 }, { "epoch": 0.21060155201587025, "grad_norm": 0.846206531882245, "learning_rate": 9.168909701691564e-06, "loss": 0.1494, "step": 7219 }, { "epoch": 0.2106307252465138, "grad_norm": 0.9946974153910175, "learning_rate": 9.168648855065844e-06, "loss": 0.1695, "step": 7220 }, { "epoch": 0.21065989847715735, "grad_norm": 0.7830084892918774, "learning_rate": 9.168387971223422e-06, "loss": 0.1745, "step": 7221 }, { "epoch": 0.21068907170780093, "grad_norm": 0.8729232446653769, "learning_rate": 9.16812705016663e-06, "loss": 0.1511, "step": 7222 }, { "epoch": 0.2107182449384445, "grad_norm": 1.106811527660954, "learning_rate": 9.16786609189779e-06, "loss": 0.1821, "step": 7223 }, { "epoch": 0.21074741816908804, "grad_norm": 0.8419729335873963, "learning_rate": 9.167605096419238e-06, "loss": 0.1542, "step": 7224 }, { "epoch": 0.2107765913997316, "grad_norm": 0.9634351687740887, "learning_rate": 9.167344063733305e-06, "loss": 0.1516, "step": 7225 }, { "epoch": 0.21080576463037518, "grad_norm": 0.7983317844393885, "learning_rate": 9.167082993842317e-06, "loss": 0.1505, "step": 7226 }, { "epoch": 0.21083493786101873, "grad_norm": 0.7538471682521021, "learning_rate": 9.166821886748607e-06, "loss": 0.1335, "step": 7227 }, { "epoch": 0.21086411109166228, "grad_norm": 0.9316829504194767, "learning_rate": 9.166560742454507e-06, "loss": 0.1605, "step": 7228 }, { "epoch": 0.21089328432230586, "grad_norm": 0.8741634697097257, "learning_rate": 9.166299560962346e-06, "loss": 0.19, "step": 7229 }, { "epoch": 0.21092245755294942, "grad_norm": 0.8083318365286651, "learning_rate": 9.166038342274458e-06, "loss": 0.1578, "step": 7230 }, { "epoch": 0.21095163078359297, "grad_norm": 0.8987026413554663, "learning_rate": 9.165777086393173e-06, "loss": 0.1791, "step": 7231 }, { "epoch": 0.21098080401423655, "grad_norm": 0.9635897845510202, "learning_rate": 9.165515793320824e-06, "loss": 0.1622, "step": 7232 }, { "epoch": 0.2110099772448801, "grad_norm": 0.9937506673765454, "learning_rate": 9.165254463059747e-06, "loss": 0.1745, "step": 7233 }, { "epoch": 0.21103915047552366, "grad_norm": 0.8573155118927874, "learning_rate": 9.164993095612271e-06, "loss": 0.17, "step": 7234 }, { "epoch": 0.2110683237061672, "grad_norm": 1.0406524258434822, "learning_rate": 9.164731690980732e-06, "loss": 0.1857, "step": 7235 }, { "epoch": 0.2110974969368108, "grad_norm": 0.8987827523359867, "learning_rate": 9.16447024916746e-06, "loss": 0.1587, "step": 7236 }, { "epoch": 0.21112667016745434, "grad_norm": 0.6673609456296926, "learning_rate": 9.164208770174795e-06, "loss": 0.1444, "step": 7237 }, { "epoch": 0.2111558433980979, "grad_norm": 0.859594804297381, "learning_rate": 9.163947254005066e-06, "loss": 0.1545, "step": 7238 }, { "epoch": 0.21118501662874148, "grad_norm": 0.7358956790046004, "learning_rate": 9.163685700660611e-06, "loss": 0.15, "step": 7239 }, { "epoch": 0.21121418985938503, "grad_norm": 0.7973373779002741, "learning_rate": 9.163424110143763e-06, "loss": 0.1729, "step": 7240 }, { "epoch": 0.21124336309002859, "grad_norm": 0.7203175182166265, "learning_rate": 9.16316248245686e-06, "loss": 0.1332, "step": 7241 }, { "epoch": 0.21127253632067214, "grad_norm": 0.8740975631117702, "learning_rate": 9.162900817602235e-06, "loss": 0.1525, "step": 7242 }, { "epoch": 0.21130170955131572, "grad_norm": 0.7848806097179016, "learning_rate": 9.162639115582226e-06, "loss": 0.1545, "step": 7243 }, { "epoch": 0.21133088278195927, "grad_norm": 0.8666497284228482, "learning_rate": 9.16237737639917e-06, "loss": 0.1618, "step": 7244 }, { "epoch": 0.21136005601260283, "grad_norm": 0.7492361144512795, "learning_rate": 9.162115600055398e-06, "loss": 0.1736, "step": 7245 }, { "epoch": 0.2113892292432464, "grad_norm": 0.7576215749565726, "learning_rate": 9.161853786553256e-06, "loss": 0.1597, "step": 7246 }, { "epoch": 0.21141840247388996, "grad_norm": 0.8438264160498878, "learning_rate": 9.161591935895073e-06, "loss": 0.1486, "step": 7247 }, { "epoch": 0.2114475757045335, "grad_norm": 0.7609494232054379, "learning_rate": 9.161330048083194e-06, "loss": 0.1666, "step": 7248 }, { "epoch": 0.2114767489351771, "grad_norm": 0.8909393589085635, "learning_rate": 9.161068123119953e-06, "loss": 0.1615, "step": 7249 }, { "epoch": 0.21150592216582065, "grad_norm": 0.8393390508080685, "learning_rate": 9.160806161007687e-06, "loss": 0.1446, "step": 7250 }, { "epoch": 0.2115350953964642, "grad_norm": 0.7310636219773928, "learning_rate": 9.16054416174874e-06, "loss": 0.1617, "step": 7251 }, { "epoch": 0.21156426862710775, "grad_norm": 0.8136839895612361, "learning_rate": 9.160282125345445e-06, "loss": 0.1734, "step": 7252 }, { "epoch": 0.21159344185775134, "grad_norm": 0.8147171789853841, "learning_rate": 9.160020051800148e-06, "loss": 0.1512, "step": 7253 }, { "epoch": 0.2116226150883949, "grad_norm": 0.9635672195950745, "learning_rate": 9.159757941115181e-06, "loss": 0.1528, "step": 7254 }, { "epoch": 0.21165178831903844, "grad_norm": 0.7038737130414471, "learning_rate": 9.15949579329289e-06, "loss": 0.1742, "step": 7255 }, { "epoch": 0.21168096154968202, "grad_norm": 0.8915309327020401, "learning_rate": 9.159233608335614e-06, "loss": 0.1661, "step": 7256 }, { "epoch": 0.21171013478032558, "grad_norm": 0.7964819668145793, "learning_rate": 9.158971386245691e-06, "loss": 0.1285, "step": 7257 }, { "epoch": 0.21173930801096913, "grad_norm": 0.8556872958705937, "learning_rate": 9.158709127025468e-06, "loss": 0.1775, "step": 7258 }, { "epoch": 0.21176848124161268, "grad_norm": 0.7919850483184171, "learning_rate": 9.15844683067728e-06, "loss": 0.1612, "step": 7259 }, { "epoch": 0.21179765447225626, "grad_norm": 0.8689093484287964, "learning_rate": 9.15818449720347e-06, "loss": 0.1723, "step": 7260 }, { "epoch": 0.21182682770289982, "grad_norm": 1.0878408856516633, "learning_rate": 9.157922126606385e-06, "loss": 0.1561, "step": 7261 }, { "epoch": 0.21185600093354337, "grad_norm": 0.7054973680891888, "learning_rate": 9.157659718888362e-06, "loss": 0.1507, "step": 7262 }, { "epoch": 0.21188517416418695, "grad_norm": 0.7655988458983077, "learning_rate": 9.157397274051745e-06, "loss": 0.1776, "step": 7263 }, { "epoch": 0.2119143473948305, "grad_norm": 0.8851712451319848, "learning_rate": 9.157134792098878e-06, "loss": 0.2037, "step": 7264 }, { "epoch": 0.21194352062547406, "grad_norm": 0.9438857408147793, "learning_rate": 9.156872273032104e-06, "loss": 0.1795, "step": 7265 }, { "epoch": 0.21197269385611764, "grad_norm": 0.9911627622933965, "learning_rate": 9.156609716853768e-06, "loss": 0.1696, "step": 7266 }, { "epoch": 0.2120018670867612, "grad_norm": 0.8586388532504238, "learning_rate": 9.156347123566211e-06, "loss": 0.1657, "step": 7267 }, { "epoch": 0.21203104031740475, "grad_norm": 0.9447965772581178, "learning_rate": 9.15608449317178e-06, "loss": 0.1542, "step": 7268 }, { "epoch": 0.2120602135480483, "grad_norm": 1.0570953122206987, "learning_rate": 9.15582182567282e-06, "loss": 0.179, "step": 7269 }, { "epoch": 0.21208938677869188, "grad_norm": 0.9138467528532017, "learning_rate": 9.155559121071673e-06, "loss": 0.1824, "step": 7270 }, { "epoch": 0.21211856000933543, "grad_norm": 0.7993348248640859, "learning_rate": 9.155296379370686e-06, "loss": 0.1544, "step": 7271 }, { "epoch": 0.212147733239979, "grad_norm": 0.9384783663292866, "learning_rate": 9.155033600572206e-06, "loss": 0.1529, "step": 7272 }, { "epoch": 0.21217690647062257, "grad_norm": 0.9040254271351434, "learning_rate": 9.154770784678577e-06, "loss": 0.1603, "step": 7273 }, { "epoch": 0.21220607970126612, "grad_norm": 0.8428159003755334, "learning_rate": 9.154507931692146e-06, "loss": 0.1622, "step": 7274 }, { "epoch": 0.21223525293190967, "grad_norm": 0.932883922513483, "learning_rate": 9.154245041615262e-06, "loss": 0.1672, "step": 7275 }, { "epoch": 0.21226442616255325, "grad_norm": 0.6227746897426399, "learning_rate": 9.153982114450268e-06, "loss": 0.1557, "step": 7276 }, { "epoch": 0.2122935993931968, "grad_norm": 0.9073164938194024, "learning_rate": 9.153719150199513e-06, "loss": 0.1764, "step": 7277 }, { "epoch": 0.21232277262384036, "grad_norm": 1.0442867199960852, "learning_rate": 9.153456148865347e-06, "loss": 0.1545, "step": 7278 }, { "epoch": 0.21235194585448391, "grad_norm": 0.6690553638554411, "learning_rate": 9.153193110450115e-06, "loss": 0.1575, "step": 7279 }, { "epoch": 0.2123811190851275, "grad_norm": 0.8853732465828301, "learning_rate": 9.152930034956166e-06, "loss": 0.1572, "step": 7280 }, { "epoch": 0.21241029231577105, "grad_norm": 0.8426211020092063, "learning_rate": 9.152666922385849e-06, "loss": 0.1563, "step": 7281 }, { "epoch": 0.2124394655464146, "grad_norm": 0.7788186702083829, "learning_rate": 9.152403772741514e-06, "loss": 0.1511, "step": 7282 }, { "epoch": 0.21246863877705818, "grad_norm": 0.822945340131927, "learning_rate": 9.152140586025509e-06, "loss": 0.1744, "step": 7283 }, { "epoch": 0.21249781200770174, "grad_norm": 0.9471169841068209, "learning_rate": 9.151877362240182e-06, "loss": 0.1634, "step": 7284 }, { "epoch": 0.2125269852383453, "grad_norm": 0.8465634649866716, "learning_rate": 9.151614101387886e-06, "loss": 0.1814, "step": 7285 }, { "epoch": 0.21255615846898884, "grad_norm": 1.2962259187476264, "learning_rate": 9.151350803470971e-06, "loss": 0.1372, "step": 7286 }, { "epoch": 0.21258533169963242, "grad_norm": 0.9542451308170714, "learning_rate": 9.151087468491788e-06, "loss": 0.1836, "step": 7287 }, { "epoch": 0.21261450493027598, "grad_norm": 1.0098810004640373, "learning_rate": 9.150824096452686e-06, "loss": 0.1786, "step": 7288 }, { "epoch": 0.21264367816091953, "grad_norm": 1.0573907358946364, "learning_rate": 9.150560687356018e-06, "loss": 0.1796, "step": 7289 }, { "epoch": 0.2126728513915631, "grad_norm": 1.3555653612269074, "learning_rate": 9.150297241204134e-06, "loss": 0.1736, "step": 7290 }, { "epoch": 0.21270202462220666, "grad_norm": 0.9332835706812773, "learning_rate": 9.150033757999389e-06, "loss": 0.1935, "step": 7291 }, { "epoch": 0.21273119785285022, "grad_norm": 0.9551991891457929, "learning_rate": 9.149770237744132e-06, "loss": 0.1537, "step": 7292 }, { "epoch": 0.2127603710834938, "grad_norm": 1.1512656443502813, "learning_rate": 9.149506680440715e-06, "loss": 0.1569, "step": 7293 }, { "epoch": 0.21278954431413735, "grad_norm": 0.893945134974164, "learning_rate": 9.149243086091495e-06, "loss": 0.1679, "step": 7294 }, { "epoch": 0.2128187175447809, "grad_norm": 0.9385221012246392, "learning_rate": 9.148979454698824e-06, "loss": 0.1753, "step": 7295 }, { "epoch": 0.21284789077542446, "grad_norm": 0.8300356824294304, "learning_rate": 9.148715786265054e-06, "loss": 0.1542, "step": 7296 }, { "epoch": 0.21287706400606804, "grad_norm": 0.857791008708655, "learning_rate": 9.148452080792538e-06, "loss": 0.1686, "step": 7297 }, { "epoch": 0.2129062372367116, "grad_norm": 0.7634129287782235, "learning_rate": 9.148188338283635e-06, "loss": 0.1893, "step": 7298 }, { "epoch": 0.21293541046735515, "grad_norm": 0.7691221227717636, "learning_rate": 9.147924558740694e-06, "loss": 0.1527, "step": 7299 }, { "epoch": 0.21296458369799873, "grad_norm": 0.8425605026794849, "learning_rate": 9.147660742166075e-06, "loss": 0.1605, "step": 7300 }, { "epoch": 0.21299375692864228, "grad_norm": 0.9963815151871954, "learning_rate": 9.14739688856213e-06, "loss": 0.1768, "step": 7301 }, { "epoch": 0.21302293015928583, "grad_norm": 0.8846975435104927, "learning_rate": 9.147132997931216e-06, "loss": 0.1563, "step": 7302 }, { "epoch": 0.2130521033899294, "grad_norm": 0.8220305453751654, "learning_rate": 9.146869070275688e-06, "loss": 0.169, "step": 7303 }, { "epoch": 0.21308127662057297, "grad_norm": 0.7534101736131481, "learning_rate": 9.146605105597904e-06, "loss": 0.1514, "step": 7304 }, { "epoch": 0.21311044985121652, "grad_norm": 1.0260436057186861, "learning_rate": 9.146341103900219e-06, "loss": 0.1675, "step": 7305 }, { "epoch": 0.21313962308186007, "grad_norm": 0.7428550466098834, "learning_rate": 9.14607706518499e-06, "loss": 0.1613, "step": 7306 }, { "epoch": 0.21316879631250366, "grad_norm": 0.8292004776562788, "learning_rate": 9.145812989454576e-06, "loss": 0.169, "step": 7307 }, { "epoch": 0.2131979695431472, "grad_norm": 0.8611035287697096, "learning_rate": 9.145548876711332e-06, "loss": 0.1321, "step": 7308 }, { "epoch": 0.21322714277379076, "grad_norm": 0.7735265669200234, "learning_rate": 9.145284726957618e-06, "loss": 0.1811, "step": 7309 }, { "epoch": 0.21325631600443434, "grad_norm": 0.757016431699202, "learning_rate": 9.14502054019579e-06, "loss": 0.1535, "step": 7310 }, { "epoch": 0.2132854892350779, "grad_norm": 0.7863331618232147, "learning_rate": 9.14475631642821e-06, "loss": 0.167, "step": 7311 }, { "epoch": 0.21331466246572145, "grad_norm": 0.8302171844504787, "learning_rate": 9.144492055657234e-06, "loss": 0.1575, "step": 7312 }, { "epoch": 0.213343835696365, "grad_norm": 0.9145860360312988, "learning_rate": 9.144227757885222e-06, "loss": 0.1636, "step": 7313 }, { "epoch": 0.21337300892700858, "grad_norm": 1.0497092239261154, "learning_rate": 9.143963423114534e-06, "loss": 0.1355, "step": 7314 }, { "epoch": 0.21340218215765214, "grad_norm": 0.924946311542116, "learning_rate": 9.143699051347533e-06, "loss": 0.1707, "step": 7315 }, { "epoch": 0.2134313553882957, "grad_norm": 0.8193654544468313, "learning_rate": 9.14343464258657e-06, "loss": 0.1656, "step": 7316 }, { "epoch": 0.21346052861893927, "grad_norm": 0.8506878470010961, "learning_rate": 9.143170196834016e-06, "loss": 0.1508, "step": 7317 }, { "epoch": 0.21348970184958282, "grad_norm": 0.7402661330754645, "learning_rate": 9.142905714092228e-06, "loss": 0.1316, "step": 7318 }, { "epoch": 0.21351887508022638, "grad_norm": 0.8419823467258659, "learning_rate": 9.142641194363565e-06, "loss": 0.17, "step": 7319 }, { "epoch": 0.21354804831086996, "grad_norm": 0.9193798138031342, "learning_rate": 9.142376637650389e-06, "loss": 0.1707, "step": 7320 }, { "epoch": 0.2135772215415135, "grad_norm": 0.7789488088692962, "learning_rate": 9.142112043955065e-06, "loss": 0.1492, "step": 7321 }, { "epoch": 0.21360639477215707, "grad_norm": 0.8795139713047154, "learning_rate": 9.141847413279955e-06, "loss": 0.1623, "step": 7322 }, { "epoch": 0.21363556800280062, "grad_norm": 0.848520747485592, "learning_rate": 9.141582745627418e-06, "loss": 0.1824, "step": 7323 }, { "epoch": 0.2136647412334442, "grad_norm": 0.8085062744672866, "learning_rate": 9.141318040999818e-06, "loss": 0.2026, "step": 7324 }, { "epoch": 0.21369391446408775, "grad_norm": 1.1710800232932104, "learning_rate": 9.14105329939952e-06, "loss": 0.1367, "step": 7325 }, { "epoch": 0.2137230876947313, "grad_norm": 0.8742825967810263, "learning_rate": 9.140788520828887e-06, "loss": 0.1695, "step": 7326 }, { "epoch": 0.2137522609253749, "grad_norm": 0.9654315392518187, "learning_rate": 9.140523705290284e-06, "loss": 0.159, "step": 7327 }, { "epoch": 0.21378143415601844, "grad_norm": 0.756589814868092, "learning_rate": 9.140258852786073e-06, "loss": 0.1342, "step": 7328 }, { "epoch": 0.213810607386662, "grad_norm": 0.838268804707351, "learning_rate": 9.139993963318619e-06, "loss": 0.1719, "step": 7329 }, { "epoch": 0.21383978061730555, "grad_norm": 0.7408867817376356, "learning_rate": 9.139729036890286e-06, "loss": 0.1564, "step": 7330 }, { "epoch": 0.21386895384794913, "grad_norm": 0.7836876194083487, "learning_rate": 9.139464073503442e-06, "loss": 0.1464, "step": 7331 }, { "epoch": 0.21389812707859268, "grad_norm": 0.8026603789293204, "learning_rate": 9.13919907316045e-06, "loss": 0.168, "step": 7332 }, { "epoch": 0.21392730030923623, "grad_norm": 1.1129692994111364, "learning_rate": 9.138934035863676e-06, "loss": 0.1767, "step": 7333 }, { "epoch": 0.21395647353987982, "grad_norm": 0.9043989670577581, "learning_rate": 9.138668961615489e-06, "loss": 0.1605, "step": 7334 }, { "epoch": 0.21398564677052337, "grad_norm": 0.761803105172563, "learning_rate": 9.138403850418252e-06, "loss": 0.1579, "step": 7335 }, { "epoch": 0.21401482000116692, "grad_norm": 0.9006980494791728, "learning_rate": 9.138138702274334e-06, "loss": 0.1919, "step": 7336 }, { "epoch": 0.2140439932318105, "grad_norm": 1.0927969594980038, "learning_rate": 9.137873517186102e-06, "loss": 0.1497, "step": 7337 }, { "epoch": 0.21407316646245406, "grad_norm": 0.7630838897546133, "learning_rate": 9.137608295155922e-06, "loss": 0.1471, "step": 7338 }, { "epoch": 0.2141023396930976, "grad_norm": 0.7360159065532706, "learning_rate": 9.137343036186163e-06, "loss": 0.1565, "step": 7339 }, { "epoch": 0.21413151292374116, "grad_norm": 0.8045461629263935, "learning_rate": 9.137077740279193e-06, "loss": 0.1567, "step": 7340 }, { "epoch": 0.21416068615438474, "grad_norm": 0.8868403361881234, "learning_rate": 9.13681240743738e-06, "loss": 0.2215, "step": 7341 }, { "epoch": 0.2141898593850283, "grad_norm": 0.7991777175910662, "learning_rate": 9.136547037663095e-06, "loss": 0.1478, "step": 7342 }, { "epoch": 0.21421903261567185, "grad_norm": 0.8704840435846436, "learning_rate": 9.136281630958706e-06, "loss": 0.1701, "step": 7343 }, { "epoch": 0.21424820584631543, "grad_norm": 0.7646456628845033, "learning_rate": 9.13601618732658e-06, "loss": 0.161, "step": 7344 }, { "epoch": 0.21427737907695898, "grad_norm": 0.8046625131504545, "learning_rate": 9.135750706769089e-06, "loss": 0.1771, "step": 7345 }, { "epoch": 0.21430655230760254, "grad_norm": 0.8329365527287422, "learning_rate": 9.135485189288604e-06, "loss": 0.1893, "step": 7346 }, { "epoch": 0.21433572553824612, "grad_norm": 0.8606383080312506, "learning_rate": 9.135219634887493e-06, "loss": 0.1723, "step": 7347 }, { "epoch": 0.21436489876888967, "grad_norm": 0.8321246251854761, "learning_rate": 9.134954043568131e-06, "loss": 0.1775, "step": 7348 }, { "epoch": 0.21439407199953323, "grad_norm": 0.9277947646560082, "learning_rate": 9.134688415332885e-06, "loss": 0.156, "step": 7349 }, { "epoch": 0.21442324523017678, "grad_norm": 0.8517468199609406, "learning_rate": 9.134422750184127e-06, "loss": 0.1367, "step": 7350 }, { "epoch": 0.21445241846082036, "grad_norm": 1.2604531790201514, "learning_rate": 9.13415704812423e-06, "loss": 0.1661, "step": 7351 }, { "epoch": 0.2144815916914639, "grad_norm": 0.787396494273231, "learning_rate": 9.133891309155565e-06, "loss": 0.1515, "step": 7352 }, { "epoch": 0.21451076492210747, "grad_norm": 1.0771416045998476, "learning_rate": 9.133625533280505e-06, "loss": 0.1761, "step": 7353 }, { "epoch": 0.21453993815275105, "grad_norm": 0.8482116276404471, "learning_rate": 9.133359720501425e-06, "loss": 0.1564, "step": 7354 }, { "epoch": 0.2145691113833946, "grad_norm": 0.9357043607950191, "learning_rate": 9.133093870820695e-06, "loss": 0.1711, "step": 7355 }, { "epoch": 0.21459828461403815, "grad_norm": 0.8300983990280097, "learning_rate": 9.132827984240691e-06, "loss": 0.1593, "step": 7356 }, { "epoch": 0.2146274578446817, "grad_norm": 0.725622974870402, "learning_rate": 9.132562060763784e-06, "loss": 0.1616, "step": 7357 }, { "epoch": 0.2146566310753253, "grad_norm": 0.9195976435212265, "learning_rate": 9.13229610039235e-06, "loss": 0.184, "step": 7358 }, { "epoch": 0.21468580430596884, "grad_norm": 0.8605882375424254, "learning_rate": 9.132030103128762e-06, "loss": 0.2024, "step": 7359 }, { "epoch": 0.2147149775366124, "grad_norm": 0.907525777273798, "learning_rate": 9.131764068975397e-06, "loss": 0.1897, "step": 7360 }, { "epoch": 0.21474415076725598, "grad_norm": 0.9433606626723489, "learning_rate": 9.131497997934627e-06, "loss": 0.1466, "step": 7361 }, { "epoch": 0.21477332399789953, "grad_norm": 0.8210241521435729, "learning_rate": 9.13123189000883e-06, "loss": 0.1554, "step": 7362 }, { "epoch": 0.21480249722854308, "grad_norm": 0.8116618100076691, "learning_rate": 9.130965745200382e-06, "loss": 0.1662, "step": 7363 }, { "epoch": 0.21483167045918666, "grad_norm": 1.2190663889972422, "learning_rate": 9.130699563511656e-06, "loss": 0.1887, "step": 7364 }, { "epoch": 0.21486084368983022, "grad_norm": 0.8565025067762257, "learning_rate": 9.130433344945032e-06, "loss": 0.1604, "step": 7365 }, { "epoch": 0.21489001692047377, "grad_norm": 0.8834188656468229, "learning_rate": 9.130167089502884e-06, "loss": 0.1591, "step": 7366 }, { "epoch": 0.21491919015111732, "grad_norm": 1.143879753765153, "learning_rate": 9.12990079718759e-06, "loss": 0.1381, "step": 7367 }, { "epoch": 0.2149483633817609, "grad_norm": 0.8172458099029768, "learning_rate": 9.129634468001529e-06, "loss": 0.155, "step": 7368 }, { "epoch": 0.21497753661240446, "grad_norm": 0.8025807980795531, "learning_rate": 9.129368101947076e-06, "loss": 0.154, "step": 7369 }, { "epoch": 0.215006709843048, "grad_norm": 0.974295012210877, "learning_rate": 9.12910169902661e-06, "loss": 0.1945, "step": 7370 }, { "epoch": 0.2150358830736916, "grad_norm": 0.8587266874156988, "learning_rate": 9.128835259242511e-06, "loss": 0.1682, "step": 7371 }, { "epoch": 0.21506505630433514, "grad_norm": 0.8151983296692307, "learning_rate": 9.128568782597155e-06, "loss": 0.1767, "step": 7372 }, { "epoch": 0.2150942295349787, "grad_norm": 0.7577545340507429, "learning_rate": 9.128302269092925e-06, "loss": 0.1704, "step": 7373 }, { "epoch": 0.21512340276562225, "grad_norm": 0.8399926028136064, "learning_rate": 9.128035718732196e-06, "loss": 0.1669, "step": 7374 }, { "epoch": 0.21515257599626583, "grad_norm": 0.9750137504931491, "learning_rate": 9.12776913151735e-06, "loss": 0.1634, "step": 7375 }, { "epoch": 0.21518174922690939, "grad_norm": 0.8003817450825019, "learning_rate": 9.127502507450765e-06, "loss": 0.157, "step": 7376 }, { "epoch": 0.21521092245755294, "grad_norm": 0.8293215185825179, "learning_rate": 9.127235846534826e-06, "loss": 0.1671, "step": 7377 }, { "epoch": 0.21524009568819652, "grad_norm": 1.05160228048288, "learning_rate": 9.126969148771907e-06, "loss": 0.1662, "step": 7378 }, { "epoch": 0.21526926891884007, "grad_norm": 0.7691951627365682, "learning_rate": 9.126702414164395e-06, "loss": 0.1695, "step": 7379 }, { "epoch": 0.21529844214948363, "grad_norm": 0.8939838121579913, "learning_rate": 9.126435642714669e-06, "loss": 0.1517, "step": 7380 }, { "epoch": 0.2153276153801272, "grad_norm": 0.8513543627829114, "learning_rate": 9.12616883442511e-06, "loss": 0.1707, "step": 7381 }, { "epoch": 0.21535678861077076, "grad_norm": 0.9092042406280318, "learning_rate": 9.1259019892981e-06, "loss": 0.1994, "step": 7382 }, { "epoch": 0.21538596184141431, "grad_norm": 0.9000988792296255, "learning_rate": 9.125635107336024e-06, "loss": 0.1679, "step": 7383 }, { "epoch": 0.21541513507205787, "grad_norm": 0.908123743882174, "learning_rate": 9.125368188541262e-06, "loss": 0.1758, "step": 7384 }, { "epoch": 0.21544430830270145, "grad_norm": 0.8767753044865089, "learning_rate": 9.125101232916196e-06, "loss": 0.176, "step": 7385 }, { "epoch": 0.215473481533345, "grad_norm": 0.9149316587210717, "learning_rate": 9.124834240463212e-06, "loss": 0.1538, "step": 7386 }, { "epoch": 0.21550265476398855, "grad_norm": 0.8840923678120068, "learning_rate": 9.124567211184693e-06, "loss": 0.1675, "step": 7387 }, { "epoch": 0.21553182799463214, "grad_norm": 0.8313796305740393, "learning_rate": 9.124300145083022e-06, "loss": 0.2106, "step": 7388 }, { "epoch": 0.2155610012252757, "grad_norm": 1.081647456332479, "learning_rate": 9.124033042160583e-06, "loss": 0.1649, "step": 7389 }, { "epoch": 0.21559017445591924, "grad_norm": 0.7099284823286249, "learning_rate": 9.123765902419764e-06, "loss": 0.1427, "step": 7390 }, { "epoch": 0.21561934768656282, "grad_norm": 0.8659710311845469, "learning_rate": 9.123498725862946e-06, "loss": 0.1486, "step": 7391 }, { "epoch": 0.21564852091720638, "grad_norm": 0.8675062325411816, "learning_rate": 9.123231512492513e-06, "loss": 0.1584, "step": 7392 }, { "epoch": 0.21567769414784993, "grad_norm": 0.8166116693541626, "learning_rate": 9.122964262310858e-06, "loss": 0.1762, "step": 7393 }, { "epoch": 0.21570686737849348, "grad_norm": 0.8307253049564872, "learning_rate": 9.12269697532036e-06, "loss": 0.1694, "step": 7394 }, { "epoch": 0.21573604060913706, "grad_norm": 0.835149841674723, "learning_rate": 9.122429651523408e-06, "loss": 0.1825, "step": 7395 }, { "epoch": 0.21576521383978062, "grad_norm": 1.039023270171752, "learning_rate": 9.122162290922387e-06, "loss": 0.1541, "step": 7396 }, { "epoch": 0.21579438707042417, "grad_norm": 1.2286048092311068, "learning_rate": 9.121894893519688e-06, "loss": 0.1706, "step": 7397 }, { "epoch": 0.21582356030106775, "grad_norm": 0.8429100222118212, "learning_rate": 9.121627459317693e-06, "loss": 0.1796, "step": 7398 }, { "epoch": 0.2158527335317113, "grad_norm": 0.911287718055003, "learning_rate": 9.121359988318792e-06, "loss": 0.1771, "step": 7399 }, { "epoch": 0.21588190676235486, "grad_norm": 0.9175234071784473, "learning_rate": 9.121092480525374e-06, "loss": 0.1553, "step": 7400 }, { "epoch": 0.2159110799929984, "grad_norm": 0.9441039377984193, "learning_rate": 9.120824935939824e-06, "loss": 0.1682, "step": 7401 }, { "epoch": 0.215940253223642, "grad_norm": 0.7742096126268675, "learning_rate": 9.120557354564534e-06, "loss": 0.1871, "step": 7402 }, { "epoch": 0.21596942645428555, "grad_norm": 0.8067408667463365, "learning_rate": 9.120289736401892e-06, "loss": 0.1873, "step": 7403 }, { "epoch": 0.2159985996849291, "grad_norm": 0.7300165533170915, "learning_rate": 9.120022081454286e-06, "loss": 0.1776, "step": 7404 }, { "epoch": 0.21602777291557268, "grad_norm": 0.7454749963610728, "learning_rate": 9.119754389724107e-06, "loss": 0.1571, "step": 7405 }, { "epoch": 0.21605694614621623, "grad_norm": 0.7374077050525888, "learning_rate": 9.119486661213744e-06, "loss": 0.1824, "step": 7406 }, { "epoch": 0.2160861193768598, "grad_norm": 0.8220961724986665, "learning_rate": 9.119218895925588e-06, "loss": 0.1446, "step": 7407 }, { "epoch": 0.21611529260750337, "grad_norm": 0.7783355121923526, "learning_rate": 9.118951093862028e-06, "loss": 0.195, "step": 7408 }, { "epoch": 0.21614446583814692, "grad_norm": 0.7202058289041149, "learning_rate": 9.118683255025457e-06, "loss": 0.1704, "step": 7409 }, { "epoch": 0.21617363906879047, "grad_norm": 0.8610059331317936, "learning_rate": 9.118415379418265e-06, "loss": 0.1622, "step": 7410 }, { "epoch": 0.21620281229943403, "grad_norm": 0.7001027600457649, "learning_rate": 9.118147467042844e-06, "loss": 0.1421, "step": 7411 }, { "epoch": 0.2162319855300776, "grad_norm": 0.8022121923333063, "learning_rate": 9.117879517901584e-06, "loss": 0.1784, "step": 7412 }, { "epoch": 0.21626115876072116, "grad_norm": 0.9036733519026036, "learning_rate": 9.11761153199688e-06, "loss": 0.1584, "step": 7413 }, { "epoch": 0.21629033199136471, "grad_norm": 0.7732901311394017, "learning_rate": 9.117343509331122e-06, "loss": 0.1569, "step": 7414 }, { "epoch": 0.2163195052220083, "grad_norm": 0.8897167987673777, "learning_rate": 9.117075449906704e-06, "loss": 0.1618, "step": 7415 }, { "epoch": 0.21634867845265185, "grad_norm": 0.9203266233694063, "learning_rate": 9.11680735372602e-06, "loss": 0.1841, "step": 7416 }, { "epoch": 0.2163778516832954, "grad_norm": 0.9771782649620638, "learning_rate": 9.116539220791464e-06, "loss": 0.167, "step": 7417 }, { "epoch": 0.21640702491393898, "grad_norm": 0.7923600665984015, "learning_rate": 9.116271051105428e-06, "loss": 0.1596, "step": 7418 }, { "epoch": 0.21643619814458254, "grad_norm": 0.9232057168625178, "learning_rate": 9.116002844670304e-06, "loss": 0.1747, "step": 7419 }, { "epoch": 0.2164653713752261, "grad_norm": 0.7897568343124842, "learning_rate": 9.115734601488492e-06, "loss": 0.1449, "step": 7420 }, { "epoch": 0.21649454460586964, "grad_norm": 0.9717566598530014, "learning_rate": 9.115466321562384e-06, "loss": 0.1414, "step": 7421 }, { "epoch": 0.21652371783651322, "grad_norm": 0.7384852427970718, "learning_rate": 9.115198004894371e-06, "loss": 0.1689, "step": 7422 }, { "epoch": 0.21655289106715678, "grad_norm": 0.9349622934838513, "learning_rate": 9.114929651486857e-06, "loss": 0.1844, "step": 7423 }, { "epoch": 0.21658206429780033, "grad_norm": 1.1331587925570314, "learning_rate": 9.114661261342232e-06, "loss": 0.153, "step": 7424 }, { "epoch": 0.2166112375284439, "grad_norm": 0.8569517521357359, "learning_rate": 9.114392834462895e-06, "loss": 0.1398, "step": 7425 }, { "epoch": 0.21664041075908747, "grad_norm": 1.0594142407397262, "learning_rate": 9.114124370851238e-06, "loss": 0.1609, "step": 7426 }, { "epoch": 0.21666958398973102, "grad_norm": 0.9872499379799294, "learning_rate": 9.113855870509664e-06, "loss": 0.1944, "step": 7427 }, { "epoch": 0.21669875722037457, "grad_norm": 0.9348282425197145, "learning_rate": 9.113587333440566e-06, "loss": 0.16, "step": 7428 }, { "epoch": 0.21672793045101815, "grad_norm": 0.8385280054899792, "learning_rate": 9.11331875964634e-06, "loss": 0.1529, "step": 7429 }, { "epoch": 0.2167571036816617, "grad_norm": 0.8559787857518748, "learning_rate": 9.113050149129387e-06, "loss": 0.1605, "step": 7430 }, { "epoch": 0.21678627691230526, "grad_norm": 0.8517007332578688, "learning_rate": 9.112781501892105e-06, "loss": 0.161, "step": 7431 }, { "epoch": 0.21681545014294884, "grad_norm": 1.1456108258528437, "learning_rate": 9.112512817936892e-06, "loss": 0.1422, "step": 7432 }, { "epoch": 0.2168446233735924, "grad_norm": 0.9161175606348492, "learning_rate": 9.112244097266144e-06, "loss": 0.1636, "step": 7433 }, { "epoch": 0.21687379660423595, "grad_norm": 1.0118412085103796, "learning_rate": 9.111975339882265e-06, "loss": 0.1416, "step": 7434 }, { "epoch": 0.21690296983487953, "grad_norm": 0.7363456828612303, "learning_rate": 9.11170654578765e-06, "loss": 0.156, "step": 7435 }, { "epoch": 0.21693214306552308, "grad_norm": 0.7820229076664653, "learning_rate": 9.1114377149847e-06, "loss": 0.1583, "step": 7436 }, { "epoch": 0.21696131629616663, "grad_norm": 1.0942678168691158, "learning_rate": 9.11116884747582e-06, "loss": 0.1769, "step": 7437 }, { "epoch": 0.2169904895268102, "grad_norm": 0.9447698383398426, "learning_rate": 9.1108999432634e-06, "loss": 0.1871, "step": 7438 }, { "epoch": 0.21701966275745377, "grad_norm": 0.8157624572363903, "learning_rate": 9.11063100234985e-06, "loss": 0.1936, "step": 7439 }, { "epoch": 0.21704883598809732, "grad_norm": 0.9421469363701053, "learning_rate": 9.110362024737566e-06, "loss": 0.1633, "step": 7440 }, { "epoch": 0.21707800921874087, "grad_norm": 0.9607965558693335, "learning_rate": 9.110093010428953e-06, "loss": 0.1627, "step": 7441 }, { "epoch": 0.21710718244938446, "grad_norm": 0.8354674897137242, "learning_rate": 9.10982395942641e-06, "loss": 0.1966, "step": 7442 }, { "epoch": 0.217136355680028, "grad_norm": 0.906809921024923, "learning_rate": 9.10955487173234e-06, "loss": 0.1484, "step": 7443 }, { "epoch": 0.21716552891067156, "grad_norm": 0.980781509983675, "learning_rate": 9.109285747349145e-06, "loss": 0.1425, "step": 7444 }, { "epoch": 0.21719470214131512, "grad_norm": 0.8914872052533028, "learning_rate": 9.109016586279227e-06, "loss": 0.1559, "step": 7445 }, { "epoch": 0.2172238753719587, "grad_norm": 0.7714414266749681, "learning_rate": 9.10874738852499e-06, "loss": 0.1585, "step": 7446 }, { "epoch": 0.21725304860260225, "grad_norm": 1.0786465918729728, "learning_rate": 9.108478154088838e-06, "loss": 0.1649, "step": 7447 }, { "epoch": 0.2172822218332458, "grad_norm": 0.8838986337349987, "learning_rate": 9.108208882973172e-06, "loss": 0.1651, "step": 7448 }, { "epoch": 0.21731139506388938, "grad_norm": 0.7998682490861097, "learning_rate": 9.1079395751804e-06, "loss": 0.1799, "step": 7449 }, { "epoch": 0.21734056829453294, "grad_norm": 1.121077534441, "learning_rate": 9.107670230712924e-06, "loss": 0.1488, "step": 7450 }, { "epoch": 0.2173697415251765, "grad_norm": 0.8781274845047359, "learning_rate": 9.107400849573148e-06, "loss": 0.1773, "step": 7451 }, { "epoch": 0.21739891475582007, "grad_norm": 0.9108741953398324, "learning_rate": 9.107131431763479e-06, "loss": 0.1399, "step": 7452 }, { "epoch": 0.21742808798646363, "grad_norm": 1.4747423636955517, "learning_rate": 9.106861977286319e-06, "loss": 0.1823, "step": 7453 }, { "epoch": 0.21745726121710718, "grad_norm": 0.9139258343487516, "learning_rate": 9.106592486144077e-06, "loss": 0.1478, "step": 7454 }, { "epoch": 0.21748643444775073, "grad_norm": 0.9833107826241818, "learning_rate": 9.106322958339156e-06, "loss": 0.175, "step": 7455 }, { "epoch": 0.2175156076783943, "grad_norm": 0.9696490842829171, "learning_rate": 9.106053393873965e-06, "loss": 0.1738, "step": 7456 }, { "epoch": 0.21754478090903787, "grad_norm": 1.2310078909985334, "learning_rate": 9.105783792750909e-06, "loss": 0.1756, "step": 7457 }, { "epoch": 0.21757395413968142, "grad_norm": 0.7618525861111929, "learning_rate": 9.105514154972397e-06, "loss": 0.1577, "step": 7458 }, { "epoch": 0.217603127370325, "grad_norm": 0.9952546015463827, "learning_rate": 9.105244480540833e-06, "loss": 0.1793, "step": 7459 }, { "epoch": 0.21763230060096855, "grad_norm": 0.9626963435260152, "learning_rate": 9.104974769458626e-06, "loss": 0.1626, "step": 7460 }, { "epoch": 0.2176614738316121, "grad_norm": 0.9606094758968395, "learning_rate": 9.104705021728185e-06, "loss": 0.1722, "step": 7461 }, { "epoch": 0.2176906470622557, "grad_norm": 0.9611724485693799, "learning_rate": 9.104435237351918e-06, "loss": 0.1693, "step": 7462 }, { "epoch": 0.21771982029289924, "grad_norm": 0.994146211050947, "learning_rate": 9.104165416332232e-06, "loss": 0.1714, "step": 7463 }, { "epoch": 0.2177489935235428, "grad_norm": 0.7237656428923633, "learning_rate": 9.103895558671538e-06, "loss": 0.1474, "step": 7464 }, { "epoch": 0.21777816675418635, "grad_norm": 0.853741497086034, "learning_rate": 9.103625664372244e-06, "loss": 0.1638, "step": 7465 }, { "epoch": 0.21780733998482993, "grad_norm": 0.9604956953178158, "learning_rate": 9.10335573343676e-06, "loss": 0.1592, "step": 7466 }, { "epoch": 0.21783651321547348, "grad_norm": 0.7211730382503233, "learning_rate": 9.103085765867494e-06, "loss": 0.1483, "step": 7467 }, { "epoch": 0.21786568644611704, "grad_norm": 0.9003289263920882, "learning_rate": 9.102815761666857e-06, "loss": 0.1737, "step": 7468 }, { "epoch": 0.21789485967676062, "grad_norm": 1.063103587280332, "learning_rate": 9.102545720837264e-06, "loss": 0.1767, "step": 7469 }, { "epoch": 0.21792403290740417, "grad_norm": 0.9651305024078388, "learning_rate": 9.102275643381118e-06, "loss": 0.1749, "step": 7470 }, { "epoch": 0.21795320613804772, "grad_norm": 0.7664733929527952, "learning_rate": 9.102005529300837e-06, "loss": 0.1778, "step": 7471 }, { "epoch": 0.21798237936869128, "grad_norm": 0.7476178142610409, "learning_rate": 9.10173537859883e-06, "loss": 0.1598, "step": 7472 }, { "epoch": 0.21801155259933486, "grad_norm": 0.788871726907985, "learning_rate": 9.101465191277507e-06, "loss": 0.1597, "step": 7473 }, { "epoch": 0.2180407258299784, "grad_norm": 0.8464570107689131, "learning_rate": 9.101194967339284e-06, "loss": 0.1542, "step": 7474 }, { "epoch": 0.21806989906062196, "grad_norm": 0.9205452933907281, "learning_rate": 9.100924706786568e-06, "loss": 0.1511, "step": 7475 }, { "epoch": 0.21809907229126554, "grad_norm": 0.7740632248814412, "learning_rate": 9.100654409621779e-06, "loss": 0.1639, "step": 7476 }, { "epoch": 0.2181282455219091, "grad_norm": 0.6725205946707393, "learning_rate": 9.100384075847324e-06, "loss": 0.1458, "step": 7477 }, { "epoch": 0.21815741875255265, "grad_norm": 0.8691746230253524, "learning_rate": 9.10011370546562e-06, "loss": 0.1436, "step": 7478 }, { "epoch": 0.21818659198319623, "grad_norm": 0.8966021726089877, "learning_rate": 9.099843298479079e-06, "loss": 0.2026, "step": 7479 }, { "epoch": 0.21821576521383979, "grad_norm": 1.525537760543073, "learning_rate": 9.099572854890115e-06, "loss": 0.1721, "step": 7480 }, { "epoch": 0.21824493844448334, "grad_norm": 1.0278026389305943, "learning_rate": 9.099302374701145e-06, "loss": 0.1698, "step": 7481 }, { "epoch": 0.2182741116751269, "grad_norm": 0.810566816600142, "learning_rate": 9.09903185791458e-06, "loss": 0.1575, "step": 7482 }, { "epoch": 0.21830328490577047, "grad_norm": 0.9454530453508962, "learning_rate": 9.098761304532839e-06, "loss": 0.1806, "step": 7483 }, { "epoch": 0.21833245813641403, "grad_norm": 0.6411644748981236, "learning_rate": 9.098490714558335e-06, "loss": 0.136, "step": 7484 }, { "epoch": 0.21836163136705758, "grad_norm": 0.9555150607957652, "learning_rate": 9.098220087993484e-06, "loss": 0.1502, "step": 7485 }, { "epoch": 0.21839080459770116, "grad_norm": 0.7725450469261882, "learning_rate": 9.0979494248407e-06, "loss": 0.1755, "step": 7486 }, { "epoch": 0.2184199778283447, "grad_norm": 0.7376261609439652, "learning_rate": 9.097678725102406e-06, "loss": 0.1487, "step": 7487 }, { "epoch": 0.21844915105898827, "grad_norm": 0.9009890967373166, "learning_rate": 9.097407988781012e-06, "loss": 0.158, "step": 7488 }, { "epoch": 0.21847832428963182, "grad_norm": 0.9162151433377764, "learning_rate": 9.097137215878938e-06, "loss": 0.1578, "step": 7489 }, { "epoch": 0.2185074975202754, "grad_norm": 0.8071439964557588, "learning_rate": 9.096866406398601e-06, "loss": 0.1824, "step": 7490 }, { "epoch": 0.21853667075091895, "grad_norm": 0.8452596310026971, "learning_rate": 9.096595560342418e-06, "loss": 0.175, "step": 7491 }, { "epoch": 0.2185658439815625, "grad_norm": 0.9880919817564738, "learning_rate": 9.09632467771281e-06, "loss": 0.1785, "step": 7492 }, { "epoch": 0.2185950172122061, "grad_norm": 0.8639885120516009, "learning_rate": 9.096053758512193e-06, "loss": 0.1407, "step": 7493 }, { "epoch": 0.21862419044284964, "grad_norm": 0.9298884048629106, "learning_rate": 9.095782802742983e-06, "loss": 0.1597, "step": 7494 }, { "epoch": 0.2186533636734932, "grad_norm": 0.8963426486967087, "learning_rate": 9.095511810407605e-06, "loss": 0.1693, "step": 7495 }, { "epoch": 0.21868253690413678, "grad_norm": 0.779138913716008, "learning_rate": 9.095240781508472e-06, "loss": 0.152, "step": 7496 }, { "epoch": 0.21871171013478033, "grad_norm": 0.98885334805735, "learning_rate": 9.09496971604801e-06, "loss": 0.1658, "step": 7497 }, { "epoch": 0.21874088336542388, "grad_norm": 1.0373383670255278, "learning_rate": 9.094698614028635e-06, "loss": 0.171, "step": 7498 }, { "epoch": 0.21877005659606744, "grad_norm": 0.9342057049525488, "learning_rate": 9.094427475452767e-06, "loss": 0.1718, "step": 7499 }, { "epoch": 0.21879922982671102, "grad_norm": 0.9690013882216534, "learning_rate": 9.09415630032283e-06, "loss": 0.1728, "step": 7500 }, { "epoch": 0.21882840305735457, "grad_norm": 0.8017953998969738, "learning_rate": 9.09388508864124e-06, "loss": 0.1509, "step": 7501 }, { "epoch": 0.21885757628799812, "grad_norm": 1.2085718131544307, "learning_rate": 9.093613840410423e-06, "loss": 0.1743, "step": 7502 }, { "epoch": 0.2188867495186417, "grad_norm": 0.9052288263657564, "learning_rate": 9.0933425556328e-06, "loss": 0.1228, "step": 7503 }, { "epoch": 0.21891592274928526, "grad_norm": 0.7426645505106202, "learning_rate": 9.09307123431079e-06, "loss": 0.1555, "step": 7504 }, { "epoch": 0.2189450959799288, "grad_norm": 0.8284735612852144, "learning_rate": 9.092799876446818e-06, "loss": 0.1653, "step": 7505 }, { "epoch": 0.2189742692105724, "grad_norm": 1.1483666041753051, "learning_rate": 9.092528482043306e-06, "loss": 0.1621, "step": 7506 }, { "epoch": 0.21900344244121595, "grad_norm": 0.8718060940875548, "learning_rate": 9.092257051102675e-06, "loss": 0.156, "step": 7507 }, { "epoch": 0.2190326156718595, "grad_norm": 0.9244857952426913, "learning_rate": 9.091985583627352e-06, "loss": 0.1579, "step": 7508 }, { "epoch": 0.21906178890250305, "grad_norm": 0.9375404157081058, "learning_rate": 9.091714079619758e-06, "loss": 0.159, "step": 7509 }, { "epoch": 0.21909096213314663, "grad_norm": 0.9444245221277218, "learning_rate": 9.091442539082317e-06, "loss": 0.1669, "step": 7510 }, { "epoch": 0.21912013536379019, "grad_norm": 0.8695707828850077, "learning_rate": 9.091170962017453e-06, "loss": 0.1575, "step": 7511 }, { "epoch": 0.21914930859443374, "grad_norm": 0.9718117313019489, "learning_rate": 9.090899348427593e-06, "loss": 0.1587, "step": 7512 }, { "epoch": 0.21917848182507732, "grad_norm": 0.9249537069936109, "learning_rate": 9.090627698315159e-06, "loss": 0.1736, "step": 7513 }, { "epoch": 0.21920765505572087, "grad_norm": 1.0613644382339964, "learning_rate": 9.090356011682578e-06, "loss": 0.1854, "step": 7514 }, { "epoch": 0.21923682828636443, "grad_norm": 0.7595003707986981, "learning_rate": 9.090084288532276e-06, "loss": 0.1618, "step": 7515 }, { "epoch": 0.21926600151700798, "grad_norm": 0.8861136072560637, "learning_rate": 9.089812528866674e-06, "loss": 0.1818, "step": 7516 }, { "epoch": 0.21929517474765156, "grad_norm": 1.1073248286947113, "learning_rate": 9.089540732688205e-06, "loss": 0.1688, "step": 7517 }, { "epoch": 0.21932434797829511, "grad_norm": 0.8579089096518323, "learning_rate": 9.089268899999293e-06, "loss": 0.1597, "step": 7518 }, { "epoch": 0.21935352120893867, "grad_norm": 0.8120764675150153, "learning_rate": 9.088997030802364e-06, "loss": 0.1766, "step": 7519 }, { "epoch": 0.21938269443958225, "grad_norm": 1.160647013834858, "learning_rate": 9.088725125099844e-06, "loss": 0.1622, "step": 7520 }, { "epoch": 0.2194118676702258, "grad_norm": 0.9285213051213117, "learning_rate": 9.088453182894165e-06, "loss": 0.1598, "step": 7521 }, { "epoch": 0.21944104090086936, "grad_norm": 0.8613692267922717, "learning_rate": 9.08818120418775e-06, "loss": 0.1688, "step": 7522 }, { "epoch": 0.21947021413151294, "grad_norm": 0.8943126674686942, "learning_rate": 9.08790918898303e-06, "loss": 0.1662, "step": 7523 }, { "epoch": 0.2194993873621565, "grad_norm": 0.7885484357016502, "learning_rate": 9.087637137282432e-06, "loss": 0.1621, "step": 7524 }, { "epoch": 0.21952856059280004, "grad_norm": 0.7457779815187887, "learning_rate": 9.087365049088386e-06, "loss": 0.1645, "step": 7525 }, { "epoch": 0.2195577338234436, "grad_norm": 0.9917753671200845, "learning_rate": 9.08709292440332e-06, "loss": 0.1432, "step": 7526 }, { "epoch": 0.21958690705408718, "grad_norm": 1.0063350040424661, "learning_rate": 9.086820763229665e-06, "loss": 0.1587, "step": 7527 }, { "epoch": 0.21961608028473073, "grad_norm": 1.1737765979425956, "learning_rate": 9.086548565569848e-06, "loss": 0.1676, "step": 7528 }, { "epoch": 0.21964525351537428, "grad_norm": 0.9962712694939887, "learning_rate": 9.086276331426302e-06, "loss": 0.1493, "step": 7529 }, { "epoch": 0.21967442674601786, "grad_norm": 1.0197100473107363, "learning_rate": 9.086004060801456e-06, "loss": 0.1781, "step": 7530 }, { "epoch": 0.21970359997666142, "grad_norm": 0.8887979084517902, "learning_rate": 9.085731753697741e-06, "loss": 0.1709, "step": 7531 }, { "epoch": 0.21973277320730497, "grad_norm": 1.0573885465579034, "learning_rate": 9.085459410117589e-06, "loss": 0.1477, "step": 7532 }, { "epoch": 0.21976194643794855, "grad_norm": 0.8718632363111908, "learning_rate": 9.085187030063432e-06, "loss": 0.1606, "step": 7533 }, { "epoch": 0.2197911196685921, "grad_norm": 0.7976871165360285, "learning_rate": 9.084914613537699e-06, "loss": 0.1665, "step": 7534 }, { "epoch": 0.21982029289923566, "grad_norm": 0.8654040256073582, "learning_rate": 9.084642160542823e-06, "loss": 0.1699, "step": 7535 }, { "epoch": 0.2198494661298792, "grad_norm": 0.9982965024961783, "learning_rate": 9.084369671081237e-06, "loss": 0.1834, "step": 7536 }, { "epoch": 0.2198786393605228, "grad_norm": 0.7991190223835759, "learning_rate": 9.084097145155372e-06, "loss": 0.168, "step": 7537 }, { "epoch": 0.21990781259116635, "grad_norm": 0.8384227842961963, "learning_rate": 9.083824582767667e-06, "loss": 0.1568, "step": 7538 }, { "epoch": 0.2199369858218099, "grad_norm": 0.8639148551820021, "learning_rate": 9.083551983920546e-06, "loss": 0.1895, "step": 7539 }, { "epoch": 0.21996615905245348, "grad_norm": 0.8776760204043199, "learning_rate": 9.083279348616451e-06, "loss": 0.16, "step": 7540 }, { "epoch": 0.21999533228309703, "grad_norm": 0.8397931986688365, "learning_rate": 9.083006676857813e-06, "loss": 0.1624, "step": 7541 }, { "epoch": 0.2200245055137406, "grad_norm": 0.9393977801830081, "learning_rate": 9.082733968647064e-06, "loss": 0.1431, "step": 7542 }, { "epoch": 0.22005367874438414, "grad_norm": 1.2585098990210004, "learning_rate": 9.082461223986643e-06, "loss": 0.1519, "step": 7543 }, { "epoch": 0.22008285197502772, "grad_norm": 0.8099130895397219, "learning_rate": 9.08218844287898e-06, "loss": 0.1834, "step": 7544 }, { "epoch": 0.22011202520567127, "grad_norm": 0.8935366257897372, "learning_rate": 9.081915625326516e-06, "loss": 0.1707, "step": 7545 }, { "epoch": 0.22014119843631483, "grad_norm": 0.9456191841995787, "learning_rate": 9.081642771331681e-06, "loss": 0.1765, "step": 7546 }, { "epoch": 0.2201703716669584, "grad_norm": 1.0163265319686992, "learning_rate": 9.081369880896916e-06, "loss": 0.1672, "step": 7547 }, { "epoch": 0.22019954489760196, "grad_norm": 1.0068121967005164, "learning_rate": 9.081096954024653e-06, "loss": 0.167, "step": 7548 }, { "epoch": 0.22022871812824552, "grad_norm": 0.8631234713501891, "learning_rate": 9.080823990717332e-06, "loss": 0.1774, "step": 7549 }, { "epoch": 0.2202578913588891, "grad_norm": 0.938971277102497, "learning_rate": 9.080550990977388e-06, "loss": 0.1621, "step": 7550 }, { "epoch": 0.22028706458953265, "grad_norm": 0.9393416019374851, "learning_rate": 9.08027795480726e-06, "loss": 0.1563, "step": 7551 }, { "epoch": 0.2203162378201762, "grad_norm": 0.9137967490648851, "learning_rate": 9.080004882209384e-06, "loss": 0.1699, "step": 7552 }, { "epoch": 0.22034541105081976, "grad_norm": 0.7496392812503637, "learning_rate": 9.079731773186196e-06, "loss": 0.1408, "step": 7553 }, { "epoch": 0.22037458428146334, "grad_norm": 0.9376169250918007, "learning_rate": 9.079458627740139e-06, "loss": 0.1679, "step": 7554 }, { "epoch": 0.2204037575121069, "grad_norm": 0.7795422893143354, "learning_rate": 9.079185445873649e-06, "loss": 0.1712, "step": 7555 }, { "epoch": 0.22043293074275044, "grad_norm": 0.8337107539120889, "learning_rate": 9.078912227589166e-06, "loss": 0.1687, "step": 7556 }, { "epoch": 0.22046210397339402, "grad_norm": 0.8084468214276382, "learning_rate": 9.078638972889126e-06, "loss": 0.1454, "step": 7557 }, { "epoch": 0.22049127720403758, "grad_norm": 0.7236410382389844, "learning_rate": 9.078365681775974e-06, "loss": 0.1607, "step": 7558 }, { "epoch": 0.22052045043468113, "grad_norm": 0.8600214654896388, "learning_rate": 9.078092354252143e-06, "loss": 0.1388, "step": 7559 }, { "epoch": 0.22054962366532468, "grad_norm": 1.0482883562274232, "learning_rate": 9.07781899032008e-06, "loss": 0.175, "step": 7560 }, { "epoch": 0.22057879689596827, "grad_norm": 0.7623423869872636, "learning_rate": 9.077545589982221e-06, "loss": 0.1719, "step": 7561 }, { "epoch": 0.22060797012661182, "grad_norm": 0.8428118785734672, "learning_rate": 9.077272153241008e-06, "loss": 0.1391, "step": 7562 }, { "epoch": 0.22063714335725537, "grad_norm": 0.8976367068085978, "learning_rate": 9.076998680098883e-06, "loss": 0.1574, "step": 7563 }, { "epoch": 0.22066631658789895, "grad_norm": 0.7699393351335566, "learning_rate": 9.076725170558289e-06, "loss": 0.1437, "step": 7564 }, { "epoch": 0.2206954898185425, "grad_norm": 0.8454398342400627, "learning_rate": 9.076451624621665e-06, "loss": 0.1804, "step": 7565 }, { "epoch": 0.22072466304918606, "grad_norm": 0.838918798589249, "learning_rate": 9.076178042291453e-06, "loss": 0.1415, "step": 7566 }, { "epoch": 0.22075383627982964, "grad_norm": 0.8760292330861484, "learning_rate": 9.075904423570096e-06, "loss": 0.1506, "step": 7567 }, { "epoch": 0.2207830095104732, "grad_norm": 0.8392243222768452, "learning_rate": 9.075630768460037e-06, "loss": 0.1394, "step": 7568 }, { "epoch": 0.22081218274111675, "grad_norm": 0.7150461773229624, "learning_rate": 9.075357076963723e-06, "loss": 0.1623, "step": 7569 }, { "epoch": 0.2208413559717603, "grad_norm": 1.021782821192606, "learning_rate": 9.07508334908359e-06, "loss": 0.1661, "step": 7570 }, { "epoch": 0.22087052920240388, "grad_norm": 0.7182875658508563, "learning_rate": 9.074809584822087e-06, "loss": 0.1565, "step": 7571 }, { "epoch": 0.22089970243304743, "grad_norm": 0.7924182059793997, "learning_rate": 9.074535784181658e-06, "loss": 0.1697, "step": 7572 }, { "epoch": 0.220928875663691, "grad_norm": 0.7716242050220606, "learning_rate": 9.074261947164744e-06, "loss": 0.1507, "step": 7573 }, { "epoch": 0.22095804889433457, "grad_norm": 0.8161076577769065, "learning_rate": 9.073988073773792e-06, "loss": 0.1559, "step": 7574 }, { "epoch": 0.22098722212497812, "grad_norm": 0.9744163088086125, "learning_rate": 9.07371416401125e-06, "loss": 0.1756, "step": 7575 }, { "epoch": 0.22101639535562168, "grad_norm": 0.9554848713519216, "learning_rate": 9.073440217879557e-06, "loss": 0.1575, "step": 7576 }, { "epoch": 0.22104556858626526, "grad_norm": 0.7954637106628107, "learning_rate": 9.073166235381163e-06, "loss": 0.141, "step": 7577 }, { "epoch": 0.2210747418169088, "grad_norm": 0.977347338980649, "learning_rate": 9.072892216518513e-06, "loss": 0.1734, "step": 7578 }, { "epoch": 0.22110391504755236, "grad_norm": 0.8007874726654638, "learning_rate": 9.072618161294056e-06, "loss": 0.1746, "step": 7579 }, { "epoch": 0.22113308827819592, "grad_norm": 0.8751881298397141, "learning_rate": 9.072344069710234e-06, "loss": 0.1724, "step": 7580 }, { "epoch": 0.2211622615088395, "grad_norm": 0.8967593436275193, "learning_rate": 9.072069941769497e-06, "loss": 0.1496, "step": 7581 }, { "epoch": 0.22119143473948305, "grad_norm": 0.8527490070747805, "learning_rate": 9.071795777474291e-06, "loss": 0.1673, "step": 7582 }, { "epoch": 0.2212206079701266, "grad_norm": 0.7119002806549058, "learning_rate": 9.071521576827066e-06, "loss": 0.1588, "step": 7583 }, { "epoch": 0.22124978120077018, "grad_norm": 0.7191548340724161, "learning_rate": 9.071247339830266e-06, "loss": 0.1735, "step": 7584 }, { "epoch": 0.22127895443141374, "grad_norm": 0.7932341849874444, "learning_rate": 9.070973066486343e-06, "loss": 0.1472, "step": 7585 }, { "epoch": 0.2213081276620573, "grad_norm": 0.9788378489903121, "learning_rate": 9.070698756797744e-06, "loss": 0.1782, "step": 7586 }, { "epoch": 0.22133730089270084, "grad_norm": 0.670224463952022, "learning_rate": 9.070424410766918e-06, "loss": 0.1493, "step": 7587 }, { "epoch": 0.22136647412334443, "grad_norm": 1.100490778659628, "learning_rate": 9.070150028396315e-06, "loss": 0.1666, "step": 7588 }, { "epoch": 0.22139564735398798, "grad_norm": 0.9397402986306163, "learning_rate": 9.069875609688384e-06, "loss": 0.1372, "step": 7589 }, { "epoch": 0.22142482058463153, "grad_norm": 0.8819686288122192, "learning_rate": 9.069601154645575e-06, "loss": 0.1703, "step": 7590 }, { "epoch": 0.2214539938152751, "grad_norm": 1.0019783126541315, "learning_rate": 9.06932666327034e-06, "loss": 0.1793, "step": 7591 }, { "epoch": 0.22148316704591867, "grad_norm": 0.8283424701988963, "learning_rate": 9.069052135565126e-06, "loss": 0.1506, "step": 7592 }, { "epoch": 0.22151234027656222, "grad_norm": 1.0498749053308172, "learning_rate": 9.068777571532385e-06, "loss": 0.1609, "step": 7593 }, { "epoch": 0.2215415135072058, "grad_norm": 0.845997786865884, "learning_rate": 9.06850297117457e-06, "loss": 0.1591, "step": 7594 }, { "epoch": 0.22157068673784935, "grad_norm": 0.9371159908291806, "learning_rate": 9.068228334494133e-06, "loss": 0.1713, "step": 7595 }, { "epoch": 0.2215998599684929, "grad_norm": 1.0625213095938626, "learning_rate": 9.067953661493524e-06, "loss": 0.1835, "step": 7596 }, { "epoch": 0.22162903319913646, "grad_norm": 0.9265621786408575, "learning_rate": 9.067678952175196e-06, "loss": 0.1658, "step": 7597 }, { "epoch": 0.22165820642978004, "grad_norm": 0.8570825544727586, "learning_rate": 9.067404206541601e-06, "loss": 0.1519, "step": 7598 }, { "epoch": 0.2216873796604236, "grad_norm": 0.8283111861076646, "learning_rate": 9.067129424595191e-06, "loss": 0.1553, "step": 7599 }, { "epoch": 0.22171655289106715, "grad_norm": 0.7113587454962451, "learning_rate": 9.066854606338422e-06, "loss": 0.162, "step": 7600 }, { "epoch": 0.22174572612171073, "grad_norm": 0.7252270028494985, "learning_rate": 9.066579751773745e-06, "loss": 0.1607, "step": 7601 }, { "epoch": 0.22177489935235428, "grad_norm": 0.734893412520523, "learning_rate": 9.066304860903616e-06, "loss": 0.1618, "step": 7602 }, { "epoch": 0.22180407258299784, "grad_norm": 0.887711150723681, "learning_rate": 9.066029933730486e-06, "loss": 0.1661, "step": 7603 }, { "epoch": 0.22183324581364142, "grad_norm": 0.9686223705673456, "learning_rate": 9.065754970256813e-06, "loss": 0.1698, "step": 7604 }, { "epoch": 0.22186241904428497, "grad_norm": 0.6550977232812051, "learning_rate": 9.06547997048505e-06, "loss": 0.1545, "step": 7605 }, { "epoch": 0.22189159227492852, "grad_norm": 0.8286044566302697, "learning_rate": 9.065204934417654e-06, "loss": 0.1617, "step": 7606 }, { "epoch": 0.22192076550557208, "grad_norm": 0.9547923364302132, "learning_rate": 9.064929862057075e-06, "loss": 0.17, "step": 7607 }, { "epoch": 0.22194993873621566, "grad_norm": 0.7569291356378777, "learning_rate": 9.064654753405775e-06, "loss": 0.1646, "step": 7608 }, { "epoch": 0.2219791119668592, "grad_norm": 1.008866085330164, "learning_rate": 9.064379608466207e-06, "loss": 0.1888, "step": 7609 }, { "epoch": 0.22200828519750276, "grad_norm": 0.8024048623313427, "learning_rate": 9.064104427240828e-06, "loss": 0.1932, "step": 7610 }, { "epoch": 0.22203745842814634, "grad_norm": 0.796585074083653, "learning_rate": 9.063829209732096e-06, "loss": 0.1537, "step": 7611 }, { "epoch": 0.2220666316587899, "grad_norm": 0.7401358079354325, "learning_rate": 9.063553955942465e-06, "loss": 0.1483, "step": 7612 }, { "epoch": 0.22209580488943345, "grad_norm": 0.7607351143856173, "learning_rate": 9.063278665874396e-06, "loss": 0.1658, "step": 7613 }, { "epoch": 0.222124978120077, "grad_norm": 0.7940261184520345, "learning_rate": 9.063003339530342e-06, "loss": 0.1508, "step": 7614 }, { "epoch": 0.22215415135072059, "grad_norm": 0.71268964953337, "learning_rate": 9.062727976912769e-06, "loss": 0.1631, "step": 7615 }, { "epoch": 0.22218332458136414, "grad_norm": 0.9682479161327682, "learning_rate": 9.062452578024128e-06, "loss": 0.1882, "step": 7616 }, { "epoch": 0.2222124978120077, "grad_norm": 0.7615084534557112, "learning_rate": 9.062177142866879e-06, "loss": 0.1651, "step": 7617 }, { "epoch": 0.22224167104265127, "grad_norm": 0.9499185985324785, "learning_rate": 9.061901671443483e-06, "loss": 0.1679, "step": 7618 }, { "epoch": 0.22227084427329483, "grad_norm": 0.7525132228183636, "learning_rate": 9.061626163756398e-06, "loss": 0.1599, "step": 7619 }, { "epoch": 0.22230001750393838, "grad_norm": 0.7786034564604447, "learning_rate": 9.061350619808086e-06, "loss": 0.1937, "step": 7620 }, { "epoch": 0.22232919073458196, "grad_norm": 0.9028832343859958, "learning_rate": 9.061075039601003e-06, "loss": 0.1712, "step": 7621 }, { "epoch": 0.22235836396522551, "grad_norm": 0.7954470304362796, "learning_rate": 9.060799423137615e-06, "loss": 0.1555, "step": 7622 }, { "epoch": 0.22238753719586907, "grad_norm": 0.9866484570970352, "learning_rate": 9.060523770420376e-06, "loss": 0.1547, "step": 7623 }, { "epoch": 0.22241671042651262, "grad_norm": 0.7820834776528715, "learning_rate": 9.060248081451752e-06, "loss": 0.1512, "step": 7624 }, { "epoch": 0.2224458836571562, "grad_norm": 0.8186999136404977, "learning_rate": 9.059972356234202e-06, "loss": 0.1533, "step": 7625 }, { "epoch": 0.22247505688779975, "grad_norm": 0.991945352238341, "learning_rate": 9.059696594770186e-06, "loss": 0.1582, "step": 7626 }, { "epoch": 0.2225042301184433, "grad_norm": 0.8035245190630039, "learning_rate": 9.059420797062169e-06, "loss": 0.154, "step": 7627 }, { "epoch": 0.2225334033490869, "grad_norm": 1.0123527087147737, "learning_rate": 9.059144963112612e-06, "loss": 0.1683, "step": 7628 }, { "epoch": 0.22256257657973044, "grad_norm": 0.7261878728564278, "learning_rate": 9.058869092923979e-06, "loss": 0.1624, "step": 7629 }, { "epoch": 0.222591749810374, "grad_norm": 0.8661584585519259, "learning_rate": 9.058593186498731e-06, "loss": 0.1308, "step": 7630 }, { "epoch": 0.22262092304101755, "grad_norm": 0.8825084494799464, "learning_rate": 9.058317243839333e-06, "loss": 0.1872, "step": 7631 }, { "epoch": 0.22265009627166113, "grad_norm": 0.782826852717805, "learning_rate": 9.058041264948244e-06, "loss": 0.1744, "step": 7632 }, { "epoch": 0.22267926950230468, "grad_norm": 0.7472818214781765, "learning_rate": 9.057765249827935e-06, "loss": 0.165, "step": 7633 }, { "epoch": 0.22270844273294824, "grad_norm": 0.8827334418567251, "learning_rate": 9.057489198480864e-06, "loss": 0.1589, "step": 7634 }, { "epoch": 0.22273761596359182, "grad_norm": 0.7843369468588055, "learning_rate": 9.057213110909499e-06, "loss": 0.1557, "step": 7635 }, { "epoch": 0.22276678919423537, "grad_norm": 0.8039448951636196, "learning_rate": 9.056936987116304e-06, "loss": 0.1531, "step": 7636 }, { "epoch": 0.22279596242487892, "grad_norm": 0.9018143498707185, "learning_rate": 9.056660827103744e-06, "loss": 0.1699, "step": 7637 }, { "epoch": 0.2228251356555225, "grad_norm": 0.8453876767618507, "learning_rate": 9.056384630874284e-06, "loss": 0.1643, "step": 7638 }, { "epoch": 0.22285430888616606, "grad_norm": 1.112609241081302, "learning_rate": 9.056108398430392e-06, "loss": 0.1428, "step": 7639 }, { "epoch": 0.2228834821168096, "grad_norm": 1.1699841161145523, "learning_rate": 9.055832129774531e-06, "loss": 0.1922, "step": 7640 }, { "epoch": 0.22291265534745316, "grad_norm": 0.7491678728972175, "learning_rate": 9.05555582490917e-06, "loss": 0.1632, "step": 7641 }, { "epoch": 0.22294182857809675, "grad_norm": 1.2193349843145231, "learning_rate": 9.055279483836773e-06, "loss": 0.1593, "step": 7642 }, { "epoch": 0.2229710018087403, "grad_norm": 0.9511142377842252, "learning_rate": 9.05500310655981e-06, "loss": 0.1545, "step": 7643 }, { "epoch": 0.22300017503938385, "grad_norm": 0.8744041500940533, "learning_rate": 9.054726693080748e-06, "loss": 0.1463, "step": 7644 }, { "epoch": 0.22302934827002743, "grad_norm": 1.0206027985283834, "learning_rate": 9.054450243402054e-06, "loss": 0.1832, "step": 7645 }, { "epoch": 0.223058521500671, "grad_norm": 0.9566518601858032, "learning_rate": 9.054173757526195e-06, "loss": 0.1564, "step": 7646 }, { "epoch": 0.22308769473131454, "grad_norm": 0.7813565535186373, "learning_rate": 9.05389723545564e-06, "loss": 0.2013, "step": 7647 }, { "epoch": 0.22311686796195812, "grad_norm": 0.7598699186360195, "learning_rate": 9.053620677192859e-06, "loss": 0.1319, "step": 7648 }, { "epoch": 0.22314604119260167, "grad_norm": 0.9025666236147157, "learning_rate": 9.05334408274032e-06, "loss": 0.173, "step": 7649 }, { "epoch": 0.22317521442324523, "grad_norm": 0.984611023588771, "learning_rate": 9.053067452100493e-06, "loss": 0.1639, "step": 7650 }, { "epoch": 0.22320438765388878, "grad_norm": 0.6207124157241103, "learning_rate": 9.052790785275848e-06, "loss": 0.1552, "step": 7651 }, { "epoch": 0.22323356088453236, "grad_norm": 0.7937699128727909, "learning_rate": 9.052514082268853e-06, "loss": 0.1614, "step": 7652 }, { "epoch": 0.22326273411517591, "grad_norm": 0.9311623697082175, "learning_rate": 9.052237343081982e-06, "loss": 0.1566, "step": 7653 }, { "epoch": 0.22329190734581947, "grad_norm": 0.779768384518718, "learning_rate": 9.051960567717702e-06, "loss": 0.1512, "step": 7654 }, { "epoch": 0.22332108057646305, "grad_norm": 0.783404962765794, "learning_rate": 9.051683756178484e-06, "loss": 0.1609, "step": 7655 }, { "epoch": 0.2233502538071066, "grad_norm": 1.0170932451725594, "learning_rate": 9.051406908466803e-06, "loss": 0.186, "step": 7656 }, { "epoch": 0.22337942703775016, "grad_norm": 0.9983335483662694, "learning_rate": 9.051130024585125e-06, "loss": 0.1659, "step": 7657 }, { "epoch": 0.2234086002683937, "grad_norm": 0.9680024818605943, "learning_rate": 9.050853104535927e-06, "loss": 0.1702, "step": 7658 }, { "epoch": 0.2234377734990373, "grad_norm": 0.9655365420641593, "learning_rate": 9.05057614832168e-06, "loss": 0.1733, "step": 7659 }, { "epoch": 0.22346694672968084, "grad_norm": 0.8928412249761869, "learning_rate": 9.050299155944857e-06, "loss": 0.1479, "step": 7660 }, { "epoch": 0.2234961199603244, "grad_norm": 0.7972993805394739, "learning_rate": 9.050022127407928e-06, "loss": 0.1679, "step": 7661 }, { "epoch": 0.22352529319096798, "grad_norm": 0.9849039289849973, "learning_rate": 9.049745062713368e-06, "loss": 0.1443, "step": 7662 }, { "epoch": 0.22355446642161153, "grad_norm": 0.9891067194514748, "learning_rate": 9.049467961863652e-06, "loss": 0.1489, "step": 7663 }, { "epoch": 0.22358363965225508, "grad_norm": 0.8719737764767784, "learning_rate": 9.049190824861254e-06, "loss": 0.1379, "step": 7664 }, { "epoch": 0.22361281288289866, "grad_norm": 1.1786497385711123, "learning_rate": 9.048913651708643e-06, "loss": 0.1486, "step": 7665 }, { "epoch": 0.22364198611354222, "grad_norm": 0.9145057571417332, "learning_rate": 9.048636442408302e-06, "loss": 0.1503, "step": 7666 }, { "epoch": 0.22367115934418577, "grad_norm": 0.8945697892923155, "learning_rate": 9.0483591969627e-06, "loss": 0.1748, "step": 7667 }, { "epoch": 0.22370033257482932, "grad_norm": 0.9387624666252707, "learning_rate": 9.048081915374312e-06, "loss": 0.168, "step": 7668 }, { "epoch": 0.2237295058054729, "grad_norm": 1.0523629310613862, "learning_rate": 9.047804597645615e-06, "loss": 0.1904, "step": 7669 }, { "epoch": 0.22375867903611646, "grad_norm": 0.9639725962769458, "learning_rate": 9.047527243779086e-06, "loss": 0.1773, "step": 7670 }, { "epoch": 0.22378785226676, "grad_norm": 0.7872886284236622, "learning_rate": 9.047249853777201e-06, "loss": 0.1709, "step": 7671 }, { "epoch": 0.2238170254974036, "grad_norm": 0.9870747744907474, "learning_rate": 9.046972427642434e-06, "loss": 0.1828, "step": 7672 }, { "epoch": 0.22384619872804715, "grad_norm": 0.8699741891418625, "learning_rate": 9.046694965377263e-06, "loss": 0.1641, "step": 7673 }, { "epoch": 0.2238753719586907, "grad_norm": 0.9056957844811336, "learning_rate": 9.046417466984165e-06, "loss": 0.1732, "step": 7674 }, { "epoch": 0.22390454518933425, "grad_norm": 0.943514829476528, "learning_rate": 9.046139932465618e-06, "loss": 0.1731, "step": 7675 }, { "epoch": 0.22393371841997783, "grad_norm": 0.8878351010150347, "learning_rate": 9.045862361824101e-06, "loss": 0.1441, "step": 7676 }, { "epoch": 0.2239628916506214, "grad_norm": 1.0017549404017092, "learning_rate": 9.04558475506209e-06, "loss": 0.1479, "step": 7677 }, { "epoch": 0.22399206488126494, "grad_norm": 0.7349079912287529, "learning_rate": 9.045307112182064e-06, "loss": 0.1555, "step": 7678 }, { "epoch": 0.22402123811190852, "grad_norm": 0.8843178662615621, "learning_rate": 9.045029433186502e-06, "loss": 0.1641, "step": 7679 }, { "epoch": 0.22405041134255207, "grad_norm": 1.0304141828281947, "learning_rate": 9.044751718077883e-06, "loss": 0.162, "step": 7680 }, { "epoch": 0.22407958457319563, "grad_norm": 0.7978965128000045, "learning_rate": 9.044473966858684e-06, "loss": 0.1535, "step": 7681 }, { "epoch": 0.2241087578038392, "grad_norm": 0.8422392322066434, "learning_rate": 9.044196179531389e-06, "loss": 0.1747, "step": 7682 }, { "epoch": 0.22413793103448276, "grad_norm": 0.9792872894157268, "learning_rate": 9.043918356098476e-06, "loss": 0.1619, "step": 7683 }, { "epoch": 0.22416710426512632, "grad_norm": 0.8599377481402988, "learning_rate": 9.043640496562425e-06, "loss": 0.1542, "step": 7684 }, { "epoch": 0.22419627749576987, "grad_norm": 0.884419926074693, "learning_rate": 9.043362600925717e-06, "loss": 0.153, "step": 7685 }, { "epoch": 0.22422545072641345, "grad_norm": 0.947556215129273, "learning_rate": 9.043084669190832e-06, "loss": 0.1569, "step": 7686 }, { "epoch": 0.224254623957057, "grad_norm": 0.7913255157996661, "learning_rate": 9.042806701360254e-06, "loss": 0.1658, "step": 7687 }, { "epoch": 0.22428379718770056, "grad_norm": 0.7619208615017694, "learning_rate": 9.042528697436461e-06, "loss": 0.1452, "step": 7688 }, { "epoch": 0.22431297041834414, "grad_norm": 0.8566179978048031, "learning_rate": 9.042250657421938e-06, "loss": 0.1651, "step": 7689 }, { "epoch": 0.2243421436489877, "grad_norm": 0.8134848864736269, "learning_rate": 9.041972581319165e-06, "loss": 0.1654, "step": 7690 }, { "epoch": 0.22437131687963124, "grad_norm": 0.7432956393861663, "learning_rate": 9.041694469130628e-06, "loss": 0.1654, "step": 7691 }, { "epoch": 0.22440049011027483, "grad_norm": 0.7811840870532569, "learning_rate": 9.041416320858804e-06, "loss": 0.143, "step": 7692 }, { "epoch": 0.22442966334091838, "grad_norm": 0.8061525347883657, "learning_rate": 9.041138136506183e-06, "loss": 0.1653, "step": 7693 }, { "epoch": 0.22445883657156193, "grad_norm": 0.688503899823051, "learning_rate": 9.040859916075244e-06, "loss": 0.1478, "step": 7694 }, { "epoch": 0.22448800980220548, "grad_norm": 0.9075490749612564, "learning_rate": 9.040581659568472e-06, "loss": 0.139, "step": 7695 }, { "epoch": 0.22451718303284907, "grad_norm": 0.6928728556934488, "learning_rate": 9.040303366988353e-06, "loss": 0.1364, "step": 7696 }, { "epoch": 0.22454635626349262, "grad_norm": 0.6761159505016567, "learning_rate": 9.04002503833737e-06, "loss": 0.1437, "step": 7697 }, { "epoch": 0.22457552949413617, "grad_norm": 0.8425281803151635, "learning_rate": 9.039746673618007e-06, "loss": 0.1497, "step": 7698 }, { "epoch": 0.22460470272477975, "grad_norm": 0.9503518187886093, "learning_rate": 9.039468272832749e-06, "loss": 0.1809, "step": 7699 }, { "epoch": 0.2246338759554233, "grad_norm": 0.9150172811661464, "learning_rate": 9.039189835984085e-06, "loss": 0.1531, "step": 7700 }, { "epoch": 0.22466304918606686, "grad_norm": 0.9210411178398046, "learning_rate": 9.038911363074495e-06, "loss": 0.1634, "step": 7701 }, { "epoch": 0.2246922224167104, "grad_norm": 0.783002610336514, "learning_rate": 9.038632854106473e-06, "loss": 0.1898, "step": 7702 }, { "epoch": 0.224721395647354, "grad_norm": 0.7402063151144914, "learning_rate": 9.038354309082498e-06, "loss": 0.1468, "step": 7703 }, { "epoch": 0.22475056887799755, "grad_norm": 0.7865639516467352, "learning_rate": 9.038075728005061e-06, "loss": 0.1436, "step": 7704 }, { "epoch": 0.2247797421086411, "grad_norm": 0.8337676546211781, "learning_rate": 9.037797110876645e-06, "loss": 0.1687, "step": 7705 }, { "epoch": 0.22480891533928468, "grad_norm": 0.7662961401587532, "learning_rate": 9.037518457699744e-06, "loss": 0.145, "step": 7706 }, { "epoch": 0.22483808856992823, "grad_norm": 1.0198900699113442, "learning_rate": 9.03723976847684e-06, "loss": 0.1467, "step": 7707 }, { "epoch": 0.2248672618005718, "grad_norm": 0.6969708413849717, "learning_rate": 9.036961043210424e-06, "loss": 0.1569, "step": 7708 }, { "epoch": 0.22489643503121537, "grad_norm": 0.7766844709857786, "learning_rate": 9.036682281902984e-06, "loss": 0.1604, "step": 7709 }, { "epoch": 0.22492560826185892, "grad_norm": 0.8059386561736319, "learning_rate": 9.036403484557005e-06, "loss": 0.1539, "step": 7710 }, { "epoch": 0.22495478149250248, "grad_norm": 0.865283687640427, "learning_rate": 9.036124651174983e-06, "loss": 0.1607, "step": 7711 }, { "epoch": 0.22498395472314603, "grad_norm": 0.7670100636936602, "learning_rate": 9.035845781759403e-06, "loss": 0.1407, "step": 7712 }, { "epoch": 0.2250131279537896, "grad_norm": 1.0289788523648595, "learning_rate": 9.035566876312754e-06, "loss": 0.1828, "step": 7713 }, { "epoch": 0.22504230118443316, "grad_norm": 0.8626691997769976, "learning_rate": 9.035287934837529e-06, "loss": 0.158, "step": 7714 }, { "epoch": 0.22507147441507672, "grad_norm": 0.9646817622704134, "learning_rate": 9.035008957336215e-06, "loss": 0.1708, "step": 7715 }, { "epoch": 0.2251006476457203, "grad_norm": 0.8368005802837244, "learning_rate": 9.034729943811304e-06, "loss": 0.1681, "step": 7716 }, { "epoch": 0.22512982087636385, "grad_norm": 0.8280718769062629, "learning_rate": 9.034450894265288e-06, "loss": 0.1519, "step": 7717 }, { "epoch": 0.2251589941070074, "grad_norm": 1.0617486656027086, "learning_rate": 9.034171808700657e-06, "loss": 0.1667, "step": 7718 }, { "epoch": 0.22518816733765099, "grad_norm": 0.912307137057485, "learning_rate": 9.033892687119905e-06, "loss": 0.1751, "step": 7719 }, { "epoch": 0.22521734056829454, "grad_norm": 0.9032676593843462, "learning_rate": 9.03361352952552e-06, "loss": 0.1835, "step": 7720 }, { "epoch": 0.2252465137989381, "grad_norm": 1.1243956755698408, "learning_rate": 9.033334335919997e-06, "loss": 0.1802, "step": 7721 }, { "epoch": 0.22527568702958164, "grad_norm": 0.8308440720513559, "learning_rate": 9.033055106305828e-06, "loss": 0.1652, "step": 7722 }, { "epoch": 0.22530486026022523, "grad_norm": 0.7229023191329865, "learning_rate": 9.032775840685505e-06, "loss": 0.1764, "step": 7723 }, { "epoch": 0.22533403349086878, "grad_norm": 0.8759747801209383, "learning_rate": 9.032496539061523e-06, "loss": 0.1576, "step": 7724 }, { "epoch": 0.22536320672151233, "grad_norm": 0.8923359849564969, "learning_rate": 9.032217201436374e-06, "loss": 0.1818, "step": 7725 }, { "epoch": 0.2253923799521559, "grad_norm": 0.7070430046591983, "learning_rate": 9.031937827812552e-06, "loss": 0.1587, "step": 7726 }, { "epoch": 0.22542155318279947, "grad_norm": 0.7723221493766538, "learning_rate": 9.031658418192553e-06, "loss": 0.1787, "step": 7727 }, { "epoch": 0.22545072641344302, "grad_norm": 0.9411068171141572, "learning_rate": 9.031378972578867e-06, "loss": 0.1611, "step": 7728 }, { "epoch": 0.22547989964408657, "grad_norm": 0.7716916797419265, "learning_rate": 9.031099490973996e-06, "loss": 0.1493, "step": 7729 }, { "epoch": 0.22550907287473015, "grad_norm": 0.7524008958908079, "learning_rate": 9.030819973380429e-06, "loss": 0.1593, "step": 7730 }, { "epoch": 0.2255382461053737, "grad_norm": 0.9948539072114604, "learning_rate": 9.030540419800664e-06, "loss": 0.1501, "step": 7731 }, { "epoch": 0.22556741933601726, "grad_norm": 0.7505041602274153, "learning_rate": 9.030260830237195e-06, "loss": 0.1596, "step": 7732 }, { "epoch": 0.22559659256666084, "grad_norm": 0.7834396248817043, "learning_rate": 9.029981204692521e-06, "loss": 0.15, "step": 7733 }, { "epoch": 0.2256257657973044, "grad_norm": 0.9118209380807698, "learning_rate": 9.029701543169136e-06, "loss": 0.155, "step": 7734 }, { "epoch": 0.22565493902794795, "grad_norm": 0.7907698722510623, "learning_rate": 9.029421845669537e-06, "loss": 0.157, "step": 7735 }, { "epoch": 0.22568411225859153, "grad_norm": 0.9267610415957518, "learning_rate": 9.029142112196224e-06, "loss": 0.1624, "step": 7736 }, { "epoch": 0.22571328548923508, "grad_norm": 0.7296141636833037, "learning_rate": 9.02886234275169e-06, "loss": 0.1484, "step": 7737 }, { "epoch": 0.22574245871987864, "grad_norm": 0.7530974192188826, "learning_rate": 9.028582537338434e-06, "loss": 0.1591, "step": 7738 }, { "epoch": 0.2257716319505222, "grad_norm": 0.854773736858789, "learning_rate": 9.028302695958956e-06, "loss": 0.1734, "step": 7739 }, { "epoch": 0.22580080518116577, "grad_norm": 0.8437420622238747, "learning_rate": 9.028022818615753e-06, "loss": 0.1708, "step": 7740 }, { "epoch": 0.22582997841180932, "grad_norm": 0.8625507996665871, "learning_rate": 9.027742905311324e-06, "loss": 0.1676, "step": 7741 }, { "epoch": 0.22585915164245288, "grad_norm": 0.8691371780488519, "learning_rate": 9.02746295604817e-06, "loss": 0.1985, "step": 7742 }, { "epoch": 0.22588832487309646, "grad_norm": 0.9278230265563486, "learning_rate": 9.027182970828786e-06, "loss": 0.1826, "step": 7743 }, { "epoch": 0.22591749810374, "grad_norm": 0.7856522119252362, "learning_rate": 9.026902949655673e-06, "loss": 0.1448, "step": 7744 }, { "epoch": 0.22594667133438356, "grad_norm": 0.9002523993168426, "learning_rate": 9.026622892531333e-06, "loss": 0.1626, "step": 7745 }, { "epoch": 0.22597584456502712, "grad_norm": 0.7387181122476415, "learning_rate": 9.026342799458265e-06, "loss": 0.1374, "step": 7746 }, { "epoch": 0.2260050177956707, "grad_norm": 1.1592372854476964, "learning_rate": 9.026062670438969e-06, "loss": 0.1786, "step": 7747 }, { "epoch": 0.22603419102631425, "grad_norm": 0.8886203635134633, "learning_rate": 9.025782505475947e-06, "loss": 0.1643, "step": 7748 }, { "epoch": 0.2260633642569578, "grad_norm": 0.8658568934398153, "learning_rate": 9.0255023045717e-06, "loss": 0.1575, "step": 7749 }, { "epoch": 0.22609253748760139, "grad_norm": 0.8613802901972075, "learning_rate": 9.025222067728729e-06, "loss": 0.1712, "step": 7750 }, { "epoch": 0.22612171071824494, "grad_norm": 0.8466326761128169, "learning_rate": 9.024941794949536e-06, "loss": 0.1639, "step": 7751 }, { "epoch": 0.2261508839488885, "grad_norm": 0.9454126268379517, "learning_rate": 9.024661486236624e-06, "loss": 0.1487, "step": 7752 }, { "epoch": 0.22618005717953207, "grad_norm": 1.190906457612595, "learning_rate": 9.024381141592495e-06, "loss": 0.1684, "step": 7753 }, { "epoch": 0.22620923041017563, "grad_norm": 0.8217592094029972, "learning_rate": 9.024100761019652e-06, "loss": 0.159, "step": 7754 }, { "epoch": 0.22623840364081918, "grad_norm": 0.736328682964108, "learning_rate": 9.023820344520597e-06, "loss": 0.1481, "step": 7755 }, { "epoch": 0.22626757687146273, "grad_norm": 1.043382123408031, "learning_rate": 9.023539892097837e-06, "loss": 0.161, "step": 7756 }, { "epoch": 0.22629675010210631, "grad_norm": 0.7643653720367733, "learning_rate": 9.02325940375387e-06, "loss": 0.1741, "step": 7757 }, { "epoch": 0.22632592333274987, "grad_norm": 0.8874120344038124, "learning_rate": 9.022978879491207e-06, "loss": 0.1941, "step": 7758 }, { "epoch": 0.22635509656339342, "grad_norm": 0.831561926407437, "learning_rate": 9.022698319312346e-06, "loss": 0.1883, "step": 7759 }, { "epoch": 0.226384269794037, "grad_norm": 0.9981630667292514, "learning_rate": 9.022417723219797e-06, "loss": 0.1562, "step": 7760 }, { "epoch": 0.22641344302468056, "grad_norm": 1.485055949321117, "learning_rate": 9.02213709121606e-06, "loss": 0.1651, "step": 7761 }, { "epoch": 0.2264426162553241, "grad_norm": 1.0950257317704974, "learning_rate": 9.021856423303645e-06, "loss": 0.1634, "step": 7762 }, { "epoch": 0.2264717894859677, "grad_norm": 1.3654749199431029, "learning_rate": 9.021575719485056e-06, "loss": 0.1706, "step": 7763 }, { "epoch": 0.22650096271661124, "grad_norm": 1.133425702358128, "learning_rate": 9.0212949797628e-06, "loss": 0.1572, "step": 7764 }, { "epoch": 0.2265301359472548, "grad_norm": 0.8283512805983546, "learning_rate": 9.02101420413938e-06, "loss": 0.1636, "step": 7765 }, { "epoch": 0.22655930917789835, "grad_norm": 1.1206211136306208, "learning_rate": 9.020733392617306e-06, "loss": 0.1703, "step": 7766 }, { "epoch": 0.22658848240854193, "grad_norm": 0.9820767132312471, "learning_rate": 9.020452545199084e-06, "loss": 0.1867, "step": 7767 }, { "epoch": 0.22661765563918548, "grad_norm": 0.7701110439822542, "learning_rate": 9.020171661887223e-06, "loss": 0.1444, "step": 7768 }, { "epoch": 0.22664682886982904, "grad_norm": 0.8374307375015951, "learning_rate": 9.019890742684227e-06, "loss": 0.1785, "step": 7769 }, { "epoch": 0.22667600210047262, "grad_norm": 1.0855926879485878, "learning_rate": 9.019609787592607e-06, "loss": 0.183, "step": 7770 }, { "epoch": 0.22670517533111617, "grad_norm": 0.881521948980321, "learning_rate": 9.01932879661487e-06, "loss": 0.1556, "step": 7771 }, { "epoch": 0.22673434856175972, "grad_norm": 0.8624881531492817, "learning_rate": 9.019047769753527e-06, "loss": 0.1902, "step": 7772 }, { "epoch": 0.22676352179240328, "grad_norm": 0.765844828474478, "learning_rate": 9.018766707011082e-06, "loss": 0.1273, "step": 7773 }, { "epoch": 0.22679269502304686, "grad_norm": 0.9658090031841545, "learning_rate": 9.018485608390048e-06, "loss": 0.1399, "step": 7774 }, { "epoch": 0.2268218682536904, "grad_norm": 0.7638279854341149, "learning_rate": 9.018204473892935e-06, "loss": 0.1321, "step": 7775 }, { "epoch": 0.22685104148433397, "grad_norm": 0.7406486751853761, "learning_rate": 9.017923303522251e-06, "loss": 0.1457, "step": 7776 }, { "epoch": 0.22688021471497755, "grad_norm": 0.7685794357804425, "learning_rate": 9.017642097280506e-06, "loss": 0.1568, "step": 7777 }, { "epoch": 0.2269093879456211, "grad_norm": 0.7109188143174116, "learning_rate": 9.017360855170212e-06, "loss": 0.1365, "step": 7778 }, { "epoch": 0.22693856117626465, "grad_norm": 0.8266958434797916, "learning_rate": 9.01707957719388e-06, "loss": 0.1551, "step": 7779 }, { "epoch": 0.22696773440690823, "grad_norm": 0.8098626300968494, "learning_rate": 9.01679826335402e-06, "loss": 0.1662, "step": 7780 }, { "epoch": 0.2269969076375518, "grad_norm": 0.987600792423462, "learning_rate": 9.016516913653144e-06, "loss": 0.1809, "step": 7781 }, { "epoch": 0.22702608086819534, "grad_norm": 0.7391288497416076, "learning_rate": 9.016235528093764e-06, "loss": 0.1485, "step": 7782 }, { "epoch": 0.2270552540988389, "grad_norm": 0.7452053286443837, "learning_rate": 9.015954106678391e-06, "loss": 0.1455, "step": 7783 }, { "epoch": 0.22708442732948247, "grad_norm": 0.8935608228872053, "learning_rate": 9.01567264940954e-06, "loss": 0.179, "step": 7784 }, { "epoch": 0.22711360056012603, "grad_norm": 0.6328135737272962, "learning_rate": 9.01539115628972e-06, "loss": 0.1481, "step": 7785 }, { "epoch": 0.22714277379076958, "grad_norm": 0.8840860145091277, "learning_rate": 9.01510962732145e-06, "loss": 0.1723, "step": 7786 }, { "epoch": 0.22717194702141316, "grad_norm": 0.867258923074141, "learning_rate": 9.014828062507237e-06, "loss": 0.1682, "step": 7787 }, { "epoch": 0.22720112025205672, "grad_norm": 0.7557198857562272, "learning_rate": 9.014546461849597e-06, "loss": 0.1663, "step": 7788 }, { "epoch": 0.22723029348270027, "grad_norm": 0.9233793516447816, "learning_rate": 9.014264825351046e-06, "loss": 0.1603, "step": 7789 }, { "epoch": 0.22725946671334385, "grad_norm": 0.8904760694334602, "learning_rate": 9.013983153014097e-06, "loss": 0.1673, "step": 7790 }, { "epoch": 0.2272886399439874, "grad_norm": 0.9100790280131235, "learning_rate": 9.013701444841262e-06, "loss": 0.1546, "step": 7791 }, { "epoch": 0.22731781317463096, "grad_norm": 0.825827433111236, "learning_rate": 9.013419700835062e-06, "loss": 0.1516, "step": 7792 }, { "epoch": 0.2273469864052745, "grad_norm": 0.714287197207715, "learning_rate": 9.013137920998007e-06, "loss": 0.1378, "step": 7793 }, { "epoch": 0.2273761596359181, "grad_norm": 1.1462027908735675, "learning_rate": 9.012856105332615e-06, "loss": 0.1691, "step": 7794 }, { "epoch": 0.22740533286656164, "grad_norm": 1.0480933585120638, "learning_rate": 9.012574253841401e-06, "loss": 0.1668, "step": 7795 }, { "epoch": 0.2274345060972052, "grad_norm": 0.8537650131964035, "learning_rate": 9.012292366526884e-06, "loss": 0.1717, "step": 7796 }, { "epoch": 0.22746367932784878, "grad_norm": 0.9896746025215346, "learning_rate": 9.012010443391578e-06, "loss": 0.1705, "step": 7797 }, { "epoch": 0.22749285255849233, "grad_norm": 0.9716721601903818, "learning_rate": 9.011728484438e-06, "loss": 0.1754, "step": 7798 }, { "epoch": 0.22752202578913588, "grad_norm": 0.840118546389893, "learning_rate": 9.011446489668667e-06, "loss": 0.1485, "step": 7799 }, { "epoch": 0.22755119901977944, "grad_norm": 0.7677962194515048, "learning_rate": 9.011164459086099e-06, "loss": 0.1449, "step": 7800 }, { "epoch": 0.22758037225042302, "grad_norm": 0.9554203066954745, "learning_rate": 9.010882392692812e-06, "loss": 0.1818, "step": 7801 }, { "epoch": 0.22760954548106657, "grad_norm": 1.0770749099152734, "learning_rate": 9.010600290491323e-06, "loss": 0.1599, "step": 7802 }, { "epoch": 0.22763871871171013, "grad_norm": 0.7743675527840735, "learning_rate": 9.010318152484152e-06, "loss": 0.1906, "step": 7803 }, { "epoch": 0.2276678919423537, "grad_norm": 1.1281114450625802, "learning_rate": 9.01003597867382e-06, "loss": 0.1739, "step": 7804 }, { "epoch": 0.22769706517299726, "grad_norm": 1.1674191060660375, "learning_rate": 9.00975376906284e-06, "loss": 0.1616, "step": 7805 }, { "epoch": 0.2277262384036408, "grad_norm": 0.9705020625119505, "learning_rate": 9.009471523653742e-06, "loss": 0.1633, "step": 7806 }, { "epoch": 0.2277554116342844, "grad_norm": 0.7453195533992221, "learning_rate": 9.009189242449034e-06, "loss": 0.1524, "step": 7807 }, { "epoch": 0.22778458486492795, "grad_norm": 0.8832157465935807, "learning_rate": 9.008906925451243e-06, "loss": 0.1605, "step": 7808 }, { "epoch": 0.2278137580955715, "grad_norm": 1.1004881089961562, "learning_rate": 9.008624572662888e-06, "loss": 0.181, "step": 7809 }, { "epoch": 0.22784293132621505, "grad_norm": 0.7527910908048386, "learning_rate": 9.00834218408649e-06, "loss": 0.1729, "step": 7810 }, { "epoch": 0.22787210455685863, "grad_norm": 0.7625762769082549, "learning_rate": 9.00805975972457e-06, "loss": 0.1585, "step": 7811 }, { "epoch": 0.2279012777875022, "grad_norm": 0.9127263062150434, "learning_rate": 9.007777299579649e-06, "loss": 0.1595, "step": 7812 }, { "epoch": 0.22793045101814574, "grad_norm": 0.8264397871512847, "learning_rate": 9.007494803654249e-06, "loss": 0.1672, "step": 7813 }, { "epoch": 0.22795962424878932, "grad_norm": 0.9024746727207784, "learning_rate": 9.007212271950892e-06, "loss": 0.1555, "step": 7814 }, { "epoch": 0.22798879747943288, "grad_norm": 0.7173904426501031, "learning_rate": 9.006929704472101e-06, "loss": 0.1411, "step": 7815 }, { "epoch": 0.22801797071007643, "grad_norm": 0.7899023405750852, "learning_rate": 9.006647101220398e-06, "loss": 0.1432, "step": 7816 }, { "epoch": 0.22804714394071998, "grad_norm": 1.022143416772875, "learning_rate": 9.006364462198306e-06, "loss": 0.1841, "step": 7817 }, { "epoch": 0.22807631717136356, "grad_norm": 0.9567311761695345, "learning_rate": 9.006081787408348e-06, "loss": 0.1545, "step": 7818 }, { "epoch": 0.22810549040200712, "grad_norm": 0.9918120469913401, "learning_rate": 9.005799076853048e-06, "loss": 0.1541, "step": 7819 }, { "epoch": 0.22813466363265067, "grad_norm": 0.9430094332135609, "learning_rate": 9.00551633053493e-06, "loss": 0.1706, "step": 7820 }, { "epoch": 0.22816383686329425, "grad_norm": 0.8282651104457783, "learning_rate": 9.005233548456518e-06, "loss": 0.1478, "step": 7821 }, { "epoch": 0.2281930100939378, "grad_norm": 1.0210830831906763, "learning_rate": 9.004950730620338e-06, "loss": 0.1705, "step": 7822 }, { "epoch": 0.22822218332458136, "grad_norm": 0.9474757314337756, "learning_rate": 9.004667877028915e-06, "loss": 0.1779, "step": 7823 }, { "epoch": 0.22825135655522494, "grad_norm": 0.8493551949810291, "learning_rate": 9.004384987684771e-06, "loss": 0.1598, "step": 7824 }, { "epoch": 0.2282805297858685, "grad_norm": 0.9577551249206481, "learning_rate": 9.004102062590437e-06, "loss": 0.1329, "step": 7825 }, { "epoch": 0.22830970301651204, "grad_norm": 0.9244488280811329, "learning_rate": 9.003819101748432e-06, "loss": 0.1712, "step": 7826 }, { "epoch": 0.2283388762471556, "grad_norm": 0.9143973487115579, "learning_rate": 9.003536105161288e-06, "loss": 0.1776, "step": 7827 }, { "epoch": 0.22836804947779918, "grad_norm": 0.9323695028120131, "learning_rate": 9.003253072831529e-06, "loss": 0.1707, "step": 7828 }, { "epoch": 0.22839722270844273, "grad_norm": 1.087380405820929, "learning_rate": 9.00297000476168e-06, "loss": 0.1712, "step": 7829 }, { "epoch": 0.22842639593908629, "grad_norm": 0.7641626718912142, "learning_rate": 9.002686900954275e-06, "loss": 0.1462, "step": 7830 }, { "epoch": 0.22845556916972987, "grad_norm": 0.8528325098822728, "learning_rate": 9.002403761411832e-06, "loss": 0.1595, "step": 7831 }, { "epoch": 0.22848474240037342, "grad_norm": 0.9642626444010138, "learning_rate": 9.002120586136887e-06, "loss": 0.1678, "step": 7832 }, { "epoch": 0.22851391563101697, "grad_norm": 0.9639017949911085, "learning_rate": 9.001837375131963e-06, "loss": 0.1442, "step": 7833 }, { "epoch": 0.22854308886166055, "grad_norm": 0.8683937244315321, "learning_rate": 9.00155412839959e-06, "loss": 0.1438, "step": 7834 }, { "epoch": 0.2285722620923041, "grad_norm": 0.7520997942617577, "learning_rate": 9.001270845942298e-06, "loss": 0.1417, "step": 7835 }, { "epoch": 0.22860143532294766, "grad_norm": 0.9781807377410655, "learning_rate": 9.000987527762614e-06, "loss": 0.1924, "step": 7836 }, { "epoch": 0.2286306085535912, "grad_norm": 0.839190644885023, "learning_rate": 9.000704173863071e-06, "loss": 0.1753, "step": 7837 }, { "epoch": 0.2286597817842348, "grad_norm": 0.6885528433710029, "learning_rate": 9.000420784246194e-06, "loss": 0.1635, "step": 7838 }, { "epoch": 0.22868895501487835, "grad_norm": 0.7794359405780938, "learning_rate": 9.000137358914516e-06, "loss": 0.1565, "step": 7839 }, { "epoch": 0.2287181282455219, "grad_norm": 0.7247611656491247, "learning_rate": 8.999853897870565e-06, "loss": 0.1522, "step": 7840 }, { "epoch": 0.22874730147616548, "grad_norm": 1.0571012770404538, "learning_rate": 8.999570401116874e-06, "loss": 0.1601, "step": 7841 }, { "epoch": 0.22877647470680904, "grad_norm": 0.9208776416346971, "learning_rate": 8.999286868655974e-06, "loss": 0.1723, "step": 7842 }, { "epoch": 0.2288056479374526, "grad_norm": 0.8248710677339088, "learning_rate": 8.999003300490396e-06, "loss": 0.15, "step": 7843 }, { "epoch": 0.22883482116809614, "grad_norm": 0.7639701251864959, "learning_rate": 8.99871969662267e-06, "loss": 0.1621, "step": 7844 }, { "epoch": 0.22886399439873972, "grad_norm": 1.0376494613981613, "learning_rate": 8.998436057055332e-06, "loss": 0.1783, "step": 7845 }, { "epoch": 0.22889316762938328, "grad_norm": 0.7757586736623177, "learning_rate": 8.998152381790907e-06, "loss": 0.1491, "step": 7846 }, { "epoch": 0.22892234086002683, "grad_norm": 0.8881310885196493, "learning_rate": 8.997868670831935e-06, "loss": 0.1548, "step": 7847 }, { "epoch": 0.2289515140906704, "grad_norm": 0.7430788008706929, "learning_rate": 8.997584924180945e-06, "loss": 0.1647, "step": 7848 }, { "epoch": 0.22898068732131396, "grad_norm": 0.8635293715316253, "learning_rate": 8.99730114184047e-06, "loss": 0.1621, "step": 7849 }, { "epoch": 0.22900986055195752, "grad_norm": 0.78795988793354, "learning_rate": 8.997017323813046e-06, "loss": 0.1434, "step": 7850 }, { "epoch": 0.2290390337826011, "grad_norm": 0.7522582486832059, "learning_rate": 8.996733470101204e-06, "loss": 0.1809, "step": 7851 }, { "epoch": 0.22906820701324465, "grad_norm": 0.731333929964769, "learning_rate": 8.99644958070748e-06, "loss": 0.1564, "step": 7852 }, { "epoch": 0.2290973802438882, "grad_norm": 0.8006664639301642, "learning_rate": 8.99616565563441e-06, "loss": 0.1587, "step": 7853 }, { "epoch": 0.22912655347453176, "grad_norm": 0.7840244405115172, "learning_rate": 8.995881694884526e-06, "loss": 0.1699, "step": 7854 }, { "epoch": 0.22915572670517534, "grad_norm": 0.7834716981915015, "learning_rate": 8.995597698460364e-06, "loss": 0.144, "step": 7855 }, { "epoch": 0.2291848999358189, "grad_norm": 0.7539945566652915, "learning_rate": 8.99531366636446e-06, "loss": 0.1868, "step": 7856 }, { "epoch": 0.22921407316646245, "grad_norm": 0.8070913680316629, "learning_rate": 8.99502959859935e-06, "loss": 0.1511, "step": 7857 }, { "epoch": 0.22924324639710603, "grad_norm": 0.7146344547838658, "learning_rate": 8.994745495167567e-06, "loss": 0.1549, "step": 7858 }, { "epoch": 0.22927241962774958, "grad_norm": 0.8230625572382745, "learning_rate": 8.994461356071651e-06, "loss": 0.1788, "step": 7859 }, { "epoch": 0.22930159285839313, "grad_norm": 0.9757606580104359, "learning_rate": 8.99417718131414e-06, "loss": 0.1405, "step": 7860 }, { "epoch": 0.2293307660890367, "grad_norm": 0.7860021861901404, "learning_rate": 8.993892970897564e-06, "loss": 0.1302, "step": 7861 }, { "epoch": 0.22935993931968027, "grad_norm": 0.8665656633073543, "learning_rate": 8.993608724824467e-06, "loss": 0.1536, "step": 7862 }, { "epoch": 0.22938911255032382, "grad_norm": 0.7053428463513204, "learning_rate": 8.993324443097387e-06, "loss": 0.1569, "step": 7863 }, { "epoch": 0.22941828578096737, "grad_norm": 0.8591552881685272, "learning_rate": 8.993040125718857e-06, "loss": 0.1651, "step": 7864 }, { "epoch": 0.22944745901161095, "grad_norm": 0.8655036985855517, "learning_rate": 8.992755772691418e-06, "loss": 0.1622, "step": 7865 }, { "epoch": 0.2294766322422545, "grad_norm": 0.8914369730508105, "learning_rate": 8.99247138401761e-06, "loss": 0.1664, "step": 7866 }, { "epoch": 0.22950580547289806, "grad_norm": 0.941520779873375, "learning_rate": 8.99218695969997e-06, "loss": 0.1658, "step": 7867 }, { "epoch": 0.22953497870354164, "grad_norm": 0.9019841936882262, "learning_rate": 8.991902499741036e-06, "loss": 0.1598, "step": 7868 }, { "epoch": 0.2295641519341852, "grad_norm": 1.0984679069219512, "learning_rate": 8.991618004143353e-06, "loss": 0.1542, "step": 7869 }, { "epoch": 0.22959332516482875, "grad_norm": 30.538307535441056, "learning_rate": 8.991333472909455e-06, "loss": 0.2469, "step": 7870 }, { "epoch": 0.2296224983954723, "grad_norm": 0.9871575245444739, "learning_rate": 8.991048906041884e-06, "loss": 0.1521, "step": 7871 }, { "epoch": 0.22965167162611588, "grad_norm": 7.0030216908220515, "learning_rate": 8.990764303543183e-06, "loss": 0.1661, "step": 7872 }, { "epoch": 0.22968084485675944, "grad_norm": 0.8611120724498316, "learning_rate": 8.99047966541589e-06, "loss": 0.2029, "step": 7873 }, { "epoch": 0.229710018087403, "grad_norm": 1.0826007546623617, "learning_rate": 8.990194991662547e-06, "loss": 0.1964, "step": 7874 }, { "epoch": 0.22973919131804657, "grad_norm": 0.9013871563643052, "learning_rate": 8.989910282285696e-06, "loss": 0.1797, "step": 7875 }, { "epoch": 0.22976836454869012, "grad_norm": 0.6976822409689709, "learning_rate": 8.989625537287879e-06, "loss": 0.1487, "step": 7876 }, { "epoch": 0.22979753777933368, "grad_norm": 0.8141668230560807, "learning_rate": 8.989340756671637e-06, "loss": 0.1594, "step": 7877 }, { "epoch": 0.22982671100997726, "grad_norm": 0.797237042164959, "learning_rate": 8.989055940439513e-06, "loss": 0.1853, "step": 7878 }, { "epoch": 0.2298558842406208, "grad_norm": 0.8427973492284777, "learning_rate": 8.98877108859405e-06, "loss": 0.1854, "step": 7879 }, { "epoch": 0.22988505747126436, "grad_norm": 0.7827418496927713, "learning_rate": 8.98848620113779e-06, "loss": 0.1588, "step": 7880 }, { "epoch": 0.22991423070190792, "grad_norm": 0.8137145924122848, "learning_rate": 8.988201278073279e-06, "loss": 0.1877, "step": 7881 }, { "epoch": 0.2299434039325515, "grad_norm": 0.7185630838548984, "learning_rate": 8.987916319403058e-06, "loss": 0.145, "step": 7882 }, { "epoch": 0.22997257716319505, "grad_norm": 0.6945072273188773, "learning_rate": 8.987631325129672e-06, "loss": 0.1573, "step": 7883 }, { "epoch": 0.2300017503938386, "grad_norm": 0.6783951637049451, "learning_rate": 8.987346295255665e-06, "loss": 0.1542, "step": 7884 }, { "epoch": 0.2300309236244822, "grad_norm": 0.8143489706937942, "learning_rate": 8.987061229783583e-06, "loss": 0.1544, "step": 7885 }, { "epoch": 0.23006009685512574, "grad_norm": 0.9324773063743967, "learning_rate": 8.98677612871597e-06, "loss": 0.1507, "step": 7886 }, { "epoch": 0.2300892700857693, "grad_norm": 0.8604850345630668, "learning_rate": 8.986490992055371e-06, "loss": 0.1712, "step": 7887 }, { "epoch": 0.23011844331641285, "grad_norm": 0.9366105297218565, "learning_rate": 8.986205819804332e-06, "loss": 0.1654, "step": 7888 }, { "epoch": 0.23014761654705643, "grad_norm": 0.7698870467790884, "learning_rate": 8.9859206119654e-06, "loss": 0.1563, "step": 7889 }, { "epoch": 0.23017678977769998, "grad_norm": 0.8892805451272776, "learning_rate": 8.98563536854112e-06, "loss": 0.1482, "step": 7890 }, { "epoch": 0.23020596300834353, "grad_norm": 0.831349679074709, "learning_rate": 8.985350089534039e-06, "loss": 0.1876, "step": 7891 }, { "epoch": 0.23023513623898711, "grad_norm": 0.9095725014546474, "learning_rate": 8.985064774946704e-06, "loss": 0.1659, "step": 7892 }, { "epoch": 0.23026430946963067, "grad_norm": 0.6884617720193952, "learning_rate": 8.98477942478166e-06, "loss": 0.1585, "step": 7893 }, { "epoch": 0.23029348270027422, "grad_norm": 0.7678957873083491, "learning_rate": 8.984494039041458e-06, "loss": 0.156, "step": 7894 }, { "epoch": 0.2303226559309178, "grad_norm": 0.8958766686510108, "learning_rate": 8.984208617728645e-06, "loss": 0.1651, "step": 7895 }, { "epoch": 0.23035182916156136, "grad_norm": 0.7220839846605178, "learning_rate": 8.983923160845766e-06, "loss": 0.1646, "step": 7896 }, { "epoch": 0.2303810023922049, "grad_norm": 0.7298317309624487, "learning_rate": 8.983637668395375e-06, "loss": 0.1296, "step": 7897 }, { "epoch": 0.23041017562284846, "grad_norm": 0.9141541064688687, "learning_rate": 8.983352140380017e-06, "loss": 0.1598, "step": 7898 }, { "epoch": 0.23043934885349204, "grad_norm": 0.867744659092262, "learning_rate": 8.983066576802241e-06, "loss": 0.1861, "step": 7899 }, { "epoch": 0.2304685220841356, "grad_norm": 0.9990059552793454, "learning_rate": 8.9827809776646e-06, "loss": 0.1694, "step": 7900 }, { "epoch": 0.23049769531477915, "grad_norm": 0.8735981646512109, "learning_rate": 8.98249534296964e-06, "loss": 0.1724, "step": 7901 }, { "epoch": 0.23052686854542273, "grad_norm": 0.8882672213679386, "learning_rate": 8.98220967271991e-06, "loss": 0.1522, "step": 7902 }, { "epoch": 0.23055604177606628, "grad_norm": 0.9497935493498831, "learning_rate": 8.981923966917965e-06, "loss": 0.1771, "step": 7903 }, { "epoch": 0.23058521500670984, "grad_norm": 0.8874610411280186, "learning_rate": 8.981638225566352e-06, "loss": 0.1736, "step": 7904 }, { "epoch": 0.23061438823735342, "grad_norm": 0.9846342420793052, "learning_rate": 8.981352448667625e-06, "loss": 0.159, "step": 7905 }, { "epoch": 0.23064356146799697, "grad_norm": 0.9571905763581517, "learning_rate": 8.981066636224334e-06, "loss": 0.1644, "step": 7906 }, { "epoch": 0.23067273469864052, "grad_norm": 1.000066371424318, "learning_rate": 8.980780788239029e-06, "loss": 0.1565, "step": 7907 }, { "epoch": 0.23070190792928408, "grad_norm": 0.8388845856794662, "learning_rate": 8.980494904714263e-06, "loss": 0.1396, "step": 7908 }, { "epoch": 0.23073108115992766, "grad_norm": 1.1958278189996134, "learning_rate": 8.98020898565259e-06, "loss": 0.1526, "step": 7909 }, { "epoch": 0.2307602543905712, "grad_norm": 0.8659713240342617, "learning_rate": 8.979923031056561e-06, "loss": 0.1575, "step": 7910 }, { "epoch": 0.23078942762121477, "grad_norm": 0.8888592808462227, "learning_rate": 8.979637040928728e-06, "loss": 0.1713, "step": 7911 }, { "epoch": 0.23081860085185835, "grad_norm": 1.0438234571076548, "learning_rate": 8.979351015271648e-06, "loss": 0.1779, "step": 7912 }, { "epoch": 0.2308477740825019, "grad_norm": 0.9340396543834354, "learning_rate": 8.979064954087871e-06, "loss": 0.1784, "step": 7913 }, { "epoch": 0.23087694731314545, "grad_norm": 0.8961219508440874, "learning_rate": 8.97877885737995e-06, "loss": 0.1523, "step": 7914 }, { "epoch": 0.230906120543789, "grad_norm": 0.9385047700926765, "learning_rate": 8.978492725150444e-06, "loss": 0.1603, "step": 7915 }, { "epoch": 0.2309352937744326, "grad_norm": 1.1180766056341944, "learning_rate": 8.978206557401903e-06, "loss": 0.1799, "step": 7916 }, { "epoch": 0.23096446700507614, "grad_norm": 0.8924194484470929, "learning_rate": 8.977920354136885e-06, "loss": 0.1895, "step": 7917 }, { "epoch": 0.2309936402357197, "grad_norm": 0.7494284756242796, "learning_rate": 8.977634115357942e-06, "loss": 0.1388, "step": 7918 }, { "epoch": 0.23102281346636327, "grad_norm": 0.7633926722825112, "learning_rate": 8.977347841067631e-06, "loss": 0.1608, "step": 7919 }, { "epoch": 0.23105198669700683, "grad_norm": 0.8638611396990575, "learning_rate": 8.97706153126851e-06, "loss": 0.1641, "step": 7920 }, { "epoch": 0.23108115992765038, "grad_norm": 0.780354734871524, "learning_rate": 8.976775185963131e-06, "loss": 0.1378, "step": 7921 }, { "epoch": 0.23111033315829396, "grad_norm": 0.8743108969033693, "learning_rate": 8.976488805154054e-06, "loss": 0.193, "step": 7922 }, { "epoch": 0.23113950638893752, "grad_norm": 0.8404789222654602, "learning_rate": 8.976202388843833e-06, "loss": 0.1547, "step": 7923 }, { "epoch": 0.23116867961958107, "grad_norm": 0.858721831932792, "learning_rate": 8.975915937035029e-06, "loss": 0.1795, "step": 7924 }, { "epoch": 0.23119785285022462, "grad_norm": 0.8571207713848548, "learning_rate": 8.975629449730194e-06, "loss": 0.1759, "step": 7925 }, { "epoch": 0.2312270260808682, "grad_norm": 0.9479269031813717, "learning_rate": 8.975342926931888e-06, "loss": 0.16, "step": 7926 }, { "epoch": 0.23125619931151176, "grad_norm": 0.8689659096373238, "learning_rate": 8.97505636864267e-06, "loss": 0.1819, "step": 7927 }, { "epoch": 0.2312853725421553, "grad_norm": 0.7629244776263481, "learning_rate": 8.974769774865097e-06, "loss": 0.167, "step": 7928 }, { "epoch": 0.2313145457727989, "grad_norm": 0.8319382999924249, "learning_rate": 8.97448314560173e-06, "loss": 0.1928, "step": 7929 }, { "epoch": 0.23134371900344244, "grad_norm": 0.7636448846716699, "learning_rate": 8.974196480855126e-06, "loss": 0.1476, "step": 7930 }, { "epoch": 0.231372892234086, "grad_norm": 0.7868278619232064, "learning_rate": 8.973909780627845e-06, "loss": 0.1553, "step": 7931 }, { "epoch": 0.23140206546472955, "grad_norm": 1.783456156342513, "learning_rate": 8.973623044922444e-06, "loss": 0.1468, "step": 7932 }, { "epoch": 0.23143123869537313, "grad_norm": 0.7361392006785855, "learning_rate": 8.973336273741487e-06, "loss": 0.1935, "step": 7933 }, { "epoch": 0.23146041192601668, "grad_norm": 0.6799103841962983, "learning_rate": 8.973049467087531e-06, "loss": 0.1595, "step": 7934 }, { "epoch": 0.23148958515666024, "grad_norm": 0.8197974864462231, "learning_rate": 8.972762624963139e-06, "loss": 0.1556, "step": 7935 }, { "epoch": 0.23151875838730382, "grad_norm": 0.9292101613609204, "learning_rate": 8.972475747370869e-06, "loss": 0.1486, "step": 7936 }, { "epoch": 0.23154793161794737, "grad_norm": 0.8351032308641211, "learning_rate": 8.972188834313285e-06, "loss": 0.1508, "step": 7937 }, { "epoch": 0.23157710484859093, "grad_norm": 0.8236472852016963, "learning_rate": 8.971901885792947e-06, "loss": 0.1729, "step": 7938 }, { "epoch": 0.2316062780792345, "grad_norm": 0.754782621058893, "learning_rate": 8.971614901812417e-06, "loss": 0.1546, "step": 7939 }, { "epoch": 0.23163545130987806, "grad_norm": 0.8184346728606533, "learning_rate": 8.971327882374257e-06, "loss": 0.1498, "step": 7940 }, { "epoch": 0.2316646245405216, "grad_norm": 0.7842135080527304, "learning_rate": 8.97104082748103e-06, "loss": 0.1587, "step": 7941 }, { "epoch": 0.23169379777116517, "grad_norm": 0.8003555749338849, "learning_rate": 8.970753737135298e-06, "loss": 0.1545, "step": 7942 }, { "epoch": 0.23172297100180875, "grad_norm": 0.9322813589322437, "learning_rate": 8.970466611339625e-06, "loss": 0.1682, "step": 7943 }, { "epoch": 0.2317521442324523, "grad_norm": 0.7959947942319586, "learning_rate": 8.970179450096574e-06, "loss": 0.1509, "step": 7944 }, { "epoch": 0.23178131746309585, "grad_norm": 0.7854743779984855, "learning_rate": 8.96989225340871e-06, "loss": 0.1536, "step": 7945 }, { "epoch": 0.23181049069373943, "grad_norm": 1.0302643387878918, "learning_rate": 8.969605021278594e-06, "loss": 0.1548, "step": 7946 }, { "epoch": 0.231839663924383, "grad_norm": 0.8798981554529599, "learning_rate": 8.969317753708792e-06, "loss": 0.1501, "step": 7947 }, { "epoch": 0.23186883715502654, "grad_norm": 0.8036381869669064, "learning_rate": 8.96903045070187e-06, "loss": 0.1587, "step": 7948 }, { "epoch": 0.23189801038567012, "grad_norm": 0.981191173155209, "learning_rate": 8.968743112260389e-06, "loss": 0.1757, "step": 7949 }, { "epoch": 0.23192718361631368, "grad_norm": 0.9158301363845617, "learning_rate": 8.968455738386919e-06, "loss": 0.1604, "step": 7950 }, { "epoch": 0.23195635684695723, "grad_norm": 1.0966531889290108, "learning_rate": 8.968168329084022e-06, "loss": 0.1801, "step": 7951 }, { "epoch": 0.23198553007760078, "grad_norm": 0.9422267898448748, "learning_rate": 8.967880884354267e-06, "loss": 0.1644, "step": 7952 }, { "epoch": 0.23201470330824436, "grad_norm": 0.7561762332547298, "learning_rate": 8.967593404200219e-06, "loss": 0.1493, "step": 7953 }, { "epoch": 0.23204387653888792, "grad_norm": 1.1758815200025448, "learning_rate": 8.967305888624442e-06, "loss": 0.1709, "step": 7954 }, { "epoch": 0.23207304976953147, "grad_norm": 0.9566726405483177, "learning_rate": 8.967018337629508e-06, "loss": 0.1836, "step": 7955 }, { "epoch": 0.23210222300017505, "grad_norm": 0.7946192356490421, "learning_rate": 8.966730751217978e-06, "loss": 0.1681, "step": 7956 }, { "epoch": 0.2321313962308186, "grad_norm": 0.9160009772815291, "learning_rate": 8.966443129392426e-06, "loss": 0.1519, "step": 7957 }, { "epoch": 0.23216056946146216, "grad_norm": 1.0918327574228717, "learning_rate": 8.966155472155414e-06, "loss": 0.1702, "step": 7958 }, { "epoch": 0.2321897426921057, "grad_norm": 0.7626379469549405, "learning_rate": 8.965867779509513e-06, "loss": 0.155, "step": 7959 }, { "epoch": 0.2322189159227493, "grad_norm": 0.8618726573903422, "learning_rate": 8.965580051457292e-06, "loss": 0.1588, "step": 7960 }, { "epoch": 0.23224808915339284, "grad_norm": 0.9285347313650374, "learning_rate": 8.96529228800132e-06, "loss": 0.1753, "step": 7961 }, { "epoch": 0.2322772623840364, "grad_norm": 0.8710843645255506, "learning_rate": 8.965004489144165e-06, "loss": 0.1694, "step": 7962 }, { "epoch": 0.23230643561467998, "grad_norm": 0.7944912694806492, "learning_rate": 8.964716654888395e-06, "loss": 0.1671, "step": 7963 }, { "epoch": 0.23233560884532353, "grad_norm": 0.805655434799871, "learning_rate": 8.964428785236581e-06, "loss": 0.1552, "step": 7964 }, { "epoch": 0.23236478207596709, "grad_norm": 0.8562198493417296, "learning_rate": 8.964140880191294e-06, "loss": 0.1476, "step": 7965 }, { "epoch": 0.23239395530661067, "grad_norm": 0.7119304754090852, "learning_rate": 8.963852939755104e-06, "loss": 0.1662, "step": 7966 }, { "epoch": 0.23242312853725422, "grad_norm": 0.9471470810961862, "learning_rate": 8.96356496393058e-06, "loss": 0.1595, "step": 7967 }, { "epoch": 0.23245230176789777, "grad_norm": 0.802465025689359, "learning_rate": 8.963276952720294e-06, "loss": 0.1747, "step": 7968 }, { "epoch": 0.23248147499854133, "grad_norm": 0.653049839552109, "learning_rate": 8.96298890612682e-06, "loss": 0.1743, "step": 7969 }, { "epoch": 0.2325106482291849, "grad_norm": 0.7449545503708133, "learning_rate": 8.962700824152724e-06, "loss": 0.1519, "step": 7970 }, { "epoch": 0.23253982145982846, "grad_norm": 0.859759788278501, "learning_rate": 8.962412706800583e-06, "loss": 0.1643, "step": 7971 }, { "epoch": 0.23256899469047201, "grad_norm": 0.7912390424457756, "learning_rate": 8.962124554072966e-06, "loss": 0.1623, "step": 7972 }, { "epoch": 0.2325981679211156, "grad_norm": 0.96985934893583, "learning_rate": 8.961836365972448e-06, "loss": 0.1567, "step": 7973 }, { "epoch": 0.23262734115175915, "grad_norm": 0.8698070988113134, "learning_rate": 8.9615481425016e-06, "loss": 0.1413, "step": 7974 }, { "epoch": 0.2326565143824027, "grad_norm": 1.1161478727879717, "learning_rate": 8.961259883662997e-06, "loss": 0.1888, "step": 7975 }, { "epoch": 0.23268568761304628, "grad_norm": 0.9863245588204235, "learning_rate": 8.960971589459208e-06, "loss": 0.1671, "step": 7976 }, { "epoch": 0.23271486084368984, "grad_norm": 1.340980192673474, "learning_rate": 8.960683259892813e-06, "loss": 0.1938, "step": 7977 }, { "epoch": 0.2327440340743334, "grad_norm": 1.0935669382378899, "learning_rate": 8.960394894966383e-06, "loss": 0.1596, "step": 7978 }, { "epoch": 0.23277320730497694, "grad_norm": 0.7201980386984678, "learning_rate": 8.960106494682492e-06, "loss": 0.155, "step": 7979 }, { "epoch": 0.23280238053562052, "grad_norm": 0.9488219835295263, "learning_rate": 8.959818059043717e-06, "loss": 0.1618, "step": 7980 }, { "epoch": 0.23283155376626408, "grad_norm": 1.0172603853180795, "learning_rate": 8.959529588052631e-06, "loss": 0.1788, "step": 7981 }, { "epoch": 0.23286072699690763, "grad_norm": 0.7545652274753679, "learning_rate": 8.959241081711811e-06, "loss": 0.1527, "step": 7982 }, { "epoch": 0.2328899002275512, "grad_norm": 0.9252378447180483, "learning_rate": 8.95895254002383e-06, "loss": 0.1716, "step": 7983 }, { "epoch": 0.23291907345819476, "grad_norm": 0.7860081958102194, "learning_rate": 8.958663962991265e-06, "loss": 0.157, "step": 7984 }, { "epoch": 0.23294824668883832, "grad_norm": 1.022959670385792, "learning_rate": 8.958375350616695e-06, "loss": 0.1515, "step": 7985 }, { "epoch": 0.23297741991948187, "grad_norm": 0.8943316343739773, "learning_rate": 8.958086702902695e-06, "loss": 0.1801, "step": 7986 }, { "epoch": 0.23300659315012545, "grad_norm": 1.0773939516632338, "learning_rate": 8.957798019851842e-06, "loss": 0.1685, "step": 7987 }, { "epoch": 0.233035766380769, "grad_norm": 0.9049791383744568, "learning_rate": 8.957509301466712e-06, "loss": 0.1691, "step": 7988 }, { "epoch": 0.23306493961141256, "grad_norm": 0.8968414027056072, "learning_rate": 8.957220547749884e-06, "loss": 0.1465, "step": 7989 }, { "epoch": 0.23309411284205614, "grad_norm": 0.9636828632979421, "learning_rate": 8.956931758703935e-06, "loss": 0.1811, "step": 7990 }, { "epoch": 0.2331232860726997, "grad_norm": 0.9914752249113326, "learning_rate": 8.956642934331446e-06, "loss": 0.1709, "step": 7991 }, { "epoch": 0.23315245930334325, "grad_norm": 1.088550261313748, "learning_rate": 8.956354074634992e-06, "loss": 0.1863, "step": 7992 }, { "epoch": 0.23318163253398683, "grad_norm": 1.1213752797523344, "learning_rate": 8.956065179617153e-06, "loss": 0.1504, "step": 7993 }, { "epoch": 0.23321080576463038, "grad_norm": 1.5068470923028956, "learning_rate": 8.955776249280508e-06, "loss": 0.1528, "step": 7994 }, { "epoch": 0.23323997899527393, "grad_norm": 1.0904897897433061, "learning_rate": 8.955487283627638e-06, "loss": 0.2054, "step": 7995 }, { "epoch": 0.2332691522259175, "grad_norm": 1.0699673947294661, "learning_rate": 8.955198282661122e-06, "loss": 0.1348, "step": 7996 }, { "epoch": 0.23329832545656107, "grad_norm": 0.7775833248102009, "learning_rate": 8.954909246383539e-06, "loss": 0.1811, "step": 7997 }, { "epoch": 0.23332749868720462, "grad_norm": 0.9471724998179966, "learning_rate": 8.95462017479747e-06, "loss": 0.1642, "step": 7998 }, { "epoch": 0.23335667191784817, "grad_norm": 0.8766472407340916, "learning_rate": 8.954331067905498e-06, "loss": 0.1598, "step": 7999 }, { "epoch": 0.23338584514849176, "grad_norm": 0.7779864407460095, "learning_rate": 8.9540419257102e-06, "loss": 0.1601, "step": 8000 }, { "epoch": 0.2334150183791353, "grad_norm": 0.8068738332383827, "learning_rate": 8.953752748214161e-06, "loss": 0.1677, "step": 8001 }, { "epoch": 0.23344419160977886, "grad_norm": 0.9702547364467246, "learning_rate": 8.953463535419962e-06, "loss": 0.1731, "step": 8002 }, { "epoch": 0.23347336484042241, "grad_norm": 0.8231585920754951, "learning_rate": 8.953174287330182e-06, "loss": 0.1867, "step": 8003 }, { "epoch": 0.233502538071066, "grad_norm": 0.7479409714255466, "learning_rate": 8.952885003947407e-06, "loss": 0.1697, "step": 8004 }, { "epoch": 0.23353171130170955, "grad_norm": 0.7061660965729089, "learning_rate": 8.95259568527422e-06, "loss": 0.1417, "step": 8005 }, { "epoch": 0.2335608845323531, "grad_norm": 1.0359937169217408, "learning_rate": 8.952306331313199e-06, "loss": 0.2055, "step": 8006 }, { "epoch": 0.23359005776299668, "grad_norm": 0.8176126070362488, "learning_rate": 8.952016942066932e-06, "loss": 0.1684, "step": 8007 }, { "epoch": 0.23361923099364024, "grad_norm": 1.0809415238229225, "learning_rate": 8.951727517538001e-06, "loss": 0.1641, "step": 8008 }, { "epoch": 0.2336484042242838, "grad_norm": 0.8048304969169773, "learning_rate": 8.951438057728991e-06, "loss": 0.1588, "step": 8009 }, { "epoch": 0.23367757745492737, "grad_norm": 0.9538286575976014, "learning_rate": 8.951148562642485e-06, "loss": 0.1657, "step": 8010 }, { "epoch": 0.23370675068557092, "grad_norm": 1.1010655732932972, "learning_rate": 8.950859032281068e-06, "loss": 0.1975, "step": 8011 }, { "epoch": 0.23373592391621448, "grad_norm": 0.8423336991321413, "learning_rate": 8.950569466647322e-06, "loss": 0.1913, "step": 8012 }, { "epoch": 0.23376509714685803, "grad_norm": 0.9953928472588061, "learning_rate": 8.950279865743838e-06, "loss": 0.1685, "step": 8013 }, { "epoch": 0.2337942703775016, "grad_norm": 0.7296554387113371, "learning_rate": 8.949990229573198e-06, "loss": 0.1487, "step": 8014 }, { "epoch": 0.23382344360814517, "grad_norm": 0.7998938992446264, "learning_rate": 8.949700558137986e-06, "loss": 0.145, "step": 8015 }, { "epoch": 0.23385261683878872, "grad_norm": 0.8282895543551169, "learning_rate": 8.949410851440793e-06, "loss": 0.1876, "step": 8016 }, { "epoch": 0.2338817900694323, "grad_norm": 0.7432614557343605, "learning_rate": 8.949121109484202e-06, "loss": 0.1445, "step": 8017 }, { "epoch": 0.23391096330007585, "grad_norm": 0.827381807714969, "learning_rate": 8.9488313322708e-06, "loss": 0.1824, "step": 8018 }, { "epoch": 0.2339401365307194, "grad_norm": 0.7444319638690382, "learning_rate": 8.948541519803174e-06, "loss": 0.1404, "step": 8019 }, { "epoch": 0.233969309761363, "grad_norm": 0.7989216545674781, "learning_rate": 8.948251672083913e-06, "loss": 0.1606, "step": 8020 }, { "epoch": 0.23399848299200654, "grad_norm": 0.8655703677849157, "learning_rate": 8.947961789115602e-06, "loss": 0.1628, "step": 8021 }, { "epoch": 0.2340276562226501, "grad_norm": 0.878427661366001, "learning_rate": 8.947671870900833e-06, "loss": 0.1626, "step": 8022 }, { "epoch": 0.23405682945329365, "grad_norm": 0.7137478691908138, "learning_rate": 8.94738191744219e-06, "loss": 0.1424, "step": 8023 }, { "epoch": 0.23408600268393723, "grad_norm": 0.9131009998198611, "learning_rate": 8.947091928742265e-06, "loss": 0.174, "step": 8024 }, { "epoch": 0.23411517591458078, "grad_norm": 0.8295889966807191, "learning_rate": 8.946801904803643e-06, "loss": 0.157, "step": 8025 }, { "epoch": 0.23414434914522433, "grad_norm": 0.679940778230621, "learning_rate": 8.946511845628917e-06, "loss": 0.1654, "step": 8026 }, { "epoch": 0.23417352237586792, "grad_norm": 0.6610192945988974, "learning_rate": 8.946221751220676e-06, "loss": 0.1355, "step": 8027 }, { "epoch": 0.23420269560651147, "grad_norm": 3.442276636815862, "learning_rate": 8.945931621581511e-06, "loss": 0.1647, "step": 8028 }, { "epoch": 0.23423186883715502, "grad_norm": 0.8839468671166052, "learning_rate": 8.945641456714007e-06, "loss": 0.1424, "step": 8029 }, { "epoch": 0.23426104206779857, "grad_norm": 0.8889270339695862, "learning_rate": 8.94535125662076e-06, "loss": 0.1647, "step": 8030 }, { "epoch": 0.23429021529844216, "grad_norm": 1.0277466748950153, "learning_rate": 8.94506102130436e-06, "loss": 0.1728, "step": 8031 }, { "epoch": 0.2343193885290857, "grad_norm": 0.8233028025233522, "learning_rate": 8.944770750767393e-06, "loss": 0.1326, "step": 8032 }, { "epoch": 0.23434856175972926, "grad_norm": 0.7082904575322121, "learning_rate": 8.944480445012458e-06, "loss": 0.1366, "step": 8033 }, { "epoch": 0.23437773499037284, "grad_norm": 0.7853682305585855, "learning_rate": 8.94419010404214e-06, "loss": 0.1695, "step": 8034 }, { "epoch": 0.2344069082210164, "grad_norm": 0.805043308464724, "learning_rate": 8.943899727859038e-06, "loss": 0.1645, "step": 8035 }, { "epoch": 0.23443608145165995, "grad_norm": 0.8223772365652124, "learning_rate": 8.943609316465739e-06, "loss": 0.155, "step": 8036 }, { "epoch": 0.23446525468230353, "grad_norm": 0.8983760952745956, "learning_rate": 8.943318869864836e-06, "loss": 0.1343, "step": 8037 }, { "epoch": 0.23449442791294708, "grad_norm": 0.7157276833187896, "learning_rate": 8.943028388058925e-06, "loss": 0.1403, "step": 8038 }, { "epoch": 0.23452360114359064, "grad_norm": 0.7825010396865133, "learning_rate": 8.942737871050598e-06, "loss": 0.1321, "step": 8039 }, { "epoch": 0.2345527743742342, "grad_norm": 0.8076007038951147, "learning_rate": 8.942447318842449e-06, "loss": 0.1929, "step": 8040 }, { "epoch": 0.23458194760487777, "grad_norm": 0.8279545523036859, "learning_rate": 8.94215673143707e-06, "loss": 0.1736, "step": 8041 }, { "epoch": 0.23461112083552133, "grad_norm": 0.7908824169183024, "learning_rate": 8.941866108837058e-06, "loss": 0.1557, "step": 8042 }, { "epoch": 0.23464029406616488, "grad_norm": 0.8955458930943784, "learning_rate": 8.941575451045006e-06, "loss": 0.1511, "step": 8043 }, { "epoch": 0.23466946729680846, "grad_norm": 0.8377022011248544, "learning_rate": 8.941284758063508e-06, "loss": 0.1823, "step": 8044 }, { "epoch": 0.234698640527452, "grad_norm": 2.101691490561202, "learning_rate": 8.940994029895162e-06, "loss": 0.1541, "step": 8045 }, { "epoch": 0.23472781375809557, "grad_norm": 0.8641975686700145, "learning_rate": 8.940703266542561e-06, "loss": 0.1395, "step": 8046 }, { "epoch": 0.23475698698873912, "grad_norm": 0.7837571588736078, "learning_rate": 8.940412468008303e-06, "loss": 0.1741, "step": 8047 }, { "epoch": 0.2347861602193827, "grad_norm": 0.8798153568066842, "learning_rate": 8.940121634294983e-06, "loss": 0.1627, "step": 8048 }, { "epoch": 0.23481533345002625, "grad_norm": 0.8248862615696155, "learning_rate": 8.939830765405198e-06, "loss": 0.1569, "step": 8049 }, { "epoch": 0.2348445066806698, "grad_norm": 1.2675451855792133, "learning_rate": 8.939539861341544e-06, "loss": 0.1523, "step": 8050 }, { "epoch": 0.2348736799113134, "grad_norm": 0.8146104364588431, "learning_rate": 8.939248922106618e-06, "loss": 0.1699, "step": 8051 }, { "epoch": 0.23490285314195694, "grad_norm": 0.8045537304185456, "learning_rate": 8.938957947703019e-06, "loss": 0.1512, "step": 8052 }, { "epoch": 0.2349320263726005, "grad_norm": 0.9183869283521147, "learning_rate": 8.938666938133343e-06, "loss": 0.1859, "step": 8053 }, { "epoch": 0.23496119960324408, "grad_norm": 0.8897729225953223, "learning_rate": 8.938375893400189e-06, "loss": 0.1604, "step": 8054 }, { "epoch": 0.23499037283388763, "grad_norm": 0.7725779472047589, "learning_rate": 8.938084813506155e-06, "loss": 0.1525, "step": 8055 }, { "epoch": 0.23501954606453118, "grad_norm": 0.7989758230798651, "learning_rate": 8.937793698453841e-06, "loss": 0.1627, "step": 8056 }, { "epoch": 0.23504871929517474, "grad_norm": 0.8581415588098958, "learning_rate": 8.937502548245844e-06, "loss": 0.1553, "step": 8057 }, { "epoch": 0.23507789252581832, "grad_norm": 0.9810036306537716, "learning_rate": 8.937211362884764e-06, "loss": 0.1514, "step": 8058 }, { "epoch": 0.23510706575646187, "grad_norm": 0.702512980123612, "learning_rate": 8.9369201423732e-06, "loss": 0.1743, "step": 8059 }, { "epoch": 0.23513623898710542, "grad_norm": 0.9888176350519338, "learning_rate": 8.936628886713754e-06, "loss": 0.1582, "step": 8060 }, { "epoch": 0.235165412217749, "grad_norm": 0.9156496686672432, "learning_rate": 8.936337595909024e-06, "loss": 0.1596, "step": 8061 }, { "epoch": 0.23519458544839256, "grad_norm": 0.7957213009908428, "learning_rate": 8.936046269961614e-06, "loss": 0.152, "step": 8062 }, { "epoch": 0.2352237586790361, "grad_norm": 0.9959649863010526, "learning_rate": 8.93575490887412e-06, "loss": 0.2111, "step": 8063 }, { "epoch": 0.2352529319096797, "grad_norm": 1.0618771730079335, "learning_rate": 8.935463512649147e-06, "loss": 0.1481, "step": 8064 }, { "epoch": 0.23528210514032324, "grad_norm": 0.8232608343299364, "learning_rate": 8.935172081289293e-06, "loss": 0.1599, "step": 8065 }, { "epoch": 0.2353112783709668, "grad_norm": 0.9889686791904171, "learning_rate": 8.934880614797166e-06, "loss": 0.154, "step": 8066 }, { "epoch": 0.23534045160161035, "grad_norm": 0.8758211257441239, "learning_rate": 8.934589113175363e-06, "loss": 0.1548, "step": 8067 }, { "epoch": 0.23536962483225393, "grad_norm": 0.861350412594102, "learning_rate": 8.934297576426487e-06, "loss": 0.1443, "step": 8068 }, { "epoch": 0.23539879806289749, "grad_norm": 0.8082345622066743, "learning_rate": 8.93400600455314e-06, "loss": 0.154, "step": 8069 }, { "epoch": 0.23542797129354104, "grad_norm": 1.0046091692315662, "learning_rate": 8.933714397557928e-06, "loss": 0.1865, "step": 8070 }, { "epoch": 0.23545714452418462, "grad_norm": 0.9800636252704941, "learning_rate": 8.933422755443453e-06, "loss": 0.1387, "step": 8071 }, { "epoch": 0.23548631775482817, "grad_norm": 6.8535591367862825, "learning_rate": 8.933131078212318e-06, "loss": 0.1666, "step": 8072 }, { "epoch": 0.23551549098547173, "grad_norm": 1.024408699368027, "learning_rate": 8.932839365867127e-06, "loss": 0.1554, "step": 8073 }, { "epoch": 0.23554466421611528, "grad_norm": 0.8963925660044842, "learning_rate": 8.932547618410486e-06, "loss": 0.1703, "step": 8074 }, { "epoch": 0.23557383744675886, "grad_norm": 0.7748710339714768, "learning_rate": 8.932255835845e-06, "loss": 0.1505, "step": 8075 }, { "epoch": 0.2356030106774024, "grad_norm": 3.705315847541083, "learning_rate": 8.931964018173272e-06, "loss": 0.1912, "step": 8076 }, { "epoch": 0.23563218390804597, "grad_norm": 0.830467918815383, "learning_rate": 8.931672165397907e-06, "loss": 0.1723, "step": 8077 }, { "epoch": 0.23566135713868955, "grad_norm": 0.784473307751724, "learning_rate": 8.931380277521511e-06, "loss": 0.1519, "step": 8078 }, { "epoch": 0.2356905303693331, "grad_norm": 0.9859581011216128, "learning_rate": 8.931088354546691e-06, "loss": 0.1603, "step": 8079 }, { "epoch": 0.23571970359997665, "grad_norm": 0.8925307079247121, "learning_rate": 8.930796396476051e-06, "loss": 0.176, "step": 8080 }, { "epoch": 0.23574887683062024, "grad_norm": 0.7635710131029193, "learning_rate": 8.930504403312201e-06, "loss": 0.1947, "step": 8081 }, { "epoch": 0.2357780500612638, "grad_norm": 0.8758109918925939, "learning_rate": 8.930212375057747e-06, "loss": 0.1627, "step": 8082 }, { "epoch": 0.23580722329190734, "grad_norm": 0.9129551107737623, "learning_rate": 8.929920311715293e-06, "loss": 0.1575, "step": 8083 }, { "epoch": 0.2358363965225509, "grad_norm": 0.6878454110352461, "learning_rate": 8.92962821328745e-06, "loss": 0.1373, "step": 8084 }, { "epoch": 0.23586556975319448, "grad_norm": 0.8301824701868347, "learning_rate": 8.929336079776822e-06, "loss": 0.1652, "step": 8085 }, { "epoch": 0.23589474298383803, "grad_norm": 0.8809332968442258, "learning_rate": 8.929043911186021e-06, "loss": 0.1528, "step": 8086 }, { "epoch": 0.23592391621448158, "grad_norm": 0.8597079340281912, "learning_rate": 8.928751707517655e-06, "loss": 0.1596, "step": 8087 }, { "epoch": 0.23595308944512516, "grad_norm": 0.8951525859435204, "learning_rate": 8.92845946877433e-06, "loss": 0.1846, "step": 8088 }, { "epoch": 0.23598226267576872, "grad_norm": 0.8740482636392535, "learning_rate": 8.92816719495866e-06, "loss": 0.1852, "step": 8089 }, { "epoch": 0.23601143590641227, "grad_norm": 0.7349794775328545, "learning_rate": 8.927874886073247e-06, "loss": 0.166, "step": 8090 }, { "epoch": 0.23604060913705585, "grad_norm": 0.9817526374427119, "learning_rate": 8.927582542120707e-06, "loss": 0.1689, "step": 8091 }, { "epoch": 0.2360697823676994, "grad_norm": 0.9985307535530884, "learning_rate": 8.927290163103646e-06, "loss": 0.1799, "step": 8092 }, { "epoch": 0.23609895559834296, "grad_norm": 0.7443694265539429, "learning_rate": 8.926997749024677e-06, "loss": 0.1252, "step": 8093 }, { "epoch": 0.2361281288289865, "grad_norm": 1.0655059388918109, "learning_rate": 8.926705299886408e-06, "loss": 0.1545, "step": 8094 }, { "epoch": 0.2361573020596301, "grad_norm": 0.7822390842209348, "learning_rate": 8.926412815691454e-06, "loss": 0.1596, "step": 8095 }, { "epoch": 0.23618647529027365, "grad_norm": 0.6411630374731159, "learning_rate": 8.926120296442421e-06, "loss": 0.1466, "step": 8096 }, { "epoch": 0.2362156485209172, "grad_norm": 0.7433286019275946, "learning_rate": 8.925827742141926e-06, "loss": 0.1542, "step": 8097 }, { "epoch": 0.23624482175156078, "grad_norm": 0.8266822077356416, "learning_rate": 8.925535152792577e-06, "loss": 0.1556, "step": 8098 }, { "epoch": 0.23627399498220433, "grad_norm": 0.8447328919697852, "learning_rate": 8.925242528396986e-06, "loss": 0.1962, "step": 8099 }, { "epoch": 0.23630316821284789, "grad_norm": 0.867242488085671, "learning_rate": 8.924949868957769e-06, "loss": 0.1705, "step": 8100 }, { "epoch": 0.23633234144349144, "grad_norm": 0.861202562787116, "learning_rate": 8.924657174477535e-06, "loss": 0.176, "step": 8101 }, { "epoch": 0.23636151467413502, "grad_norm": 0.8235656420001373, "learning_rate": 8.924364444958898e-06, "loss": 0.1737, "step": 8102 }, { "epoch": 0.23639068790477857, "grad_norm": 0.7579535439089422, "learning_rate": 8.924071680404474e-06, "loss": 0.177, "step": 8103 }, { "epoch": 0.23641986113542213, "grad_norm": 0.8908424358661887, "learning_rate": 8.923778880816874e-06, "loss": 0.1625, "step": 8104 }, { "epoch": 0.2364490343660657, "grad_norm": 0.9356466203247811, "learning_rate": 8.923486046198712e-06, "loss": 0.1823, "step": 8105 }, { "epoch": 0.23647820759670926, "grad_norm": 0.7974110500492371, "learning_rate": 8.923193176552604e-06, "loss": 0.1761, "step": 8106 }, { "epoch": 0.23650738082735281, "grad_norm": 0.9337107920765044, "learning_rate": 8.922900271881163e-06, "loss": 0.1369, "step": 8107 }, { "epoch": 0.2365365540579964, "grad_norm": 0.7518255024182023, "learning_rate": 8.922607332187005e-06, "loss": 0.1502, "step": 8108 }, { "epoch": 0.23656572728863995, "grad_norm": 0.8621728466275934, "learning_rate": 8.922314357472745e-06, "loss": 0.1581, "step": 8109 }, { "epoch": 0.2365949005192835, "grad_norm": 1.0851661029928277, "learning_rate": 8.922021347741e-06, "loss": 0.1499, "step": 8110 }, { "epoch": 0.23662407374992706, "grad_norm": 0.79383978632219, "learning_rate": 8.921728302994385e-06, "loss": 0.141, "step": 8111 }, { "epoch": 0.23665324698057064, "grad_norm": 0.8758827526338778, "learning_rate": 8.921435223235514e-06, "loss": 0.1548, "step": 8112 }, { "epoch": 0.2366824202112142, "grad_norm": 1.003926531533097, "learning_rate": 8.921142108467007e-06, "loss": 0.1923, "step": 8113 }, { "epoch": 0.23671159344185774, "grad_norm": 0.7985070966603274, "learning_rate": 8.920848958691479e-06, "loss": 0.1885, "step": 8114 }, { "epoch": 0.23674076667250132, "grad_norm": 0.8518616953368908, "learning_rate": 8.920555773911547e-06, "loss": 0.1591, "step": 8115 }, { "epoch": 0.23676993990314488, "grad_norm": 0.9892504056410393, "learning_rate": 8.920262554129828e-06, "loss": 0.1658, "step": 8116 }, { "epoch": 0.23679911313378843, "grad_norm": 0.9199013954920274, "learning_rate": 8.919969299348943e-06, "loss": 0.1509, "step": 8117 }, { "epoch": 0.23682828636443198, "grad_norm": 1.0071317843558194, "learning_rate": 8.919676009571508e-06, "loss": 0.1915, "step": 8118 }, { "epoch": 0.23685745959507556, "grad_norm": 0.8415660230966282, "learning_rate": 8.919382684800138e-06, "loss": 0.1676, "step": 8119 }, { "epoch": 0.23688663282571912, "grad_norm": 1.0731547107912802, "learning_rate": 8.919089325037457e-06, "loss": 0.1498, "step": 8120 }, { "epoch": 0.23691580605636267, "grad_norm": 0.9496190979236668, "learning_rate": 8.918795930286084e-06, "loss": 0.1551, "step": 8121 }, { "epoch": 0.23694497928700625, "grad_norm": 0.731065271012625, "learning_rate": 8.918502500548633e-06, "loss": 0.1679, "step": 8122 }, { "epoch": 0.2369741525176498, "grad_norm": 1.0046200625359758, "learning_rate": 8.91820903582773e-06, "loss": 0.1899, "step": 8123 }, { "epoch": 0.23700332574829336, "grad_norm": 1.064180628499699, "learning_rate": 8.91791553612599e-06, "loss": 0.1892, "step": 8124 }, { "epoch": 0.23703249897893694, "grad_norm": 0.7097016029175054, "learning_rate": 8.917622001446035e-06, "loss": 0.174, "step": 8125 }, { "epoch": 0.2370616722095805, "grad_norm": 0.9185933572329031, "learning_rate": 8.917328431790488e-06, "loss": 0.1612, "step": 8126 }, { "epoch": 0.23709084544022405, "grad_norm": 0.9006503602478779, "learning_rate": 8.917034827161969e-06, "loss": 0.1731, "step": 8127 }, { "epoch": 0.2371200186708676, "grad_norm": 0.8489097841051876, "learning_rate": 8.916741187563094e-06, "loss": 0.1305, "step": 8128 }, { "epoch": 0.23714919190151118, "grad_norm": 0.9279074904873551, "learning_rate": 8.91644751299649e-06, "loss": 0.1641, "step": 8129 }, { "epoch": 0.23717836513215473, "grad_norm": 0.6544628540452366, "learning_rate": 8.91615380346478e-06, "loss": 0.1335, "step": 8130 }, { "epoch": 0.2372075383627983, "grad_norm": 0.7925203958586841, "learning_rate": 8.915860058970582e-06, "loss": 0.1345, "step": 8131 }, { "epoch": 0.23723671159344187, "grad_norm": 0.7910895955053553, "learning_rate": 8.91556627951652e-06, "loss": 0.1621, "step": 8132 }, { "epoch": 0.23726588482408542, "grad_norm": 0.7104863015512473, "learning_rate": 8.915272465105218e-06, "loss": 0.1854, "step": 8133 }, { "epoch": 0.23729505805472897, "grad_norm": 1.0036378804966084, "learning_rate": 8.914978615739297e-06, "loss": 0.1868, "step": 8134 }, { "epoch": 0.23732423128537256, "grad_norm": 0.8674590168823036, "learning_rate": 8.914684731421382e-06, "loss": 0.1628, "step": 8135 }, { "epoch": 0.2373534045160161, "grad_norm": 0.8672015430406163, "learning_rate": 8.914390812154094e-06, "loss": 0.1769, "step": 8136 }, { "epoch": 0.23738257774665966, "grad_norm": 0.6706117907279768, "learning_rate": 8.914096857940062e-06, "loss": 0.161, "step": 8137 }, { "epoch": 0.23741175097730322, "grad_norm": 0.8967270688761205, "learning_rate": 8.913802868781907e-06, "loss": 0.1873, "step": 8138 }, { "epoch": 0.2374409242079468, "grad_norm": 0.7705670336362179, "learning_rate": 8.913508844682255e-06, "loss": 0.1524, "step": 8139 }, { "epoch": 0.23747009743859035, "grad_norm": 0.822662568821872, "learning_rate": 8.91321478564373e-06, "loss": 0.1529, "step": 8140 }, { "epoch": 0.2374992706692339, "grad_norm": 0.8236205950678361, "learning_rate": 8.912920691668957e-06, "loss": 0.1762, "step": 8141 }, { "epoch": 0.23752844389987748, "grad_norm": 0.8853210785081229, "learning_rate": 8.912626562760563e-06, "loss": 0.1395, "step": 8142 }, { "epoch": 0.23755761713052104, "grad_norm": 0.7586904563452705, "learning_rate": 8.912332398921171e-06, "loss": 0.1518, "step": 8143 }, { "epoch": 0.2375867903611646, "grad_norm": 0.8651132750866162, "learning_rate": 8.91203820015341e-06, "loss": 0.1772, "step": 8144 }, { "epoch": 0.23761596359180814, "grad_norm": 0.9343750341887734, "learning_rate": 8.911743966459908e-06, "loss": 0.1534, "step": 8145 }, { "epoch": 0.23764513682245172, "grad_norm": 0.8400181663770149, "learning_rate": 8.911449697843286e-06, "loss": 0.1513, "step": 8146 }, { "epoch": 0.23767431005309528, "grad_norm": 0.8103376553564225, "learning_rate": 8.911155394306177e-06, "loss": 0.1513, "step": 8147 }, { "epoch": 0.23770348328373883, "grad_norm": 1.040231236289833, "learning_rate": 8.910861055851208e-06, "loss": 0.1707, "step": 8148 }, { "epoch": 0.2377326565143824, "grad_norm": 0.7340572763322634, "learning_rate": 8.910566682481001e-06, "loss": 0.1475, "step": 8149 }, { "epoch": 0.23776182974502597, "grad_norm": 0.8822008953144147, "learning_rate": 8.91027227419819e-06, "loss": 0.147, "step": 8150 }, { "epoch": 0.23779100297566952, "grad_norm": 0.7228118620003549, "learning_rate": 8.909977831005403e-06, "loss": 0.1408, "step": 8151 }, { "epoch": 0.2378201762063131, "grad_norm": 1.1023290703753612, "learning_rate": 8.909683352905267e-06, "loss": 0.1888, "step": 8152 }, { "epoch": 0.23784934943695665, "grad_norm": 0.8220589699407617, "learning_rate": 8.90938883990041e-06, "loss": 0.1543, "step": 8153 }, { "epoch": 0.2378785226676002, "grad_norm": 0.9489623908894284, "learning_rate": 8.909094291993464e-06, "loss": 0.157, "step": 8154 }, { "epoch": 0.23790769589824376, "grad_norm": 0.7905877103053048, "learning_rate": 8.908799709187057e-06, "loss": 0.1531, "step": 8155 }, { "epoch": 0.23793686912888734, "grad_norm": 0.8957629797758214, "learning_rate": 8.908505091483819e-06, "loss": 0.1514, "step": 8156 }, { "epoch": 0.2379660423595309, "grad_norm": 0.9786727450738257, "learning_rate": 8.90821043888638e-06, "loss": 0.1827, "step": 8157 }, { "epoch": 0.23799521559017445, "grad_norm": 0.7937521194492156, "learning_rate": 8.907915751397372e-06, "loss": 0.1646, "step": 8158 }, { "epoch": 0.23802438882081803, "grad_norm": 0.702109595124551, "learning_rate": 8.907621029019425e-06, "loss": 0.1629, "step": 8159 }, { "epoch": 0.23805356205146158, "grad_norm": 0.876485020854691, "learning_rate": 8.907326271755171e-06, "loss": 0.1617, "step": 8160 }, { "epoch": 0.23808273528210513, "grad_norm": 0.8172164952933865, "learning_rate": 8.90703147960724e-06, "loss": 0.1543, "step": 8161 }, { "epoch": 0.23811190851274872, "grad_norm": 1.0589003402978017, "learning_rate": 8.906736652578264e-06, "loss": 0.1711, "step": 8162 }, { "epoch": 0.23814108174339227, "grad_norm": 0.8320959954646029, "learning_rate": 8.906441790670877e-06, "loss": 0.1676, "step": 8163 }, { "epoch": 0.23817025497403582, "grad_norm": 0.7943563348025561, "learning_rate": 8.906146893887708e-06, "loss": 0.1526, "step": 8164 }, { "epoch": 0.23819942820467938, "grad_norm": 0.9243283488288019, "learning_rate": 8.905851962231393e-06, "loss": 0.1519, "step": 8165 }, { "epoch": 0.23822860143532296, "grad_norm": 0.6483119265403959, "learning_rate": 8.905556995704566e-06, "loss": 0.1452, "step": 8166 }, { "epoch": 0.2382577746659665, "grad_norm": 1.0631456207996153, "learning_rate": 8.905261994309857e-06, "loss": 0.1974, "step": 8167 }, { "epoch": 0.23828694789661006, "grad_norm": 0.7426459150555751, "learning_rate": 8.9049669580499e-06, "loss": 0.1361, "step": 8168 }, { "epoch": 0.23831612112725364, "grad_norm": 0.7508657482341585, "learning_rate": 8.904671886927334e-06, "loss": 0.1559, "step": 8169 }, { "epoch": 0.2383452943578972, "grad_norm": 0.9359231183611243, "learning_rate": 8.904376780944786e-06, "loss": 0.1678, "step": 8170 }, { "epoch": 0.23837446758854075, "grad_norm": 0.8394916958169752, "learning_rate": 8.904081640104895e-06, "loss": 0.1437, "step": 8171 }, { "epoch": 0.2384036408191843, "grad_norm": 0.7594288412337721, "learning_rate": 8.903786464410295e-06, "loss": 0.1254, "step": 8172 }, { "epoch": 0.23843281404982788, "grad_norm": 0.8464185946780548, "learning_rate": 8.903491253863622e-06, "loss": 0.1525, "step": 8173 }, { "epoch": 0.23846198728047144, "grad_norm": 0.8671937451759861, "learning_rate": 8.903196008467511e-06, "loss": 0.1619, "step": 8174 }, { "epoch": 0.238491160511115, "grad_norm": 0.9450577389185771, "learning_rate": 8.902900728224597e-06, "loss": 0.1697, "step": 8175 }, { "epoch": 0.23852033374175857, "grad_norm": 0.6310459193767253, "learning_rate": 8.902605413137517e-06, "loss": 0.1456, "step": 8176 }, { "epoch": 0.23854950697240213, "grad_norm": 0.8438666745577079, "learning_rate": 8.902310063208907e-06, "loss": 0.1753, "step": 8177 }, { "epoch": 0.23857868020304568, "grad_norm": 0.7813218821139011, "learning_rate": 8.902014678441406e-06, "loss": 0.1585, "step": 8178 }, { "epoch": 0.23860785343368926, "grad_norm": 0.7184763683271836, "learning_rate": 8.90171925883765e-06, "loss": 0.179, "step": 8179 }, { "epoch": 0.2386370266643328, "grad_norm": 0.788171670428232, "learning_rate": 8.901423804400273e-06, "loss": 0.1457, "step": 8180 }, { "epoch": 0.23866619989497637, "grad_norm": 0.8076824354053939, "learning_rate": 8.901128315131917e-06, "loss": 0.147, "step": 8181 }, { "epoch": 0.23869537312561992, "grad_norm": 0.7033556840440839, "learning_rate": 8.900832791035218e-06, "loss": 0.1428, "step": 8182 }, { "epoch": 0.2387245463562635, "grad_norm": 0.7371450897019787, "learning_rate": 8.900537232112816e-06, "loss": 0.1681, "step": 8183 }, { "epoch": 0.23875371958690705, "grad_norm": 0.7652072647129703, "learning_rate": 8.90024163836735e-06, "loss": 0.1788, "step": 8184 }, { "epoch": 0.2387828928175506, "grad_norm": 0.9637137372175031, "learning_rate": 8.899946009801455e-06, "loss": 0.1649, "step": 8185 }, { "epoch": 0.2388120660481942, "grad_norm": 0.8459098693789464, "learning_rate": 8.899650346417773e-06, "loss": 0.1829, "step": 8186 }, { "epoch": 0.23884123927883774, "grad_norm": 1.0011578724095007, "learning_rate": 8.899354648218947e-06, "loss": 0.1615, "step": 8187 }, { "epoch": 0.2388704125094813, "grad_norm": 0.7948233494079993, "learning_rate": 8.899058915207611e-06, "loss": 0.1681, "step": 8188 }, { "epoch": 0.23889958574012485, "grad_norm": 0.9046111595988586, "learning_rate": 8.898763147386408e-06, "loss": 0.1338, "step": 8189 }, { "epoch": 0.23892875897076843, "grad_norm": 0.8962855423839565, "learning_rate": 8.898467344757979e-06, "loss": 0.1772, "step": 8190 }, { "epoch": 0.23895793220141198, "grad_norm": 0.7896687967471685, "learning_rate": 8.898171507324964e-06, "loss": 0.1607, "step": 8191 }, { "epoch": 0.23898710543205554, "grad_norm": 1.0707583006921217, "learning_rate": 8.897875635090005e-06, "loss": 0.1556, "step": 8192 }, { "epoch": 0.23901627866269912, "grad_norm": 0.909136269454888, "learning_rate": 8.89757972805574e-06, "loss": 0.1516, "step": 8193 }, { "epoch": 0.23904545189334267, "grad_norm": 0.8802805681779381, "learning_rate": 8.897283786224817e-06, "loss": 0.1511, "step": 8194 }, { "epoch": 0.23907462512398622, "grad_norm": 0.749464207805033, "learning_rate": 8.896987809599874e-06, "loss": 0.1702, "step": 8195 }, { "epoch": 0.2391037983546298, "grad_norm": 0.6808348804245334, "learning_rate": 8.896691798183552e-06, "loss": 0.1477, "step": 8196 }, { "epoch": 0.23913297158527336, "grad_norm": 0.7649554312826676, "learning_rate": 8.896395751978498e-06, "loss": 0.1527, "step": 8197 }, { "epoch": 0.2391621448159169, "grad_norm": 0.9094753134461215, "learning_rate": 8.896099670987351e-06, "loss": 0.1826, "step": 8198 }, { "epoch": 0.23919131804656046, "grad_norm": 0.8199112485425468, "learning_rate": 8.895803555212757e-06, "loss": 0.1668, "step": 8199 }, { "epoch": 0.23922049127720404, "grad_norm": 0.7843508250661938, "learning_rate": 8.89550740465736e-06, "loss": 0.1617, "step": 8200 }, { "epoch": 0.2392496645078476, "grad_norm": 0.7023619564626988, "learning_rate": 8.895211219323802e-06, "loss": 0.1416, "step": 8201 }, { "epoch": 0.23927883773849115, "grad_norm": 0.8051786204993654, "learning_rate": 8.894914999214727e-06, "loss": 0.1609, "step": 8202 }, { "epoch": 0.23930801096913473, "grad_norm": 0.7610729256588492, "learning_rate": 8.894618744332783e-06, "loss": 0.1716, "step": 8203 }, { "epoch": 0.23933718419977829, "grad_norm": 0.8722026870288849, "learning_rate": 8.89432245468061e-06, "loss": 0.1743, "step": 8204 }, { "epoch": 0.23936635743042184, "grad_norm": 0.7253988078472988, "learning_rate": 8.894026130260858e-06, "loss": 0.1566, "step": 8205 }, { "epoch": 0.23939553066106542, "grad_norm": 0.7558594801533177, "learning_rate": 8.89372977107617e-06, "loss": 0.1722, "step": 8206 }, { "epoch": 0.23942470389170897, "grad_norm": 0.715904179026281, "learning_rate": 8.89343337712919e-06, "loss": 0.1419, "step": 8207 }, { "epoch": 0.23945387712235253, "grad_norm": 0.8812508029464208, "learning_rate": 8.893136948422569e-06, "loss": 0.1581, "step": 8208 }, { "epoch": 0.23948305035299608, "grad_norm": 0.8586838495986173, "learning_rate": 8.89284048495895e-06, "loss": 0.1779, "step": 8209 }, { "epoch": 0.23951222358363966, "grad_norm": 0.7406003304511822, "learning_rate": 8.892543986740979e-06, "loss": 0.1473, "step": 8210 }, { "epoch": 0.23954139681428321, "grad_norm": 0.8979035192711243, "learning_rate": 8.892247453771306e-06, "loss": 0.1515, "step": 8211 }, { "epoch": 0.23957057004492677, "grad_norm": 0.6963097051903574, "learning_rate": 8.891950886052576e-06, "loss": 0.1394, "step": 8212 }, { "epoch": 0.23959974327557035, "grad_norm": 1.267749439137429, "learning_rate": 8.891654283587438e-06, "loss": 0.165, "step": 8213 }, { "epoch": 0.2396289165062139, "grad_norm": 1.1354387491417997, "learning_rate": 8.891357646378538e-06, "loss": 0.1672, "step": 8214 }, { "epoch": 0.23965808973685745, "grad_norm": 0.7444284210006101, "learning_rate": 8.891060974428528e-06, "loss": 0.1744, "step": 8215 }, { "epoch": 0.239687262967501, "grad_norm": 0.6855454737424209, "learning_rate": 8.890764267740053e-06, "loss": 0.1379, "step": 8216 }, { "epoch": 0.2397164361981446, "grad_norm": 0.9024297879509084, "learning_rate": 8.890467526315765e-06, "loss": 0.1595, "step": 8217 }, { "epoch": 0.23974560942878814, "grad_norm": 0.7834463539446351, "learning_rate": 8.89017075015831e-06, "loss": 0.1689, "step": 8218 }, { "epoch": 0.2397747826594317, "grad_norm": 0.988813715135792, "learning_rate": 8.889873939270341e-06, "loss": 0.1837, "step": 8219 }, { "epoch": 0.23980395589007528, "grad_norm": 0.7267918856114794, "learning_rate": 8.889577093654504e-06, "loss": 0.1419, "step": 8220 }, { "epoch": 0.23983312912071883, "grad_norm": 0.7787868909140904, "learning_rate": 8.889280213313454e-06, "loss": 0.1544, "step": 8221 }, { "epoch": 0.23986230235136238, "grad_norm": 1.0797811006635678, "learning_rate": 8.888983298249838e-06, "loss": 0.1623, "step": 8222 }, { "epoch": 0.23989147558200596, "grad_norm": 0.8443592671417744, "learning_rate": 8.888686348466305e-06, "loss": 0.1674, "step": 8223 }, { "epoch": 0.23992064881264952, "grad_norm": 1.0697659872943215, "learning_rate": 8.88838936396551e-06, "loss": 0.1826, "step": 8224 }, { "epoch": 0.23994982204329307, "grad_norm": 0.7664760824723789, "learning_rate": 8.888092344750103e-06, "loss": 0.1366, "step": 8225 }, { "epoch": 0.23997899527393662, "grad_norm": 0.9754768917787945, "learning_rate": 8.887795290822736e-06, "loss": 0.1776, "step": 8226 }, { "epoch": 0.2400081685045802, "grad_norm": 0.8573701254119962, "learning_rate": 8.887498202186062e-06, "loss": 0.1644, "step": 8227 }, { "epoch": 0.24003734173522376, "grad_norm": 0.9220847422993835, "learning_rate": 8.88720107884273e-06, "loss": 0.1358, "step": 8228 }, { "epoch": 0.2400665149658673, "grad_norm": 0.9619544322343004, "learning_rate": 8.886903920795396e-06, "loss": 0.1627, "step": 8229 }, { "epoch": 0.2400956881965109, "grad_norm": 0.9877543280422053, "learning_rate": 8.88660672804671e-06, "loss": 0.1609, "step": 8230 }, { "epoch": 0.24012486142715445, "grad_norm": 0.770908935040861, "learning_rate": 8.886309500599328e-06, "loss": 0.1835, "step": 8231 }, { "epoch": 0.240154034657798, "grad_norm": 1.1567854629304466, "learning_rate": 8.886012238455903e-06, "loss": 0.1808, "step": 8232 }, { "epoch": 0.24018320788844155, "grad_norm": 1.1332016046396984, "learning_rate": 8.885714941619087e-06, "loss": 0.1654, "step": 8233 }, { "epoch": 0.24021238111908513, "grad_norm": 0.9002091516933857, "learning_rate": 8.885417610091536e-06, "loss": 0.1418, "step": 8234 }, { "epoch": 0.2402415543497287, "grad_norm": 1.2171802257120898, "learning_rate": 8.885120243875905e-06, "loss": 0.1466, "step": 8235 }, { "epoch": 0.24027072758037224, "grad_norm": 0.9221963422303984, "learning_rate": 8.884822842974847e-06, "loss": 0.1697, "step": 8236 }, { "epoch": 0.24029990081101582, "grad_norm": 0.9012719303878594, "learning_rate": 8.88452540739102e-06, "loss": 0.1695, "step": 8237 }, { "epoch": 0.24032907404165937, "grad_norm": 0.8564097549638308, "learning_rate": 8.884227937127076e-06, "loss": 0.1563, "step": 8238 }, { "epoch": 0.24035824727230293, "grad_norm": 0.8370971704673468, "learning_rate": 8.883930432185673e-06, "loss": 0.1455, "step": 8239 }, { "epoch": 0.2403874205029465, "grad_norm": 1.0679698209456254, "learning_rate": 8.883632892569466e-06, "loss": 0.1617, "step": 8240 }, { "epoch": 0.24041659373359006, "grad_norm": 1.118725383962962, "learning_rate": 8.88333531828111e-06, "loss": 0.1589, "step": 8241 }, { "epoch": 0.24044576696423361, "grad_norm": 0.9374701175420922, "learning_rate": 8.883037709323263e-06, "loss": 0.1632, "step": 8242 }, { "epoch": 0.24047494019487717, "grad_norm": 0.894106912041009, "learning_rate": 8.882740065698586e-06, "loss": 0.1641, "step": 8243 }, { "epoch": 0.24050411342552075, "grad_norm": 0.88794811179921, "learning_rate": 8.882442387409729e-06, "loss": 0.1426, "step": 8244 }, { "epoch": 0.2405332866561643, "grad_norm": 0.8991441307822194, "learning_rate": 8.882144674459354e-06, "loss": 0.1781, "step": 8245 }, { "epoch": 0.24056245988680786, "grad_norm": 0.8587688129744776, "learning_rate": 8.88184692685012e-06, "loss": 0.1424, "step": 8246 }, { "epoch": 0.24059163311745144, "grad_norm": 1.0713316984903607, "learning_rate": 8.88154914458468e-06, "loss": 0.1662, "step": 8247 }, { "epoch": 0.240620806348095, "grad_norm": 0.8101948080509492, "learning_rate": 8.881251327665699e-06, "loss": 0.1503, "step": 8248 }, { "epoch": 0.24064997957873854, "grad_norm": 0.7635063527922878, "learning_rate": 8.88095347609583e-06, "loss": 0.1642, "step": 8249 }, { "epoch": 0.24067915280938212, "grad_norm": 0.882359914169995, "learning_rate": 8.880655589877737e-06, "loss": 0.163, "step": 8250 }, { "epoch": 0.24070832604002568, "grad_norm": 0.6826838905996303, "learning_rate": 8.880357669014077e-06, "loss": 0.1562, "step": 8251 }, { "epoch": 0.24073749927066923, "grad_norm": 0.7851893250684556, "learning_rate": 8.88005971350751e-06, "loss": 0.1518, "step": 8252 }, { "epoch": 0.24076667250131278, "grad_norm": 0.8064569725792964, "learning_rate": 8.879761723360695e-06, "loss": 0.1783, "step": 8253 }, { "epoch": 0.24079584573195636, "grad_norm": 0.7934433704487039, "learning_rate": 8.879463698576294e-06, "loss": 0.1567, "step": 8254 }, { "epoch": 0.24082501896259992, "grad_norm": 0.7199144588868956, "learning_rate": 8.879165639156968e-06, "loss": 0.1509, "step": 8255 }, { "epoch": 0.24085419219324347, "grad_norm": 0.8761875305665132, "learning_rate": 8.878867545105377e-06, "loss": 0.1502, "step": 8256 }, { "epoch": 0.24088336542388705, "grad_norm": 1.013003472781489, "learning_rate": 8.87856941642418e-06, "loss": 0.1408, "step": 8257 }, { "epoch": 0.2409125386545306, "grad_norm": 0.9854636634111242, "learning_rate": 8.878271253116044e-06, "loss": 0.1632, "step": 8258 }, { "epoch": 0.24094171188517416, "grad_norm": 0.7951311412085198, "learning_rate": 8.877973055183629e-06, "loss": 0.2009, "step": 8259 }, { "epoch": 0.2409708851158177, "grad_norm": 1.0186181133174064, "learning_rate": 8.877674822629595e-06, "loss": 0.1964, "step": 8260 }, { "epoch": 0.2410000583464613, "grad_norm": 0.9672603265242955, "learning_rate": 8.877376555456604e-06, "loss": 0.1573, "step": 8261 }, { "epoch": 0.24102923157710485, "grad_norm": 0.8894235143992626, "learning_rate": 8.877078253667321e-06, "loss": 0.1908, "step": 8262 }, { "epoch": 0.2410584048077484, "grad_norm": 0.8754001686628599, "learning_rate": 8.876779917264412e-06, "loss": 0.1466, "step": 8263 }, { "epoch": 0.24108757803839198, "grad_norm": 1.0628663388415525, "learning_rate": 8.876481546250535e-06, "loss": 0.157, "step": 8264 }, { "epoch": 0.24111675126903553, "grad_norm": 0.9723407257991477, "learning_rate": 8.876183140628355e-06, "loss": 0.145, "step": 8265 }, { "epoch": 0.2411459244996791, "grad_norm": 0.9469882403125053, "learning_rate": 8.87588470040054e-06, "loss": 0.1625, "step": 8266 }, { "epoch": 0.24117509773032267, "grad_norm": 0.8699907055802648, "learning_rate": 8.87558622556975e-06, "loss": 0.1407, "step": 8267 }, { "epoch": 0.24120427096096622, "grad_norm": 0.9010022926823396, "learning_rate": 8.875287716138651e-06, "loss": 0.1565, "step": 8268 }, { "epoch": 0.24123344419160977, "grad_norm": 0.8853167383305223, "learning_rate": 8.87498917210991e-06, "loss": 0.147, "step": 8269 }, { "epoch": 0.24126261742225333, "grad_norm": 0.7458998076653564, "learning_rate": 8.87469059348619e-06, "loss": 0.1628, "step": 8270 }, { "epoch": 0.2412917906528969, "grad_norm": 0.9189429055503623, "learning_rate": 8.874391980270157e-06, "loss": 0.1468, "step": 8271 }, { "epoch": 0.24132096388354046, "grad_norm": 0.7682745223848754, "learning_rate": 8.874093332464477e-06, "loss": 0.15, "step": 8272 }, { "epoch": 0.24135013711418402, "grad_norm": 0.7591197071634669, "learning_rate": 8.873794650071819e-06, "loss": 0.1593, "step": 8273 }, { "epoch": 0.2413793103448276, "grad_norm": 1.1246996911394547, "learning_rate": 8.873495933094844e-06, "loss": 0.1581, "step": 8274 }, { "epoch": 0.24140848357547115, "grad_norm": 0.7076735575515133, "learning_rate": 8.873197181536223e-06, "loss": 0.1543, "step": 8275 }, { "epoch": 0.2414376568061147, "grad_norm": 0.9029068568351564, "learning_rate": 8.872898395398624e-06, "loss": 0.1755, "step": 8276 }, { "epoch": 0.24146683003675828, "grad_norm": 0.7549673909441204, "learning_rate": 8.87259957468471e-06, "loss": 0.1629, "step": 8277 }, { "epoch": 0.24149600326740184, "grad_norm": 0.7575082299161752, "learning_rate": 8.872300719397152e-06, "loss": 0.1514, "step": 8278 }, { "epoch": 0.2415251764980454, "grad_norm": 0.8352503239311698, "learning_rate": 8.872001829538619e-06, "loss": 0.1987, "step": 8279 }, { "epoch": 0.24155434972868894, "grad_norm": 0.7337178680236806, "learning_rate": 8.871702905111776e-06, "loss": 0.167, "step": 8280 }, { "epoch": 0.24158352295933253, "grad_norm": 0.7666388414207655, "learning_rate": 8.871403946119294e-06, "loss": 0.1666, "step": 8281 }, { "epoch": 0.24161269618997608, "grad_norm": 0.8030265459227783, "learning_rate": 8.871104952563843e-06, "loss": 0.174, "step": 8282 }, { "epoch": 0.24164186942061963, "grad_norm": 0.906392696766294, "learning_rate": 8.870805924448091e-06, "loss": 0.1606, "step": 8283 }, { "epoch": 0.2416710426512632, "grad_norm": 0.6662394594558032, "learning_rate": 8.870506861774708e-06, "loss": 0.1489, "step": 8284 }, { "epoch": 0.24170021588190677, "grad_norm": 0.8848343698281591, "learning_rate": 8.870207764546363e-06, "loss": 0.1632, "step": 8285 }, { "epoch": 0.24172938911255032, "grad_norm": 0.9298968774587074, "learning_rate": 8.869908632765727e-06, "loss": 0.1734, "step": 8286 }, { "epoch": 0.24175856234319387, "grad_norm": 0.8343997794773508, "learning_rate": 8.86960946643547e-06, "loss": 0.1832, "step": 8287 }, { "epoch": 0.24178773557383745, "grad_norm": 1.220524871783636, "learning_rate": 8.869310265558264e-06, "loss": 0.1592, "step": 8288 }, { "epoch": 0.241816908804481, "grad_norm": 0.8262776044773047, "learning_rate": 8.869011030136781e-06, "loss": 0.1787, "step": 8289 }, { "epoch": 0.24184608203512456, "grad_norm": 1.1037398922421195, "learning_rate": 8.868711760173688e-06, "loss": 0.1785, "step": 8290 }, { "epoch": 0.24187525526576814, "grad_norm": 0.7803727084389832, "learning_rate": 8.868412455671663e-06, "loss": 0.1632, "step": 8291 }, { "epoch": 0.2419044284964117, "grad_norm": 0.9759382256345664, "learning_rate": 8.868113116633374e-06, "loss": 0.184, "step": 8292 }, { "epoch": 0.24193360172705525, "grad_norm": 0.887876272034532, "learning_rate": 8.867813743061493e-06, "loss": 0.1361, "step": 8293 }, { "epoch": 0.24196277495769883, "grad_norm": 0.807529058890851, "learning_rate": 8.867514334958696e-06, "loss": 0.1425, "step": 8294 }, { "epoch": 0.24199194818834238, "grad_norm": 0.7849043958733483, "learning_rate": 8.867214892327653e-06, "loss": 0.1573, "step": 8295 }, { "epoch": 0.24202112141898594, "grad_norm": 0.9015404177738828, "learning_rate": 8.86691541517104e-06, "loss": 0.1952, "step": 8296 }, { "epoch": 0.2420502946496295, "grad_norm": 0.9219305059684217, "learning_rate": 8.866615903491529e-06, "loss": 0.152, "step": 8297 }, { "epoch": 0.24207946788027307, "grad_norm": 0.8353303290023836, "learning_rate": 8.866316357291793e-06, "loss": 0.161, "step": 8298 }, { "epoch": 0.24210864111091662, "grad_norm": 0.8584782625626277, "learning_rate": 8.866016776574509e-06, "loss": 0.1634, "step": 8299 }, { "epoch": 0.24213781434156018, "grad_norm": 0.8010399387149826, "learning_rate": 8.865717161342348e-06, "loss": 0.1501, "step": 8300 }, { "epoch": 0.24216698757220376, "grad_norm": 0.8403769802589146, "learning_rate": 8.86541751159799e-06, "loss": 0.1468, "step": 8301 }, { "epoch": 0.2421961608028473, "grad_norm": 0.9557938237322129, "learning_rate": 8.865117827344106e-06, "loss": 0.1571, "step": 8302 }, { "epoch": 0.24222533403349086, "grad_norm": 1.036574215317314, "learning_rate": 8.864818108583372e-06, "loss": 0.1533, "step": 8303 }, { "epoch": 0.24225450726413442, "grad_norm": 1.0665467470789982, "learning_rate": 8.864518355318465e-06, "loss": 0.1583, "step": 8304 }, { "epoch": 0.242283680494778, "grad_norm": 0.8899892947644182, "learning_rate": 8.864218567552061e-06, "loss": 0.16, "step": 8305 }, { "epoch": 0.24231285372542155, "grad_norm": 0.8293750946816629, "learning_rate": 8.863918745286836e-06, "loss": 0.1801, "step": 8306 }, { "epoch": 0.2423420269560651, "grad_norm": 0.9905375946112777, "learning_rate": 8.863618888525466e-06, "loss": 0.1587, "step": 8307 }, { "epoch": 0.24237120018670869, "grad_norm": 0.6796720878474997, "learning_rate": 8.863318997270628e-06, "loss": 0.1318, "step": 8308 }, { "epoch": 0.24240037341735224, "grad_norm": 0.8107026806590917, "learning_rate": 8.863019071525004e-06, "loss": 0.1567, "step": 8309 }, { "epoch": 0.2424295466479958, "grad_norm": 1.2963652228907871, "learning_rate": 8.862719111291265e-06, "loss": 0.1704, "step": 8310 }, { "epoch": 0.24245871987863937, "grad_norm": 0.7919308649984564, "learning_rate": 8.862419116572091e-06, "loss": 0.168, "step": 8311 }, { "epoch": 0.24248789310928293, "grad_norm": 0.7014596902903338, "learning_rate": 8.862119087370164e-06, "loss": 0.1485, "step": 8312 }, { "epoch": 0.24251706633992648, "grad_norm": 0.8910306467453531, "learning_rate": 8.861819023688158e-06, "loss": 0.1802, "step": 8313 }, { "epoch": 0.24254623957057003, "grad_norm": 0.6840901742528571, "learning_rate": 8.861518925528753e-06, "loss": 0.1311, "step": 8314 }, { "epoch": 0.2425754128012136, "grad_norm": 0.6571646828440548, "learning_rate": 8.861218792894631e-06, "loss": 0.1485, "step": 8315 }, { "epoch": 0.24260458603185717, "grad_norm": 0.9296671671227267, "learning_rate": 8.860918625788468e-06, "loss": 0.1413, "step": 8316 }, { "epoch": 0.24263375926250072, "grad_norm": 0.7680292881436243, "learning_rate": 8.860618424212945e-06, "loss": 0.1631, "step": 8317 }, { "epoch": 0.2426629324931443, "grad_norm": 0.7553524615331095, "learning_rate": 8.860318188170744e-06, "loss": 0.1505, "step": 8318 }, { "epoch": 0.24269210572378785, "grad_norm": 0.7744109809950512, "learning_rate": 8.860017917664543e-06, "loss": 0.1543, "step": 8319 }, { "epoch": 0.2427212789544314, "grad_norm": 0.741122023611664, "learning_rate": 8.859717612697023e-06, "loss": 0.1649, "step": 8320 }, { "epoch": 0.242750452185075, "grad_norm": 0.8842410624110942, "learning_rate": 8.859417273270866e-06, "loss": 0.146, "step": 8321 }, { "epoch": 0.24277962541571854, "grad_norm": 0.8396582509731948, "learning_rate": 8.859116899388752e-06, "loss": 0.1991, "step": 8322 }, { "epoch": 0.2428087986463621, "grad_norm": 0.8399060739518684, "learning_rate": 8.858816491053364e-06, "loss": 0.1617, "step": 8323 }, { "epoch": 0.24283797187700565, "grad_norm": 0.932869570379997, "learning_rate": 8.858516048267383e-06, "loss": 0.1553, "step": 8324 }, { "epoch": 0.24286714510764923, "grad_norm": 0.9219004107075606, "learning_rate": 8.85821557103349e-06, "loss": 0.1521, "step": 8325 }, { "epoch": 0.24289631833829278, "grad_norm": 0.7569424998936463, "learning_rate": 8.857915059354373e-06, "loss": 0.1229, "step": 8326 }, { "epoch": 0.24292549156893634, "grad_norm": 0.9706743467715019, "learning_rate": 8.85761451323271e-06, "loss": 0.155, "step": 8327 }, { "epoch": 0.24295466479957992, "grad_norm": 0.9094527668747412, "learning_rate": 8.857313932671186e-06, "loss": 0.1528, "step": 8328 }, { "epoch": 0.24298383803022347, "grad_norm": 0.9351577674569229, "learning_rate": 8.857013317672484e-06, "loss": 0.1449, "step": 8329 }, { "epoch": 0.24301301126086702, "grad_norm": 0.7590114397369029, "learning_rate": 8.856712668239287e-06, "loss": 0.144, "step": 8330 }, { "epoch": 0.24304218449151058, "grad_norm": 0.8761829605476987, "learning_rate": 8.85641198437428e-06, "loss": 0.1637, "step": 8331 }, { "epoch": 0.24307135772215416, "grad_norm": 0.9394578365099572, "learning_rate": 8.856111266080149e-06, "loss": 0.1459, "step": 8332 }, { "epoch": 0.2431005309527977, "grad_norm": 0.7659881061961028, "learning_rate": 8.855810513359574e-06, "loss": 0.1548, "step": 8333 }, { "epoch": 0.24312970418344126, "grad_norm": 0.8870597339994251, "learning_rate": 8.855509726215247e-06, "loss": 0.1614, "step": 8334 }, { "epoch": 0.24315887741408485, "grad_norm": 0.9362402687968784, "learning_rate": 8.855208904649848e-06, "loss": 0.1602, "step": 8335 }, { "epoch": 0.2431880506447284, "grad_norm": 0.920572264713605, "learning_rate": 8.854908048666064e-06, "loss": 0.1549, "step": 8336 }, { "epoch": 0.24321722387537195, "grad_norm": 0.7838104690539485, "learning_rate": 8.85460715826658e-06, "loss": 0.1584, "step": 8337 }, { "epoch": 0.24324639710601553, "grad_norm": 0.8296277648008216, "learning_rate": 8.854306233454085e-06, "loss": 0.1636, "step": 8338 }, { "epoch": 0.24327557033665909, "grad_norm": 0.9261275509176963, "learning_rate": 8.854005274231264e-06, "loss": 0.1583, "step": 8339 }, { "epoch": 0.24330474356730264, "grad_norm": 0.7284314849938635, "learning_rate": 8.853704280600803e-06, "loss": 0.151, "step": 8340 }, { "epoch": 0.2433339167979462, "grad_norm": 0.855592558560831, "learning_rate": 8.853403252565391e-06, "loss": 0.1499, "step": 8341 }, { "epoch": 0.24336309002858977, "grad_norm": 1.0361960340942988, "learning_rate": 8.853102190127714e-06, "loss": 0.1929, "step": 8342 }, { "epoch": 0.24339226325923333, "grad_norm": 0.8606824807586467, "learning_rate": 8.852801093290461e-06, "loss": 0.1676, "step": 8343 }, { "epoch": 0.24342143648987688, "grad_norm": 0.802848091187224, "learning_rate": 8.852499962056321e-06, "loss": 0.1475, "step": 8344 }, { "epoch": 0.24345060972052046, "grad_norm": 0.941050678114156, "learning_rate": 8.852198796427978e-06, "loss": 0.1586, "step": 8345 }, { "epoch": 0.24347978295116401, "grad_norm": 0.8658456740404586, "learning_rate": 8.851897596408125e-06, "loss": 0.1487, "step": 8346 }, { "epoch": 0.24350895618180757, "grad_norm": 0.7557942088182621, "learning_rate": 8.85159636199945e-06, "loss": 0.1837, "step": 8347 }, { "epoch": 0.24353812941245115, "grad_norm": 0.9696259629692668, "learning_rate": 8.851295093204642e-06, "loss": 0.1666, "step": 8348 }, { "epoch": 0.2435673026430947, "grad_norm": 1.1376486002663677, "learning_rate": 8.850993790026391e-06, "loss": 0.1538, "step": 8349 }, { "epoch": 0.24359647587373826, "grad_norm": 0.7397184643381797, "learning_rate": 8.850692452467387e-06, "loss": 0.1669, "step": 8350 }, { "epoch": 0.2436256491043818, "grad_norm": 1.0140075529701373, "learning_rate": 8.850391080530319e-06, "loss": 0.1752, "step": 8351 }, { "epoch": 0.2436548223350254, "grad_norm": 1.0352251266250718, "learning_rate": 8.850089674217879e-06, "loss": 0.1754, "step": 8352 }, { "epoch": 0.24368399556566894, "grad_norm": 0.7166052530629659, "learning_rate": 8.849788233532759e-06, "loss": 0.1357, "step": 8353 }, { "epoch": 0.2437131687963125, "grad_norm": 0.8722454672563336, "learning_rate": 8.849486758477647e-06, "loss": 0.1563, "step": 8354 }, { "epoch": 0.24374234202695608, "grad_norm": 0.787635601188435, "learning_rate": 8.849185249055236e-06, "loss": 0.1512, "step": 8355 }, { "epoch": 0.24377151525759963, "grad_norm": 0.7581295989698724, "learning_rate": 8.848883705268219e-06, "loss": 0.1546, "step": 8356 }, { "epoch": 0.24380068848824318, "grad_norm": 0.850561381936157, "learning_rate": 8.848582127119285e-06, "loss": 0.1618, "step": 8357 }, { "epoch": 0.24382986171888674, "grad_norm": 0.7886592090366925, "learning_rate": 8.84828051461113e-06, "loss": 0.1723, "step": 8358 }, { "epoch": 0.24385903494953032, "grad_norm": 0.8313972585497709, "learning_rate": 8.847978867746446e-06, "loss": 0.1518, "step": 8359 }, { "epoch": 0.24388820818017387, "grad_norm": 0.8607898970493084, "learning_rate": 8.847677186527924e-06, "loss": 0.1642, "step": 8360 }, { "epoch": 0.24391738141081742, "grad_norm": 0.7871688502088359, "learning_rate": 8.84737547095826e-06, "loss": 0.1494, "step": 8361 }, { "epoch": 0.243946554641461, "grad_norm": 0.8363080403057463, "learning_rate": 8.847073721040145e-06, "loss": 0.1779, "step": 8362 }, { "epoch": 0.24397572787210456, "grad_norm": 0.8484381925285275, "learning_rate": 8.846771936776275e-06, "loss": 0.1782, "step": 8363 }, { "epoch": 0.2440049011027481, "grad_norm": 1.0067802342523158, "learning_rate": 8.846470118169343e-06, "loss": 0.1451, "step": 8364 }, { "epoch": 0.2440340743333917, "grad_norm": 0.8955634481195281, "learning_rate": 8.846168265222044e-06, "loss": 0.148, "step": 8365 }, { "epoch": 0.24406324756403525, "grad_norm": 1.0444161973369948, "learning_rate": 8.845866377937073e-06, "loss": 0.1753, "step": 8366 }, { "epoch": 0.2440924207946788, "grad_norm": 0.906266243851301, "learning_rate": 8.845564456317124e-06, "loss": 0.1451, "step": 8367 }, { "epoch": 0.24412159402532235, "grad_norm": 1.0563299544092395, "learning_rate": 8.845262500364896e-06, "loss": 0.1526, "step": 8368 }, { "epoch": 0.24415076725596593, "grad_norm": 0.8454233917176577, "learning_rate": 8.84496051008308e-06, "loss": 0.1537, "step": 8369 }, { "epoch": 0.2441799404866095, "grad_norm": 1.0749430110370448, "learning_rate": 8.844658485474376e-06, "loss": 0.1633, "step": 8370 }, { "epoch": 0.24420911371725304, "grad_norm": 1.0658178285046038, "learning_rate": 8.844356426541476e-06, "loss": 0.1558, "step": 8371 }, { "epoch": 0.24423828694789662, "grad_norm": 0.8166177664164356, "learning_rate": 8.844054333287081e-06, "loss": 0.1927, "step": 8372 }, { "epoch": 0.24426746017854017, "grad_norm": 1.1520638365934748, "learning_rate": 8.84375220571389e-06, "loss": 0.1737, "step": 8373 }, { "epoch": 0.24429663340918373, "grad_norm": 0.9214527141771043, "learning_rate": 8.843450043824593e-06, "loss": 0.1859, "step": 8374 }, { "epoch": 0.24432580663982728, "grad_norm": 0.7492326427282036, "learning_rate": 8.843147847621893e-06, "loss": 0.1419, "step": 8375 }, { "epoch": 0.24435497987047086, "grad_norm": 0.7411821285949995, "learning_rate": 8.842845617108485e-06, "loss": 0.1607, "step": 8376 }, { "epoch": 0.24438415310111442, "grad_norm": 0.7154000436837367, "learning_rate": 8.842543352287069e-06, "loss": 0.1481, "step": 8377 }, { "epoch": 0.24441332633175797, "grad_norm": 0.9138082082050378, "learning_rate": 8.842241053160345e-06, "loss": 0.1662, "step": 8378 }, { "epoch": 0.24444249956240155, "grad_norm": 0.964306699508199, "learning_rate": 8.841938719731008e-06, "loss": 0.1418, "step": 8379 }, { "epoch": 0.2444716727930451, "grad_norm": 0.7727967753547276, "learning_rate": 8.841636352001762e-06, "loss": 0.1754, "step": 8380 }, { "epoch": 0.24450084602368866, "grad_norm": 0.8789704182148401, "learning_rate": 8.841333949975302e-06, "loss": 0.1444, "step": 8381 }, { "epoch": 0.24453001925433224, "grad_norm": 0.7411244681921415, "learning_rate": 8.84103151365433e-06, "loss": 0.1458, "step": 8382 }, { "epoch": 0.2445591924849758, "grad_norm": 0.8487622912626777, "learning_rate": 8.840729043041545e-06, "loss": 0.1606, "step": 8383 }, { "epoch": 0.24458836571561934, "grad_norm": 0.9033530842049082, "learning_rate": 8.840426538139647e-06, "loss": 0.1865, "step": 8384 }, { "epoch": 0.2446175389462629, "grad_norm": 0.8992042468042295, "learning_rate": 8.84012399895134e-06, "loss": 0.1704, "step": 8385 }, { "epoch": 0.24464671217690648, "grad_norm": 0.7081280719070593, "learning_rate": 8.83982142547932e-06, "loss": 0.1592, "step": 8386 }, { "epoch": 0.24467588540755003, "grad_norm": 1.0275869945524918, "learning_rate": 8.839518817726293e-06, "loss": 0.1369, "step": 8387 }, { "epoch": 0.24470505863819358, "grad_norm": 0.7760137392426738, "learning_rate": 8.839216175694957e-06, "loss": 0.1523, "step": 8388 }, { "epoch": 0.24473423186883717, "grad_norm": 0.8148903229715854, "learning_rate": 8.838913499388018e-06, "loss": 0.1679, "step": 8389 }, { "epoch": 0.24476340509948072, "grad_norm": 0.9797201186920089, "learning_rate": 8.838610788808173e-06, "loss": 0.1422, "step": 8390 }, { "epoch": 0.24479257833012427, "grad_norm": 1.0186898166413612, "learning_rate": 8.838308043958128e-06, "loss": 0.1759, "step": 8391 }, { "epoch": 0.24482175156076785, "grad_norm": 1.027818751354689, "learning_rate": 8.838005264840585e-06, "loss": 0.1646, "step": 8392 }, { "epoch": 0.2448509247914114, "grad_norm": 1.225498641750817, "learning_rate": 8.837702451458248e-06, "loss": 0.1519, "step": 8393 }, { "epoch": 0.24488009802205496, "grad_norm": 0.9275017187124127, "learning_rate": 8.83739960381382e-06, "loss": 0.1519, "step": 8394 }, { "epoch": 0.2449092712526985, "grad_norm": 1.0713892531659432, "learning_rate": 8.837096721910004e-06, "loss": 0.1524, "step": 8395 }, { "epoch": 0.2449384444833421, "grad_norm": 1.162071648834617, "learning_rate": 8.836793805749504e-06, "loss": 0.1648, "step": 8396 }, { "epoch": 0.24496761771398565, "grad_norm": 0.7598401323146186, "learning_rate": 8.836490855335026e-06, "loss": 0.1681, "step": 8397 }, { "epoch": 0.2449967909446292, "grad_norm": 0.7747140963682789, "learning_rate": 8.83618787066927e-06, "loss": 0.1494, "step": 8398 }, { "epoch": 0.24502596417527278, "grad_norm": 0.8215032281001173, "learning_rate": 8.835884851754948e-06, "loss": 0.1581, "step": 8399 }, { "epoch": 0.24505513740591633, "grad_norm": 0.7220283583031136, "learning_rate": 8.83558179859476e-06, "loss": 0.155, "step": 8400 }, { "epoch": 0.2450843106365599, "grad_norm": 0.6641240567457919, "learning_rate": 8.835278711191414e-06, "loss": 0.1304, "step": 8401 }, { "epoch": 0.24511348386720344, "grad_norm": 0.7538929804627872, "learning_rate": 8.834975589547616e-06, "loss": 0.1456, "step": 8402 }, { "epoch": 0.24514265709784702, "grad_norm": 0.8066346810016369, "learning_rate": 8.83467243366607e-06, "loss": 0.1367, "step": 8403 }, { "epoch": 0.24517183032849058, "grad_norm": 0.7457877340021333, "learning_rate": 8.834369243549484e-06, "loss": 0.1419, "step": 8404 }, { "epoch": 0.24520100355913413, "grad_norm": 0.687374925843236, "learning_rate": 8.834066019200566e-06, "loss": 0.1444, "step": 8405 }, { "epoch": 0.2452301767897777, "grad_norm": 0.9135412221902482, "learning_rate": 8.83376276062202e-06, "loss": 0.1415, "step": 8406 }, { "epoch": 0.24525935002042126, "grad_norm": 0.8808876465180047, "learning_rate": 8.833459467816557e-06, "loss": 0.1384, "step": 8407 }, { "epoch": 0.24528852325106482, "grad_norm": 0.7754192923566724, "learning_rate": 8.833156140786883e-06, "loss": 0.1786, "step": 8408 }, { "epoch": 0.2453176964817084, "grad_norm": 0.8651372266868499, "learning_rate": 8.832852779535704e-06, "loss": 0.18, "step": 8409 }, { "epoch": 0.24534686971235195, "grad_norm": 0.8809853052651581, "learning_rate": 8.832549384065732e-06, "loss": 0.1696, "step": 8410 }, { "epoch": 0.2453760429429955, "grad_norm": 0.8133090326836228, "learning_rate": 8.832245954379674e-06, "loss": 0.1438, "step": 8411 }, { "epoch": 0.24540521617363906, "grad_norm": 0.9548720252177634, "learning_rate": 8.831942490480238e-06, "loss": 0.1598, "step": 8412 }, { "epoch": 0.24543438940428264, "grad_norm": 0.9654900151038924, "learning_rate": 8.831638992370136e-06, "loss": 0.164, "step": 8413 }, { "epoch": 0.2454635626349262, "grad_norm": 1.0402333658753493, "learning_rate": 8.831335460052075e-06, "loss": 0.1499, "step": 8414 }, { "epoch": 0.24549273586556974, "grad_norm": 1.0229072464196518, "learning_rate": 8.831031893528765e-06, "loss": 0.1668, "step": 8415 }, { "epoch": 0.24552190909621333, "grad_norm": 1.1475131038509692, "learning_rate": 8.830728292802917e-06, "loss": 0.1671, "step": 8416 }, { "epoch": 0.24555108232685688, "grad_norm": 0.9151730725841057, "learning_rate": 8.830424657877241e-06, "loss": 0.1521, "step": 8417 }, { "epoch": 0.24558025555750043, "grad_norm": 0.7484544291478948, "learning_rate": 8.830120988754448e-06, "loss": 0.1581, "step": 8418 }, { "epoch": 0.24560942878814399, "grad_norm": 0.835116926310824, "learning_rate": 8.82981728543725e-06, "loss": 0.1717, "step": 8419 }, { "epoch": 0.24563860201878757, "grad_norm": 2.437720013564385, "learning_rate": 8.829513547928357e-06, "loss": 0.186, "step": 8420 }, { "epoch": 0.24566777524943112, "grad_norm": 0.9567922993993235, "learning_rate": 8.829209776230481e-06, "loss": 0.1402, "step": 8421 }, { "epoch": 0.24569694848007467, "grad_norm": 0.6785539352419789, "learning_rate": 8.828905970346333e-06, "loss": 0.1562, "step": 8422 }, { "epoch": 0.24572612171071825, "grad_norm": 1.0096327952414297, "learning_rate": 8.82860213027863e-06, "loss": 0.1565, "step": 8423 }, { "epoch": 0.2457552949413618, "grad_norm": 0.9654423138572058, "learning_rate": 8.828298256030078e-06, "loss": 0.1736, "step": 8424 }, { "epoch": 0.24578446817200536, "grad_norm": 0.869416426857183, "learning_rate": 8.827994347603395e-06, "loss": 0.1433, "step": 8425 }, { "epoch": 0.24581364140264894, "grad_norm": 0.654907225293584, "learning_rate": 8.82769040500129e-06, "loss": 0.1526, "step": 8426 }, { "epoch": 0.2458428146332925, "grad_norm": 1.0479087753183833, "learning_rate": 8.827386428226481e-06, "loss": 0.1591, "step": 8427 }, { "epoch": 0.24587198786393605, "grad_norm": 0.86967230802673, "learning_rate": 8.827082417281679e-06, "loss": 0.1478, "step": 8428 }, { "epoch": 0.2459011610945796, "grad_norm": 0.7681872088185056, "learning_rate": 8.826778372169599e-06, "loss": 0.1431, "step": 8429 }, { "epoch": 0.24593033432522318, "grad_norm": 0.9663204434008695, "learning_rate": 8.826474292892954e-06, "loss": 0.1547, "step": 8430 }, { "epoch": 0.24595950755586674, "grad_norm": 0.8851904730109313, "learning_rate": 8.82617017945446e-06, "loss": 0.1549, "step": 8431 }, { "epoch": 0.2459886807865103, "grad_norm": 0.7937661421652855, "learning_rate": 8.825866031856833e-06, "loss": 0.1739, "step": 8432 }, { "epoch": 0.24601785401715387, "grad_norm": 0.7341655041980866, "learning_rate": 8.825561850102788e-06, "loss": 0.1457, "step": 8433 }, { "epoch": 0.24604702724779742, "grad_norm": 1.0127929178350177, "learning_rate": 8.82525763419504e-06, "loss": 0.1962, "step": 8434 }, { "epoch": 0.24607620047844098, "grad_norm": 0.9877385349978934, "learning_rate": 8.824953384136305e-06, "loss": 0.1924, "step": 8435 }, { "epoch": 0.24610537370908456, "grad_norm": 0.8526552191336515, "learning_rate": 8.824649099929297e-06, "loss": 0.1591, "step": 8436 }, { "epoch": 0.2461345469397281, "grad_norm": 0.8486905635216188, "learning_rate": 8.824344781576736e-06, "loss": 0.1717, "step": 8437 }, { "epoch": 0.24616372017037166, "grad_norm": 0.9146396786883396, "learning_rate": 8.82404042908134e-06, "loss": 0.1603, "step": 8438 }, { "epoch": 0.24619289340101522, "grad_norm": 0.9111314156629063, "learning_rate": 8.823736042445822e-06, "loss": 0.1741, "step": 8439 }, { "epoch": 0.2462220666316588, "grad_norm": 0.7504101195768871, "learning_rate": 8.8234316216729e-06, "loss": 0.1656, "step": 8440 }, { "epoch": 0.24625123986230235, "grad_norm": 0.8826741149882912, "learning_rate": 8.823127166765296e-06, "loss": 0.1782, "step": 8441 }, { "epoch": 0.2462804130929459, "grad_norm": 1.079721249237678, "learning_rate": 8.822822677725725e-06, "loss": 0.1427, "step": 8442 }, { "epoch": 0.24630958632358949, "grad_norm": 0.8250626002008481, "learning_rate": 8.822518154556904e-06, "loss": 0.1471, "step": 8443 }, { "epoch": 0.24633875955423304, "grad_norm": 2.595163956846552, "learning_rate": 8.822213597261553e-06, "loss": 0.1564, "step": 8444 }, { "epoch": 0.2463679327848766, "grad_norm": 1.028256115729576, "learning_rate": 8.821909005842393e-06, "loss": 0.1628, "step": 8445 }, { "epoch": 0.24639710601552015, "grad_norm": 0.8793545771654575, "learning_rate": 8.821604380302141e-06, "loss": 0.1517, "step": 8446 }, { "epoch": 0.24642627924616373, "grad_norm": 0.6827770932932468, "learning_rate": 8.82129972064352e-06, "loss": 0.1579, "step": 8447 }, { "epoch": 0.24645545247680728, "grad_norm": 0.8235675301541698, "learning_rate": 8.820995026869244e-06, "loss": 0.1639, "step": 8448 }, { "epoch": 0.24648462570745083, "grad_norm": 0.9152598512939685, "learning_rate": 8.820690298982037e-06, "loss": 0.1788, "step": 8449 }, { "epoch": 0.24651379893809441, "grad_norm": 0.840929740765244, "learning_rate": 8.82038553698462e-06, "loss": 0.1414, "step": 8450 }, { "epoch": 0.24654297216873797, "grad_norm": 0.7697768271526885, "learning_rate": 8.820080740879713e-06, "loss": 0.1521, "step": 8451 }, { "epoch": 0.24657214539938152, "grad_norm": 3.034581115019779, "learning_rate": 8.819775910670036e-06, "loss": 0.1517, "step": 8452 }, { "epoch": 0.2466013186300251, "grad_norm": 0.8956114868858843, "learning_rate": 8.819471046358313e-06, "loss": 0.1552, "step": 8453 }, { "epoch": 0.24663049186066865, "grad_norm": 1.075827573742953, "learning_rate": 8.819166147947263e-06, "loss": 0.1694, "step": 8454 }, { "epoch": 0.2466596650913122, "grad_norm": 0.7881868658598621, "learning_rate": 8.81886121543961e-06, "loss": 0.1531, "step": 8455 }, { "epoch": 0.24668883832195576, "grad_norm": 0.729149444478052, "learning_rate": 8.818556248838075e-06, "loss": 0.1895, "step": 8456 }, { "epoch": 0.24671801155259934, "grad_norm": 0.8361171197534403, "learning_rate": 8.818251248145382e-06, "loss": 0.1638, "step": 8457 }, { "epoch": 0.2467471847832429, "grad_norm": 0.8258725227718289, "learning_rate": 8.817946213364254e-06, "loss": 0.17, "step": 8458 }, { "epoch": 0.24677635801388645, "grad_norm": 1.0109646348966774, "learning_rate": 8.817641144497413e-06, "loss": 0.1714, "step": 8459 }, { "epoch": 0.24680553124453003, "grad_norm": 0.8278978580142614, "learning_rate": 8.817336041547582e-06, "loss": 0.1489, "step": 8460 }, { "epoch": 0.24683470447517358, "grad_norm": 0.8980349193116316, "learning_rate": 8.817030904517488e-06, "loss": 0.1738, "step": 8461 }, { "epoch": 0.24686387770581714, "grad_norm": 1.0060006588202914, "learning_rate": 8.816725733409852e-06, "loss": 0.1641, "step": 8462 }, { "epoch": 0.24689305093646072, "grad_norm": 1.1469604360476755, "learning_rate": 8.8164205282274e-06, "loss": 0.1559, "step": 8463 }, { "epoch": 0.24692222416710427, "grad_norm": 0.9779373329827378, "learning_rate": 8.816115288972857e-06, "loss": 0.1691, "step": 8464 }, { "epoch": 0.24695139739774782, "grad_norm": 1.5967700458984158, "learning_rate": 8.815810015648947e-06, "loss": 0.1711, "step": 8465 }, { "epoch": 0.24698057062839138, "grad_norm": 0.9709621836414194, "learning_rate": 8.815504708258398e-06, "loss": 0.1709, "step": 8466 }, { "epoch": 0.24700974385903496, "grad_norm": 0.9165203118739379, "learning_rate": 8.815199366803932e-06, "loss": 0.1869, "step": 8467 }, { "epoch": 0.2470389170896785, "grad_norm": 0.8066612396376657, "learning_rate": 8.814893991288277e-06, "loss": 0.1504, "step": 8468 }, { "epoch": 0.24706809032032206, "grad_norm": 0.7944070270789971, "learning_rate": 8.814588581714158e-06, "loss": 0.179, "step": 8469 }, { "epoch": 0.24709726355096565, "grad_norm": 0.8136541392797165, "learning_rate": 8.814283138084305e-06, "loss": 0.1585, "step": 8470 }, { "epoch": 0.2471264367816092, "grad_norm": 0.8970606040478348, "learning_rate": 8.813977660401442e-06, "loss": 0.1446, "step": 8471 }, { "epoch": 0.24715561001225275, "grad_norm": 0.6884550119953908, "learning_rate": 8.813672148668296e-06, "loss": 0.1317, "step": 8472 }, { "epoch": 0.2471847832428963, "grad_norm": 0.792880746072291, "learning_rate": 8.813366602887596e-06, "loss": 0.1705, "step": 8473 }, { "epoch": 0.2472139564735399, "grad_norm": 0.8942780596943223, "learning_rate": 8.81306102306207e-06, "loss": 0.1696, "step": 8474 }, { "epoch": 0.24724312970418344, "grad_norm": 0.9207073793628455, "learning_rate": 8.812755409194444e-06, "loss": 0.1902, "step": 8475 }, { "epoch": 0.247272302934827, "grad_norm": 0.7593633274691847, "learning_rate": 8.81244976128745e-06, "loss": 0.1565, "step": 8476 }, { "epoch": 0.24730147616547057, "grad_norm": 0.9856712072108497, "learning_rate": 8.812144079343814e-06, "loss": 0.1913, "step": 8477 }, { "epoch": 0.24733064939611413, "grad_norm": 0.9154848599457953, "learning_rate": 8.811838363366263e-06, "loss": 0.1612, "step": 8478 }, { "epoch": 0.24735982262675768, "grad_norm": 0.7981364221196089, "learning_rate": 8.811532613357532e-06, "loss": 0.1938, "step": 8479 }, { "epoch": 0.24738899585740126, "grad_norm": 0.8215717143394589, "learning_rate": 8.811226829320347e-06, "loss": 0.1933, "step": 8480 }, { "epoch": 0.24741816908804481, "grad_norm": 0.8889634002739583, "learning_rate": 8.810921011257439e-06, "loss": 0.16, "step": 8481 }, { "epoch": 0.24744734231868837, "grad_norm": 0.7136621367510201, "learning_rate": 8.810615159171539e-06, "loss": 0.1527, "step": 8482 }, { "epoch": 0.24747651554933192, "grad_norm": 0.768581865624633, "learning_rate": 8.810309273065374e-06, "loss": 0.1462, "step": 8483 }, { "epoch": 0.2475056887799755, "grad_norm": 0.7718778497787705, "learning_rate": 8.810003352941679e-06, "loss": 0.1498, "step": 8484 }, { "epoch": 0.24753486201061906, "grad_norm": 0.8938330577938709, "learning_rate": 8.809697398803183e-06, "loss": 0.1766, "step": 8485 }, { "epoch": 0.2475640352412626, "grad_norm": 0.7270221163387063, "learning_rate": 8.809391410652618e-06, "loss": 0.1661, "step": 8486 }, { "epoch": 0.2475932084719062, "grad_norm": 0.8740024671496396, "learning_rate": 8.809085388492716e-06, "loss": 0.167, "step": 8487 }, { "epoch": 0.24762238170254974, "grad_norm": 1.0996909584243546, "learning_rate": 8.808779332326208e-06, "loss": 0.1993, "step": 8488 }, { "epoch": 0.2476515549331933, "grad_norm": 0.8376569971712228, "learning_rate": 8.808473242155828e-06, "loss": 0.1428, "step": 8489 }, { "epoch": 0.24768072816383685, "grad_norm": 0.8162306345311063, "learning_rate": 8.808167117984308e-06, "loss": 0.1546, "step": 8490 }, { "epoch": 0.24770990139448043, "grad_norm": 1.145727284508113, "learning_rate": 8.807860959814381e-06, "loss": 0.2119, "step": 8491 }, { "epoch": 0.24773907462512398, "grad_norm": 0.8791723337086851, "learning_rate": 8.807554767648782e-06, "loss": 0.1759, "step": 8492 }, { "epoch": 0.24776824785576754, "grad_norm": 0.7711098477920081, "learning_rate": 8.80724854149024e-06, "loss": 0.1558, "step": 8493 }, { "epoch": 0.24779742108641112, "grad_norm": 0.879832302265608, "learning_rate": 8.806942281341496e-06, "loss": 0.165, "step": 8494 }, { "epoch": 0.24782659431705467, "grad_norm": 1.0633651975194989, "learning_rate": 8.806635987205276e-06, "loss": 0.149, "step": 8495 }, { "epoch": 0.24785576754769822, "grad_norm": 0.9650721712161302, "learning_rate": 8.80632965908432e-06, "loss": 0.1769, "step": 8496 }, { "epoch": 0.2478849407783418, "grad_norm": 1.115058116156931, "learning_rate": 8.806023296981364e-06, "loss": 0.1764, "step": 8497 }, { "epoch": 0.24791411400898536, "grad_norm": 0.8621447225082789, "learning_rate": 8.805716900899137e-06, "loss": 0.1581, "step": 8498 }, { "epoch": 0.2479432872396289, "grad_norm": 0.877791472812501, "learning_rate": 8.805410470840378e-06, "loss": 0.1538, "step": 8499 }, { "epoch": 0.24797246047027247, "grad_norm": 0.850704153107941, "learning_rate": 8.805104006807825e-06, "loss": 0.1705, "step": 8500 }, { "epoch": 0.24800163370091605, "grad_norm": 1.1570560093546498, "learning_rate": 8.80479750880421e-06, "loss": 0.1484, "step": 8501 }, { "epoch": 0.2480308069315596, "grad_norm": 1.1830242487341915, "learning_rate": 8.804490976832272e-06, "loss": 0.1528, "step": 8502 }, { "epoch": 0.24805998016220315, "grad_norm": 0.8592503509249563, "learning_rate": 8.804184410894747e-06, "loss": 0.1621, "step": 8503 }, { "epoch": 0.24808915339284673, "grad_norm": 1.5333932107566217, "learning_rate": 8.803877810994373e-06, "loss": 0.1643, "step": 8504 }, { "epoch": 0.2481183266234903, "grad_norm": 0.9298591687473505, "learning_rate": 8.803571177133884e-06, "loss": 0.1664, "step": 8505 }, { "epoch": 0.24814749985413384, "grad_norm": 0.9366701256850264, "learning_rate": 8.80326450931602e-06, "loss": 0.1675, "step": 8506 }, { "epoch": 0.24817667308477742, "grad_norm": 1.0417008665791745, "learning_rate": 8.802957807543517e-06, "loss": 0.1484, "step": 8507 }, { "epoch": 0.24820584631542097, "grad_norm": 0.6713970157192196, "learning_rate": 8.802651071819118e-06, "loss": 0.1784, "step": 8508 }, { "epoch": 0.24823501954606453, "grad_norm": 0.7218572054930797, "learning_rate": 8.802344302145555e-06, "loss": 0.1619, "step": 8509 }, { "epoch": 0.24826419277670808, "grad_norm": 0.8067575500397708, "learning_rate": 8.80203749852557e-06, "loss": 0.1829, "step": 8510 }, { "epoch": 0.24829336600735166, "grad_norm": 0.6142079062235674, "learning_rate": 8.801730660961902e-06, "loss": 0.1457, "step": 8511 }, { "epoch": 0.24832253923799522, "grad_norm": 0.7805665552257766, "learning_rate": 8.80142378945729e-06, "loss": 0.141, "step": 8512 }, { "epoch": 0.24835171246863877, "grad_norm": 0.8006755079437554, "learning_rate": 8.801116884014475e-06, "loss": 0.162, "step": 8513 }, { "epoch": 0.24838088569928235, "grad_norm": 0.9079863639047883, "learning_rate": 8.800809944636195e-06, "loss": 0.1377, "step": 8514 }, { "epoch": 0.2484100589299259, "grad_norm": 0.7832487583579651, "learning_rate": 8.800502971325193e-06, "loss": 0.1521, "step": 8515 }, { "epoch": 0.24843923216056946, "grad_norm": 0.892213394264103, "learning_rate": 8.800195964084205e-06, "loss": 0.185, "step": 8516 }, { "epoch": 0.248468405391213, "grad_norm": 0.9631334992011685, "learning_rate": 8.799888922915975e-06, "loss": 0.1519, "step": 8517 }, { "epoch": 0.2484975786218566, "grad_norm": 0.7927291508948916, "learning_rate": 8.799581847823247e-06, "loss": 0.1579, "step": 8518 }, { "epoch": 0.24852675185250014, "grad_norm": 0.782056273651509, "learning_rate": 8.799274738808757e-06, "loss": 0.1464, "step": 8519 }, { "epoch": 0.2485559250831437, "grad_norm": 0.8473680251759129, "learning_rate": 8.798967595875247e-06, "loss": 0.1684, "step": 8520 }, { "epoch": 0.24858509831378728, "grad_norm": 0.8015432953151329, "learning_rate": 8.798660419025464e-06, "loss": 0.1628, "step": 8521 }, { "epoch": 0.24861427154443083, "grad_norm": 1.0293800914323845, "learning_rate": 8.798353208262147e-06, "loss": 0.187, "step": 8522 }, { "epoch": 0.24864344477507438, "grad_norm": 0.908218371022314, "learning_rate": 8.79804596358804e-06, "loss": 0.1727, "step": 8523 }, { "epoch": 0.24867261800571797, "grad_norm": 1.1202371764316217, "learning_rate": 8.797738685005883e-06, "loss": 0.1757, "step": 8524 }, { "epoch": 0.24870179123636152, "grad_norm": 1.0151239456504162, "learning_rate": 8.797431372518424e-06, "loss": 0.151, "step": 8525 }, { "epoch": 0.24873096446700507, "grad_norm": 0.7940225277400671, "learning_rate": 8.797124026128403e-06, "loss": 0.1655, "step": 8526 }, { "epoch": 0.24876013769764863, "grad_norm": 0.725528592028752, "learning_rate": 8.796816645838566e-06, "loss": 0.1543, "step": 8527 }, { "epoch": 0.2487893109282922, "grad_norm": 0.9403967739044864, "learning_rate": 8.796509231651655e-06, "loss": 0.1857, "step": 8528 }, { "epoch": 0.24881848415893576, "grad_norm": 0.9240299974494754, "learning_rate": 8.796201783570417e-06, "loss": 0.1642, "step": 8529 }, { "epoch": 0.2488476573895793, "grad_norm": 0.7519665485351806, "learning_rate": 8.795894301597596e-06, "loss": 0.1623, "step": 8530 }, { "epoch": 0.2488768306202229, "grad_norm": 0.8571661523051104, "learning_rate": 8.795586785735935e-06, "loss": 0.1726, "step": 8531 }, { "epoch": 0.24890600385086645, "grad_norm": 0.947172515776725, "learning_rate": 8.795279235988183e-06, "loss": 0.1604, "step": 8532 }, { "epoch": 0.24893517708151, "grad_norm": 0.8151591283214685, "learning_rate": 8.794971652357083e-06, "loss": 0.1589, "step": 8533 }, { "epoch": 0.24896435031215358, "grad_norm": 0.8089411185537707, "learning_rate": 8.794664034845383e-06, "loss": 0.1738, "step": 8534 }, { "epoch": 0.24899352354279713, "grad_norm": 0.7158728657870362, "learning_rate": 8.794356383455826e-06, "loss": 0.1651, "step": 8535 }, { "epoch": 0.2490226967734407, "grad_norm": 0.8113539840458991, "learning_rate": 8.794048698191165e-06, "loss": 0.1578, "step": 8536 }, { "epoch": 0.24905187000408424, "grad_norm": 0.6200616649387198, "learning_rate": 8.79374097905414e-06, "loss": 0.1495, "step": 8537 }, { "epoch": 0.24908104323472782, "grad_norm": 0.9316102662343403, "learning_rate": 8.793433226047501e-06, "loss": 0.1726, "step": 8538 }, { "epoch": 0.24911021646537138, "grad_norm": 0.8846989390645414, "learning_rate": 8.793125439173997e-06, "loss": 0.1464, "step": 8539 }, { "epoch": 0.24913938969601493, "grad_norm": 0.7452266622871222, "learning_rate": 8.792817618436375e-06, "loss": 0.164, "step": 8540 }, { "epoch": 0.2491685629266585, "grad_norm": 0.8744304124780296, "learning_rate": 8.792509763837382e-06, "loss": 0.1474, "step": 8541 }, { "epoch": 0.24919773615730206, "grad_norm": 0.8878039406492998, "learning_rate": 8.792201875379767e-06, "loss": 0.1688, "step": 8542 }, { "epoch": 0.24922690938794562, "grad_norm": 0.9490768869872591, "learning_rate": 8.791893953066279e-06, "loss": 0.1524, "step": 8543 }, { "epoch": 0.24925608261858917, "grad_norm": 0.8695160326896547, "learning_rate": 8.791585996899667e-06, "loss": 0.158, "step": 8544 }, { "epoch": 0.24928525584923275, "grad_norm": 0.9238782009657288, "learning_rate": 8.79127800688268e-06, "loss": 0.1801, "step": 8545 }, { "epoch": 0.2493144290798763, "grad_norm": 0.911790527348034, "learning_rate": 8.790969983018067e-06, "loss": 0.1786, "step": 8546 }, { "epoch": 0.24934360231051986, "grad_norm": 0.9008959008504507, "learning_rate": 8.790661925308582e-06, "loss": 0.1675, "step": 8547 }, { "epoch": 0.24937277554116344, "grad_norm": 0.8888191495930848, "learning_rate": 8.79035383375697e-06, "loss": 0.157, "step": 8548 }, { "epoch": 0.249401948771807, "grad_norm": 0.9852866117505581, "learning_rate": 8.790045708365983e-06, "loss": 0.1563, "step": 8549 }, { "epoch": 0.24943112200245054, "grad_norm": 0.8088381591791408, "learning_rate": 8.789737549138376e-06, "loss": 0.1505, "step": 8550 }, { "epoch": 0.24946029523309413, "grad_norm": 0.7571559136577702, "learning_rate": 8.789429356076895e-06, "loss": 0.1529, "step": 8551 }, { "epoch": 0.24948946846373768, "grad_norm": 0.9057581284918355, "learning_rate": 8.789121129184292e-06, "loss": 0.1319, "step": 8552 }, { "epoch": 0.24951864169438123, "grad_norm": 0.7759745420464187, "learning_rate": 8.78881286846332e-06, "loss": 0.145, "step": 8553 }, { "epoch": 0.24954781492502479, "grad_norm": 0.693580534777219, "learning_rate": 8.788504573916735e-06, "loss": 0.15, "step": 8554 }, { "epoch": 0.24957698815566837, "grad_norm": 0.7697521924643248, "learning_rate": 8.788196245547283e-06, "loss": 0.1468, "step": 8555 }, { "epoch": 0.24960616138631192, "grad_norm": 0.7953894129472417, "learning_rate": 8.787887883357718e-06, "loss": 0.1826, "step": 8556 }, { "epoch": 0.24963533461695547, "grad_norm": 0.7171112382319759, "learning_rate": 8.787579487350795e-06, "loss": 0.1663, "step": 8557 }, { "epoch": 0.24966450784759905, "grad_norm": 0.8971018760703712, "learning_rate": 8.787271057529267e-06, "loss": 0.168, "step": 8558 }, { "epoch": 0.2496936810782426, "grad_norm": 0.914590183249974, "learning_rate": 8.786962593895887e-06, "loss": 0.1697, "step": 8559 }, { "epoch": 0.24972285430888616, "grad_norm": 0.7356187797125372, "learning_rate": 8.786654096453411e-06, "loss": 0.1627, "step": 8560 }, { "epoch": 0.24975202753952971, "grad_norm": 0.7085676442730278, "learning_rate": 8.786345565204588e-06, "loss": 0.1481, "step": 8561 }, { "epoch": 0.2497812007701733, "grad_norm": 0.8035803998376765, "learning_rate": 8.786037000152176e-06, "loss": 0.1521, "step": 8562 }, { "epoch": 0.24981037400081685, "grad_norm": 0.8456489905190574, "learning_rate": 8.785728401298931e-06, "loss": 0.1561, "step": 8563 }, { "epoch": 0.2498395472314604, "grad_norm": 0.806132692376375, "learning_rate": 8.785419768647606e-06, "loss": 0.1635, "step": 8564 }, { "epoch": 0.24986872046210398, "grad_norm": 0.697632255673314, "learning_rate": 8.785111102200958e-06, "loss": 0.1273, "step": 8565 }, { "epoch": 0.24989789369274754, "grad_norm": 0.7291816114635233, "learning_rate": 8.78480240196174e-06, "loss": 0.1439, "step": 8566 }, { "epoch": 0.2499270669233911, "grad_norm": 0.8278397948964653, "learning_rate": 8.784493667932709e-06, "loss": 0.1608, "step": 8567 }, { "epoch": 0.24995624015403467, "grad_norm": 0.8764478466593116, "learning_rate": 8.784184900116623e-06, "loss": 0.1725, "step": 8568 }, { "epoch": 0.24998541338467822, "grad_norm": 0.8910826173893902, "learning_rate": 8.783876098516239e-06, "loss": 0.1613, "step": 8569 }, { "epoch": 0.2500145866153218, "grad_norm": 0.9454110100942791, "learning_rate": 8.783567263134312e-06, "loss": 0.1909, "step": 8570 }, { "epoch": 0.25004375984596533, "grad_norm": 0.7174902051083285, "learning_rate": 8.783258393973597e-06, "loss": 0.1531, "step": 8571 }, { "epoch": 0.2500729330766089, "grad_norm": 0.8168221964766684, "learning_rate": 8.782949491036856e-06, "loss": 0.1601, "step": 8572 }, { "epoch": 0.2501021063072525, "grad_norm": 0.7669406975016696, "learning_rate": 8.782640554326847e-06, "loss": 0.1343, "step": 8573 }, { "epoch": 0.25013127953789605, "grad_norm": 0.8471735608476396, "learning_rate": 8.782331583846323e-06, "loss": 0.1638, "step": 8574 }, { "epoch": 0.2501604527685396, "grad_norm": 0.6773378519508466, "learning_rate": 8.782022579598046e-06, "loss": 0.135, "step": 8575 }, { "epoch": 0.25018962599918315, "grad_norm": 0.9158554370246568, "learning_rate": 8.781713541584775e-06, "loss": 0.1768, "step": 8576 }, { "epoch": 0.2502187992298267, "grad_norm": 1.1025626378261217, "learning_rate": 8.78140446980927e-06, "loss": 0.1492, "step": 8577 }, { "epoch": 0.25024797246047026, "grad_norm": 0.9874043507033101, "learning_rate": 8.781095364274286e-06, "loss": 0.151, "step": 8578 }, { "epoch": 0.2502771456911138, "grad_norm": 0.9573220286234951, "learning_rate": 8.780786224982585e-06, "loss": 0.1588, "step": 8579 }, { "epoch": 0.2503063189217574, "grad_norm": 0.8466670019844869, "learning_rate": 8.780477051936928e-06, "loss": 0.1523, "step": 8580 }, { "epoch": 0.250335492152401, "grad_norm": 0.8922545595963715, "learning_rate": 8.780167845140075e-06, "loss": 0.1722, "step": 8581 }, { "epoch": 0.2503646653830445, "grad_norm": 0.7860626236954013, "learning_rate": 8.779858604594786e-06, "loss": 0.1589, "step": 8582 }, { "epoch": 0.2503938386136881, "grad_norm": 0.8939785190141168, "learning_rate": 8.779549330303822e-06, "loss": 0.1522, "step": 8583 }, { "epoch": 0.25042301184433163, "grad_norm": 0.9091443804093601, "learning_rate": 8.779240022269941e-06, "loss": 0.172, "step": 8584 }, { "epoch": 0.2504521850749752, "grad_norm": 0.7720207746400931, "learning_rate": 8.778930680495911e-06, "loss": 0.1697, "step": 8585 }, { "epoch": 0.25048135830561874, "grad_norm": 0.8593576498178653, "learning_rate": 8.778621304984487e-06, "loss": 0.1549, "step": 8586 }, { "epoch": 0.25051053153626235, "grad_norm": 0.8583096187012206, "learning_rate": 8.778311895738436e-06, "loss": 0.1396, "step": 8587 }, { "epoch": 0.2505397047669059, "grad_norm": 0.7815697983140117, "learning_rate": 8.778002452760517e-06, "loss": 0.1694, "step": 8588 }, { "epoch": 0.25056887799754946, "grad_norm": 0.9921630331516273, "learning_rate": 8.777692976053496e-06, "loss": 0.1823, "step": 8589 }, { "epoch": 0.250598051228193, "grad_norm": 0.9075847254953955, "learning_rate": 8.77738346562013e-06, "loss": 0.1536, "step": 8590 }, { "epoch": 0.25062722445883656, "grad_norm": 0.9035407785795335, "learning_rate": 8.77707392146319e-06, "loss": 0.1569, "step": 8591 }, { "epoch": 0.2506563976894801, "grad_norm": 0.9623455556518227, "learning_rate": 8.776764343585434e-06, "loss": 0.1777, "step": 8592 }, { "epoch": 0.25068557092012367, "grad_norm": 0.7794053355405665, "learning_rate": 8.776454731989628e-06, "loss": 0.1787, "step": 8593 }, { "epoch": 0.2507147441507673, "grad_norm": 0.8189228578184276, "learning_rate": 8.776145086678535e-06, "loss": 0.1457, "step": 8594 }, { "epoch": 0.25074391738141083, "grad_norm": 0.8106201577521042, "learning_rate": 8.775835407654922e-06, "loss": 0.1808, "step": 8595 }, { "epoch": 0.2507730906120544, "grad_norm": 1.0761468091271227, "learning_rate": 8.77552569492155e-06, "loss": 0.1709, "step": 8596 }, { "epoch": 0.25080226384269794, "grad_norm": 0.7975032316074803, "learning_rate": 8.775215948481187e-06, "loss": 0.1448, "step": 8597 }, { "epoch": 0.2508314370733415, "grad_norm": 1.0595088346673573, "learning_rate": 8.774906168336595e-06, "loss": 0.1569, "step": 8598 }, { "epoch": 0.25086061030398504, "grad_norm": 0.7694985571630539, "learning_rate": 8.774596354490544e-06, "loss": 0.159, "step": 8599 }, { "epoch": 0.25088978353462865, "grad_norm": 1.1726649632400676, "learning_rate": 8.774286506945797e-06, "loss": 0.1853, "step": 8600 }, { "epoch": 0.2509189567652722, "grad_norm": 1.010183535285587, "learning_rate": 8.773976625705122e-06, "loss": 0.1998, "step": 8601 }, { "epoch": 0.25094812999591576, "grad_norm": 0.8032566842963813, "learning_rate": 8.773666710771283e-06, "loss": 0.1576, "step": 8602 }, { "epoch": 0.2509773032265593, "grad_norm": 0.7919812675501868, "learning_rate": 8.77335676214705e-06, "loss": 0.1656, "step": 8603 }, { "epoch": 0.25100647645720287, "grad_norm": 0.9404814588709256, "learning_rate": 8.773046779835189e-06, "loss": 0.158, "step": 8604 }, { "epoch": 0.2510356496878464, "grad_norm": 0.8010105402020736, "learning_rate": 8.772736763838466e-06, "loss": 0.1669, "step": 8605 }, { "epoch": 0.25106482291848997, "grad_norm": 1.118376939418988, "learning_rate": 8.772426714159648e-06, "loss": 0.1547, "step": 8606 }, { "epoch": 0.2510939961491336, "grad_norm": 0.7799989234003954, "learning_rate": 8.772116630801506e-06, "loss": 0.1542, "step": 8607 }, { "epoch": 0.25112316937977713, "grad_norm": 0.712752334450024, "learning_rate": 8.77180651376681e-06, "loss": 0.1582, "step": 8608 }, { "epoch": 0.2511523426104207, "grad_norm": 0.7919712574181268, "learning_rate": 8.771496363058323e-06, "loss": 0.1809, "step": 8609 }, { "epoch": 0.25118151584106424, "grad_norm": 1.0160139063748992, "learning_rate": 8.771186178678817e-06, "loss": 0.1618, "step": 8610 }, { "epoch": 0.2512106890717078, "grad_norm": 0.8429941639356404, "learning_rate": 8.770875960631063e-06, "loss": 0.1639, "step": 8611 }, { "epoch": 0.25123986230235135, "grad_norm": 1.0152709983300994, "learning_rate": 8.770565708917826e-06, "loss": 0.1607, "step": 8612 }, { "epoch": 0.2512690355329949, "grad_norm": 0.9282003931382112, "learning_rate": 8.77025542354188e-06, "loss": 0.1421, "step": 8613 }, { "epoch": 0.2512982087636385, "grad_norm": 0.8633570939081969, "learning_rate": 8.769945104505992e-06, "loss": 0.1563, "step": 8614 }, { "epoch": 0.25132738199428206, "grad_norm": 0.764496718697493, "learning_rate": 8.769634751812937e-06, "loss": 0.1615, "step": 8615 }, { "epoch": 0.2513565552249256, "grad_norm": 0.8117302432475778, "learning_rate": 8.769324365465482e-06, "loss": 0.1425, "step": 8616 }, { "epoch": 0.25138572845556917, "grad_norm": 0.9333843792401662, "learning_rate": 8.769013945466396e-06, "loss": 0.1695, "step": 8617 }, { "epoch": 0.2514149016862127, "grad_norm": 1.120504946358395, "learning_rate": 8.768703491818455e-06, "loss": 0.1693, "step": 8618 }, { "epoch": 0.2514440749168563, "grad_norm": 0.8375545723038, "learning_rate": 8.76839300452443e-06, "loss": 0.1578, "step": 8619 }, { "epoch": 0.25147324814749983, "grad_norm": 0.9141436940891724, "learning_rate": 8.76808248358709e-06, "loss": 0.1502, "step": 8620 }, { "epoch": 0.25150242137814344, "grad_norm": 0.9099046009644094, "learning_rate": 8.76777192900921e-06, "loss": 0.1797, "step": 8621 }, { "epoch": 0.251531594608787, "grad_norm": 0.7826325768943171, "learning_rate": 8.767461340793563e-06, "loss": 0.1521, "step": 8622 }, { "epoch": 0.25156076783943054, "grad_norm": 0.8693445201995545, "learning_rate": 8.767150718942919e-06, "loss": 0.1623, "step": 8623 }, { "epoch": 0.2515899410700741, "grad_norm": 1.0338015467666206, "learning_rate": 8.766840063460054e-06, "loss": 0.1561, "step": 8624 }, { "epoch": 0.25161911430071765, "grad_norm": 0.9980300350500342, "learning_rate": 8.766529374347738e-06, "loss": 0.1593, "step": 8625 }, { "epoch": 0.2516482875313612, "grad_norm": 0.8951577122410843, "learning_rate": 8.766218651608748e-06, "loss": 0.1547, "step": 8626 }, { "epoch": 0.2516774607620048, "grad_norm": 0.8125359105854942, "learning_rate": 8.765907895245857e-06, "loss": 0.1576, "step": 8627 }, { "epoch": 0.25170663399264837, "grad_norm": 0.9954761177039216, "learning_rate": 8.765597105261838e-06, "loss": 0.1688, "step": 8628 }, { "epoch": 0.2517358072232919, "grad_norm": 0.9543299198439987, "learning_rate": 8.765286281659469e-06, "loss": 0.1483, "step": 8629 }, { "epoch": 0.25176498045393547, "grad_norm": 0.8071287681668878, "learning_rate": 8.764975424441522e-06, "loss": 0.157, "step": 8630 }, { "epoch": 0.251794153684579, "grad_norm": 0.8540025790801501, "learning_rate": 8.764664533610774e-06, "loss": 0.1868, "step": 8631 }, { "epoch": 0.2518233269152226, "grad_norm": 0.9667079527671832, "learning_rate": 8.764353609169997e-06, "loss": 0.1524, "step": 8632 }, { "epoch": 0.25185250014586613, "grad_norm": 0.9058758410702621, "learning_rate": 8.764042651121973e-06, "loss": 0.1516, "step": 8633 }, { "epoch": 0.25188167337650974, "grad_norm": 0.8409416216707294, "learning_rate": 8.763731659469473e-06, "loss": 0.1475, "step": 8634 }, { "epoch": 0.2519108466071533, "grad_norm": 0.8538141851741363, "learning_rate": 8.763420634215277e-06, "loss": 0.1428, "step": 8635 }, { "epoch": 0.25194001983779685, "grad_norm": 0.9442534273461484, "learning_rate": 8.763109575362156e-06, "loss": 0.1615, "step": 8636 }, { "epoch": 0.2519691930684404, "grad_norm": 0.805499568697246, "learning_rate": 8.762798482912895e-06, "loss": 0.1632, "step": 8637 }, { "epoch": 0.25199836629908395, "grad_norm": 0.6903477934349642, "learning_rate": 8.762487356870267e-06, "loss": 0.1724, "step": 8638 }, { "epoch": 0.2520275395297275, "grad_norm": 0.7868907082652199, "learning_rate": 8.762176197237048e-06, "loss": 0.1323, "step": 8639 }, { "epoch": 0.25205671276037106, "grad_norm": 0.6980466504190983, "learning_rate": 8.76186500401602e-06, "loss": 0.1552, "step": 8640 }, { "epoch": 0.25208588599101467, "grad_norm": 0.792164302645313, "learning_rate": 8.761553777209957e-06, "loss": 0.1546, "step": 8641 }, { "epoch": 0.2521150592216582, "grad_norm": 0.94007538803066, "learning_rate": 8.761242516821642e-06, "loss": 0.1372, "step": 8642 }, { "epoch": 0.2521442324523018, "grad_norm": 0.8307347612519377, "learning_rate": 8.760931222853851e-06, "loss": 0.1457, "step": 8643 }, { "epoch": 0.25217340568294533, "grad_norm": 0.9547156420905087, "learning_rate": 8.760619895309364e-06, "loss": 0.1611, "step": 8644 }, { "epoch": 0.2522025789135889, "grad_norm": 1.2277498281310582, "learning_rate": 8.76030853419096e-06, "loss": 0.1869, "step": 8645 }, { "epoch": 0.25223175214423244, "grad_norm": 0.7589041422482073, "learning_rate": 8.759997139501418e-06, "loss": 0.1893, "step": 8646 }, { "epoch": 0.252260925374876, "grad_norm": 1.1921327947013587, "learning_rate": 8.759685711243519e-06, "loss": 0.1566, "step": 8647 }, { "epoch": 0.2522900986055196, "grad_norm": 1.0194506263989331, "learning_rate": 8.759374249420046e-06, "loss": 0.1679, "step": 8648 }, { "epoch": 0.25231927183616315, "grad_norm": 0.8559406641656732, "learning_rate": 8.759062754033776e-06, "loss": 0.1517, "step": 8649 }, { "epoch": 0.2523484450668067, "grad_norm": 0.8029563133814118, "learning_rate": 8.75875122508749e-06, "loss": 0.1579, "step": 8650 }, { "epoch": 0.25237761829745026, "grad_norm": 1.0332525877684775, "learning_rate": 8.758439662583972e-06, "loss": 0.1528, "step": 8651 }, { "epoch": 0.2524067915280938, "grad_norm": 1.0776228863205328, "learning_rate": 8.758128066526002e-06, "loss": 0.165, "step": 8652 }, { "epoch": 0.25243596475873736, "grad_norm": 0.9647404425923645, "learning_rate": 8.75781643691636e-06, "loss": 0.1493, "step": 8653 }, { "epoch": 0.2524651379893809, "grad_norm": 1.3075908157331049, "learning_rate": 8.757504773757831e-06, "loss": 0.1519, "step": 8654 }, { "epoch": 0.2524943112200245, "grad_norm": 0.9019791019347585, "learning_rate": 8.757193077053197e-06, "loss": 0.1432, "step": 8655 }, { "epoch": 0.2525234844506681, "grad_norm": 0.8670379469726758, "learning_rate": 8.756881346805238e-06, "loss": 0.1371, "step": 8656 }, { "epoch": 0.25255265768131163, "grad_norm": 0.9192781121346693, "learning_rate": 8.75656958301674e-06, "loss": 0.1419, "step": 8657 }, { "epoch": 0.2525818309119552, "grad_norm": 0.8267824674551398, "learning_rate": 8.756257785690488e-06, "loss": 0.1417, "step": 8658 }, { "epoch": 0.25261100414259874, "grad_norm": 0.7242660404992772, "learning_rate": 8.755945954829259e-06, "loss": 0.1454, "step": 8659 }, { "epoch": 0.2526401773732423, "grad_norm": 0.9708102460840208, "learning_rate": 8.755634090435845e-06, "loss": 0.158, "step": 8660 }, { "epoch": 0.2526693506038859, "grad_norm": 0.8964518881575788, "learning_rate": 8.755322192513026e-06, "loss": 0.154, "step": 8661 }, { "epoch": 0.25269852383452945, "grad_norm": 0.7597045241854307, "learning_rate": 8.755010261063583e-06, "loss": 0.1633, "step": 8662 }, { "epoch": 0.252727697065173, "grad_norm": 0.6959537002629588, "learning_rate": 8.754698296090306e-06, "loss": 0.1346, "step": 8663 }, { "epoch": 0.25275687029581656, "grad_norm": 1.1404850589425124, "learning_rate": 8.754386297595982e-06, "loss": 0.1723, "step": 8664 }, { "epoch": 0.2527860435264601, "grad_norm": 0.8324400735924496, "learning_rate": 8.754074265583391e-06, "loss": 0.1613, "step": 8665 }, { "epoch": 0.25281521675710367, "grad_norm": 0.8231664541683732, "learning_rate": 8.753762200055323e-06, "loss": 0.1457, "step": 8666 }, { "epoch": 0.2528443899877472, "grad_norm": 0.7615114736289824, "learning_rate": 8.75345010101456e-06, "loss": 0.148, "step": 8667 }, { "epoch": 0.25287356321839083, "grad_norm": 0.6634927283572258, "learning_rate": 8.753137968463891e-06, "loss": 0.1614, "step": 8668 }, { "epoch": 0.2529027364490344, "grad_norm": 0.7274235145822803, "learning_rate": 8.752825802406104e-06, "loss": 0.1622, "step": 8669 }, { "epoch": 0.25293190967967794, "grad_norm": 0.8576516402955692, "learning_rate": 8.752513602843984e-06, "loss": 0.1695, "step": 8670 }, { "epoch": 0.2529610829103215, "grad_norm": 3.6622315857967838, "learning_rate": 8.752201369780317e-06, "loss": 0.156, "step": 8671 }, { "epoch": 0.25299025614096504, "grad_norm": 1.0653715826093315, "learning_rate": 8.751889103217892e-06, "loss": 0.1438, "step": 8672 }, { "epoch": 0.2530194293716086, "grad_norm": 0.9536320888241338, "learning_rate": 8.751576803159495e-06, "loss": 0.1403, "step": 8673 }, { "epoch": 0.25304860260225215, "grad_norm": 0.9389850085117988, "learning_rate": 8.751264469607919e-06, "loss": 0.1532, "step": 8674 }, { "epoch": 0.25307777583289576, "grad_norm": 0.9264477765314943, "learning_rate": 8.750952102565949e-06, "loss": 0.1612, "step": 8675 }, { "epoch": 0.2531069490635393, "grad_norm": 0.7896236278290961, "learning_rate": 8.750639702036372e-06, "loss": 0.1576, "step": 8676 }, { "epoch": 0.25313612229418286, "grad_norm": 1.089660135807044, "learning_rate": 8.75032726802198e-06, "loss": 0.1816, "step": 8677 }, { "epoch": 0.2531652955248264, "grad_norm": 0.7963842850599411, "learning_rate": 8.75001480052556e-06, "loss": 0.167, "step": 8678 }, { "epoch": 0.25319446875546997, "grad_norm": 0.8047992319509558, "learning_rate": 8.749702299549908e-06, "loss": 0.159, "step": 8679 }, { "epoch": 0.2532236419861135, "grad_norm": 0.8361255619347895, "learning_rate": 8.749389765097805e-06, "loss": 0.1534, "step": 8680 }, { "epoch": 0.2532528152167571, "grad_norm": 0.8121742773997878, "learning_rate": 8.749077197172044e-06, "loss": 0.1673, "step": 8681 }, { "epoch": 0.2532819884474007, "grad_norm": 0.7805005133041115, "learning_rate": 8.74876459577542e-06, "loss": 0.174, "step": 8682 }, { "epoch": 0.25331116167804424, "grad_norm": 0.7274078253360706, "learning_rate": 8.748451960910718e-06, "loss": 0.1759, "step": 8683 }, { "epoch": 0.2533403349086878, "grad_norm": 1.0429367089407238, "learning_rate": 8.748139292580733e-06, "loss": 0.2104, "step": 8684 }, { "epoch": 0.25336950813933135, "grad_norm": 0.8506984585250397, "learning_rate": 8.747826590788256e-06, "loss": 0.1652, "step": 8685 }, { "epoch": 0.2533986813699749, "grad_norm": 1.1421366549445577, "learning_rate": 8.747513855536077e-06, "loss": 0.1337, "step": 8686 }, { "epoch": 0.25342785460061845, "grad_norm": 1.1113881068498233, "learning_rate": 8.747201086826989e-06, "loss": 0.1407, "step": 8687 }, { "epoch": 0.25345702783126206, "grad_norm": 0.9908063541335966, "learning_rate": 8.746888284663784e-06, "loss": 0.174, "step": 8688 }, { "epoch": 0.2534862010619056, "grad_norm": 0.7712802509983608, "learning_rate": 8.746575449049255e-06, "loss": 0.1406, "step": 8689 }, { "epoch": 0.25351537429254917, "grad_norm": 0.7431100563887824, "learning_rate": 8.746262579986194e-06, "loss": 0.1513, "step": 8690 }, { "epoch": 0.2535445475231927, "grad_norm": 0.7644407130429836, "learning_rate": 8.745949677477396e-06, "loss": 0.1537, "step": 8691 }, { "epoch": 0.2535737207538363, "grad_norm": 0.9870168221703469, "learning_rate": 8.745636741525654e-06, "loss": 0.1978, "step": 8692 }, { "epoch": 0.2536028939844798, "grad_norm": 0.8982865652069405, "learning_rate": 8.745323772133761e-06, "loss": 0.1409, "step": 8693 }, { "epoch": 0.2536320672151234, "grad_norm": 0.9042036941833452, "learning_rate": 8.745010769304509e-06, "loss": 0.1413, "step": 8694 }, { "epoch": 0.253661240445767, "grad_norm": 0.9012370192724426, "learning_rate": 8.744697733040696e-06, "loss": 0.177, "step": 8695 }, { "epoch": 0.25369041367641054, "grad_norm": 0.7438421399011913, "learning_rate": 8.744384663345118e-06, "loss": 0.1522, "step": 8696 }, { "epoch": 0.2537195869070541, "grad_norm": 0.843966325383359, "learning_rate": 8.744071560220567e-06, "loss": 0.1901, "step": 8697 }, { "epoch": 0.25374876013769765, "grad_norm": 0.7354098162433835, "learning_rate": 8.743758423669837e-06, "loss": 0.1535, "step": 8698 }, { "epoch": 0.2537779333683412, "grad_norm": 1.2117025504985062, "learning_rate": 8.743445253695725e-06, "loss": 0.1744, "step": 8699 }, { "epoch": 0.25380710659898476, "grad_norm": 0.9362173993868674, "learning_rate": 8.743132050301031e-06, "loss": 0.1653, "step": 8700 }, { "epoch": 0.2538362798296283, "grad_norm": 0.8310313706059644, "learning_rate": 8.742818813488545e-06, "loss": 0.15, "step": 8701 }, { "epoch": 0.2538654530602719, "grad_norm": 0.8139816868476868, "learning_rate": 8.742505543261066e-06, "loss": 0.1394, "step": 8702 }, { "epoch": 0.25389462629091547, "grad_norm": 0.7197049985343916, "learning_rate": 8.742192239621391e-06, "loss": 0.159, "step": 8703 }, { "epoch": 0.253923799521559, "grad_norm": 0.7073837646585768, "learning_rate": 8.741878902572318e-06, "loss": 0.1603, "step": 8704 }, { "epoch": 0.2539529727522026, "grad_norm": 0.7041714988132413, "learning_rate": 8.741565532116643e-06, "loss": 0.1687, "step": 8705 }, { "epoch": 0.25398214598284613, "grad_norm": 0.7522600859004025, "learning_rate": 8.741252128257164e-06, "loss": 0.1433, "step": 8706 }, { "epoch": 0.2540113192134897, "grad_norm": 0.7407409471862808, "learning_rate": 8.740938690996678e-06, "loss": 0.1498, "step": 8707 }, { "epoch": 0.25404049244413324, "grad_norm": 0.8667489606012565, "learning_rate": 8.740625220337987e-06, "loss": 0.1435, "step": 8708 }, { "epoch": 0.25406966567477685, "grad_norm": 0.6547311529300278, "learning_rate": 8.740311716283884e-06, "loss": 0.1362, "step": 8709 }, { "epoch": 0.2540988389054204, "grad_norm": 0.8371369414539442, "learning_rate": 8.739998178837172e-06, "loss": 0.1499, "step": 8710 }, { "epoch": 0.25412801213606395, "grad_norm": 0.8356802734640116, "learning_rate": 8.739684608000651e-06, "loss": 0.1419, "step": 8711 }, { "epoch": 0.2541571853667075, "grad_norm": 0.7991211929141547, "learning_rate": 8.739371003777117e-06, "loss": 0.1506, "step": 8712 }, { "epoch": 0.25418635859735106, "grad_norm": 0.7427853021170802, "learning_rate": 8.73905736616937e-06, "loss": 0.1538, "step": 8713 }, { "epoch": 0.2542155318279946, "grad_norm": 0.8113598601341298, "learning_rate": 8.738743695180214e-06, "loss": 0.1589, "step": 8714 }, { "epoch": 0.2542447050586382, "grad_norm": 0.8569630973636471, "learning_rate": 8.738429990812445e-06, "loss": 0.1637, "step": 8715 }, { "epoch": 0.2542738782892818, "grad_norm": 0.9100324245795195, "learning_rate": 8.738116253068866e-06, "loss": 0.16, "step": 8716 }, { "epoch": 0.2543030515199253, "grad_norm": 0.8638207685398084, "learning_rate": 8.737802481952277e-06, "loss": 0.1536, "step": 8717 }, { "epoch": 0.2543322247505689, "grad_norm": 0.845447471564934, "learning_rate": 8.73748867746548e-06, "loss": 0.1394, "step": 8718 }, { "epoch": 0.25436139798121243, "grad_norm": 0.873617854106875, "learning_rate": 8.737174839611277e-06, "loss": 0.1692, "step": 8719 }, { "epoch": 0.254390571211856, "grad_norm": 0.9311736290655294, "learning_rate": 8.736860968392469e-06, "loss": 0.1448, "step": 8720 }, { "epoch": 0.25441974444249954, "grad_norm": 1.1191446734340702, "learning_rate": 8.736547063811858e-06, "loss": 0.1659, "step": 8721 }, { "epoch": 0.25444891767314315, "grad_norm": 1.1348476700458443, "learning_rate": 8.736233125872247e-06, "loss": 0.1582, "step": 8722 }, { "epoch": 0.2544780909037867, "grad_norm": 1.1460856678321898, "learning_rate": 8.735919154576438e-06, "loss": 0.1621, "step": 8723 }, { "epoch": 0.25450726413443026, "grad_norm": 1.1746904149793125, "learning_rate": 8.735605149927236e-06, "loss": 0.1658, "step": 8724 }, { "epoch": 0.2545364373650738, "grad_norm": 1.329046099020268, "learning_rate": 8.735291111927441e-06, "loss": 0.1516, "step": 8725 }, { "epoch": 0.25456561059571736, "grad_norm": 1.1640390457061447, "learning_rate": 8.73497704057986e-06, "loss": 0.1882, "step": 8726 }, { "epoch": 0.2545947838263609, "grad_norm": 1.0280124658537055, "learning_rate": 8.734662935887295e-06, "loss": 0.1573, "step": 8727 }, { "epoch": 0.25462395705700447, "grad_norm": 0.9639943602244389, "learning_rate": 8.73434879785255e-06, "loss": 0.17, "step": 8728 }, { "epoch": 0.2546531302876481, "grad_norm": 0.825427524115082, "learning_rate": 8.734034626478432e-06, "loss": 0.1587, "step": 8729 }, { "epoch": 0.25468230351829163, "grad_norm": 0.9130862061586684, "learning_rate": 8.733720421767744e-06, "loss": 0.1663, "step": 8730 }, { "epoch": 0.2547114767489352, "grad_norm": 0.8498724775905401, "learning_rate": 8.733406183723293e-06, "loss": 0.1486, "step": 8731 }, { "epoch": 0.25474064997957874, "grad_norm": 1.0775770106421836, "learning_rate": 8.73309191234788e-06, "loss": 0.1389, "step": 8732 }, { "epoch": 0.2547698232102223, "grad_norm": 0.7640333168428327, "learning_rate": 8.732777607644314e-06, "loss": 0.1382, "step": 8733 }, { "epoch": 0.25479899644086584, "grad_norm": 0.8952833004604634, "learning_rate": 8.7324632696154e-06, "loss": 0.1836, "step": 8734 }, { "epoch": 0.2548281696715094, "grad_norm": 0.849439438598427, "learning_rate": 8.732148898263946e-06, "loss": 0.1611, "step": 8735 }, { "epoch": 0.254857342902153, "grad_norm": 1.0782415390561326, "learning_rate": 8.73183449359276e-06, "loss": 0.196, "step": 8736 }, { "epoch": 0.25488651613279656, "grad_norm": 1.0005187587540993, "learning_rate": 8.731520055604642e-06, "loss": 0.2017, "step": 8737 }, { "epoch": 0.2549156893634401, "grad_norm": 0.7896312246550041, "learning_rate": 8.731205584302406e-06, "loss": 0.1589, "step": 8738 }, { "epoch": 0.25494486259408367, "grad_norm": 0.944690480624181, "learning_rate": 8.730891079688856e-06, "loss": 0.1515, "step": 8739 }, { "epoch": 0.2549740358247272, "grad_norm": 1.028373765804265, "learning_rate": 8.730576541766803e-06, "loss": 0.1718, "step": 8740 }, { "epoch": 0.25500320905537077, "grad_norm": 0.9030042930995393, "learning_rate": 8.730261970539052e-06, "loss": 0.1606, "step": 8741 }, { "epoch": 0.2550323822860144, "grad_norm": 0.8539913410412551, "learning_rate": 8.729947366008413e-06, "loss": 0.1404, "step": 8742 }, { "epoch": 0.25506155551665793, "grad_norm": 0.8820475575043571, "learning_rate": 8.729632728177695e-06, "loss": 0.1377, "step": 8743 }, { "epoch": 0.2550907287473015, "grad_norm": 0.7477643030610125, "learning_rate": 8.729318057049704e-06, "loss": 0.1599, "step": 8744 }, { "epoch": 0.25511990197794504, "grad_norm": 0.7396971754806884, "learning_rate": 8.729003352627255e-06, "loss": 0.1551, "step": 8745 }, { "epoch": 0.2551490752085886, "grad_norm": 1.1071026612148385, "learning_rate": 8.728688614913152e-06, "loss": 0.1372, "step": 8746 }, { "epoch": 0.25517824843923215, "grad_norm": 0.9409646097818506, "learning_rate": 8.728373843910207e-06, "loss": 0.1521, "step": 8747 }, { "epoch": 0.2552074216698757, "grad_norm": 0.9150115926042979, "learning_rate": 8.728059039621231e-06, "loss": 0.1638, "step": 8748 }, { "epoch": 0.2552365949005193, "grad_norm": 0.9937005959563189, "learning_rate": 8.727744202049035e-06, "loss": 0.1609, "step": 8749 }, { "epoch": 0.25526576813116286, "grad_norm": 0.8958513950848999, "learning_rate": 8.727429331196426e-06, "loss": 0.1703, "step": 8750 }, { "epoch": 0.2552949413618064, "grad_norm": 0.8248501867962532, "learning_rate": 8.72711442706622e-06, "loss": 0.1424, "step": 8751 }, { "epoch": 0.25532411459244997, "grad_norm": 0.9571136844958456, "learning_rate": 8.726799489661225e-06, "loss": 0.1659, "step": 8752 }, { "epoch": 0.2553532878230935, "grad_norm": 1.0667095653183198, "learning_rate": 8.726484518984256e-06, "loss": 0.1616, "step": 8753 }, { "epoch": 0.2553824610537371, "grad_norm": 0.8618680921339075, "learning_rate": 8.72616951503812e-06, "loss": 0.1573, "step": 8754 }, { "epoch": 0.25541163428438063, "grad_norm": 0.8299326664354898, "learning_rate": 8.725854477825632e-06, "loss": 0.179, "step": 8755 }, { "epoch": 0.25544080751502424, "grad_norm": 0.8713894055508062, "learning_rate": 8.725539407349606e-06, "loss": 0.1391, "step": 8756 }, { "epoch": 0.2554699807456678, "grad_norm": 0.8007962491614893, "learning_rate": 8.725224303612854e-06, "loss": 0.1533, "step": 8757 }, { "epoch": 0.25549915397631134, "grad_norm": 0.971945767080681, "learning_rate": 8.724909166618187e-06, "loss": 0.1811, "step": 8758 }, { "epoch": 0.2555283272069549, "grad_norm": 0.8146285945171134, "learning_rate": 8.724593996368422e-06, "loss": 0.1613, "step": 8759 }, { "epoch": 0.25555750043759845, "grad_norm": 0.8374369013562248, "learning_rate": 8.72427879286637e-06, "loss": 0.1497, "step": 8760 }, { "epoch": 0.255586673668242, "grad_norm": 0.9625265198360471, "learning_rate": 8.723963556114847e-06, "loss": 0.1552, "step": 8761 }, { "epoch": 0.25561584689888556, "grad_norm": 0.7911600922588395, "learning_rate": 8.723648286116664e-06, "loss": 0.1567, "step": 8762 }, { "epoch": 0.25564502012952917, "grad_norm": 0.6944363270822046, "learning_rate": 8.723332982874639e-06, "loss": 0.16, "step": 8763 }, { "epoch": 0.2556741933601727, "grad_norm": 0.7716485153147591, "learning_rate": 8.723017646391587e-06, "loss": 0.1689, "step": 8764 }, { "epoch": 0.2557033665908163, "grad_norm": 0.8354276672058369, "learning_rate": 8.722702276670323e-06, "loss": 0.1541, "step": 8765 }, { "epoch": 0.2557325398214598, "grad_norm": 0.695295395783904, "learning_rate": 8.72238687371366e-06, "loss": 0.1488, "step": 8766 }, { "epoch": 0.2557617130521034, "grad_norm": 0.7860631251591896, "learning_rate": 8.722071437524415e-06, "loss": 0.1492, "step": 8767 }, { "epoch": 0.25579088628274693, "grad_norm": 0.8912421457723493, "learning_rate": 8.721755968105406e-06, "loss": 0.1689, "step": 8768 }, { "epoch": 0.2558200595133905, "grad_norm": 0.7598400483680359, "learning_rate": 8.721440465459448e-06, "loss": 0.1569, "step": 8769 }, { "epoch": 0.2558492327440341, "grad_norm": 0.857580910539667, "learning_rate": 8.721124929589358e-06, "loss": 0.1437, "step": 8770 }, { "epoch": 0.25587840597467765, "grad_norm": 0.6838812636405465, "learning_rate": 8.720809360497953e-06, "loss": 0.1589, "step": 8771 }, { "epoch": 0.2559075792053212, "grad_norm": 0.9153310537215729, "learning_rate": 8.720493758188049e-06, "loss": 0.1506, "step": 8772 }, { "epoch": 0.25593675243596475, "grad_norm": 0.82615409438108, "learning_rate": 8.720178122662466e-06, "loss": 0.152, "step": 8773 }, { "epoch": 0.2559659256666083, "grad_norm": 0.8605169310453629, "learning_rate": 8.71986245392402e-06, "loss": 0.1637, "step": 8774 }, { "epoch": 0.25599509889725186, "grad_norm": 0.9090589536210341, "learning_rate": 8.719546751975531e-06, "loss": 0.1457, "step": 8775 }, { "epoch": 0.25602427212789547, "grad_norm": 0.7772837033436167, "learning_rate": 8.719231016819817e-06, "loss": 0.1443, "step": 8776 }, { "epoch": 0.256053445358539, "grad_norm": 1.0667870199913696, "learning_rate": 8.718915248459697e-06, "loss": 0.1499, "step": 8777 }, { "epoch": 0.2560826185891826, "grad_norm": 0.8081053989600095, "learning_rate": 8.718599446897987e-06, "loss": 0.1692, "step": 8778 }, { "epoch": 0.25611179181982613, "grad_norm": 0.983564666362862, "learning_rate": 8.718283612137508e-06, "loss": 0.1746, "step": 8779 }, { "epoch": 0.2561409650504697, "grad_norm": 0.8528117300126145, "learning_rate": 8.717967744181084e-06, "loss": 0.1757, "step": 8780 }, { "epoch": 0.25617013828111324, "grad_norm": 0.741080976782169, "learning_rate": 8.717651843031529e-06, "loss": 0.1547, "step": 8781 }, { "epoch": 0.2561993115117568, "grad_norm": 1.1153756248130342, "learning_rate": 8.717335908691667e-06, "loss": 0.1674, "step": 8782 }, { "epoch": 0.2562284847424004, "grad_norm": 0.8409951078676309, "learning_rate": 8.717019941164317e-06, "loss": 0.1452, "step": 8783 }, { "epoch": 0.25625765797304395, "grad_norm": 0.6809635702314836, "learning_rate": 8.7167039404523e-06, "loss": 0.1911, "step": 8784 }, { "epoch": 0.2562868312036875, "grad_norm": 1.1079818340434244, "learning_rate": 8.71638790655844e-06, "loss": 0.1412, "step": 8785 }, { "epoch": 0.25631600443433106, "grad_norm": 0.9838662069191417, "learning_rate": 8.716071839485552e-06, "loss": 0.1679, "step": 8786 }, { "epoch": 0.2563451776649746, "grad_norm": 0.6506205885962708, "learning_rate": 8.715755739236464e-06, "loss": 0.1546, "step": 8787 }, { "epoch": 0.25637435089561816, "grad_norm": 0.718062480563348, "learning_rate": 8.715439605813994e-06, "loss": 0.1545, "step": 8788 }, { "epoch": 0.2564035241262617, "grad_norm": 0.8612418407748099, "learning_rate": 8.715123439220968e-06, "loss": 0.141, "step": 8789 }, { "epoch": 0.2564326973569053, "grad_norm": 0.634017237768269, "learning_rate": 8.714807239460206e-06, "loss": 0.1696, "step": 8790 }, { "epoch": 0.2564618705875489, "grad_norm": 0.7110658042582967, "learning_rate": 8.714491006534532e-06, "loss": 0.155, "step": 8791 }, { "epoch": 0.25649104381819243, "grad_norm": 1.2230597945265116, "learning_rate": 8.714174740446769e-06, "loss": 0.1521, "step": 8792 }, { "epoch": 0.256520217048836, "grad_norm": 0.7733159226290556, "learning_rate": 8.713858441199741e-06, "loss": 0.179, "step": 8793 }, { "epoch": 0.25654939027947954, "grad_norm": 0.9072653388804234, "learning_rate": 8.713542108796271e-06, "loss": 0.1584, "step": 8794 }, { "epoch": 0.2565785635101231, "grad_norm": 1.1565474412328696, "learning_rate": 8.713225743239183e-06, "loss": 0.1729, "step": 8795 }, { "epoch": 0.25660773674076665, "grad_norm": 0.8551553752200075, "learning_rate": 8.712909344531302e-06, "loss": 0.1369, "step": 8796 }, { "epoch": 0.25663690997141025, "grad_norm": 0.9607543688190029, "learning_rate": 8.712592912675454e-06, "loss": 0.1688, "step": 8797 }, { "epoch": 0.2566660832020538, "grad_norm": 0.9504308927374339, "learning_rate": 8.712276447674462e-06, "loss": 0.1917, "step": 8798 }, { "epoch": 0.25669525643269736, "grad_norm": 1.0782985099457012, "learning_rate": 8.711959949531152e-06, "loss": 0.183, "step": 8799 }, { "epoch": 0.2567244296633409, "grad_norm": 1.1792973780465728, "learning_rate": 8.71164341824835e-06, "loss": 0.1563, "step": 8800 }, { "epoch": 0.25675360289398447, "grad_norm": 0.8086254196557137, "learning_rate": 8.711326853828881e-06, "loss": 0.1761, "step": 8801 }, { "epoch": 0.256782776124628, "grad_norm": 0.8422364878747902, "learning_rate": 8.711010256275572e-06, "loss": 0.1523, "step": 8802 }, { "epoch": 0.25681194935527163, "grad_norm": 1.0218613293672558, "learning_rate": 8.710693625591249e-06, "loss": 0.1478, "step": 8803 }, { "epoch": 0.2568411225859152, "grad_norm": 1.067926623148478, "learning_rate": 8.71037696177874e-06, "loss": 0.1506, "step": 8804 }, { "epoch": 0.25687029581655874, "grad_norm": 0.8521451983426845, "learning_rate": 8.710060264840872e-06, "loss": 0.1609, "step": 8805 }, { "epoch": 0.2568994690472023, "grad_norm": 1.0538250897279264, "learning_rate": 8.70974353478047e-06, "loss": 0.1773, "step": 8806 }, { "epoch": 0.25692864227784584, "grad_norm": 0.8845549457939076, "learning_rate": 8.709426771600363e-06, "loss": 0.1599, "step": 8807 }, { "epoch": 0.2569578155084894, "grad_norm": 1.0316144930372386, "learning_rate": 8.70910997530338e-06, "loss": 0.1897, "step": 8808 }, { "epoch": 0.25698698873913295, "grad_norm": 0.8275803806540443, "learning_rate": 8.70879314589235e-06, "loss": 0.1785, "step": 8809 }, { "epoch": 0.25701616196977656, "grad_norm": 0.9629761451867813, "learning_rate": 8.708476283370098e-06, "loss": 0.1811, "step": 8810 }, { "epoch": 0.2570453352004201, "grad_norm": 0.9505803037569778, "learning_rate": 8.708159387739456e-06, "loss": 0.158, "step": 8811 }, { "epoch": 0.25707450843106366, "grad_norm": 0.7218256849695398, "learning_rate": 8.70784245900325e-06, "loss": 0.1709, "step": 8812 }, { "epoch": 0.2571036816617072, "grad_norm": 0.8765072740167568, "learning_rate": 8.707525497164316e-06, "loss": 0.1731, "step": 8813 }, { "epoch": 0.25713285489235077, "grad_norm": 0.8155519387229843, "learning_rate": 8.707208502225476e-06, "loss": 0.1466, "step": 8814 }, { "epoch": 0.2571620281229943, "grad_norm": 0.7103380025975924, "learning_rate": 8.706891474189566e-06, "loss": 0.1564, "step": 8815 }, { "epoch": 0.2571912013536379, "grad_norm": 0.7816166744788744, "learning_rate": 8.706574413059411e-06, "loss": 0.148, "step": 8816 }, { "epoch": 0.2572203745842815, "grad_norm": 0.6888803989050883, "learning_rate": 8.706257318837846e-06, "loss": 0.1337, "step": 8817 }, { "epoch": 0.25724954781492504, "grad_norm": 0.7799227031136653, "learning_rate": 8.7059401915277e-06, "loss": 0.1365, "step": 8818 }, { "epoch": 0.2572787210455686, "grad_norm": 0.8660913193244866, "learning_rate": 8.705623031131805e-06, "loss": 0.1838, "step": 8819 }, { "epoch": 0.25730789427621215, "grad_norm": 0.6483985644983833, "learning_rate": 8.70530583765299e-06, "loss": 0.1331, "step": 8820 }, { "epoch": 0.2573370675068557, "grad_norm": 1.2646915220802024, "learning_rate": 8.704988611094093e-06, "loss": 0.1595, "step": 8821 }, { "epoch": 0.25736624073749925, "grad_norm": 0.7979583065309406, "learning_rate": 8.704671351457941e-06, "loss": 0.1565, "step": 8822 }, { "epoch": 0.2573954139681428, "grad_norm": 0.7451983996609904, "learning_rate": 8.704354058747366e-06, "loss": 0.1467, "step": 8823 }, { "epoch": 0.2574245871987864, "grad_norm": 1.0089067781043577, "learning_rate": 8.704036732965202e-06, "loss": 0.1668, "step": 8824 }, { "epoch": 0.25745376042942997, "grad_norm": 1.0520293847921796, "learning_rate": 8.703719374114283e-06, "loss": 0.1518, "step": 8825 }, { "epoch": 0.2574829336600735, "grad_norm": 0.8712651565054161, "learning_rate": 8.703401982197444e-06, "loss": 0.1662, "step": 8826 }, { "epoch": 0.2575121068907171, "grad_norm": 0.9988059629732003, "learning_rate": 8.703084557217513e-06, "loss": 0.1497, "step": 8827 }, { "epoch": 0.2575412801213606, "grad_norm": 0.8589199021224142, "learning_rate": 8.702767099177328e-06, "loss": 0.1649, "step": 8828 }, { "epoch": 0.2575704533520042, "grad_norm": 0.7726342037518046, "learning_rate": 8.702449608079722e-06, "loss": 0.1294, "step": 8829 }, { "epoch": 0.2575996265826478, "grad_norm": 0.7079604464746598, "learning_rate": 8.70213208392753e-06, "loss": 0.1504, "step": 8830 }, { "epoch": 0.25762879981329134, "grad_norm": 0.8668711322604143, "learning_rate": 8.701814526723588e-06, "loss": 0.1645, "step": 8831 }, { "epoch": 0.2576579730439349, "grad_norm": 0.8689087172820066, "learning_rate": 8.701496936470728e-06, "loss": 0.1598, "step": 8832 }, { "epoch": 0.25768714627457845, "grad_norm": 0.729254569499863, "learning_rate": 8.701179313171787e-06, "loss": 0.1745, "step": 8833 }, { "epoch": 0.257716319505222, "grad_norm": 0.7750600007600736, "learning_rate": 8.7008616568296e-06, "loss": 0.161, "step": 8834 }, { "epoch": 0.25774549273586556, "grad_norm": 1.1576861073778233, "learning_rate": 8.700543967447005e-06, "loss": 0.1961, "step": 8835 }, { "epoch": 0.2577746659665091, "grad_norm": 0.7936500970083683, "learning_rate": 8.700226245026838e-06, "loss": 0.1596, "step": 8836 }, { "epoch": 0.2578038391971527, "grad_norm": 0.82538331026121, "learning_rate": 8.699908489571931e-06, "loss": 0.1334, "step": 8837 }, { "epoch": 0.25783301242779627, "grad_norm": 0.7553905612520914, "learning_rate": 8.699590701085125e-06, "loss": 0.1621, "step": 8838 }, { "epoch": 0.2578621856584398, "grad_norm": 0.7823037908029749, "learning_rate": 8.699272879569258e-06, "loss": 0.161, "step": 8839 }, { "epoch": 0.2578913588890834, "grad_norm": 0.8909308772809414, "learning_rate": 8.698955025027165e-06, "loss": 0.1622, "step": 8840 }, { "epoch": 0.25792053211972693, "grad_norm": 0.6483082566578128, "learning_rate": 8.698637137461685e-06, "loss": 0.147, "step": 8841 }, { "epoch": 0.2579497053503705, "grad_norm": 1.1071666630884744, "learning_rate": 8.698319216875656e-06, "loss": 0.1652, "step": 8842 }, { "epoch": 0.25797887858101404, "grad_norm": 0.8406033754314253, "learning_rate": 8.698001263271914e-06, "loss": 0.1398, "step": 8843 }, { "epoch": 0.25800805181165765, "grad_norm": 0.7163675769592432, "learning_rate": 8.697683276653302e-06, "loss": 0.1765, "step": 8844 }, { "epoch": 0.2580372250423012, "grad_norm": 0.8801074754734578, "learning_rate": 8.697365257022654e-06, "loss": 0.1617, "step": 8845 }, { "epoch": 0.25806639827294475, "grad_norm": 0.8879008470690003, "learning_rate": 8.697047204382813e-06, "loss": 0.1408, "step": 8846 }, { "epoch": 0.2580955715035883, "grad_norm": 0.7716951003806196, "learning_rate": 8.696729118736618e-06, "loss": 0.1424, "step": 8847 }, { "epoch": 0.25812474473423186, "grad_norm": 0.8899398981891741, "learning_rate": 8.696411000086906e-06, "loss": 0.1855, "step": 8848 }, { "epoch": 0.2581539179648754, "grad_norm": 0.8839715875952767, "learning_rate": 8.69609284843652e-06, "loss": 0.1559, "step": 8849 }, { "epoch": 0.25818309119551897, "grad_norm": 1.0712819701383225, "learning_rate": 8.695774663788299e-06, "loss": 0.1678, "step": 8850 }, { "epoch": 0.2582122644261626, "grad_norm": 0.9844537685094489, "learning_rate": 8.695456446145084e-06, "loss": 0.1464, "step": 8851 }, { "epoch": 0.2582414376568061, "grad_norm": 1.0907816708370612, "learning_rate": 8.695138195509715e-06, "loss": 0.1625, "step": 8852 }, { "epoch": 0.2582706108874497, "grad_norm": 1.0740844466516966, "learning_rate": 8.694819911885034e-06, "loss": 0.1758, "step": 8853 }, { "epoch": 0.25829978411809323, "grad_norm": 0.8891126516343939, "learning_rate": 8.694501595273887e-06, "loss": 0.153, "step": 8854 }, { "epoch": 0.2583289573487368, "grad_norm": 0.8336304641396416, "learning_rate": 8.694183245679108e-06, "loss": 0.1636, "step": 8855 }, { "epoch": 0.25835813057938034, "grad_norm": 0.9731712741496811, "learning_rate": 8.693864863103546e-06, "loss": 0.1687, "step": 8856 }, { "epoch": 0.25838730381002395, "grad_norm": 0.9648990726717422, "learning_rate": 8.693546447550036e-06, "loss": 0.173, "step": 8857 }, { "epoch": 0.2584164770406675, "grad_norm": 0.904415710537177, "learning_rate": 8.693227999021428e-06, "loss": 0.1546, "step": 8858 }, { "epoch": 0.25844565027131106, "grad_norm": 0.7976806557549195, "learning_rate": 8.69290951752056e-06, "loss": 0.1497, "step": 8859 }, { "epoch": 0.2584748235019546, "grad_norm": 0.8187815959775875, "learning_rate": 8.69259100305028e-06, "loss": 0.1555, "step": 8860 }, { "epoch": 0.25850399673259816, "grad_norm": 0.7853515194328271, "learning_rate": 8.692272455613427e-06, "loss": 0.1534, "step": 8861 }, { "epoch": 0.2585331699632417, "grad_norm": 0.7234433521282186, "learning_rate": 8.691953875212848e-06, "loss": 0.1656, "step": 8862 }, { "epoch": 0.25856234319388527, "grad_norm": 0.8750074985201797, "learning_rate": 8.691635261851385e-06, "loss": 0.1457, "step": 8863 }, { "epoch": 0.2585915164245289, "grad_norm": 0.7847218231122696, "learning_rate": 8.691316615531885e-06, "loss": 0.1459, "step": 8864 }, { "epoch": 0.25862068965517243, "grad_norm": 0.7680291020795362, "learning_rate": 8.690997936257191e-06, "loss": 0.1697, "step": 8865 }, { "epoch": 0.258649862885816, "grad_norm": 0.6764550047370509, "learning_rate": 8.690679224030149e-06, "loss": 0.156, "step": 8866 }, { "epoch": 0.25867903611645954, "grad_norm": 0.8041923507195612, "learning_rate": 8.690360478853603e-06, "loss": 0.1414, "step": 8867 }, { "epoch": 0.2587082093471031, "grad_norm": 0.8314780058372228, "learning_rate": 8.6900417007304e-06, "loss": 0.145, "step": 8868 }, { "epoch": 0.25873738257774664, "grad_norm": 0.8227874731554615, "learning_rate": 8.689722889663386e-06, "loss": 0.1571, "step": 8869 }, { "epoch": 0.2587665558083902, "grad_norm": 0.7284739481406283, "learning_rate": 8.689404045655406e-06, "loss": 0.1593, "step": 8870 }, { "epoch": 0.2587957290390338, "grad_norm": 1.277646345397803, "learning_rate": 8.689085168709309e-06, "loss": 0.1556, "step": 8871 }, { "epoch": 0.25882490226967736, "grad_norm": 0.8730722697824033, "learning_rate": 8.688766258827938e-06, "loss": 0.1789, "step": 8872 }, { "epoch": 0.2588540755003209, "grad_norm": 0.8774586082077487, "learning_rate": 8.688447316014144e-06, "loss": 0.1462, "step": 8873 }, { "epoch": 0.25888324873096447, "grad_norm": 0.7898122849796687, "learning_rate": 8.688128340270772e-06, "loss": 0.1662, "step": 8874 }, { "epoch": 0.258912421961608, "grad_norm": 0.7686793143659327, "learning_rate": 8.68780933160067e-06, "loss": 0.147, "step": 8875 }, { "epoch": 0.2589415951922516, "grad_norm": 0.9601607688264278, "learning_rate": 8.687490290006689e-06, "loss": 0.1646, "step": 8876 }, { "epoch": 0.2589707684228951, "grad_norm": 0.9082518558436797, "learning_rate": 8.687171215491673e-06, "loss": 0.1979, "step": 8877 }, { "epoch": 0.25899994165353873, "grad_norm": 0.761382025265103, "learning_rate": 8.686852108058472e-06, "loss": 0.1427, "step": 8878 }, { "epoch": 0.2590291148841823, "grad_norm": 0.7154054704953288, "learning_rate": 8.686532967709938e-06, "loss": 0.1495, "step": 8879 }, { "epoch": 0.25905828811482584, "grad_norm": 0.7285945560701206, "learning_rate": 8.686213794448914e-06, "loss": 0.1534, "step": 8880 }, { "epoch": 0.2590874613454694, "grad_norm": 0.8378752925761715, "learning_rate": 8.685894588278256e-06, "loss": 0.148, "step": 8881 }, { "epoch": 0.25911663457611295, "grad_norm": 0.9615955944272917, "learning_rate": 8.685575349200812e-06, "loss": 0.164, "step": 8882 }, { "epoch": 0.2591458078067565, "grad_norm": 0.7677578971911746, "learning_rate": 8.685256077219428e-06, "loss": 0.1464, "step": 8883 }, { "epoch": 0.25917498103740005, "grad_norm": 0.877176485121053, "learning_rate": 8.684936772336961e-06, "loss": 0.1421, "step": 8884 }, { "epoch": 0.25920415426804366, "grad_norm": 0.9925513683909988, "learning_rate": 8.684617434556255e-06, "loss": 0.1452, "step": 8885 }, { "epoch": 0.2592333274986872, "grad_norm": 0.8638704131644256, "learning_rate": 8.684298063880166e-06, "loss": 0.1629, "step": 8886 }, { "epoch": 0.25926250072933077, "grad_norm": 0.7770560173656657, "learning_rate": 8.683978660311542e-06, "loss": 0.168, "step": 8887 }, { "epoch": 0.2592916739599743, "grad_norm": 0.8800620468543845, "learning_rate": 8.683659223853238e-06, "loss": 0.1867, "step": 8888 }, { "epoch": 0.2593208471906179, "grad_norm": 0.9077449434044605, "learning_rate": 8.683339754508102e-06, "loss": 0.1581, "step": 8889 }, { "epoch": 0.25935002042126143, "grad_norm": 0.7541267098540116, "learning_rate": 8.683020252278988e-06, "loss": 0.152, "step": 8890 }, { "epoch": 0.25937919365190504, "grad_norm": 0.9230556082843885, "learning_rate": 8.68270071716875e-06, "loss": 0.1671, "step": 8891 }, { "epoch": 0.2594083668825486, "grad_norm": 0.9330205850452694, "learning_rate": 8.682381149180239e-06, "loss": 0.1989, "step": 8892 }, { "epoch": 0.25943754011319214, "grad_norm": 0.8778281677680428, "learning_rate": 8.682061548316307e-06, "loss": 0.1331, "step": 8893 }, { "epoch": 0.2594667133438357, "grad_norm": 0.8308052619745485, "learning_rate": 8.681741914579807e-06, "loss": 0.1745, "step": 8894 }, { "epoch": 0.25949588657447925, "grad_norm": 0.7996080524419938, "learning_rate": 8.681422247973596e-06, "loss": 0.1615, "step": 8895 }, { "epoch": 0.2595250598051228, "grad_norm": 0.9020034260693374, "learning_rate": 8.681102548500526e-06, "loss": 0.1636, "step": 8896 }, { "epoch": 0.25955423303576636, "grad_norm": 0.8887946298156156, "learning_rate": 8.68078281616345e-06, "loss": 0.1743, "step": 8897 }, { "epoch": 0.25958340626640997, "grad_norm": 0.7243999100222906, "learning_rate": 8.680463050965227e-06, "loss": 0.1558, "step": 8898 }, { "epoch": 0.2596125794970535, "grad_norm": 0.8516601822735672, "learning_rate": 8.680143252908704e-06, "loss": 0.1774, "step": 8899 }, { "epoch": 0.2596417527276971, "grad_norm": 0.9026393380363806, "learning_rate": 8.679823421996745e-06, "loss": 0.1433, "step": 8900 }, { "epoch": 0.2596709259583406, "grad_norm": 0.6900676970466267, "learning_rate": 8.679503558232197e-06, "loss": 0.1449, "step": 8901 }, { "epoch": 0.2597000991889842, "grad_norm": 0.9630417895997558, "learning_rate": 8.679183661617923e-06, "loss": 0.1735, "step": 8902 }, { "epoch": 0.25972927241962773, "grad_norm": 0.76408418310192, "learning_rate": 8.678863732156773e-06, "loss": 0.1499, "step": 8903 }, { "epoch": 0.2597584456502713, "grad_norm": 0.8353763808398833, "learning_rate": 8.678543769851606e-06, "loss": 0.1333, "step": 8904 }, { "epoch": 0.2597876188809149, "grad_norm": 0.7340719249282294, "learning_rate": 8.678223774705279e-06, "loss": 0.1486, "step": 8905 }, { "epoch": 0.25981679211155845, "grad_norm": 0.7884093911045592, "learning_rate": 8.677903746720648e-06, "loss": 0.1886, "step": 8906 }, { "epoch": 0.259845965342202, "grad_norm": 0.9833278264474146, "learning_rate": 8.677583685900572e-06, "loss": 0.169, "step": 8907 }, { "epoch": 0.25987513857284555, "grad_norm": 0.9930250670443083, "learning_rate": 8.677263592247905e-06, "loss": 0.1481, "step": 8908 }, { "epoch": 0.2599043118034891, "grad_norm": 0.9653846703152127, "learning_rate": 8.676943465765506e-06, "loss": 0.1324, "step": 8909 }, { "epoch": 0.25993348503413266, "grad_norm": 0.9202175390113022, "learning_rate": 8.676623306456235e-06, "loss": 0.1332, "step": 8910 }, { "epoch": 0.2599626582647762, "grad_norm": 0.8731167581220094, "learning_rate": 8.676303114322948e-06, "loss": 0.1493, "step": 8911 }, { "epoch": 0.2599918314954198, "grad_norm": 0.886676344309571, "learning_rate": 8.675982889368503e-06, "loss": 0.1739, "step": 8912 }, { "epoch": 0.2600210047260634, "grad_norm": 0.7860685323189874, "learning_rate": 8.675662631595762e-06, "loss": 0.1703, "step": 8913 }, { "epoch": 0.26005017795670693, "grad_norm": 1.1572269862127893, "learning_rate": 8.675342341007582e-06, "loss": 0.1552, "step": 8914 }, { "epoch": 0.2600793511873505, "grad_norm": 0.7990010528377889, "learning_rate": 8.675022017606824e-06, "loss": 0.1511, "step": 8915 }, { "epoch": 0.26010852441799404, "grad_norm": 0.9849423360542807, "learning_rate": 8.674701661396345e-06, "loss": 0.1535, "step": 8916 }, { "epoch": 0.2601376976486376, "grad_norm": 0.7597937933589964, "learning_rate": 8.674381272379008e-06, "loss": 0.1351, "step": 8917 }, { "epoch": 0.2601668708792812, "grad_norm": 0.8573456198285051, "learning_rate": 8.674060850557673e-06, "loss": 0.154, "step": 8918 }, { "epoch": 0.26019604410992475, "grad_norm": 0.9188513446682759, "learning_rate": 8.673740395935198e-06, "loss": 0.1729, "step": 8919 }, { "epoch": 0.2602252173405683, "grad_norm": 1.2466147536443686, "learning_rate": 8.673419908514447e-06, "loss": 0.1777, "step": 8920 }, { "epoch": 0.26025439057121186, "grad_norm": 1.0470505186834715, "learning_rate": 8.67309938829828e-06, "loss": 0.1642, "step": 8921 }, { "epoch": 0.2602835638018554, "grad_norm": 1.1318805959368543, "learning_rate": 8.672778835289556e-06, "loss": 0.1586, "step": 8922 }, { "epoch": 0.26031273703249896, "grad_norm": 0.8774068696043981, "learning_rate": 8.672458249491143e-06, "loss": 0.1549, "step": 8923 }, { "epoch": 0.2603419102631425, "grad_norm": 0.9910230944103623, "learning_rate": 8.672137630905897e-06, "loss": 0.1942, "step": 8924 }, { "epoch": 0.2603710834937861, "grad_norm": 1.0426900066770715, "learning_rate": 8.671816979536682e-06, "loss": 0.1459, "step": 8925 }, { "epoch": 0.2604002567244297, "grad_norm": 0.872076369217735, "learning_rate": 8.671496295386363e-06, "loss": 0.1679, "step": 8926 }, { "epoch": 0.26042942995507323, "grad_norm": 0.7784845171279845, "learning_rate": 8.671175578457803e-06, "loss": 0.1801, "step": 8927 }, { "epoch": 0.2604586031857168, "grad_norm": 1.1143806082736285, "learning_rate": 8.670854828753862e-06, "loss": 0.1559, "step": 8928 }, { "epoch": 0.26048777641636034, "grad_norm": 0.8231650852474093, "learning_rate": 8.670534046277405e-06, "loss": 0.1738, "step": 8929 }, { "epoch": 0.2605169496470039, "grad_norm": 0.7152319986232941, "learning_rate": 8.670213231031299e-06, "loss": 0.1518, "step": 8930 }, { "epoch": 0.26054612287764745, "grad_norm": 0.8815074491040681, "learning_rate": 8.669892383018402e-06, "loss": 0.1901, "step": 8931 }, { "epoch": 0.26057529610829105, "grad_norm": 0.9281452692370107, "learning_rate": 8.669571502241582e-06, "loss": 0.1405, "step": 8932 }, { "epoch": 0.2606044693389346, "grad_norm": 0.843511794898961, "learning_rate": 8.669250588703706e-06, "loss": 0.1507, "step": 8933 }, { "epoch": 0.26063364256957816, "grad_norm": 0.8313034900109824, "learning_rate": 8.668929642407634e-06, "loss": 0.1737, "step": 8934 }, { "epoch": 0.2606628158002217, "grad_norm": 0.8363379483768809, "learning_rate": 8.668608663356237e-06, "loss": 0.1558, "step": 8935 }, { "epoch": 0.26069198903086527, "grad_norm": 0.8525148222277982, "learning_rate": 8.668287651552377e-06, "loss": 0.1657, "step": 8936 }, { "epoch": 0.2607211622615088, "grad_norm": 0.8191935090884097, "learning_rate": 8.66796660699892e-06, "loss": 0.1445, "step": 8937 }, { "epoch": 0.2607503354921524, "grad_norm": 0.8618948197418193, "learning_rate": 8.667645529698731e-06, "loss": 0.1542, "step": 8938 }, { "epoch": 0.260779508722796, "grad_norm": 0.8259232628387545, "learning_rate": 8.66732441965468e-06, "loss": 0.1305, "step": 8939 }, { "epoch": 0.26080868195343954, "grad_norm": 0.710543349995988, "learning_rate": 8.667003276869632e-06, "loss": 0.1302, "step": 8940 }, { "epoch": 0.2608378551840831, "grad_norm": 0.8812651562659243, "learning_rate": 8.666682101346456e-06, "loss": 0.1849, "step": 8941 }, { "epoch": 0.26086702841472664, "grad_norm": 0.8899631092618145, "learning_rate": 8.666360893088015e-06, "loss": 0.172, "step": 8942 }, { "epoch": 0.2608962016453702, "grad_norm": 0.8699659262285109, "learning_rate": 8.666039652097178e-06, "loss": 0.1272, "step": 8943 }, { "epoch": 0.26092537487601375, "grad_norm": 0.8170363413887268, "learning_rate": 8.665718378376816e-06, "loss": 0.1593, "step": 8944 }, { "epoch": 0.26095454810665736, "grad_norm": 0.980424642282014, "learning_rate": 8.665397071929796e-06, "loss": 0.1715, "step": 8945 }, { "epoch": 0.2609837213373009, "grad_norm": 0.8161710016929798, "learning_rate": 8.665075732758985e-06, "loss": 0.1482, "step": 8946 }, { "epoch": 0.26101289456794446, "grad_norm": 0.8648771296327689, "learning_rate": 8.664754360867252e-06, "loss": 0.1674, "step": 8947 }, { "epoch": 0.261042067798588, "grad_norm": 1.1281315541060049, "learning_rate": 8.664432956257468e-06, "loss": 0.1571, "step": 8948 }, { "epoch": 0.26107124102923157, "grad_norm": 0.9988097399923852, "learning_rate": 8.664111518932501e-06, "loss": 0.1841, "step": 8949 }, { "epoch": 0.2611004142598751, "grad_norm": 0.9501992764518179, "learning_rate": 8.663790048895222e-06, "loss": 0.1608, "step": 8950 }, { "epoch": 0.2611295874905187, "grad_norm": 0.9569829937773743, "learning_rate": 8.6634685461485e-06, "loss": 0.1523, "step": 8951 }, { "epoch": 0.2611587607211623, "grad_norm": 1.122425336542205, "learning_rate": 8.663147010695202e-06, "loss": 0.1558, "step": 8952 }, { "epoch": 0.26118793395180584, "grad_norm": 1.0707956436614317, "learning_rate": 8.662825442538206e-06, "loss": 0.1465, "step": 8953 }, { "epoch": 0.2612171071824494, "grad_norm": 0.6557527786271601, "learning_rate": 8.662503841680377e-06, "loss": 0.1436, "step": 8954 }, { "epoch": 0.26124628041309295, "grad_norm": 1.0398305195766397, "learning_rate": 8.662182208124588e-06, "loss": 0.1681, "step": 8955 }, { "epoch": 0.2612754536437365, "grad_norm": 1.0251881764708946, "learning_rate": 8.661860541873712e-06, "loss": 0.172, "step": 8956 }, { "epoch": 0.26130462687438005, "grad_norm": 1.1542249340298345, "learning_rate": 8.661538842930617e-06, "loss": 0.166, "step": 8957 }, { "epoch": 0.2613338001050236, "grad_norm": 0.8961954416241569, "learning_rate": 8.661217111298179e-06, "loss": 0.1572, "step": 8958 }, { "epoch": 0.2613629733356672, "grad_norm": 1.0721378267867974, "learning_rate": 8.660895346979268e-06, "loss": 0.1571, "step": 8959 }, { "epoch": 0.26139214656631077, "grad_norm": 1.233492676885101, "learning_rate": 8.660573549976755e-06, "loss": 0.1747, "step": 8960 }, { "epoch": 0.2614213197969543, "grad_norm": 0.7274276762163758, "learning_rate": 8.66025172029352e-06, "loss": 0.1454, "step": 8961 }, { "epoch": 0.2614504930275979, "grad_norm": 0.8377190903857192, "learning_rate": 8.65992985793243e-06, "loss": 0.1604, "step": 8962 }, { "epoch": 0.2614796662582414, "grad_norm": 1.08457859515976, "learning_rate": 8.659607962896356e-06, "loss": 0.1585, "step": 8963 }, { "epoch": 0.261508839488885, "grad_norm": 0.692446274962691, "learning_rate": 8.65928603518818e-06, "loss": 0.1327, "step": 8964 }, { "epoch": 0.26153801271952853, "grad_norm": 0.9453876214076944, "learning_rate": 8.65896407481077e-06, "loss": 0.1535, "step": 8965 }, { "epoch": 0.26156718595017214, "grad_norm": 0.8171866133815451, "learning_rate": 8.658642081767003e-06, "loss": 0.1611, "step": 8966 }, { "epoch": 0.2615963591808157, "grad_norm": 0.7267011421363825, "learning_rate": 8.658320056059752e-06, "loss": 0.1756, "step": 8967 }, { "epoch": 0.26162553241145925, "grad_norm": 0.6845048571919239, "learning_rate": 8.657997997691893e-06, "loss": 0.1624, "step": 8968 }, { "epoch": 0.2616547056421028, "grad_norm": 0.8526214663746644, "learning_rate": 8.657675906666301e-06, "loss": 0.1786, "step": 8969 }, { "epoch": 0.26168387887274636, "grad_norm": 0.7279969395521828, "learning_rate": 8.657353782985853e-06, "loss": 0.144, "step": 8970 }, { "epoch": 0.2617130521033899, "grad_norm": 6.897200323623024, "learning_rate": 8.657031626653423e-06, "loss": 0.2039, "step": 8971 }, { "epoch": 0.2617422253340335, "grad_norm": 0.9029048669461691, "learning_rate": 8.656709437671886e-06, "loss": 0.1441, "step": 8972 }, { "epoch": 0.26177139856467707, "grad_norm": 0.793420907658017, "learning_rate": 8.656387216044122e-06, "loss": 0.1268, "step": 8973 }, { "epoch": 0.2618005717953206, "grad_norm": 0.7511495038593089, "learning_rate": 8.656064961773006e-06, "loss": 0.1498, "step": 8974 }, { "epoch": 0.2618297450259642, "grad_norm": 0.9088885928694428, "learning_rate": 8.655742674861414e-06, "loss": 0.1448, "step": 8975 }, { "epoch": 0.26185891825660773, "grad_norm": 0.6745868296167258, "learning_rate": 8.655420355312224e-06, "loss": 0.1746, "step": 8976 }, { "epoch": 0.2618880914872513, "grad_norm": 1.1519673108011803, "learning_rate": 8.655098003128312e-06, "loss": 0.1735, "step": 8977 }, { "epoch": 0.26191726471789484, "grad_norm": 1.231659580622008, "learning_rate": 8.654775618312561e-06, "loss": 0.1455, "step": 8978 }, { "epoch": 0.26194643794853845, "grad_norm": 0.7365798841092727, "learning_rate": 8.654453200867842e-06, "loss": 0.1909, "step": 8979 }, { "epoch": 0.261975611179182, "grad_norm": 0.8951672978577805, "learning_rate": 8.654130750797041e-06, "loss": 0.156, "step": 8980 }, { "epoch": 0.26200478440982555, "grad_norm": 1.0670266903104302, "learning_rate": 8.65380826810303e-06, "loss": 0.1511, "step": 8981 }, { "epoch": 0.2620339576404691, "grad_norm": 0.7877643841195099, "learning_rate": 8.653485752788692e-06, "loss": 0.1423, "step": 8982 }, { "epoch": 0.26206313087111266, "grad_norm": 0.863593991044593, "learning_rate": 8.653163204856906e-06, "loss": 0.1511, "step": 8983 }, { "epoch": 0.2620923041017562, "grad_norm": 0.8138077050375636, "learning_rate": 8.65284062431055e-06, "loss": 0.1583, "step": 8984 }, { "epoch": 0.26212147733239977, "grad_norm": 0.7585225714676208, "learning_rate": 8.652518011152507e-06, "loss": 0.1727, "step": 8985 }, { "epoch": 0.2621506505630434, "grad_norm": 0.9090473772890441, "learning_rate": 8.652195365385652e-06, "loss": 0.1228, "step": 8986 }, { "epoch": 0.26217982379368693, "grad_norm": 0.7627118636691527, "learning_rate": 8.651872687012871e-06, "loss": 0.1982, "step": 8987 }, { "epoch": 0.2622089970243305, "grad_norm": 0.7793849449518128, "learning_rate": 8.651549976037042e-06, "loss": 0.1645, "step": 8988 }, { "epoch": 0.26223817025497403, "grad_norm": 0.6968624570367113, "learning_rate": 8.651227232461045e-06, "loss": 0.1493, "step": 8989 }, { "epoch": 0.2622673434856176, "grad_norm": 0.8330253397591032, "learning_rate": 8.650904456287765e-06, "loss": 0.197, "step": 8990 }, { "epoch": 0.26229651671626114, "grad_norm": 0.9423238449724195, "learning_rate": 8.65058164752008e-06, "loss": 0.1647, "step": 8991 }, { "epoch": 0.2623256899469047, "grad_norm": 0.7795221338227699, "learning_rate": 8.650258806160874e-06, "loss": 0.1633, "step": 8992 }, { "epoch": 0.2623548631775483, "grad_norm": 0.9439610475194946, "learning_rate": 8.649935932213029e-06, "loss": 0.1692, "step": 8993 }, { "epoch": 0.26238403640819186, "grad_norm": 1.2520198627267944, "learning_rate": 8.649613025679428e-06, "loss": 0.1469, "step": 8994 }, { "epoch": 0.2624132096388354, "grad_norm": 0.8153986117003251, "learning_rate": 8.649290086562952e-06, "loss": 0.1635, "step": 8995 }, { "epoch": 0.26244238286947896, "grad_norm": 0.9706801494347831, "learning_rate": 8.648967114866485e-06, "loss": 0.1399, "step": 8996 }, { "epoch": 0.2624715561001225, "grad_norm": 1.0048217905972299, "learning_rate": 8.648644110592912e-06, "loss": 0.158, "step": 8997 }, { "epoch": 0.26250072933076607, "grad_norm": 0.6717974086650561, "learning_rate": 8.648321073745113e-06, "loss": 0.1535, "step": 8998 }, { "epoch": 0.2625299025614097, "grad_norm": 1.2447905270372501, "learning_rate": 8.647998004325977e-06, "loss": 0.1781, "step": 8999 }, { "epoch": 0.26255907579205323, "grad_norm": 0.9021571185221067, "learning_rate": 8.647674902338384e-06, "loss": 0.158, "step": 9000 }, { "epoch": 0.2625882490226968, "grad_norm": 0.7515560073859457, "learning_rate": 8.647351767785221e-06, "loss": 0.1625, "step": 9001 }, { "epoch": 0.26261742225334034, "grad_norm": 0.8906606297136825, "learning_rate": 8.647028600669373e-06, "loss": 0.1646, "step": 9002 }, { "epoch": 0.2626465954839839, "grad_norm": 0.7556433769020066, "learning_rate": 8.646705400993722e-06, "loss": 0.1549, "step": 9003 }, { "epoch": 0.26267576871462744, "grad_norm": 0.7594058956556977, "learning_rate": 8.646382168761159e-06, "loss": 0.1482, "step": 9004 }, { "epoch": 0.262704941945271, "grad_norm": 0.7329656671449547, "learning_rate": 8.646058903974563e-06, "loss": 0.1623, "step": 9005 }, { "epoch": 0.2627341151759146, "grad_norm": 0.7762564542528988, "learning_rate": 8.645735606636825e-06, "loss": 0.1415, "step": 9006 }, { "epoch": 0.26276328840655816, "grad_norm": 0.9856730080145799, "learning_rate": 8.645412276750829e-06, "loss": 0.1517, "step": 9007 }, { "epoch": 0.2627924616372017, "grad_norm": 0.7124863843563982, "learning_rate": 8.645088914319464e-06, "loss": 0.1643, "step": 9008 }, { "epoch": 0.26282163486784527, "grad_norm": 0.9274028579170007, "learning_rate": 8.644765519345615e-06, "loss": 0.1647, "step": 9009 }, { "epoch": 0.2628508080984888, "grad_norm": 0.9549257484451333, "learning_rate": 8.644442091832168e-06, "loss": 0.1537, "step": 9010 }, { "epoch": 0.2628799813291324, "grad_norm": 0.6818318593964277, "learning_rate": 8.644118631782014e-06, "loss": 0.1314, "step": 9011 }, { "epoch": 0.2629091545597759, "grad_norm": 0.8348523017673742, "learning_rate": 8.643795139198037e-06, "loss": 0.1442, "step": 9012 }, { "epoch": 0.26293832779041953, "grad_norm": 0.8854657649503088, "learning_rate": 8.643471614083127e-06, "loss": 0.1873, "step": 9013 }, { "epoch": 0.2629675010210631, "grad_norm": 0.7212956182481403, "learning_rate": 8.643148056440174e-06, "loss": 0.1267, "step": 9014 }, { "epoch": 0.26299667425170664, "grad_norm": 0.7784757188427204, "learning_rate": 8.642824466272065e-06, "loss": 0.1537, "step": 9015 }, { "epoch": 0.2630258474823502, "grad_norm": 0.9231990584958093, "learning_rate": 8.642500843581687e-06, "loss": 0.1672, "step": 9016 }, { "epoch": 0.26305502071299375, "grad_norm": 0.8723099299784459, "learning_rate": 8.64217718837193e-06, "loss": 0.1497, "step": 9017 }, { "epoch": 0.2630841939436373, "grad_norm": 0.9186162309499485, "learning_rate": 8.641853500645685e-06, "loss": 0.1549, "step": 9018 }, { "epoch": 0.26311336717428085, "grad_norm": 0.8548084882452498, "learning_rate": 8.641529780405843e-06, "loss": 0.1778, "step": 9019 }, { "epoch": 0.26314254040492446, "grad_norm": 0.9450437817969356, "learning_rate": 8.641206027655293e-06, "loss": 0.1745, "step": 9020 }, { "epoch": 0.263171713635568, "grad_norm": 0.8052277640189358, "learning_rate": 8.640882242396922e-06, "loss": 0.1454, "step": 9021 }, { "epoch": 0.26320088686621157, "grad_norm": 0.9179526845228608, "learning_rate": 8.640558424633625e-06, "loss": 0.1511, "step": 9022 }, { "epoch": 0.2632300600968551, "grad_norm": 0.9192983316214682, "learning_rate": 8.640234574368292e-06, "loss": 0.1674, "step": 9023 }, { "epoch": 0.2632592333274987, "grad_norm": 0.9721749546684811, "learning_rate": 8.639910691603815e-06, "loss": 0.1589, "step": 9024 }, { "epoch": 0.26328840655814223, "grad_norm": 0.8077257913425597, "learning_rate": 8.63958677634308e-06, "loss": 0.1858, "step": 9025 }, { "epoch": 0.2633175797887858, "grad_norm": 0.738405800801672, "learning_rate": 8.639262828588988e-06, "loss": 0.1564, "step": 9026 }, { "epoch": 0.2633467530194294, "grad_norm": 0.7852468710522374, "learning_rate": 8.638938848344422e-06, "loss": 0.1719, "step": 9027 }, { "epoch": 0.26337592625007294, "grad_norm": 0.7730085408186513, "learning_rate": 8.63861483561228e-06, "loss": 0.1497, "step": 9028 }, { "epoch": 0.2634050994807165, "grad_norm": 0.9883000188980213, "learning_rate": 8.638290790395453e-06, "loss": 0.167, "step": 9029 }, { "epoch": 0.26343427271136005, "grad_norm": 0.8436573459162998, "learning_rate": 8.637966712696837e-06, "loss": 0.1643, "step": 9030 }, { "epoch": 0.2634634459420036, "grad_norm": 0.7092549122404046, "learning_rate": 8.637642602519321e-06, "loss": 0.1694, "step": 9031 }, { "epoch": 0.26349261917264716, "grad_norm": 1.0446338585028105, "learning_rate": 8.6373184598658e-06, "loss": 0.1352, "step": 9032 }, { "epoch": 0.26352179240329077, "grad_norm": 0.7027271973633433, "learning_rate": 8.636994284739167e-06, "loss": 0.1584, "step": 9033 }, { "epoch": 0.2635509656339343, "grad_norm": 0.9778596732464395, "learning_rate": 8.636670077142319e-06, "loss": 0.1623, "step": 9034 }, { "epoch": 0.2635801388645779, "grad_norm": 0.8677475585979214, "learning_rate": 8.636345837078149e-06, "loss": 0.1393, "step": 9035 }, { "epoch": 0.2636093120952214, "grad_norm": 0.8155722533322951, "learning_rate": 8.63602156454955e-06, "loss": 0.1377, "step": 9036 }, { "epoch": 0.263638485325865, "grad_norm": 0.9343654193152432, "learning_rate": 8.63569725955942e-06, "loss": 0.1707, "step": 9037 }, { "epoch": 0.26366765855650853, "grad_norm": 0.920493289169441, "learning_rate": 8.63537292211065e-06, "loss": 0.1643, "step": 9038 }, { "epoch": 0.2636968317871521, "grad_norm": 1.1822121690421965, "learning_rate": 8.63504855220614e-06, "loss": 0.1527, "step": 9039 }, { "epoch": 0.2637260050177957, "grad_norm": 0.8324666194814841, "learning_rate": 8.634724149848785e-06, "loss": 0.1497, "step": 9040 }, { "epoch": 0.26375517824843925, "grad_norm": 0.8232415364228624, "learning_rate": 8.634399715041479e-06, "loss": 0.1547, "step": 9041 }, { "epoch": 0.2637843514790828, "grad_norm": 1.0870219534930903, "learning_rate": 8.634075247787121e-06, "loss": 0.1638, "step": 9042 }, { "epoch": 0.26381352470972635, "grad_norm": 0.8903388992881184, "learning_rate": 8.633750748088608e-06, "loss": 0.1396, "step": 9043 }, { "epoch": 0.2638426979403699, "grad_norm": 1.0044263782715226, "learning_rate": 8.633426215948833e-06, "loss": 0.1733, "step": 9044 }, { "epoch": 0.26387187117101346, "grad_norm": 1.2698529262262845, "learning_rate": 8.633101651370696e-06, "loss": 0.1823, "step": 9045 }, { "epoch": 0.263901044401657, "grad_norm": 1.0729459899868792, "learning_rate": 8.632777054357098e-06, "loss": 0.2038, "step": 9046 }, { "epoch": 0.2639302176323006, "grad_norm": 0.7010983361020153, "learning_rate": 8.632452424910932e-06, "loss": 0.163, "step": 9047 }, { "epoch": 0.2639593908629442, "grad_norm": 1.0570119784118244, "learning_rate": 8.632127763035096e-06, "loss": 0.1752, "step": 9048 }, { "epoch": 0.26398856409358773, "grad_norm": 1.0045126713576789, "learning_rate": 8.631803068732493e-06, "loss": 0.1549, "step": 9049 }, { "epoch": 0.2640177373242313, "grad_norm": 0.966544410497631, "learning_rate": 8.631478342006019e-06, "loss": 0.1407, "step": 9050 }, { "epoch": 0.26404691055487484, "grad_norm": 0.9947474879539469, "learning_rate": 8.631153582858571e-06, "loss": 0.1771, "step": 9051 }, { "epoch": 0.2640760837855184, "grad_norm": 0.8973227562563453, "learning_rate": 8.630828791293053e-06, "loss": 0.1617, "step": 9052 }, { "epoch": 0.26410525701616194, "grad_norm": 1.1181779623509178, "learning_rate": 8.63050396731236e-06, "loss": 0.1372, "step": 9053 }, { "epoch": 0.26413443024680555, "grad_norm": 0.8669198873518774, "learning_rate": 8.630179110919396e-06, "loss": 0.1598, "step": 9054 }, { "epoch": 0.2641636034774491, "grad_norm": 0.8858988726842243, "learning_rate": 8.62985422211706e-06, "loss": 0.148, "step": 9055 }, { "epoch": 0.26419277670809266, "grad_norm": 1.0085743706771297, "learning_rate": 8.629529300908252e-06, "loss": 0.1687, "step": 9056 }, { "epoch": 0.2642219499387362, "grad_norm": 0.7803668383702542, "learning_rate": 8.629204347295871e-06, "loss": 0.1473, "step": 9057 }, { "epoch": 0.26425112316937976, "grad_norm": 0.9095071387023369, "learning_rate": 8.628879361282822e-06, "loss": 0.1715, "step": 9058 }, { "epoch": 0.2642802964000233, "grad_norm": 0.7505477609686817, "learning_rate": 8.628554342872001e-06, "loss": 0.1432, "step": 9059 }, { "epoch": 0.2643094696306669, "grad_norm": 0.799754619139009, "learning_rate": 8.628229292066317e-06, "loss": 0.1556, "step": 9060 }, { "epoch": 0.2643386428613105, "grad_norm": 0.8988449170981903, "learning_rate": 8.627904208868667e-06, "loss": 0.1534, "step": 9061 }, { "epoch": 0.26436781609195403, "grad_norm": 0.8730500814357809, "learning_rate": 8.627579093281954e-06, "loss": 0.144, "step": 9062 }, { "epoch": 0.2643969893225976, "grad_norm": 0.8647646197225216, "learning_rate": 8.62725394530908e-06, "loss": 0.1594, "step": 9063 }, { "epoch": 0.26442616255324114, "grad_norm": 0.704882418996132, "learning_rate": 8.62692876495295e-06, "loss": 0.1342, "step": 9064 }, { "epoch": 0.2644553357838847, "grad_norm": 0.8287469086871684, "learning_rate": 8.626603552216463e-06, "loss": 0.146, "step": 9065 }, { "epoch": 0.26448450901452825, "grad_norm": 0.7172902656043784, "learning_rate": 8.626278307102527e-06, "loss": 0.1673, "step": 9066 }, { "epoch": 0.26451368224517186, "grad_norm": 0.8710773055210546, "learning_rate": 8.625953029614045e-06, "loss": 0.1723, "step": 9067 }, { "epoch": 0.2645428554758154, "grad_norm": 0.9036504981894505, "learning_rate": 8.625627719753919e-06, "loss": 0.1723, "step": 9068 }, { "epoch": 0.26457202870645896, "grad_norm": 0.827967608980461, "learning_rate": 8.625302377525055e-06, "loss": 0.1419, "step": 9069 }, { "epoch": 0.2646012019371025, "grad_norm": 1.1107301199535424, "learning_rate": 8.624977002930356e-06, "loss": 0.1516, "step": 9070 }, { "epoch": 0.26463037516774607, "grad_norm": 1.0366582363229322, "learning_rate": 8.624651595972729e-06, "loss": 0.156, "step": 9071 }, { "epoch": 0.2646595483983896, "grad_norm": 0.8720778348750352, "learning_rate": 8.624326156655075e-06, "loss": 0.1832, "step": 9072 }, { "epoch": 0.2646887216290332, "grad_norm": 1.1158055232479938, "learning_rate": 8.624000684980305e-06, "loss": 0.1605, "step": 9073 }, { "epoch": 0.2647178948596768, "grad_norm": 0.9359495379112925, "learning_rate": 8.62367518095132e-06, "loss": 0.1451, "step": 9074 }, { "epoch": 0.26474706809032034, "grad_norm": 0.797724665799639, "learning_rate": 8.623349644571029e-06, "loss": 0.1413, "step": 9075 }, { "epoch": 0.2647762413209639, "grad_norm": 1.147284726854106, "learning_rate": 8.623024075842337e-06, "loss": 0.1632, "step": 9076 }, { "epoch": 0.26480541455160744, "grad_norm": 0.8992657168090905, "learning_rate": 8.622698474768151e-06, "loss": 0.182, "step": 9077 }, { "epoch": 0.264834587782251, "grad_norm": 0.7815571394446682, "learning_rate": 8.622372841351378e-06, "loss": 0.1639, "step": 9078 }, { "epoch": 0.26486376101289455, "grad_norm": 0.8564192048276966, "learning_rate": 8.622047175594926e-06, "loss": 0.1688, "step": 9079 }, { "epoch": 0.2648929342435381, "grad_norm": 0.8074776318284307, "learning_rate": 8.6217214775017e-06, "loss": 0.1461, "step": 9080 }, { "epoch": 0.2649221074741817, "grad_norm": 0.7674874346703437, "learning_rate": 8.62139574707461e-06, "loss": 0.1711, "step": 9081 }, { "epoch": 0.26495128070482526, "grad_norm": 0.9988993533004416, "learning_rate": 8.621069984316562e-06, "loss": 0.143, "step": 9082 }, { "epoch": 0.2649804539354688, "grad_norm": 0.7562372592021596, "learning_rate": 8.620744189230468e-06, "loss": 0.1508, "step": 9083 }, { "epoch": 0.26500962716611237, "grad_norm": 0.7517946916366848, "learning_rate": 8.620418361819231e-06, "loss": 0.1884, "step": 9084 }, { "epoch": 0.2650388003967559, "grad_norm": 0.8642858531473507, "learning_rate": 8.620092502085766e-06, "loss": 0.149, "step": 9085 }, { "epoch": 0.2650679736273995, "grad_norm": 0.8279634536047271, "learning_rate": 8.619766610032978e-06, "loss": 0.1563, "step": 9086 }, { "epoch": 0.2650971468580431, "grad_norm": 1.0595198042791127, "learning_rate": 8.619440685663777e-06, "loss": 0.1529, "step": 9087 }, { "epoch": 0.26512632008868664, "grad_norm": 0.8305228327930336, "learning_rate": 8.619114728981076e-06, "loss": 0.1795, "step": 9088 }, { "epoch": 0.2651554933193302, "grad_norm": 0.7461641349306766, "learning_rate": 8.61878873998778e-06, "loss": 0.132, "step": 9089 }, { "epoch": 0.26518466654997375, "grad_norm": 1.1785531779312541, "learning_rate": 8.618462718686803e-06, "loss": 0.1636, "step": 9090 }, { "epoch": 0.2652138397806173, "grad_norm": 0.7808261703990087, "learning_rate": 8.618136665081056e-06, "loss": 0.1692, "step": 9091 }, { "epoch": 0.26524301301126085, "grad_norm": 0.9399072229359386, "learning_rate": 8.617810579173448e-06, "loss": 0.1771, "step": 9092 }, { "epoch": 0.2652721862419044, "grad_norm": 0.8716638403910036, "learning_rate": 8.61748446096689e-06, "loss": 0.1472, "step": 9093 }, { "epoch": 0.265301359472548, "grad_norm": 0.8539748051850683, "learning_rate": 8.617158310464295e-06, "loss": 0.1618, "step": 9094 }, { "epoch": 0.26533053270319157, "grad_norm": 0.7179318488689149, "learning_rate": 8.616832127668573e-06, "loss": 0.1631, "step": 9095 }, { "epoch": 0.2653597059338351, "grad_norm": 1.0042392548906347, "learning_rate": 8.616505912582638e-06, "loss": 0.1398, "step": 9096 }, { "epoch": 0.2653888791644787, "grad_norm": 0.8236697306505159, "learning_rate": 8.616179665209402e-06, "loss": 0.1655, "step": 9097 }, { "epoch": 0.26541805239512223, "grad_norm": 0.825549036235817, "learning_rate": 8.615853385551776e-06, "loss": 0.1774, "step": 9098 }, { "epoch": 0.2654472256257658, "grad_norm": 0.9269318908416707, "learning_rate": 8.615527073612675e-06, "loss": 0.1542, "step": 9099 }, { "epoch": 0.26547639885640933, "grad_norm": 0.7275122950988118, "learning_rate": 8.615200729395011e-06, "loss": 0.1437, "step": 9100 }, { "epoch": 0.26550557208705294, "grad_norm": 0.614408299811067, "learning_rate": 8.614874352901698e-06, "loss": 0.1539, "step": 9101 }, { "epoch": 0.2655347453176965, "grad_norm": 0.8434856908633734, "learning_rate": 8.61454794413565e-06, "loss": 0.1675, "step": 9102 }, { "epoch": 0.26556391854834005, "grad_norm": 0.7641500041163647, "learning_rate": 8.61422150309978e-06, "loss": 0.173, "step": 9103 }, { "epoch": 0.2655930917789836, "grad_norm": 0.8790201535354943, "learning_rate": 8.613895029797003e-06, "loss": 0.1704, "step": 9104 }, { "epoch": 0.26562226500962716, "grad_norm": 0.7619920842925546, "learning_rate": 8.613568524230235e-06, "loss": 0.1482, "step": 9105 }, { "epoch": 0.2656514382402707, "grad_norm": 0.9080016098777615, "learning_rate": 8.61324198640239e-06, "loss": 0.1458, "step": 9106 }, { "epoch": 0.26568061147091426, "grad_norm": 0.8153199610735942, "learning_rate": 8.612915416316383e-06, "loss": 0.1588, "step": 9107 }, { "epoch": 0.26570978470155787, "grad_norm": 0.7773071241995667, "learning_rate": 8.612588813975128e-06, "loss": 0.1576, "step": 9108 }, { "epoch": 0.2657389579322014, "grad_norm": 0.7211843174080956, "learning_rate": 8.612262179381546e-06, "loss": 0.1743, "step": 9109 }, { "epoch": 0.265768131162845, "grad_norm": 0.9762819829991857, "learning_rate": 8.611935512538546e-06, "loss": 0.158, "step": 9110 }, { "epoch": 0.26579730439348853, "grad_norm": 0.8956469604263455, "learning_rate": 8.611608813449049e-06, "loss": 0.1683, "step": 9111 }, { "epoch": 0.2658264776241321, "grad_norm": 0.8204892734320686, "learning_rate": 8.61128208211597e-06, "loss": 0.1602, "step": 9112 }, { "epoch": 0.26585565085477564, "grad_norm": 0.7809749207214982, "learning_rate": 8.610955318542228e-06, "loss": 0.1442, "step": 9113 }, { "epoch": 0.26588482408541925, "grad_norm": 0.9870598039420582, "learning_rate": 8.610628522730739e-06, "loss": 0.1586, "step": 9114 }, { "epoch": 0.2659139973160628, "grad_norm": 1.0519012197283568, "learning_rate": 8.61030169468442e-06, "loss": 0.1703, "step": 9115 }, { "epoch": 0.26594317054670635, "grad_norm": 0.8214479126276892, "learning_rate": 8.60997483440619e-06, "loss": 0.1786, "step": 9116 }, { "epoch": 0.2659723437773499, "grad_norm": 0.8068036418888651, "learning_rate": 8.609647941898965e-06, "loss": 0.1677, "step": 9117 }, { "epoch": 0.26600151700799346, "grad_norm": 1.0475768614955818, "learning_rate": 8.609321017165666e-06, "loss": 0.1665, "step": 9118 }, { "epoch": 0.266030690238637, "grad_norm": 0.9301065775618507, "learning_rate": 8.60899406020921e-06, "loss": 0.1467, "step": 9119 }, { "epoch": 0.26605986346928057, "grad_norm": 0.8931493725532034, "learning_rate": 8.608667071032518e-06, "loss": 0.2, "step": 9120 }, { "epoch": 0.2660890366999242, "grad_norm": 0.9897605891146277, "learning_rate": 8.608340049638505e-06, "loss": 0.1879, "step": 9121 }, { "epoch": 0.26611820993056773, "grad_norm": 0.9371094007725559, "learning_rate": 8.608012996030096e-06, "loss": 0.1386, "step": 9122 }, { "epoch": 0.2661473831612113, "grad_norm": 0.7642659985859662, "learning_rate": 8.607685910210207e-06, "loss": 0.157, "step": 9123 }, { "epoch": 0.26617655639185483, "grad_norm": 0.760785170955767, "learning_rate": 8.607358792181758e-06, "loss": 0.1592, "step": 9124 }, { "epoch": 0.2662057296224984, "grad_norm": 0.9363709350453202, "learning_rate": 8.607031641947674e-06, "loss": 0.1596, "step": 9125 }, { "epoch": 0.26623490285314194, "grad_norm": 0.9553890540930121, "learning_rate": 8.60670445951087e-06, "loss": 0.1592, "step": 9126 }, { "epoch": 0.2662640760837855, "grad_norm": 0.8108281807974532, "learning_rate": 8.606377244874272e-06, "loss": 0.164, "step": 9127 }, { "epoch": 0.2662932493144291, "grad_norm": 0.744083984081861, "learning_rate": 8.606049998040798e-06, "loss": 0.1456, "step": 9128 }, { "epoch": 0.26632242254507266, "grad_norm": 0.9122152357575725, "learning_rate": 8.60572271901337e-06, "loss": 0.1505, "step": 9129 }, { "epoch": 0.2663515957757162, "grad_norm": 0.9656027725681182, "learning_rate": 8.60539540779491e-06, "loss": 0.1513, "step": 9130 }, { "epoch": 0.26638076900635976, "grad_norm": 0.6944694313875288, "learning_rate": 8.60506806438834e-06, "loss": 0.1507, "step": 9131 }, { "epoch": 0.2664099422370033, "grad_norm": 1.3818471606419696, "learning_rate": 8.604740688796585e-06, "loss": 0.1453, "step": 9132 }, { "epoch": 0.26643911546764687, "grad_norm": 1.0641447817434493, "learning_rate": 8.604413281022563e-06, "loss": 0.1693, "step": 9133 }, { "epoch": 0.2664682886982904, "grad_norm": 0.8243456389069569, "learning_rate": 8.604085841069202e-06, "loss": 0.1424, "step": 9134 }, { "epoch": 0.26649746192893403, "grad_norm": 1.0053326829075988, "learning_rate": 8.60375836893942e-06, "loss": 0.1505, "step": 9135 }, { "epoch": 0.2665266351595776, "grad_norm": 0.9620008886312827, "learning_rate": 8.603430864636147e-06, "loss": 0.153, "step": 9136 }, { "epoch": 0.26655580839022114, "grad_norm": 0.9968262005020653, "learning_rate": 8.603103328162303e-06, "loss": 0.1624, "step": 9137 }, { "epoch": 0.2665849816208647, "grad_norm": 0.8984514152344192, "learning_rate": 8.602775759520812e-06, "loss": 0.1381, "step": 9138 }, { "epoch": 0.26661415485150824, "grad_norm": 0.9913964856948402, "learning_rate": 8.602448158714598e-06, "loss": 0.1482, "step": 9139 }, { "epoch": 0.2666433280821518, "grad_norm": 0.8265462729925408, "learning_rate": 8.602120525746588e-06, "loss": 0.1453, "step": 9140 }, { "epoch": 0.26667250131279535, "grad_norm": 0.8347403841944075, "learning_rate": 8.601792860619704e-06, "loss": 0.1562, "step": 9141 }, { "epoch": 0.26670167454343896, "grad_norm": 0.8428107837326312, "learning_rate": 8.601465163336875e-06, "loss": 0.167, "step": 9142 }, { "epoch": 0.2667308477740825, "grad_norm": 0.9401110801896505, "learning_rate": 8.601137433901026e-06, "loss": 0.1616, "step": 9143 }, { "epoch": 0.26676002100472607, "grad_norm": 0.7493122862994414, "learning_rate": 8.600809672315079e-06, "loss": 0.161, "step": 9144 }, { "epoch": 0.2667891942353696, "grad_norm": 0.9069616054462428, "learning_rate": 8.600481878581963e-06, "loss": 0.1646, "step": 9145 }, { "epoch": 0.2668183674660132, "grad_norm": 0.7983840351539716, "learning_rate": 8.600154052704606e-06, "loss": 0.1826, "step": 9146 }, { "epoch": 0.2668475406966567, "grad_norm": 0.9543317972020042, "learning_rate": 8.599826194685932e-06, "loss": 0.1525, "step": 9147 }, { "epoch": 0.26687671392730034, "grad_norm": 0.883120951423925, "learning_rate": 8.599498304528869e-06, "loss": 0.1865, "step": 9148 }, { "epoch": 0.2669058871579439, "grad_norm": 0.819372229915627, "learning_rate": 8.599170382236343e-06, "loss": 0.1692, "step": 9149 }, { "epoch": 0.26693506038858744, "grad_norm": 0.8996655475799555, "learning_rate": 8.598842427811286e-06, "loss": 0.1698, "step": 9150 }, { "epoch": 0.266964233619231, "grad_norm": 0.7135545026287398, "learning_rate": 8.598514441256622e-06, "loss": 0.1663, "step": 9151 }, { "epoch": 0.26699340684987455, "grad_norm": 0.8362320184332745, "learning_rate": 8.59818642257528e-06, "loss": 0.1815, "step": 9152 }, { "epoch": 0.2670225800805181, "grad_norm": 0.996673344357169, "learning_rate": 8.597858371770189e-06, "loss": 0.1485, "step": 9153 }, { "epoch": 0.26705175331116165, "grad_norm": 0.8816343547125585, "learning_rate": 8.597530288844275e-06, "loss": 0.1591, "step": 9154 }, { "epoch": 0.26708092654180526, "grad_norm": 0.8489802573039555, "learning_rate": 8.597202173800471e-06, "loss": 0.1665, "step": 9155 }, { "epoch": 0.2671100997724488, "grad_norm": 1.0257458096727483, "learning_rate": 8.596874026641705e-06, "loss": 0.1559, "step": 9156 }, { "epoch": 0.26713927300309237, "grad_norm": 0.9692393709333937, "learning_rate": 8.596545847370904e-06, "loss": 0.1564, "step": 9157 }, { "epoch": 0.2671684462337359, "grad_norm": 0.9106118102650673, "learning_rate": 8.596217635991004e-06, "loss": 0.1649, "step": 9158 }, { "epoch": 0.2671976194643795, "grad_norm": 0.9422265893505043, "learning_rate": 8.59588939250493e-06, "loss": 0.1709, "step": 9159 }, { "epoch": 0.26722679269502303, "grad_norm": 0.8309488586446533, "learning_rate": 8.595561116915613e-06, "loss": 0.163, "step": 9160 }, { "epoch": 0.2672559659256666, "grad_norm": 0.9041403780788585, "learning_rate": 8.595232809225987e-06, "loss": 0.1828, "step": 9161 }, { "epoch": 0.2672851391563102, "grad_norm": 0.8975794841273685, "learning_rate": 8.594904469438979e-06, "loss": 0.1361, "step": 9162 }, { "epoch": 0.26731431238695375, "grad_norm": 1.0592758026772573, "learning_rate": 8.594576097557521e-06, "loss": 0.1608, "step": 9163 }, { "epoch": 0.2673434856175973, "grad_norm": 0.7650386674472878, "learning_rate": 8.594247693584547e-06, "loss": 0.1619, "step": 9164 }, { "epoch": 0.26737265884824085, "grad_norm": 0.840847459811344, "learning_rate": 8.593919257522988e-06, "loss": 0.1519, "step": 9165 }, { "epoch": 0.2674018320788844, "grad_norm": 0.9773620966100263, "learning_rate": 8.593590789375775e-06, "loss": 0.1611, "step": 9166 }, { "epoch": 0.26743100530952796, "grad_norm": 0.8697579112754361, "learning_rate": 8.59326228914584e-06, "loss": 0.1359, "step": 9167 }, { "epoch": 0.2674601785401715, "grad_norm": 0.9788890258356004, "learning_rate": 8.59293375683612e-06, "loss": 0.139, "step": 9168 }, { "epoch": 0.2674893517708151, "grad_norm": 0.8896247117464511, "learning_rate": 8.592605192449543e-06, "loss": 0.1617, "step": 9169 }, { "epoch": 0.2675185250014587, "grad_norm": 1.0945036165838098, "learning_rate": 8.592276595989045e-06, "loss": 0.1648, "step": 9170 }, { "epoch": 0.2675476982321022, "grad_norm": 1.1723938840064165, "learning_rate": 8.59194796745756e-06, "loss": 0.156, "step": 9171 }, { "epoch": 0.2675768714627458, "grad_norm": 0.944501312743487, "learning_rate": 8.591619306858019e-06, "loss": 0.1815, "step": 9172 }, { "epoch": 0.26760604469338933, "grad_norm": 0.9320193758861964, "learning_rate": 8.59129061419336e-06, "loss": 0.1816, "step": 9173 }, { "epoch": 0.2676352179240329, "grad_norm": 1.0080757883311802, "learning_rate": 8.590961889466514e-06, "loss": 0.1503, "step": 9174 }, { "epoch": 0.2676643911546765, "grad_norm": 0.9988390895799796, "learning_rate": 8.590633132680419e-06, "loss": 0.1685, "step": 9175 }, { "epoch": 0.26769356438532005, "grad_norm": 0.9220635792551475, "learning_rate": 8.590304343838008e-06, "loss": 0.1659, "step": 9176 }, { "epoch": 0.2677227376159636, "grad_norm": 0.7333628790025075, "learning_rate": 8.589975522942218e-06, "loss": 0.1321, "step": 9177 }, { "epoch": 0.26775191084660716, "grad_norm": 1.079258263839422, "learning_rate": 8.589646669995983e-06, "loss": 0.1724, "step": 9178 }, { "epoch": 0.2677810840772507, "grad_norm": 0.8545169594228665, "learning_rate": 8.589317785002238e-06, "loss": 0.1295, "step": 9179 }, { "epoch": 0.26781025730789426, "grad_norm": 0.7275041417697529, "learning_rate": 8.588988867963922e-06, "loss": 0.1383, "step": 9180 }, { "epoch": 0.2678394305385378, "grad_norm": 1.0811129047409436, "learning_rate": 8.58865991888397e-06, "loss": 0.1805, "step": 9181 }, { "epoch": 0.2678686037691814, "grad_norm": 0.8539191484350386, "learning_rate": 8.588330937765318e-06, "loss": 0.1751, "step": 9182 }, { "epoch": 0.267897776999825, "grad_norm": 0.7555783785508675, "learning_rate": 8.588001924610905e-06, "loss": 0.1371, "step": 9183 }, { "epoch": 0.26792695023046853, "grad_norm": 0.7832880033592341, "learning_rate": 8.587672879423668e-06, "loss": 0.1533, "step": 9184 }, { "epoch": 0.2679561234611121, "grad_norm": 0.7548938877031783, "learning_rate": 8.587343802206543e-06, "loss": 0.1607, "step": 9185 }, { "epoch": 0.26798529669175564, "grad_norm": 0.7232071859426559, "learning_rate": 8.587014692962468e-06, "loss": 0.1623, "step": 9186 }, { "epoch": 0.2680144699223992, "grad_norm": 0.7363097351957697, "learning_rate": 8.586685551694384e-06, "loss": 0.1472, "step": 9187 }, { "epoch": 0.26804364315304274, "grad_norm": 0.8764554487660825, "learning_rate": 8.586356378405228e-06, "loss": 0.1571, "step": 9188 }, { "epoch": 0.26807281638368635, "grad_norm": 0.6740339109139587, "learning_rate": 8.586027173097935e-06, "loss": 0.1431, "step": 9189 }, { "epoch": 0.2681019896143299, "grad_norm": 0.7955090497724493, "learning_rate": 8.58569793577545e-06, "loss": 0.1666, "step": 9190 }, { "epoch": 0.26813116284497346, "grad_norm": 0.8110620248158532, "learning_rate": 8.58536866644071e-06, "loss": 0.1699, "step": 9191 }, { "epoch": 0.268160336075617, "grad_norm": 0.9302065781587839, "learning_rate": 8.585039365096652e-06, "loss": 0.1531, "step": 9192 }, { "epoch": 0.26818950930626057, "grad_norm": 0.7246321762602458, "learning_rate": 8.584710031746222e-06, "loss": 0.1318, "step": 9193 }, { "epoch": 0.2682186825369041, "grad_norm": 0.8120322319483088, "learning_rate": 8.584380666392354e-06, "loss": 0.1788, "step": 9194 }, { "epoch": 0.26824785576754767, "grad_norm": 0.8096852428493483, "learning_rate": 8.584051269037992e-06, "loss": 0.1534, "step": 9195 }, { "epoch": 0.2682770289981913, "grad_norm": 0.8341933111889306, "learning_rate": 8.583721839686074e-06, "loss": 0.1392, "step": 9196 }, { "epoch": 0.26830620222883483, "grad_norm": 0.8384538346422006, "learning_rate": 8.583392378339546e-06, "loss": 0.1547, "step": 9197 }, { "epoch": 0.2683353754594784, "grad_norm": 0.8337948487907447, "learning_rate": 8.583062885001345e-06, "loss": 0.1398, "step": 9198 }, { "epoch": 0.26836454869012194, "grad_norm": 0.9001017741716905, "learning_rate": 8.582733359674413e-06, "loss": 0.1388, "step": 9199 }, { "epoch": 0.2683937219207655, "grad_norm": 0.8210396932280165, "learning_rate": 8.582403802361694e-06, "loss": 0.1401, "step": 9200 }, { "epoch": 0.26842289515140905, "grad_norm": 0.8767992996359955, "learning_rate": 8.58207421306613e-06, "loss": 0.1399, "step": 9201 }, { "epoch": 0.26845206838205266, "grad_norm": 0.7541149616576392, "learning_rate": 8.58174459179066e-06, "loss": 0.1668, "step": 9202 }, { "epoch": 0.2684812416126962, "grad_norm": 1.0436433724993603, "learning_rate": 8.58141493853823e-06, "loss": 0.1596, "step": 9203 }, { "epoch": 0.26851041484333976, "grad_norm": 0.839183303192571, "learning_rate": 8.581085253311783e-06, "loss": 0.1518, "step": 9204 }, { "epoch": 0.2685395880739833, "grad_norm": 0.885748581867198, "learning_rate": 8.580755536114262e-06, "loss": 0.1646, "step": 9205 }, { "epoch": 0.26856876130462687, "grad_norm": 0.9010468031908282, "learning_rate": 8.58042578694861e-06, "loss": 0.1846, "step": 9206 }, { "epoch": 0.2685979345352704, "grad_norm": 0.9110955868109781, "learning_rate": 8.580096005817771e-06, "loss": 0.1526, "step": 9207 }, { "epoch": 0.268627107765914, "grad_norm": 0.7531201583190602, "learning_rate": 8.57976619272469e-06, "loss": 0.1607, "step": 9208 }, { "epoch": 0.2686562809965576, "grad_norm": 0.7406639129407381, "learning_rate": 8.57943634767231e-06, "loss": 0.1371, "step": 9209 }, { "epoch": 0.26868545422720114, "grad_norm": 0.8488613798188147, "learning_rate": 8.579106470663578e-06, "loss": 0.1679, "step": 9210 }, { "epoch": 0.2687146274578447, "grad_norm": 0.8229960816134502, "learning_rate": 8.578776561701438e-06, "loss": 0.1632, "step": 9211 }, { "epoch": 0.26874380068848824, "grad_norm": 1.1316756306441187, "learning_rate": 8.578446620788834e-06, "loss": 0.1689, "step": 9212 }, { "epoch": 0.2687729739191318, "grad_norm": 0.7629383223594314, "learning_rate": 8.578116647928714e-06, "loss": 0.177, "step": 9213 }, { "epoch": 0.26880214714977535, "grad_norm": 1.161795672400416, "learning_rate": 8.577786643124022e-06, "loss": 0.1656, "step": 9214 }, { "epoch": 0.2688313203804189, "grad_norm": 0.7668669152188602, "learning_rate": 8.577456606377704e-06, "loss": 0.1644, "step": 9215 }, { "epoch": 0.2688604936110625, "grad_norm": 0.7187454993662719, "learning_rate": 8.577126537692707e-06, "loss": 0.1525, "step": 9216 }, { "epoch": 0.26888966684170607, "grad_norm": 0.98976293249415, "learning_rate": 8.576796437071982e-06, "loss": 0.182, "step": 9217 }, { "epoch": 0.2689188400723496, "grad_norm": 0.7323738644458602, "learning_rate": 8.576466304518469e-06, "loss": 0.1478, "step": 9218 }, { "epoch": 0.26894801330299317, "grad_norm": 0.7744388673034995, "learning_rate": 8.57613614003512e-06, "loss": 0.1584, "step": 9219 }, { "epoch": 0.2689771865336367, "grad_norm": 1.4568329485976723, "learning_rate": 8.57580594362488e-06, "loss": 0.1464, "step": 9220 }, { "epoch": 0.2690063597642803, "grad_norm": 0.8150362305649753, "learning_rate": 8.5754757152907e-06, "loss": 0.1603, "step": 9221 }, { "epoch": 0.26903553299492383, "grad_norm": 0.9979086478524237, "learning_rate": 8.575145455035525e-06, "loss": 0.1578, "step": 9222 }, { "epoch": 0.26906470622556744, "grad_norm": 0.8383760282009125, "learning_rate": 8.574815162862305e-06, "loss": 0.1614, "step": 9223 }, { "epoch": 0.269093879456211, "grad_norm": 0.8110399966876206, "learning_rate": 8.574484838773988e-06, "loss": 0.1571, "step": 9224 }, { "epoch": 0.26912305268685455, "grad_norm": 0.9286612870673736, "learning_rate": 8.574154482773524e-06, "loss": 0.1386, "step": 9225 }, { "epoch": 0.2691522259174981, "grad_norm": 0.8942256144728984, "learning_rate": 8.573824094863863e-06, "loss": 0.1884, "step": 9226 }, { "epoch": 0.26918139914814165, "grad_norm": 0.8160018496012329, "learning_rate": 8.573493675047953e-06, "loss": 0.1827, "step": 9227 }, { "epoch": 0.2692105723787852, "grad_norm": 0.9950852190666365, "learning_rate": 8.573163223328744e-06, "loss": 0.1467, "step": 9228 }, { "epoch": 0.2692397456094288, "grad_norm": 0.9832431634164503, "learning_rate": 8.572832739709187e-06, "loss": 0.1525, "step": 9229 }, { "epoch": 0.26926891884007237, "grad_norm": 0.8231481339351706, "learning_rate": 8.572502224192233e-06, "loss": 0.1583, "step": 9230 }, { "epoch": 0.2692980920707159, "grad_norm": 0.924052550024724, "learning_rate": 8.572171676780832e-06, "loss": 0.1417, "step": 9231 }, { "epoch": 0.2693272653013595, "grad_norm": 0.8464288011978807, "learning_rate": 8.571841097477933e-06, "loss": 0.1314, "step": 9232 }, { "epoch": 0.26935643853200303, "grad_norm": 0.8420754487561416, "learning_rate": 8.571510486286492e-06, "loss": 0.1333, "step": 9233 }, { "epoch": 0.2693856117626466, "grad_norm": 1.0327153316506936, "learning_rate": 8.571179843209457e-06, "loss": 0.182, "step": 9234 }, { "epoch": 0.26941478499329014, "grad_norm": 1.0459509761829675, "learning_rate": 8.57084916824978e-06, "loss": 0.1758, "step": 9235 }, { "epoch": 0.26944395822393374, "grad_norm": 0.9712306776005475, "learning_rate": 8.570518461410415e-06, "loss": 0.148, "step": 9236 }, { "epoch": 0.2694731314545773, "grad_norm": 0.7930466124645379, "learning_rate": 8.570187722694312e-06, "loss": 0.1654, "step": 9237 }, { "epoch": 0.26950230468522085, "grad_norm": 0.780649466370968, "learning_rate": 8.569856952104427e-06, "loss": 0.1626, "step": 9238 }, { "epoch": 0.2695314779158644, "grad_norm": 0.9327302768809439, "learning_rate": 8.56952614964371e-06, "loss": 0.1627, "step": 9239 }, { "epoch": 0.26956065114650796, "grad_norm": 0.8298760840573467, "learning_rate": 8.569195315315117e-06, "loss": 0.1435, "step": 9240 }, { "epoch": 0.2695898243771515, "grad_norm": 0.866636920406715, "learning_rate": 8.568864449121599e-06, "loss": 0.1935, "step": 9241 }, { "epoch": 0.26961899760779506, "grad_norm": 0.8687460907291265, "learning_rate": 8.568533551066113e-06, "loss": 0.1839, "step": 9242 }, { "epoch": 0.26964817083843867, "grad_norm": 0.9308339552873933, "learning_rate": 8.56820262115161e-06, "loss": 0.15, "step": 9243 }, { "epoch": 0.2696773440690822, "grad_norm": 1.3052139536501206, "learning_rate": 8.567871659381047e-06, "loss": 0.1576, "step": 9244 }, { "epoch": 0.2697065172997258, "grad_norm": 0.9432496306131262, "learning_rate": 8.567540665757375e-06, "loss": 0.1902, "step": 9245 }, { "epoch": 0.26973569053036933, "grad_norm": 0.7887585699622204, "learning_rate": 8.567209640283553e-06, "loss": 0.1457, "step": 9246 }, { "epoch": 0.2697648637610129, "grad_norm": 0.8241288397490717, "learning_rate": 8.566878582962534e-06, "loss": 0.1569, "step": 9247 }, { "epoch": 0.26979403699165644, "grad_norm": 0.9992394244678918, "learning_rate": 8.566547493797278e-06, "loss": 0.1625, "step": 9248 }, { "epoch": 0.2698232102223, "grad_norm": 0.8744530226184944, "learning_rate": 8.566216372790735e-06, "loss": 0.1481, "step": 9249 }, { "epoch": 0.2698523834529436, "grad_norm": 0.8531297003913738, "learning_rate": 8.565885219945862e-06, "loss": 0.1335, "step": 9250 }, { "epoch": 0.26988155668358715, "grad_norm": 0.721705219093551, "learning_rate": 8.565554035265618e-06, "loss": 0.1431, "step": 9251 }, { "epoch": 0.2699107299142307, "grad_norm": 0.8786835574171571, "learning_rate": 8.565222818752959e-06, "loss": 0.1788, "step": 9252 }, { "epoch": 0.26993990314487426, "grad_norm": 0.7672552773990936, "learning_rate": 8.564891570410842e-06, "loss": 0.1617, "step": 9253 }, { "epoch": 0.2699690763755178, "grad_norm": 0.7098568549686243, "learning_rate": 8.564560290242224e-06, "loss": 0.1427, "step": 9254 }, { "epoch": 0.26999824960616137, "grad_norm": 0.685298869254431, "learning_rate": 8.564228978250062e-06, "loss": 0.1575, "step": 9255 }, { "epoch": 0.2700274228368049, "grad_norm": 0.9341513204785777, "learning_rate": 8.563897634437316e-06, "loss": 0.1572, "step": 9256 }, { "epoch": 0.27005659606744853, "grad_norm": 0.6749370421864948, "learning_rate": 8.563566258806942e-06, "loss": 0.1654, "step": 9257 }, { "epoch": 0.2700857692980921, "grad_norm": 0.6732788259830977, "learning_rate": 8.5632348513619e-06, "loss": 0.1533, "step": 9258 }, { "epoch": 0.27011494252873564, "grad_norm": 0.9100626040318655, "learning_rate": 8.562903412105146e-06, "loss": 0.1599, "step": 9259 }, { "epoch": 0.2701441157593792, "grad_norm": 0.8611613812154474, "learning_rate": 8.562571941039641e-06, "loss": 0.167, "step": 9260 }, { "epoch": 0.27017328899002274, "grad_norm": 0.9697066162839673, "learning_rate": 8.562240438168345e-06, "loss": 0.1409, "step": 9261 }, { "epoch": 0.2702024622206663, "grad_norm": 0.9169661997507168, "learning_rate": 8.561908903494216e-06, "loss": 0.1657, "step": 9262 }, { "epoch": 0.2702316354513099, "grad_norm": 0.6697914017206752, "learning_rate": 8.561577337020217e-06, "loss": 0.1351, "step": 9263 }, { "epoch": 0.27026080868195346, "grad_norm": 0.6985498204266574, "learning_rate": 8.561245738749302e-06, "loss": 0.1519, "step": 9264 }, { "epoch": 0.270289981912597, "grad_norm": 0.6572420921352242, "learning_rate": 8.560914108684437e-06, "loss": 0.1522, "step": 9265 }, { "epoch": 0.27031915514324056, "grad_norm": 0.8025213477402136, "learning_rate": 8.560582446828582e-06, "loss": 0.1455, "step": 9266 }, { "epoch": 0.2703483283738841, "grad_norm": 0.8256942591098151, "learning_rate": 8.560250753184695e-06, "loss": 0.1438, "step": 9267 }, { "epoch": 0.27037750160452767, "grad_norm": 1.6282283347267548, "learning_rate": 8.559919027755741e-06, "loss": 0.1566, "step": 9268 }, { "epoch": 0.2704066748351712, "grad_norm": 0.8542265396487017, "learning_rate": 8.55958727054468e-06, "loss": 0.152, "step": 9269 }, { "epoch": 0.27043584806581483, "grad_norm": 0.8206370962531879, "learning_rate": 8.559255481554471e-06, "loss": 0.1656, "step": 9270 }, { "epoch": 0.2704650212964584, "grad_norm": 0.6080706314194168, "learning_rate": 8.558923660788081e-06, "loss": 0.1317, "step": 9271 }, { "epoch": 0.27049419452710194, "grad_norm": 0.8862959009360586, "learning_rate": 8.558591808248469e-06, "loss": 0.145, "step": 9272 }, { "epoch": 0.2705233677577455, "grad_norm": 0.8417583707498439, "learning_rate": 8.5582599239386e-06, "loss": 0.1855, "step": 9273 }, { "epoch": 0.27055254098838905, "grad_norm": 0.6334134148413211, "learning_rate": 8.557928007861433e-06, "loss": 0.1513, "step": 9274 }, { "epoch": 0.2705817142190326, "grad_norm": 0.7808936451747582, "learning_rate": 8.557596060019936e-06, "loss": 0.1381, "step": 9275 }, { "epoch": 0.27061088744967615, "grad_norm": 0.8780040742489097, "learning_rate": 8.557264080417071e-06, "loss": 0.1418, "step": 9276 }, { "epoch": 0.27064006068031976, "grad_norm": 0.788857754357467, "learning_rate": 8.556932069055803e-06, "loss": 0.1608, "step": 9277 }, { "epoch": 0.2706692339109633, "grad_norm": 0.8843795580702432, "learning_rate": 8.556600025939092e-06, "loss": 0.1721, "step": 9278 }, { "epoch": 0.27069840714160687, "grad_norm": 1.071660232202477, "learning_rate": 8.556267951069906e-06, "loss": 0.1824, "step": 9279 }, { "epoch": 0.2707275803722504, "grad_norm": 0.880605245956067, "learning_rate": 8.555935844451209e-06, "loss": 0.1354, "step": 9280 }, { "epoch": 0.270756753602894, "grad_norm": 0.7316997735124465, "learning_rate": 8.555603706085965e-06, "loss": 0.14, "step": 9281 }, { "epoch": 0.2707859268335375, "grad_norm": 1.1402191306652094, "learning_rate": 8.55527153597714e-06, "loss": 0.1342, "step": 9282 }, { "epoch": 0.2708151000641811, "grad_norm": 0.95009564932012, "learning_rate": 8.5549393341277e-06, "loss": 0.1573, "step": 9283 }, { "epoch": 0.2708442732948247, "grad_norm": 0.6920552480734122, "learning_rate": 8.554607100540609e-06, "loss": 0.1513, "step": 9284 }, { "epoch": 0.27087344652546824, "grad_norm": 1.107356065150215, "learning_rate": 8.554274835218834e-06, "loss": 0.1697, "step": 9285 }, { "epoch": 0.2709026197561118, "grad_norm": 0.8283999436443544, "learning_rate": 8.553942538165344e-06, "loss": 0.1527, "step": 9286 }, { "epoch": 0.27093179298675535, "grad_norm": 0.8953118067974883, "learning_rate": 8.5536102093831e-06, "loss": 0.1472, "step": 9287 }, { "epoch": 0.2709609662173989, "grad_norm": 0.8188603250966501, "learning_rate": 8.553277848875077e-06, "loss": 0.2096, "step": 9288 }, { "epoch": 0.27099013944804246, "grad_norm": 1.0244274232072392, "learning_rate": 8.552945456644234e-06, "loss": 0.1616, "step": 9289 }, { "epoch": 0.27101931267868606, "grad_norm": 0.8385400445991816, "learning_rate": 8.552613032693545e-06, "loss": 0.1605, "step": 9290 }, { "epoch": 0.2710484859093296, "grad_norm": 0.729594948400735, "learning_rate": 8.552280577025972e-06, "loss": 0.1561, "step": 9291 }, { "epoch": 0.27107765913997317, "grad_norm": 0.9424074248978331, "learning_rate": 8.551948089644487e-06, "loss": 0.1773, "step": 9292 }, { "epoch": 0.2711068323706167, "grad_norm": 0.8850560771163402, "learning_rate": 8.551615570552058e-06, "loss": 0.1766, "step": 9293 }, { "epoch": 0.2711360056012603, "grad_norm": 0.7019283025275048, "learning_rate": 8.551283019751652e-06, "loss": 0.1595, "step": 9294 }, { "epoch": 0.27116517883190383, "grad_norm": 0.7876402084548211, "learning_rate": 8.550950437246239e-06, "loss": 0.2028, "step": 9295 }, { "epoch": 0.2711943520625474, "grad_norm": 0.9726648168405307, "learning_rate": 8.55061782303879e-06, "loss": 0.2062, "step": 9296 }, { "epoch": 0.271223525293191, "grad_norm": 0.743316445583261, "learning_rate": 8.550285177132271e-06, "loss": 0.1621, "step": 9297 }, { "epoch": 0.27125269852383455, "grad_norm": 0.897330137360762, "learning_rate": 8.549952499529654e-06, "loss": 0.1589, "step": 9298 }, { "epoch": 0.2712818717544781, "grad_norm": 0.8456954463841175, "learning_rate": 8.54961979023391e-06, "loss": 0.1563, "step": 9299 }, { "epoch": 0.27131104498512165, "grad_norm": 0.8642499948240627, "learning_rate": 8.549287049248006e-06, "loss": 0.1468, "step": 9300 }, { "epoch": 0.2713402182157652, "grad_norm": 0.8681786148279045, "learning_rate": 8.548954276574914e-06, "loss": 0.1511, "step": 9301 }, { "epoch": 0.27136939144640876, "grad_norm": 0.8314107658375232, "learning_rate": 8.548621472217606e-06, "loss": 0.199, "step": 9302 }, { "epoch": 0.2713985646770523, "grad_norm": 0.883653189288504, "learning_rate": 8.548288636179053e-06, "loss": 0.1512, "step": 9303 }, { "epoch": 0.2714277379076959, "grad_norm": 0.9456358639701622, "learning_rate": 8.547955768462226e-06, "loss": 0.1523, "step": 9304 }, { "epoch": 0.2714569111383395, "grad_norm": 0.7569096835080417, "learning_rate": 8.547622869070096e-06, "loss": 0.1696, "step": 9305 }, { "epoch": 0.271486084368983, "grad_norm": 1.030717279030035, "learning_rate": 8.547289938005638e-06, "loss": 0.1561, "step": 9306 }, { "epoch": 0.2715152575996266, "grad_norm": 0.9282325955141839, "learning_rate": 8.54695697527182e-06, "loss": 0.1362, "step": 9307 }, { "epoch": 0.27154443083027013, "grad_norm": 1.1271936814291548, "learning_rate": 8.546623980871617e-06, "loss": 0.1802, "step": 9308 }, { "epoch": 0.2715736040609137, "grad_norm": 1.0773611159059693, "learning_rate": 8.546290954808004e-06, "loss": 0.1555, "step": 9309 }, { "epoch": 0.27160277729155724, "grad_norm": 0.8407925582473406, "learning_rate": 8.54595789708395e-06, "loss": 0.1732, "step": 9310 }, { "epoch": 0.27163195052220085, "grad_norm": 0.9216617540670842, "learning_rate": 8.54562480770243e-06, "loss": 0.1375, "step": 9311 }, { "epoch": 0.2716611237528444, "grad_norm": 0.9849609811805082, "learning_rate": 8.54529168666642e-06, "loss": 0.1558, "step": 9312 }, { "epoch": 0.27169029698348796, "grad_norm": 0.8263233447640331, "learning_rate": 8.544958533978891e-06, "loss": 0.1605, "step": 9313 }, { "epoch": 0.2717194702141315, "grad_norm": 0.7228264966155231, "learning_rate": 8.544625349642818e-06, "loss": 0.1453, "step": 9314 }, { "epoch": 0.27174864344477506, "grad_norm": 0.8717108165395171, "learning_rate": 8.544292133661178e-06, "loss": 0.1437, "step": 9315 }, { "epoch": 0.2717778166754186, "grad_norm": 0.6765522309707471, "learning_rate": 8.543958886036942e-06, "loss": 0.1644, "step": 9316 }, { "epoch": 0.2718069899060622, "grad_norm": 0.9209086600834943, "learning_rate": 8.543625606773088e-06, "loss": 0.1614, "step": 9317 }, { "epoch": 0.2718361631367058, "grad_norm": 0.6893649129641594, "learning_rate": 8.543292295872591e-06, "loss": 0.1314, "step": 9318 }, { "epoch": 0.27186533636734933, "grad_norm": 1.004828097157871, "learning_rate": 8.542958953338424e-06, "loss": 0.1766, "step": 9319 }, { "epoch": 0.2718945095979929, "grad_norm": 0.8221329832359012, "learning_rate": 8.542625579173567e-06, "loss": 0.1761, "step": 9320 }, { "epoch": 0.27192368282863644, "grad_norm": 0.7769165503015246, "learning_rate": 8.542292173380994e-06, "loss": 0.1485, "step": 9321 }, { "epoch": 0.27195285605928, "grad_norm": 0.8324451091458328, "learning_rate": 8.541958735963683e-06, "loss": 0.1413, "step": 9322 }, { "epoch": 0.27198202928992354, "grad_norm": 0.7786202238810676, "learning_rate": 8.54162526692461e-06, "loss": 0.1442, "step": 9323 }, { "epoch": 0.27201120252056715, "grad_norm": 0.74539655946189, "learning_rate": 8.541291766266751e-06, "loss": 0.1439, "step": 9324 }, { "epoch": 0.2720403757512107, "grad_norm": 0.7872587596658237, "learning_rate": 8.540958233993084e-06, "loss": 0.1492, "step": 9325 }, { "epoch": 0.27206954898185426, "grad_norm": 0.9161060159698041, "learning_rate": 8.540624670106587e-06, "loss": 0.1496, "step": 9326 }, { "epoch": 0.2720987222124978, "grad_norm": 0.7337597554029629, "learning_rate": 8.54029107461024e-06, "loss": 0.1731, "step": 9327 }, { "epoch": 0.27212789544314137, "grad_norm": 0.9281427391751094, "learning_rate": 8.539957447507019e-06, "loss": 0.1601, "step": 9328 }, { "epoch": 0.2721570686737849, "grad_norm": 0.8651842214497207, "learning_rate": 8.539623788799903e-06, "loss": 0.1496, "step": 9329 }, { "epoch": 0.27218624190442847, "grad_norm": 0.6802650700621498, "learning_rate": 8.53929009849187e-06, "loss": 0.1468, "step": 9330 }, { "epoch": 0.2722154151350721, "grad_norm": 1.1047225626386818, "learning_rate": 8.5389563765859e-06, "loss": 0.1367, "step": 9331 }, { "epoch": 0.27224458836571563, "grad_norm": 0.9881449060735117, "learning_rate": 8.538622623084973e-06, "loss": 0.1797, "step": 9332 }, { "epoch": 0.2722737615963592, "grad_norm": 0.7753754384464604, "learning_rate": 8.538288837992066e-06, "loss": 0.1803, "step": 9333 }, { "epoch": 0.27230293482700274, "grad_norm": 1.1058551155594245, "learning_rate": 8.537955021310162e-06, "loss": 0.1688, "step": 9334 }, { "epoch": 0.2723321080576463, "grad_norm": 0.7792812827582498, "learning_rate": 8.537621173042241e-06, "loss": 0.1629, "step": 9335 }, { "epoch": 0.27236128128828985, "grad_norm": 0.8340503383022526, "learning_rate": 8.537287293191283e-06, "loss": 0.1583, "step": 9336 }, { "epoch": 0.2723904545189334, "grad_norm": 0.7750323342153583, "learning_rate": 8.536953381760266e-06, "loss": 0.1425, "step": 9337 }, { "epoch": 0.272419627749577, "grad_norm": 0.8793720364119025, "learning_rate": 8.536619438752176e-06, "loss": 0.1596, "step": 9338 }, { "epoch": 0.27244880098022056, "grad_norm": 0.7476810066439837, "learning_rate": 8.536285464169992e-06, "loss": 0.1577, "step": 9339 }, { "epoch": 0.2724779742108641, "grad_norm": 0.7617103675329747, "learning_rate": 8.535951458016693e-06, "loss": 0.1679, "step": 9340 }, { "epoch": 0.27250714744150767, "grad_norm": 0.8757811291725777, "learning_rate": 8.535617420295267e-06, "loss": 0.1488, "step": 9341 }, { "epoch": 0.2725363206721512, "grad_norm": 0.9223600901517369, "learning_rate": 8.53528335100869e-06, "loss": 0.17, "step": 9342 }, { "epoch": 0.2725654939027948, "grad_norm": 0.9502397521295001, "learning_rate": 8.534949250159947e-06, "loss": 0.153, "step": 9343 }, { "epoch": 0.2725946671334384, "grad_norm": 0.8614990086082593, "learning_rate": 8.534615117752024e-06, "loss": 0.1478, "step": 9344 }, { "epoch": 0.27262384036408194, "grad_norm": 0.7105929316816207, "learning_rate": 8.534280953787899e-06, "loss": 0.1413, "step": 9345 }, { "epoch": 0.2726530135947255, "grad_norm": 1.1303127016511665, "learning_rate": 8.533946758270556e-06, "loss": 0.1684, "step": 9346 }, { "epoch": 0.27268218682536904, "grad_norm": 0.7069785066889098, "learning_rate": 8.533612531202981e-06, "loss": 0.1393, "step": 9347 }, { "epoch": 0.2727113600560126, "grad_norm": 0.7996427312250106, "learning_rate": 8.533278272588159e-06, "loss": 0.1656, "step": 9348 }, { "epoch": 0.27274053328665615, "grad_norm": 1.0170876726172817, "learning_rate": 8.53294398242907e-06, "loss": 0.1747, "step": 9349 }, { "epoch": 0.2727697065172997, "grad_norm": 0.7867301607587156, "learning_rate": 8.5326096607287e-06, "loss": 0.1608, "step": 9350 }, { "epoch": 0.2727988797479433, "grad_norm": 0.8138361970107523, "learning_rate": 8.532275307490034e-06, "loss": 0.1786, "step": 9351 }, { "epoch": 0.27282805297858687, "grad_norm": 0.899526759375119, "learning_rate": 8.531940922716058e-06, "loss": 0.1555, "step": 9352 }, { "epoch": 0.2728572262092304, "grad_norm": 0.8441496100724186, "learning_rate": 8.531606506409757e-06, "loss": 0.1737, "step": 9353 }, { "epoch": 0.272886399439874, "grad_norm": 0.8824631930854943, "learning_rate": 8.531272058574116e-06, "loss": 0.1447, "step": 9354 }, { "epoch": 0.2729155726705175, "grad_norm": 0.9767803676552924, "learning_rate": 8.530937579212122e-06, "loss": 0.169, "step": 9355 }, { "epoch": 0.2729447459011611, "grad_norm": 1.5006568498457795, "learning_rate": 8.530603068326759e-06, "loss": 0.168, "step": 9356 }, { "epoch": 0.27297391913180463, "grad_norm": 0.8441209861961562, "learning_rate": 8.530268525921015e-06, "loss": 0.1273, "step": 9357 }, { "epoch": 0.27300309236244824, "grad_norm": 1.0782498860134957, "learning_rate": 8.529933951997875e-06, "loss": 0.1558, "step": 9358 }, { "epoch": 0.2730322655930918, "grad_norm": 0.655681288690383, "learning_rate": 8.52959934656033e-06, "loss": 0.1514, "step": 9359 }, { "epoch": 0.27306143882373535, "grad_norm": 1.041019869902648, "learning_rate": 8.529264709611362e-06, "loss": 0.177, "step": 9360 }, { "epoch": 0.2730906120543789, "grad_norm": 0.8222929072925065, "learning_rate": 8.528930041153962e-06, "loss": 0.1569, "step": 9361 }, { "epoch": 0.27311978528502245, "grad_norm": 0.6549911906259803, "learning_rate": 8.528595341191117e-06, "loss": 0.149, "step": 9362 }, { "epoch": 0.273148958515666, "grad_norm": 0.9660138123216552, "learning_rate": 8.528260609725816e-06, "loss": 0.159, "step": 9363 }, { "epoch": 0.27317813174630956, "grad_norm": 0.8154555060234866, "learning_rate": 8.527925846761046e-06, "loss": 0.1625, "step": 9364 }, { "epoch": 0.27320730497695317, "grad_norm": 0.783703359651396, "learning_rate": 8.527591052299797e-06, "loss": 0.1676, "step": 9365 }, { "epoch": 0.2732364782075967, "grad_norm": 0.9158468762553383, "learning_rate": 8.527256226345056e-06, "loss": 0.1566, "step": 9366 }, { "epoch": 0.2732656514382403, "grad_norm": 0.9611408206840761, "learning_rate": 8.526921368899815e-06, "loss": 0.1441, "step": 9367 }, { "epoch": 0.27329482466888383, "grad_norm": 0.8413751222970743, "learning_rate": 8.52658647996706e-06, "loss": 0.1453, "step": 9368 }, { "epoch": 0.2733239978995274, "grad_norm": 0.9248861010144814, "learning_rate": 8.526251559549783e-06, "loss": 0.1636, "step": 9369 }, { "epoch": 0.27335317113017094, "grad_norm": 0.9136868728947342, "learning_rate": 8.525916607650975e-06, "loss": 0.1564, "step": 9370 }, { "epoch": 0.27338234436081454, "grad_norm": 0.9328527593817365, "learning_rate": 8.525581624273624e-06, "loss": 0.1588, "step": 9371 }, { "epoch": 0.2734115175914581, "grad_norm": 0.9736891820902451, "learning_rate": 8.525246609420724e-06, "loss": 0.1625, "step": 9372 }, { "epoch": 0.27344069082210165, "grad_norm": 0.9497070190972525, "learning_rate": 8.524911563095262e-06, "loss": 0.161, "step": 9373 }, { "epoch": 0.2734698640527452, "grad_norm": 0.927790040062506, "learning_rate": 8.524576485300231e-06, "loss": 0.144, "step": 9374 }, { "epoch": 0.27349903728338876, "grad_norm": 0.7191343826786696, "learning_rate": 8.524241376038623e-06, "loss": 0.1349, "step": 9375 }, { "epoch": 0.2735282105140323, "grad_norm": 0.944773784415641, "learning_rate": 8.523906235313428e-06, "loss": 0.1508, "step": 9376 }, { "epoch": 0.27355738374467586, "grad_norm": 0.7617543791306891, "learning_rate": 8.52357106312764e-06, "loss": 0.1475, "step": 9377 }, { "epoch": 0.2735865569753195, "grad_norm": 0.8210786254523115, "learning_rate": 8.523235859484253e-06, "loss": 0.1479, "step": 9378 }, { "epoch": 0.273615730205963, "grad_norm": 0.9006378895303033, "learning_rate": 8.522900624386254e-06, "loss": 0.1673, "step": 9379 }, { "epoch": 0.2736449034366066, "grad_norm": 0.7785757867311436, "learning_rate": 8.522565357836642e-06, "loss": 0.1475, "step": 9380 }, { "epoch": 0.27367407666725013, "grad_norm": 0.9006336390443282, "learning_rate": 8.522230059838405e-06, "loss": 0.1666, "step": 9381 }, { "epoch": 0.2737032498978937, "grad_norm": 0.9108054518379897, "learning_rate": 8.521894730394541e-06, "loss": 0.1625, "step": 9382 }, { "epoch": 0.27373242312853724, "grad_norm": 0.7737262733776332, "learning_rate": 8.521559369508041e-06, "loss": 0.1566, "step": 9383 }, { "epoch": 0.2737615963591808, "grad_norm": 0.8088826900645648, "learning_rate": 8.5212239771819e-06, "loss": 0.1429, "step": 9384 }, { "epoch": 0.2737907695898244, "grad_norm": 0.7963841820897791, "learning_rate": 8.52088855341911e-06, "loss": 0.1262, "step": 9385 }, { "epoch": 0.27381994282046795, "grad_norm": 0.7710471350944503, "learning_rate": 8.520553098222668e-06, "loss": 0.1207, "step": 9386 }, { "epoch": 0.2738491160511115, "grad_norm": 0.6119562969575766, "learning_rate": 8.52021761159557e-06, "loss": 0.1494, "step": 9387 }, { "epoch": 0.27387828928175506, "grad_norm": 0.8668420715332206, "learning_rate": 8.519882093540808e-06, "loss": 0.1467, "step": 9388 }, { "epoch": 0.2739074625123986, "grad_norm": 0.8984444470974137, "learning_rate": 8.519546544061381e-06, "loss": 0.1644, "step": 9389 }, { "epoch": 0.27393663574304217, "grad_norm": 0.879324483678071, "learning_rate": 8.51921096316028e-06, "loss": 0.1712, "step": 9390 }, { "epoch": 0.2739658089736857, "grad_norm": 0.8891892272802413, "learning_rate": 8.518875350840504e-06, "loss": 0.1455, "step": 9391 }, { "epoch": 0.27399498220432933, "grad_norm": 0.7005306166547945, "learning_rate": 8.51853970710505e-06, "loss": 0.1612, "step": 9392 }, { "epoch": 0.2740241554349729, "grad_norm": 1.0161531778115314, "learning_rate": 8.518204031956913e-06, "loss": 0.1308, "step": 9393 }, { "epoch": 0.27405332866561644, "grad_norm": 0.6532963668948017, "learning_rate": 8.51786832539909e-06, "loss": 0.1543, "step": 9394 }, { "epoch": 0.27408250189626, "grad_norm": 0.8509789010570168, "learning_rate": 8.51753258743458e-06, "loss": 0.1822, "step": 9395 }, { "epoch": 0.27411167512690354, "grad_norm": 0.9009064557668237, "learning_rate": 8.517196818066377e-06, "loss": 0.165, "step": 9396 }, { "epoch": 0.2741408483575471, "grad_norm": 0.6804187319634439, "learning_rate": 8.51686101729748e-06, "loss": 0.1404, "step": 9397 }, { "epoch": 0.27417002158819065, "grad_norm": 0.887818453724445, "learning_rate": 8.516525185130888e-06, "loss": 0.1455, "step": 9398 }, { "epoch": 0.27419919481883426, "grad_norm": 0.7398801558085408, "learning_rate": 8.5161893215696e-06, "loss": 0.142, "step": 9399 }, { "epoch": 0.2742283680494778, "grad_norm": 0.6916633544053097, "learning_rate": 8.515853426616612e-06, "loss": 0.1381, "step": 9400 }, { "epoch": 0.27425754128012136, "grad_norm": 0.7964698108646552, "learning_rate": 8.515517500274923e-06, "loss": 0.1479, "step": 9401 }, { "epoch": 0.2742867145107649, "grad_norm": 0.8310633527532242, "learning_rate": 8.515181542547534e-06, "loss": 0.1467, "step": 9402 }, { "epoch": 0.27431588774140847, "grad_norm": 0.7182431225015057, "learning_rate": 8.514845553437443e-06, "loss": 0.1397, "step": 9403 }, { "epoch": 0.274345060972052, "grad_norm": 1.0091043667757507, "learning_rate": 8.514509532947651e-06, "loss": 0.1658, "step": 9404 }, { "epoch": 0.27437423420269563, "grad_norm": 1.0161650994653664, "learning_rate": 8.514173481081156e-06, "loss": 0.1545, "step": 9405 }, { "epoch": 0.2744034074333392, "grad_norm": 0.8351714662634255, "learning_rate": 8.513837397840958e-06, "loss": 0.1802, "step": 9406 }, { "epoch": 0.27443258066398274, "grad_norm": 1.2116086747951689, "learning_rate": 8.51350128323006e-06, "loss": 0.1461, "step": 9407 }, { "epoch": 0.2744617538946263, "grad_norm": 0.9772749691648196, "learning_rate": 8.513165137251463e-06, "loss": 0.1611, "step": 9408 }, { "epoch": 0.27449092712526985, "grad_norm": 0.8097937103562994, "learning_rate": 8.512828959908162e-06, "loss": 0.1442, "step": 9409 }, { "epoch": 0.2745201003559134, "grad_norm": 0.9182900497252301, "learning_rate": 8.512492751203165e-06, "loss": 0.1462, "step": 9410 }, { "epoch": 0.27454927358655695, "grad_norm": 0.8817319234172042, "learning_rate": 8.512156511139471e-06, "loss": 0.1632, "step": 9411 }, { "epoch": 0.27457844681720056, "grad_norm": 0.8286976994626774, "learning_rate": 8.511820239720084e-06, "loss": 0.1743, "step": 9412 }, { "epoch": 0.2746076200478441, "grad_norm": 0.735404074180732, "learning_rate": 8.511483936948002e-06, "loss": 0.1411, "step": 9413 }, { "epoch": 0.27463679327848767, "grad_norm": 0.9814813185347944, "learning_rate": 8.51114760282623e-06, "loss": 0.1515, "step": 9414 }, { "epoch": 0.2746659665091312, "grad_norm": 0.9934159197137334, "learning_rate": 8.51081123735777e-06, "loss": 0.1687, "step": 9415 }, { "epoch": 0.2746951397397748, "grad_norm": 0.7681812285132499, "learning_rate": 8.510474840545627e-06, "loss": 0.1555, "step": 9416 }, { "epoch": 0.2747243129704183, "grad_norm": 0.9609235946924203, "learning_rate": 8.5101384123928e-06, "loss": 0.1711, "step": 9417 }, { "epoch": 0.2747534862010619, "grad_norm": 1.0237015564079377, "learning_rate": 8.509801952902296e-06, "loss": 0.155, "step": 9418 }, { "epoch": 0.2747826594317055, "grad_norm": 0.7614658059014379, "learning_rate": 8.50946546207712e-06, "loss": 0.1631, "step": 9419 }, { "epoch": 0.27481183266234904, "grad_norm": 0.8529999767142598, "learning_rate": 8.509128939920272e-06, "loss": 0.1568, "step": 9420 }, { "epoch": 0.2748410058929926, "grad_norm": 0.8678651961051029, "learning_rate": 8.508792386434759e-06, "loss": 0.16, "step": 9421 }, { "epoch": 0.27487017912363615, "grad_norm": 0.7979435258787866, "learning_rate": 8.508455801623586e-06, "loss": 0.1792, "step": 9422 }, { "epoch": 0.2748993523542797, "grad_norm": 1.0046558438662228, "learning_rate": 8.508119185489757e-06, "loss": 0.1365, "step": 9423 }, { "epoch": 0.27492852558492326, "grad_norm": 0.7355819122446643, "learning_rate": 8.507782538036276e-06, "loss": 0.1447, "step": 9424 }, { "epoch": 0.2749576988155668, "grad_norm": 0.8460955767277549, "learning_rate": 8.507445859266152e-06, "loss": 0.1353, "step": 9425 }, { "epoch": 0.2749868720462104, "grad_norm": 0.7817880534790024, "learning_rate": 8.507109149182387e-06, "loss": 0.1678, "step": 9426 }, { "epoch": 0.27501604527685397, "grad_norm": 0.7356257704051247, "learning_rate": 8.506772407787988e-06, "loss": 0.176, "step": 9427 }, { "epoch": 0.2750452185074975, "grad_norm": 0.8704584172582093, "learning_rate": 8.506435635085966e-06, "loss": 0.1321, "step": 9428 }, { "epoch": 0.2750743917381411, "grad_norm": 0.8179491883004322, "learning_rate": 8.50609883107932e-06, "loss": 0.1518, "step": 9429 }, { "epoch": 0.27510356496878463, "grad_norm": 0.819497659757066, "learning_rate": 8.505761995771061e-06, "loss": 0.1696, "step": 9430 }, { "epoch": 0.2751327381994282, "grad_norm": 0.917272713572263, "learning_rate": 8.505425129164198e-06, "loss": 0.1654, "step": 9431 }, { "epoch": 0.2751619114300718, "grad_norm": 1.0703230633705194, "learning_rate": 8.505088231261733e-06, "loss": 0.2001, "step": 9432 }, { "epoch": 0.27519108466071535, "grad_norm": 1.0264963239532443, "learning_rate": 8.50475130206668e-06, "loss": 0.173, "step": 9433 }, { "epoch": 0.2752202578913589, "grad_norm": 0.8749461127264444, "learning_rate": 8.504414341582043e-06, "loss": 0.1455, "step": 9434 }, { "epoch": 0.27524943112200245, "grad_norm": 0.7617261193989336, "learning_rate": 8.50407734981083e-06, "loss": 0.1396, "step": 9435 }, { "epoch": 0.275278604352646, "grad_norm": 0.8694962379193559, "learning_rate": 8.503740326756052e-06, "loss": 0.1468, "step": 9436 }, { "epoch": 0.27530777758328956, "grad_norm": 0.785782274498047, "learning_rate": 8.503403272420718e-06, "loss": 0.1726, "step": 9437 }, { "epoch": 0.2753369508139331, "grad_norm": 0.9280001677456263, "learning_rate": 8.503066186807833e-06, "loss": 0.1608, "step": 9438 }, { "epoch": 0.2753661240445767, "grad_norm": 0.7299456908462157, "learning_rate": 8.502729069920412e-06, "loss": 0.1505, "step": 9439 }, { "epoch": 0.2753952972752203, "grad_norm": 0.8092359613359347, "learning_rate": 8.502391921761462e-06, "loss": 0.1811, "step": 9440 }, { "epoch": 0.2754244705058638, "grad_norm": 0.9013716823187342, "learning_rate": 8.502054742333992e-06, "loss": 0.1653, "step": 9441 }, { "epoch": 0.2754536437365074, "grad_norm": 0.9886197901556381, "learning_rate": 8.501717531641012e-06, "loss": 0.1699, "step": 9442 }, { "epoch": 0.27548281696715093, "grad_norm": 0.8659035733514054, "learning_rate": 8.501380289685536e-06, "loss": 0.1467, "step": 9443 }, { "epoch": 0.2755119901977945, "grad_norm": 0.9791734451901493, "learning_rate": 8.501043016470572e-06, "loss": 0.1642, "step": 9444 }, { "epoch": 0.27554116342843804, "grad_norm": 0.7702547427096468, "learning_rate": 8.500705711999131e-06, "loss": 0.1478, "step": 9445 }, { "epoch": 0.27557033665908165, "grad_norm": 0.7875749626466658, "learning_rate": 8.500368376274226e-06, "loss": 0.1364, "step": 9446 }, { "epoch": 0.2755995098897252, "grad_norm": 0.8043242573994069, "learning_rate": 8.500031009298866e-06, "loss": 0.1662, "step": 9447 }, { "epoch": 0.27562868312036876, "grad_norm": 0.8088033639772654, "learning_rate": 8.499693611076067e-06, "loss": 0.1675, "step": 9448 }, { "epoch": 0.2756578563510123, "grad_norm": 0.8821848949372785, "learning_rate": 8.499356181608838e-06, "loss": 0.1531, "step": 9449 }, { "epoch": 0.27568702958165586, "grad_norm": 0.9874441023274517, "learning_rate": 8.499018720900192e-06, "loss": 0.1525, "step": 9450 }, { "epoch": 0.2757162028122994, "grad_norm": 0.7685083610959368, "learning_rate": 8.498681228953143e-06, "loss": 0.1653, "step": 9451 }, { "epoch": 0.27574537604294297, "grad_norm": 0.8866078506983858, "learning_rate": 8.498343705770702e-06, "loss": 0.1714, "step": 9452 }, { "epoch": 0.2757745492735866, "grad_norm": 0.9772436502810337, "learning_rate": 8.498006151355884e-06, "loss": 0.1776, "step": 9453 }, { "epoch": 0.27580372250423013, "grad_norm": 0.7397309353070088, "learning_rate": 8.497668565711702e-06, "loss": 0.1467, "step": 9454 }, { "epoch": 0.2758328957348737, "grad_norm": 1.0057359122021043, "learning_rate": 8.49733094884117e-06, "loss": 0.1493, "step": 9455 }, { "epoch": 0.27586206896551724, "grad_norm": 0.8780908547386509, "learning_rate": 8.496993300747302e-06, "loss": 0.1507, "step": 9456 }, { "epoch": 0.2758912421961608, "grad_norm": 0.9346196779829378, "learning_rate": 8.496655621433114e-06, "loss": 0.1378, "step": 9457 }, { "epoch": 0.27592041542680434, "grad_norm": 0.8424834810393838, "learning_rate": 8.496317910901619e-06, "loss": 0.1819, "step": 9458 }, { "epoch": 0.27594958865744795, "grad_norm": 0.8996576444638953, "learning_rate": 8.49598016915583e-06, "loss": 0.1397, "step": 9459 }, { "epoch": 0.2759787618880915, "grad_norm": 0.8663009147602474, "learning_rate": 8.495642396198767e-06, "loss": 0.1819, "step": 9460 }, { "epoch": 0.27600793511873506, "grad_norm": 0.8017268754536865, "learning_rate": 8.495304592033442e-06, "loss": 0.158, "step": 9461 }, { "epoch": 0.2760371083493786, "grad_norm": 0.7966559988575063, "learning_rate": 8.494966756662873e-06, "loss": 0.1544, "step": 9462 }, { "epoch": 0.27606628158002217, "grad_norm": 1.001757100621573, "learning_rate": 8.494628890090075e-06, "loss": 0.1584, "step": 9463 }, { "epoch": 0.2760954548106657, "grad_norm": 0.7751713493444756, "learning_rate": 8.494290992318063e-06, "loss": 0.1672, "step": 9464 }, { "epoch": 0.2761246280413093, "grad_norm": 0.6891467762655721, "learning_rate": 8.493953063349857e-06, "loss": 0.164, "step": 9465 }, { "epoch": 0.2761538012719529, "grad_norm": 0.7761812369257313, "learning_rate": 8.493615103188471e-06, "loss": 0.1585, "step": 9466 }, { "epoch": 0.27618297450259643, "grad_norm": 0.933854139528823, "learning_rate": 8.493277111836924e-06, "loss": 0.179, "step": 9467 }, { "epoch": 0.27621214773324, "grad_norm": 0.7236528190804553, "learning_rate": 8.492939089298233e-06, "loss": 0.1453, "step": 9468 }, { "epoch": 0.27624132096388354, "grad_norm": 0.7853520953354832, "learning_rate": 8.492601035575414e-06, "loss": 0.1698, "step": 9469 }, { "epoch": 0.2762704941945271, "grad_norm": 0.844320334934501, "learning_rate": 8.492262950671488e-06, "loss": 0.1439, "step": 9470 }, { "epoch": 0.27629966742517065, "grad_norm": 0.7426067971931045, "learning_rate": 8.491924834589472e-06, "loss": 0.14, "step": 9471 }, { "epoch": 0.2763288406558142, "grad_norm": 0.7512280644895644, "learning_rate": 8.491586687332385e-06, "loss": 0.1542, "step": 9472 }, { "epoch": 0.2763580138864578, "grad_norm": 0.7872685923164168, "learning_rate": 8.491248508903245e-06, "loss": 0.1692, "step": 9473 }, { "epoch": 0.27638718711710136, "grad_norm": 0.8758122269640476, "learning_rate": 8.490910299305073e-06, "loss": 0.2009, "step": 9474 }, { "epoch": 0.2764163603477449, "grad_norm": 0.9125559819894459, "learning_rate": 8.490572058540884e-06, "loss": 0.1255, "step": 9475 }, { "epoch": 0.27644553357838847, "grad_norm": 0.7770517431740911, "learning_rate": 8.490233786613703e-06, "loss": 0.1476, "step": 9476 }, { "epoch": 0.276474706809032, "grad_norm": 1.1274314402792818, "learning_rate": 8.489895483526548e-06, "loss": 0.1386, "step": 9477 }, { "epoch": 0.2765038800396756, "grad_norm": 1.9472034299099266, "learning_rate": 8.48955714928244e-06, "loss": 0.1423, "step": 9478 }, { "epoch": 0.27653305327031913, "grad_norm": 0.9428783171021343, "learning_rate": 8.489218783884399e-06, "loss": 0.1689, "step": 9479 }, { "epoch": 0.27656222650096274, "grad_norm": 1.1013294079397176, "learning_rate": 8.488880387335444e-06, "loss": 0.1655, "step": 9480 }, { "epoch": 0.2765913997316063, "grad_norm": 0.6931283174171184, "learning_rate": 8.488541959638599e-06, "loss": 0.1564, "step": 9481 }, { "epoch": 0.27662057296224984, "grad_norm": 1.1418914566979168, "learning_rate": 8.488203500796883e-06, "loss": 0.1539, "step": 9482 }, { "epoch": 0.2766497461928934, "grad_norm": 0.8650724531174553, "learning_rate": 8.48786501081332e-06, "loss": 0.1812, "step": 9483 }, { "epoch": 0.27667891942353695, "grad_norm": 0.8506173055726773, "learning_rate": 8.487526489690928e-06, "loss": 0.1628, "step": 9484 }, { "epoch": 0.2767080926541805, "grad_norm": 1.088489506144093, "learning_rate": 8.487187937432737e-06, "loss": 0.153, "step": 9485 }, { "epoch": 0.2767372658848241, "grad_norm": 0.9366524320763602, "learning_rate": 8.486849354041761e-06, "loss": 0.1483, "step": 9486 }, { "epoch": 0.27676643911546767, "grad_norm": 0.7796073498256102, "learning_rate": 8.486510739521027e-06, "loss": 0.1574, "step": 9487 }, { "epoch": 0.2767956123461112, "grad_norm": 0.806991179746802, "learning_rate": 8.486172093873557e-06, "loss": 0.1625, "step": 9488 }, { "epoch": 0.2768247855767548, "grad_norm": 0.8683205123133251, "learning_rate": 8.485833417102375e-06, "loss": 0.1441, "step": 9489 }, { "epoch": 0.2768539588073983, "grad_norm": 0.8616785742222477, "learning_rate": 8.485494709210506e-06, "loss": 0.1741, "step": 9490 }, { "epoch": 0.2768831320380419, "grad_norm": 0.8220876158014837, "learning_rate": 8.485155970200972e-06, "loss": 0.17, "step": 9491 }, { "epoch": 0.27691230526868543, "grad_norm": 0.7495882026570969, "learning_rate": 8.484817200076796e-06, "loss": 0.1583, "step": 9492 }, { "epoch": 0.27694147849932904, "grad_norm": 1.0705677075094195, "learning_rate": 8.484478398841003e-06, "loss": 0.1576, "step": 9493 }, { "epoch": 0.2769706517299726, "grad_norm": 0.5928858273713736, "learning_rate": 8.48413956649662e-06, "loss": 0.1551, "step": 9494 }, { "epoch": 0.27699982496061615, "grad_norm": 1.0627089887650565, "learning_rate": 8.483800703046672e-06, "loss": 0.1647, "step": 9495 }, { "epoch": 0.2770289981912597, "grad_norm": 0.7247697188724354, "learning_rate": 8.483461808494182e-06, "loss": 0.1297, "step": 9496 }, { "epoch": 0.27705817142190325, "grad_norm": 0.8934715469223729, "learning_rate": 8.483122882842177e-06, "loss": 0.1694, "step": 9497 }, { "epoch": 0.2770873446525468, "grad_norm": 0.9746300274126058, "learning_rate": 8.48278392609368e-06, "loss": 0.1727, "step": 9498 }, { "epoch": 0.27711651788319036, "grad_norm": 0.8096267207398115, "learning_rate": 8.482444938251722e-06, "loss": 0.146, "step": 9499 }, { "epoch": 0.27714569111383397, "grad_norm": 0.8776300149543106, "learning_rate": 8.482105919319325e-06, "loss": 0.1532, "step": 9500 }, { "epoch": 0.2771748643444775, "grad_norm": 0.9293347863312186, "learning_rate": 8.48176686929952e-06, "loss": 0.1595, "step": 9501 }, { "epoch": 0.2772040375751211, "grad_norm": 0.9528707708136305, "learning_rate": 8.481427788195329e-06, "loss": 0.1623, "step": 9502 }, { "epoch": 0.27723321080576463, "grad_norm": 0.9223367460630922, "learning_rate": 8.481088676009783e-06, "loss": 0.1412, "step": 9503 }, { "epoch": 0.2772623840364082, "grad_norm": 0.791567245986272, "learning_rate": 8.48074953274591e-06, "loss": 0.1523, "step": 9504 }, { "epoch": 0.27729155726705174, "grad_norm": 1.3407727074347742, "learning_rate": 8.480410358406735e-06, "loss": 0.1557, "step": 9505 }, { "epoch": 0.2773207304976953, "grad_norm": 0.706668075550768, "learning_rate": 8.480071152995285e-06, "loss": 0.1551, "step": 9506 }, { "epoch": 0.2773499037283389, "grad_norm": 0.8734217382710632, "learning_rate": 8.479731916514592e-06, "loss": 0.1492, "step": 9507 }, { "epoch": 0.27737907695898245, "grad_norm": 0.7561871450179262, "learning_rate": 8.479392648967684e-06, "loss": 0.1482, "step": 9508 }, { "epoch": 0.277408250189626, "grad_norm": 0.9958435262815148, "learning_rate": 8.479053350357587e-06, "loss": 0.1515, "step": 9509 }, { "epoch": 0.27743742342026956, "grad_norm": 1.005507379061439, "learning_rate": 8.478714020687334e-06, "loss": 0.1683, "step": 9510 }, { "epoch": 0.2774665966509131, "grad_norm": 0.8848887712016784, "learning_rate": 8.478374659959953e-06, "loss": 0.1549, "step": 9511 }, { "epoch": 0.27749576988155666, "grad_norm": 0.9422045957305167, "learning_rate": 8.478035268178473e-06, "loss": 0.1711, "step": 9512 }, { "epoch": 0.2775249431122002, "grad_norm": 0.7149755989431283, "learning_rate": 8.477695845345922e-06, "loss": 0.1752, "step": 9513 }, { "epoch": 0.2775541163428438, "grad_norm": 0.8667007312392415, "learning_rate": 8.477356391465336e-06, "loss": 0.1247, "step": 9514 }, { "epoch": 0.2775832895734874, "grad_norm": 0.9587353462019751, "learning_rate": 8.477016906539742e-06, "loss": 0.1755, "step": 9515 }, { "epoch": 0.27761246280413093, "grad_norm": 0.8329582723699501, "learning_rate": 8.476677390572167e-06, "loss": 0.171, "step": 9516 }, { "epoch": 0.2776416360347745, "grad_norm": 0.9834669720070736, "learning_rate": 8.47633784356565e-06, "loss": 0.1534, "step": 9517 }, { "epoch": 0.27767080926541804, "grad_norm": 0.6999872588764519, "learning_rate": 8.475998265523219e-06, "loss": 0.1417, "step": 9518 }, { "epoch": 0.2776999824960616, "grad_norm": 0.8368581948785472, "learning_rate": 8.475658656447903e-06, "loss": 0.1643, "step": 9519 }, { "epoch": 0.2777291557267052, "grad_norm": 0.8936896290158791, "learning_rate": 8.475319016342739e-06, "loss": 0.174, "step": 9520 }, { "epoch": 0.27775832895734875, "grad_norm": 0.9997248846685993, "learning_rate": 8.474979345210753e-06, "loss": 0.1411, "step": 9521 }, { "epoch": 0.2777875021879923, "grad_norm": 0.7923848027596828, "learning_rate": 8.474639643054983e-06, "loss": 0.1643, "step": 9522 }, { "epoch": 0.27781667541863586, "grad_norm": 0.7807511127005486, "learning_rate": 8.47429990987846e-06, "loss": 0.1337, "step": 9523 }, { "epoch": 0.2778458486492794, "grad_norm": 0.8125141038135897, "learning_rate": 8.473960145684217e-06, "loss": 0.1439, "step": 9524 }, { "epoch": 0.27787502187992297, "grad_norm": 0.8156162963513861, "learning_rate": 8.473620350475284e-06, "loss": 0.132, "step": 9525 }, { "epoch": 0.2779041951105665, "grad_norm": 0.6806743244156654, "learning_rate": 8.473280524254701e-06, "loss": 0.1529, "step": 9526 }, { "epoch": 0.27793336834121013, "grad_norm": 0.7297864166420638, "learning_rate": 8.472940667025497e-06, "loss": 0.1428, "step": 9527 }, { "epoch": 0.2779625415718537, "grad_norm": 0.91214494122196, "learning_rate": 8.472600778790709e-06, "loss": 0.1531, "step": 9528 }, { "epoch": 0.27799171480249724, "grad_norm": 0.8338650660903385, "learning_rate": 8.472260859553369e-06, "loss": 0.1624, "step": 9529 }, { "epoch": 0.2780208880331408, "grad_norm": 0.8308162285551156, "learning_rate": 8.471920909316514e-06, "loss": 0.1509, "step": 9530 }, { "epoch": 0.27805006126378434, "grad_norm": 0.8998302888977185, "learning_rate": 8.47158092808318e-06, "loss": 0.1563, "step": 9531 }, { "epoch": 0.2780792344944279, "grad_norm": 0.9354212032607615, "learning_rate": 8.471240915856396e-06, "loss": 0.1407, "step": 9532 }, { "epoch": 0.27810840772507145, "grad_norm": 0.7051327334938483, "learning_rate": 8.470900872639203e-06, "loss": 0.1387, "step": 9533 }, { "epoch": 0.27813758095571506, "grad_norm": 0.8094406200554851, "learning_rate": 8.470560798434636e-06, "loss": 0.174, "step": 9534 }, { "epoch": 0.2781667541863586, "grad_norm": 0.9467595326622934, "learning_rate": 8.47022069324573e-06, "loss": 0.175, "step": 9535 }, { "epoch": 0.27819592741700216, "grad_norm": 0.8008647499990702, "learning_rate": 8.469880557075525e-06, "loss": 0.174, "step": 9536 }, { "epoch": 0.2782251006476457, "grad_norm": 0.9201909051532778, "learning_rate": 8.469540389927052e-06, "loss": 0.1502, "step": 9537 }, { "epoch": 0.27825427387828927, "grad_norm": 0.7352171064832371, "learning_rate": 8.46920019180335e-06, "loss": 0.1471, "step": 9538 }, { "epoch": 0.2782834471089328, "grad_norm": 0.9244963989311303, "learning_rate": 8.468859962707459e-06, "loss": 0.1441, "step": 9539 }, { "epoch": 0.2783126203395764, "grad_norm": 0.839838171042912, "learning_rate": 8.468519702642413e-06, "loss": 0.152, "step": 9540 }, { "epoch": 0.27834179357022, "grad_norm": 0.7189624686847728, "learning_rate": 8.468179411611252e-06, "loss": 0.1429, "step": 9541 }, { "epoch": 0.27837096680086354, "grad_norm": 0.8024451840621278, "learning_rate": 8.467839089617011e-06, "loss": 0.1433, "step": 9542 }, { "epoch": 0.2784001400315071, "grad_norm": 0.9133265010862672, "learning_rate": 8.467498736662732e-06, "loss": 0.1524, "step": 9543 }, { "epoch": 0.27842931326215065, "grad_norm": 0.7505854001962619, "learning_rate": 8.467158352751453e-06, "loss": 0.1634, "step": 9544 }, { "epoch": 0.2784584864927942, "grad_norm": 0.8928417787444326, "learning_rate": 8.466817937886211e-06, "loss": 0.1807, "step": 9545 }, { "epoch": 0.27848765972343775, "grad_norm": 1.0277759705271292, "learning_rate": 8.466477492070046e-06, "loss": 0.1535, "step": 9546 }, { "epoch": 0.27851683295408136, "grad_norm": 0.7637001525940128, "learning_rate": 8.466137015305997e-06, "loss": 0.1444, "step": 9547 }, { "epoch": 0.2785460061847249, "grad_norm": 0.7501390401123026, "learning_rate": 8.465796507597106e-06, "loss": 0.1741, "step": 9548 }, { "epoch": 0.27857517941536847, "grad_norm": 0.9450607457332094, "learning_rate": 8.465455968946409e-06, "loss": 0.1468, "step": 9549 }, { "epoch": 0.278604352646012, "grad_norm": 0.8179128995100446, "learning_rate": 8.465115399356948e-06, "loss": 0.1581, "step": 9550 }, { "epoch": 0.2786335258766556, "grad_norm": 0.8206187225207779, "learning_rate": 8.464774798831766e-06, "loss": 0.1693, "step": 9551 }, { "epoch": 0.2786626991072991, "grad_norm": 1.0282827859906551, "learning_rate": 8.464434167373901e-06, "loss": 0.1726, "step": 9552 }, { "epoch": 0.2786918723379427, "grad_norm": 0.8731238924757797, "learning_rate": 8.464093504986395e-06, "loss": 0.1881, "step": 9553 }, { "epoch": 0.2787210455685863, "grad_norm": 0.9374981958347302, "learning_rate": 8.463752811672289e-06, "loss": 0.1401, "step": 9554 }, { "epoch": 0.27875021879922984, "grad_norm": 0.7063176768742483, "learning_rate": 8.463412087434624e-06, "loss": 0.1391, "step": 9555 }, { "epoch": 0.2787793920298734, "grad_norm": 0.8843815844052086, "learning_rate": 8.463071332276442e-06, "loss": 0.1707, "step": 9556 }, { "epoch": 0.27880856526051695, "grad_norm": 1.0458012025569847, "learning_rate": 8.462730546200788e-06, "loss": 0.1432, "step": 9557 }, { "epoch": 0.2788377384911605, "grad_norm": 0.8036876244199916, "learning_rate": 8.4623897292107e-06, "loss": 0.1602, "step": 9558 }, { "epoch": 0.27886691172180406, "grad_norm": 0.9752595131467424, "learning_rate": 8.462048881309226e-06, "loss": 0.1565, "step": 9559 }, { "epoch": 0.2788960849524476, "grad_norm": 1.0282166667261705, "learning_rate": 8.461708002499405e-06, "loss": 0.1741, "step": 9560 }, { "epoch": 0.2789252581830912, "grad_norm": 0.8807169214872331, "learning_rate": 8.46136709278428e-06, "loss": 0.1444, "step": 9561 }, { "epoch": 0.27895443141373477, "grad_norm": 0.8867295328036336, "learning_rate": 8.461026152166896e-06, "loss": 0.1609, "step": 9562 }, { "epoch": 0.2789836046443783, "grad_norm": 1.0239980761347185, "learning_rate": 8.460685180650297e-06, "loss": 0.1756, "step": 9563 }, { "epoch": 0.2790127778750219, "grad_norm": 0.8283350317446394, "learning_rate": 8.460344178237528e-06, "loss": 0.133, "step": 9564 }, { "epoch": 0.27904195110566543, "grad_norm": 0.9790011158432254, "learning_rate": 8.460003144931632e-06, "loss": 0.1457, "step": 9565 }, { "epoch": 0.279071124336309, "grad_norm": 0.9965715972386453, "learning_rate": 8.459662080735653e-06, "loss": 0.1542, "step": 9566 }, { "epoch": 0.27910029756695254, "grad_norm": 0.7819092938540751, "learning_rate": 8.459320985652635e-06, "loss": 0.1477, "step": 9567 }, { "epoch": 0.27912947079759615, "grad_norm": 0.8493448943608398, "learning_rate": 8.458979859685628e-06, "loss": 0.1371, "step": 9568 }, { "epoch": 0.2791586440282397, "grad_norm": 1.0223759261181469, "learning_rate": 8.458638702837673e-06, "loss": 0.1449, "step": 9569 }, { "epoch": 0.27918781725888325, "grad_norm": 0.9055193244735283, "learning_rate": 8.45829751511182e-06, "loss": 0.144, "step": 9570 }, { "epoch": 0.2792169904895268, "grad_norm": 0.9511476417475678, "learning_rate": 8.457956296511109e-06, "loss": 0.1478, "step": 9571 }, { "epoch": 0.27924616372017036, "grad_norm": 0.9198111942266042, "learning_rate": 8.457615047038592e-06, "loss": 0.1505, "step": 9572 }, { "epoch": 0.2792753369508139, "grad_norm": 0.698641892426292, "learning_rate": 8.45727376669731e-06, "loss": 0.1462, "step": 9573 }, { "epoch": 0.2793045101814575, "grad_norm": 0.9362354068875892, "learning_rate": 8.456932455490317e-06, "loss": 0.136, "step": 9574 }, { "epoch": 0.2793336834121011, "grad_norm": 0.982646526197458, "learning_rate": 8.456591113420656e-06, "loss": 0.161, "step": 9575 }, { "epoch": 0.27936285664274463, "grad_norm": 0.7905049599781792, "learning_rate": 8.45624974049137e-06, "loss": 0.1616, "step": 9576 }, { "epoch": 0.2793920298733882, "grad_norm": 0.7745858645449902, "learning_rate": 8.455908336705515e-06, "loss": 0.1476, "step": 9577 }, { "epoch": 0.27942120310403173, "grad_norm": 0.941160734241985, "learning_rate": 8.455566902066138e-06, "loss": 0.1436, "step": 9578 }, { "epoch": 0.2794503763346753, "grad_norm": 0.9515311332832856, "learning_rate": 8.45522543657628e-06, "loss": 0.1519, "step": 9579 }, { "epoch": 0.27947954956531884, "grad_norm": 0.7975605752727278, "learning_rate": 8.454883940238995e-06, "loss": 0.1649, "step": 9580 }, { "epoch": 0.27950872279596245, "grad_norm": 0.9336212618189144, "learning_rate": 8.454542413057335e-06, "loss": 0.1767, "step": 9581 }, { "epoch": 0.279537896026606, "grad_norm": 1.0130532601655777, "learning_rate": 8.45420085503434e-06, "loss": 0.1596, "step": 9582 }, { "epoch": 0.27956706925724956, "grad_norm": 1.213368749096683, "learning_rate": 8.453859266173065e-06, "loss": 0.1501, "step": 9583 }, { "epoch": 0.2795962424878931, "grad_norm": 0.8912051490678606, "learning_rate": 8.453517646476561e-06, "loss": 0.1771, "step": 9584 }, { "epoch": 0.27962541571853666, "grad_norm": 1.0348282245629992, "learning_rate": 8.453175995947876e-06, "loss": 0.1435, "step": 9585 }, { "epoch": 0.2796545889491802, "grad_norm": 0.9865785864347076, "learning_rate": 8.452834314590059e-06, "loss": 0.1538, "step": 9586 }, { "epoch": 0.27968376217982377, "grad_norm": 1.4387595409228264, "learning_rate": 8.452492602406162e-06, "loss": 0.1444, "step": 9587 }, { "epoch": 0.2797129354104674, "grad_norm": 0.9294671559167269, "learning_rate": 8.452150859399234e-06, "loss": 0.1581, "step": 9588 }, { "epoch": 0.27974210864111093, "grad_norm": 0.9480256468071755, "learning_rate": 8.451809085572327e-06, "loss": 0.1433, "step": 9589 }, { "epoch": 0.2797712818717545, "grad_norm": 0.8572738889356397, "learning_rate": 8.451467280928494e-06, "loss": 0.1634, "step": 9590 }, { "epoch": 0.27980045510239804, "grad_norm": 0.8235818251921146, "learning_rate": 8.451125445470784e-06, "loss": 0.1709, "step": 9591 }, { "epoch": 0.2798296283330416, "grad_norm": 0.9078272402439236, "learning_rate": 8.450783579202251e-06, "loss": 0.1835, "step": 9592 }, { "epoch": 0.27985880156368514, "grad_norm": 0.8456187301304922, "learning_rate": 8.450441682125944e-06, "loss": 0.1569, "step": 9593 }, { "epoch": 0.2798879747943287, "grad_norm": 0.8309217526061093, "learning_rate": 8.45009975424492e-06, "loss": 0.176, "step": 9594 }, { "epoch": 0.2799171480249723, "grad_norm": 0.9969650659314814, "learning_rate": 8.449757795562229e-06, "loss": 0.1537, "step": 9595 }, { "epoch": 0.27994632125561586, "grad_norm": 0.6570499741744189, "learning_rate": 8.44941580608092e-06, "loss": 0.1211, "step": 9596 }, { "epoch": 0.2799754944862594, "grad_norm": 0.9583072138632722, "learning_rate": 8.449073785804054e-06, "loss": 0.1848, "step": 9597 }, { "epoch": 0.28000466771690297, "grad_norm": 0.9519186905717256, "learning_rate": 8.448731734734678e-06, "loss": 0.1884, "step": 9598 }, { "epoch": 0.2800338409475465, "grad_norm": 0.8923949258864584, "learning_rate": 8.448389652875852e-06, "loss": 0.1632, "step": 9599 }, { "epoch": 0.2800630141781901, "grad_norm": 1.0578728689403207, "learning_rate": 8.448047540230624e-06, "loss": 0.1603, "step": 9600 }, { "epoch": 0.2800921874088337, "grad_norm": 0.7595943667885049, "learning_rate": 8.447705396802051e-06, "loss": 0.1552, "step": 9601 }, { "epoch": 0.28012136063947723, "grad_norm": 0.7514324199529084, "learning_rate": 8.447363222593186e-06, "loss": 0.1491, "step": 9602 }, { "epoch": 0.2801505338701208, "grad_norm": 0.978949857641314, "learning_rate": 8.447021017607087e-06, "loss": 0.1555, "step": 9603 }, { "epoch": 0.28017970710076434, "grad_norm": 0.8809412748670732, "learning_rate": 8.446678781846806e-06, "loss": 0.1505, "step": 9604 }, { "epoch": 0.2802088803314079, "grad_norm": 0.7514678066878479, "learning_rate": 8.4463365153154e-06, "loss": 0.1383, "step": 9605 }, { "epoch": 0.28023805356205145, "grad_norm": 0.7846690672438622, "learning_rate": 8.445994218015923e-06, "loss": 0.1549, "step": 9606 }, { "epoch": 0.280267226792695, "grad_norm": 0.9113320114314601, "learning_rate": 8.445651889951435e-06, "loss": 0.142, "step": 9607 }, { "epoch": 0.2802964000233386, "grad_norm": 0.7934733096175987, "learning_rate": 8.445309531124988e-06, "loss": 0.1468, "step": 9608 }, { "epoch": 0.28032557325398216, "grad_norm": 0.7543301037519203, "learning_rate": 8.44496714153964e-06, "loss": 0.1757, "step": 9609 }, { "epoch": 0.2803547464846257, "grad_norm": 0.8455391980296363, "learning_rate": 8.444624721198447e-06, "loss": 0.1423, "step": 9610 }, { "epoch": 0.28038391971526927, "grad_norm": 0.9840328904892477, "learning_rate": 8.444282270104467e-06, "loss": 0.1475, "step": 9611 }, { "epoch": 0.2804130929459128, "grad_norm": 0.7572645399233074, "learning_rate": 8.443939788260757e-06, "loss": 0.1761, "step": 9612 }, { "epoch": 0.2804422661765564, "grad_norm": 1.0025945055821288, "learning_rate": 8.443597275670376e-06, "loss": 0.1563, "step": 9613 }, { "epoch": 0.28047143940719993, "grad_norm": 0.9968358292689676, "learning_rate": 8.44325473233638e-06, "loss": 0.1589, "step": 9614 }, { "epoch": 0.28050061263784354, "grad_norm": 0.8747427905731093, "learning_rate": 8.442912158261828e-06, "loss": 0.1516, "step": 9615 }, { "epoch": 0.2805297858684871, "grad_norm": 0.8879882495231916, "learning_rate": 8.442569553449777e-06, "loss": 0.1297, "step": 9616 }, { "epoch": 0.28055895909913064, "grad_norm": 0.8453425011322785, "learning_rate": 8.442226917903287e-06, "loss": 0.1619, "step": 9617 }, { "epoch": 0.2805881323297742, "grad_norm": 0.9752282385155893, "learning_rate": 8.441884251625419e-06, "loss": 0.1631, "step": 9618 }, { "epoch": 0.28061730556041775, "grad_norm": 1.0441464487675278, "learning_rate": 8.441541554619228e-06, "loss": 0.1432, "step": 9619 }, { "epoch": 0.2806464787910613, "grad_norm": 0.838889876502053, "learning_rate": 8.441198826887776e-06, "loss": 0.1543, "step": 9620 }, { "epoch": 0.28067565202170486, "grad_norm": 0.9042981784842837, "learning_rate": 8.440856068434122e-06, "loss": 0.1393, "step": 9621 }, { "epoch": 0.28070482525234847, "grad_norm": 0.7590671300959784, "learning_rate": 8.440513279261327e-06, "loss": 0.1466, "step": 9622 }, { "epoch": 0.280733998482992, "grad_norm": 0.786209439269673, "learning_rate": 8.44017045937245e-06, "loss": 0.1441, "step": 9623 }, { "epoch": 0.2807631717136356, "grad_norm": 0.8337456741412318, "learning_rate": 8.439827608770552e-06, "loss": 0.1631, "step": 9624 }, { "epoch": 0.2807923449442791, "grad_norm": 0.699361095548608, "learning_rate": 8.439484727458696e-06, "loss": 0.1478, "step": 9625 }, { "epoch": 0.2808215181749227, "grad_norm": 0.7906992172976698, "learning_rate": 8.43914181543994e-06, "loss": 0.1605, "step": 9626 }, { "epoch": 0.28085069140556623, "grad_norm": 0.716958191351726, "learning_rate": 8.438798872717349e-06, "loss": 0.1668, "step": 9627 }, { "epoch": 0.2808798646362098, "grad_norm": 0.9336564764011152, "learning_rate": 8.43845589929398e-06, "loss": 0.1394, "step": 9628 }, { "epoch": 0.2809090378668534, "grad_norm": 0.921589429799974, "learning_rate": 8.438112895172899e-06, "loss": 0.1426, "step": 9629 }, { "epoch": 0.28093821109749695, "grad_norm": 0.722214393748142, "learning_rate": 8.437769860357166e-06, "loss": 0.1551, "step": 9630 }, { "epoch": 0.2809673843281405, "grad_norm": 0.7821484680067312, "learning_rate": 8.437426794849845e-06, "loss": 0.1826, "step": 9631 }, { "epoch": 0.28099655755878405, "grad_norm": 0.8398165397558471, "learning_rate": 8.437083698653998e-06, "loss": 0.1399, "step": 9632 }, { "epoch": 0.2810257307894276, "grad_norm": 1.0337390383108171, "learning_rate": 8.436740571772689e-06, "loss": 0.1588, "step": 9633 }, { "epoch": 0.28105490402007116, "grad_norm": 0.7992678715725394, "learning_rate": 8.436397414208979e-06, "loss": 0.1614, "step": 9634 }, { "epoch": 0.28108407725071477, "grad_norm": 0.7817699595069072, "learning_rate": 8.436054225965933e-06, "loss": 0.1623, "step": 9635 }, { "epoch": 0.2811132504813583, "grad_norm": 0.822464347563449, "learning_rate": 8.435711007046616e-06, "loss": 0.147, "step": 9636 }, { "epoch": 0.2811424237120019, "grad_norm": 0.8761331626179581, "learning_rate": 8.435367757454092e-06, "loss": 0.15, "step": 9637 }, { "epoch": 0.28117159694264543, "grad_norm": 0.8090730886916009, "learning_rate": 8.435024477191426e-06, "loss": 0.1492, "step": 9638 }, { "epoch": 0.281200770173289, "grad_norm": 0.832891485574726, "learning_rate": 8.434681166261679e-06, "loss": 0.1448, "step": 9639 }, { "epoch": 0.28122994340393254, "grad_norm": 0.8954288258009087, "learning_rate": 8.434337824667918e-06, "loss": 0.1782, "step": 9640 }, { "epoch": 0.2812591166345761, "grad_norm": 0.7956031882393465, "learning_rate": 8.43399445241321e-06, "loss": 0.1666, "step": 9641 }, { "epoch": 0.2812882898652197, "grad_norm": 1.387686999700781, "learning_rate": 8.433651049500619e-06, "loss": 0.1595, "step": 9642 }, { "epoch": 0.28131746309586325, "grad_norm": 0.804597063386392, "learning_rate": 8.433307615933211e-06, "loss": 0.1454, "step": 9643 }, { "epoch": 0.2813466363265068, "grad_norm": 0.7854458337786944, "learning_rate": 8.432964151714052e-06, "loss": 0.1516, "step": 9644 }, { "epoch": 0.28137580955715036, "grad_norm": 0.8839624480045271, "learning_rate": 8.43262065684621e-06, "loss": 0.193, "step": 9645 }, { "epoch": 0.2814049827877939, "grad_norm": 0.7793352517051193, "learning_rate": 8.432277131332749e-06, "loss": 0.1507, "step": 9646 }, { "epoch": 0.28143415601843746, "grad_norm": 0.9059971897460766, "learning_rate": 8.431933575176737e-06, "loss": 0.1532, "step": 9647 }, { "epoch": 0.281463329249081, "grad_norm": 0.9085606820922222, "learning_rate": 8.43158998838124e-06, "loss": 0.1561, "step": 9648 }, { "epoch": 0.2814925024797246, "grad_norm": 1.09575395504824, "learning_rate": 8.431246370949328e-06, "loss": 0.1497, "step": 9649 }, { "epoch": 0.2815216757103682, "grad_norm": 0.7822475216933066, "learning_rate": 8.430902722884068e-06, "loss": 0.1655, "step": 9650 }, { "epoch": 0.28155084894101173, "grad_norm": 0.971521671599174, "learning_rate": 8.43055904418853e-06, "loss": 0.1612, "step": 9651 }, { "epoch": 0.2815800221716553, "grad_norm": 0.7586145780635484, "learning_rate": 8.430215334865775e-06, "loss": 0.152, "step": 9652 }, { "epoch": 0.28160919540229884, "grad_norm": 0.8031869234102891, "learning_rate": 8.429871594918879e-06, "loss": 0.1523, "step": 9653 }, { "epoch": 0.2816383686329424, "grad_norm": 0.751065033773529, "learning_rate": 8.429527824350908e-06, "loss": 0.1598, "step": 9654 }, { "epoch": 0.28166754186358595, "grad_norm": 0.6919716421043798, "learning_rate": 8.429184023164932e-06, "loss": 0.1487, "step": 9655 }, { "epoch": 0.28169671509422956, "grad_norm": 1.2130296148788566, "learning_rate": 8.428840191364017e-06, "loss": 0.1484, "step": 9656 }, { "epoch": 0.2817258883248731, "grad_norm": 0.8612543317526583, "learning_rate": 8.428496328951237e-06, "loss": 0.1561, "step": 9657 }, { "epoch": 0.28175506155551666, "grad_norm": 0.8319945336881983, "learning_rate": 8.42815243592966e-06, "loss": 0.1498, "step": 9658 }, { "epoch": 0.2817842347861602, "grad_norm": 0.8900715989334957, "learning_rate": 8.427808512302358e-06, "loss": 0.1557, "step": 9659 }, { "epoch": 0.28181340801680377, "grad_norm": 0.9209050378328874, "learning_rate": 8.427464558072397e-06, "loss": 0.1565, "step": 9660 }, { "epoch": 0.2818425812474473, "grad_norm": 1.0024738651345637, "learning_rate": 8.427120573242853e-06, "loss": 0.1735, "step": 9661 }, { "epoch": 0.28187175447809093, "grad_norm": 0.904717257631177, "learning_rate": 8.426776557816793e-06, "loss": 0.1865, "step": 9662 }, { "epoch": 0.2819009277087345, "grad_norm": 0.9535330914955639, "learning_rate": 8.426432511797292e-06, "loss": 0.1484, "step": 9663 }, { "epoch": 0.28193010093937804, "grad_norm": 0.83583328816481, "learning_rate": 8.426088435187418e-06, "loss": 0.1493, "step": 9664 }, { "epoch": 0.2819592741700216, "grad_norm": 0.9611065676349803, "learning_rate": 8.425744327990244e-06, "loss": 0.163, "step": 9665 }, { "epoch": 0.28198844740066514, "grad_norm": 0.9520432319743514, "learning_rate": 8.425400190208842e-06, "loss": 0.1459, "step": 9666 }, { "epoch": 0.2820176206313087, "grad_norm": 0.891136038748808, "learning_rate": 8.425056021846285e-06, "loss": 0.1744, "step": 9667 }, { "epoch": 0.28204679386195225, "grad_norm": 0.8104828816106517, "learning_rate": 8.424711822905647e-06, "loss": 0.1303, "step": 9668 }, { "epoch": 0.28207596709259586, "grad_norm": 0.6664588376329665, "learning_rate": 8.42436759339e-06, "loss": 0.1399, "step": 9669 }, { "epoch": 0.2821051403232394, "grad_norm": 0.8861763069231229, "learning_rate": 8.424023333302414e-06, "loss": 0.1526, "step": 9670 }, { "epoch": 0.28213431355388296, "grad_norm": 0.7632870412812516, "learning_rate": 8.423679042645967e-06, "loss": 0.1447, "step": 9671 }, { "epoch": 0.2821634867845265, "grad_norm": 0.7678346797721908, "learning_rate": 8.423334721423729e-06, "loss": 0.1472, "step": 9672 }, { "epoch": 0.28219266001517007, "grad_norm": 0.8287153773092156, "learning_rate": 8.422990369638778e-06, "loss": 0.1729, "step": 9673 }, { "epoch": 0.2822218332458136, "grad_norm": 0.7733026353657988, "learning_rate": 8.422645987294184e-06, "loss": 0.1572, "step": 9674 }, { "epoch": 0.2822510064764572, "grad_norm": 0.9097808971524729, "learning_rate": 8.422301574393025e-06, "loss": 0.1423, "step": 9675 }, { "epoch": 0.2822801797071008, "grad_norm": 0.821824437294448, "learning_rate": 8.421957130938374e-06, "loss": 0.1319, "step": 9676 }, { "epoch": 0.28230935293774434, "grad_norm": 0.9400066976953318, "learning_rate": 8.421612656933306e-06, "loss": 0.1534, "step": 9677 }, { "epoch": 0.2823385261683879, "grad_norm": 0.9436382052241648, "learning_rate": 8.421268152380898e-06, "loss": 0.1781, "step": 9678 }, { "epoch": 0.28236769939903145, "grad_norm": 0.8874724888722365, "learning_rate": 8.420923617284224e-06, "loss": 0.1608, "step": 9679 }, { "epoch": 0.282396872629675, "grad_norm": 0.7266858505170246, "learning_rate": 8.420579051646363e-06, "loss": 0.1562, "step": 9680 }, { "epoch": 0.28242604586031855, "grad_norm": 0.7521659709929877, "learning_rate": 8.420234455470386e-06, "loss": 0.1501, "step": 9681 }, { "epoch": 0.2824552190909621, "grad_norm": 1.1471401215465622, "learning_rate": 8.419889828759374e-06, "loss": 0.1311, "step": 9682 }, { "epoch": 0.2824843923216057, "grad_norm": 0.688710553463838, "learning_rate": 8.419545171516399e-06, "loss": 0.1613, "step": 9683 }, { "epoch": 0.28251356555224927, "grad_norm": 0.786648071437947, "learning_rate": 8.419200483744544e-06, "loss": 0.1601, "step": 9684 }, { "epoch": 0.2825427387828928, "grad_norm": 0.7593508447780223, "learning_rate": 8.418855765446883e-06, "loss": 0.1436, "step": 9685 }, { "epoch": 0.2825719120135364, "grad_norm": 0.8258345204164949, "learning_rate": 8.418511016626492e-06, "loss": 0.1522, "step": 9686 }, { "epoch": 0.28260108524417993, "grad_norm": 0.6930969976952299, "learning_rate": 8.418166237286453e-06, "loss": 0.1307, "step": 9687 }, { "epoch": 0.2826302584748235, "grad_norm": 0.775159952248678, "learning_rate": 8.41782142742984e-06, "loss": 0.1471, "step": 9688 }, { "epoch": 0.2826594317054671, "grad_norm": 0.9026015798308387, "learning_rate": 8.417476587059735e-06, "loss": 0.1495, "step": 9689 }, { "epoch": 0.28268860493611064, "grad_norm": 0.8061822204561225, "learning_rate": 8.417131716179212e-06, "loss": 0.1628, "step": 9690 }, { "epoch": 0.2827177781667542, "grad_norm": 0.7828662242122386, "learning_rate": 8.416786814791355e-06, "loss": 0.1493, "step": 9691 }, { "epoch": 0.28274695139739775, "grad_norm": 0.929254933961137, "learning_rate": 8.416441882899241e-06, "loss": 0.1742, "step": 9692 }, { "epoch": 0.2827761246280413, "grad_norm": 0.7516171096257392, "learning_rate": 8.41609692050595e-06, "loss": 0.1509, "step": 9693 }, { "epoch": 0.28280529785868486, "grad_norm": 0.7647455961180056, "learning_rate": 8.415751927614559e-06, "loss": 0.1315, "step": 9694 }, { "epoch": 0.2828344710893284, "grad_norm": 0.8782312479853289, "learning_rate": 8.415406904228151e-06, "loss": 0.1624, "step": 9695 }, { "epoch": 0.282863644319972, "grad_norm": 1.0164220231613077, "learning_rate": 8.415061850349806e-06, "loss": 0.1498, "step": 9696 }, { "epoch": 0.28289281755061557, "grad_norm": 0.8547393999235816, "learning_rate": 8.414716765982604e-06, "loss": 0.1562, "step": 9697 }, { "epoch": 0.2829219907812591, "grad_norm": 1.264434929576917, "learning_rate": 8.414371651129627e-06, "loss": 0.1452, "step": 9698 }, { "epoch": 0.2829511640119027, "grad_norm": 1.0139116707185494, "learning_rate": 8.414026505793953e-06, "loss": 0.1603, "step": 9699 }, { "epoch": 0.28298033724254623, "grad_norm": 0.9295979925804307, "learning_rate": 8.413681329978666e-06, "loss": 0.1654, "step": 9700 }, { "epoch": 0.2830095104731898, "grad_norm": 1.2400910685047386, "learning_rate": 8.413336123686847e-06, "loss": 0.1797, "step": 9701 }, { "epoch": 0.28303868370383334, "grad_norm": 1.0754853612442141, "learning_rate": 8.412990886921579e-06, "loss": 0.1712, "step": 9702 }, { "epoch": 0.28306785693447695, "grad_norm": 0.9555839236093209, "learning_rate": 8.412645619685943e-06, "loss": 0.1592, "step": 9703 }, { "epoch": 0.2830970301651205, "grad_norm": 1.0459397894466103, "learning_rate": 8.41230032198302e-06, "loss": 0.157, "step": 9704 }, { "epoch": 0.28312620339576405, "grad_norm": 0.8341881986997727, "learning_rate": 8.411954993815894e-06, "loss": 0.1588, "step": 9705 }, { "epoch": 0.2831553766264076, "grad_norm": 0.8119137135313987, "learning_rate": 8.41160963518765e-06, "loss": 0.1536, "step": 9706 }, { "epoch": 0.28318454985705116, "grad_norm": 0.8152878389508309, "learning_rate": 8.411264246101369e-06, "loss": 0.1603, "step": 9707 }, { "epoch": 0.2832137230876947, "grad_norm": 0.8343559062998911, "learning_rate": 8.410918826560134e-06, "loss": 0.1478, "step": 9708 }, { "epoch": 0.28324289631833827, "grad_norm": 0.7597627365743722, "learning_rate": 8.410573376567031e-06, "loss": 0.1563, "step": 9709 }, { "epoch": 0.2832720695489819, "grad_norm": 0.903494677094035, "learning_rate": 8.410227896125142e-06, "loss": 0.1692, "step": 9710 }, { "epoch": 0.28330124277962543, "grad_norm": 1.3270150784866643, "learning_rate": 8.409882385237555e-06, "loss": 0.1397, "step": 9711 }, { "epoch": 0.283330416010269, "grad_norm": 0.8830790643507895, "learning_rate": 8.409536843907351e-06, "loss": 0.1489, "step": 9712 }, { "epoch": 0.28335958924091253, "grad_norm": 1.068664797549712, "learning_rate": 8.409191272137616e-06, "loss": 0.1576, "step": 9713 }, { "epoch": 0.2833887624715561, "grad_norm": 0.7253653285187763, "learning_rate": 8.408845669931434e-06, "loss": 0.1393, "step": 9714 }, { "epoch": 0.28341793570219964, "grad_norm": 0.7163191072519595, "learning_rate": 8.408500037291894e-06, "loss": 0.1533, "step": 9715 }, { "epoch": 0.28344710893284325, "grad_norm": 0.8467666868444809, "learning_rate": 8.408154374222076e-06, "loss": 0.1556, "step": 9716 }, { "epoch": 0.2834762821634868, "grad_norm": 0.8696130561027053, "learning_rate": 8.407808680725072e-06, "loss": 0.1469, "step": 9717 }, { "epoch": 0.28350545539413036, "grad_norm": 0.7964640245797752, "learning_rate": 8.407462956803965e-06, "loss": 0.151, "step": 9718 }, { "epoch": 0.2835346286247739, "grad_norm": 0.7264124165991117, "learning_rate": 8.407117202461841e-06, "loss": 0.1548, "step": 9719 }, { "epoch": 0.28356380185541746, "grad_norm": 0.7390467530436048, "learning_rate": 8.406771417701788e-06, "loss": 0.1956, "step": 9720 }, { "epoch": 0.283592975086061, "grad_norm": 0.9587720822857085, "learning_rate": 8.406425602526895e-06, "loss": 0.1286, "step": 9721 }, { "epoch": 0.28362214831670457, "grad_norm": 0.8603120053150856, "learning_rate": 8.406079756940246e-06, "loss": 0.1409, "step": 9722 }, { "epoch": 0.2836513215473482, "grad_norm": 1.0536283198004628, "learning_rate": 8.40573388094493e-06, "loss": 0.1568, "step": 9723 }, { "epoch": 0.28368049477799173, "grad_norm": 0.7891749487967007, "learning_rate": 8.405387974544036e-06, "loss": 0.139, "step": 9724 }, { "epoch": 0.2837096680086353, "grad_norm": 0.8603404080694517, "learning_rate": 8.405042037740649e-06, "loss": 0.1345, "step": 9725 }, { "epoch": 0.28373884123927884, "grad_norm": 1.4079981681800657, "learning_rate": 8.404696070537861e-06, "loss": 0.161, "step": 9726 }, { "epoch": 0.2837680144699224, "grad_norm": 0.79156746578909, "learning_rate": 8.404350072938758e-06, "loss": 0.142, "step": 9727 }, { "epoch": 0.28379718770056594, "grad_norm": 0.9428572683118923, "learning_rate": 8.404004044946432e-06, "loss": 0.1466, "step": 9728 }, { "epoch": 0.2838263609312095, "grad_norm": 0.6755371678286886, "learning_rate": 8.40365798656397e-06, "loss": 0.134, "step": 9729 }, { "epoch": 0.2838555341618531, "grad_norm": 0.8582491479733165, "learning_rate": 8.403311897794461e-06, "loss": 0.1629, "step": 9730 }, { "epoch": 0.28388470739249666, "grad_norm": 0.909304447218576, "learning_rate": 8.402965778640996e-06, "loss": 0.1608, "step": 9731 }, { "epoch": 0.2839138806231402, "grad_norm": 0.7001333232973245, "learning_rate": 8.402619629106667e-06, "loss": 0.1592, "step": 9732 }, { "epoch": 0.28394305385378377, "grad_norm": 0.8150591980831794, "learning_rate": 8.40227344919456e-06, "loss": 0.1535, "step": 9733 }, { "epoch": 0.2839722270844273, "grad_norm": 0.7028752977026115, "learning_rate": 8.401927238907768e-06, "loss": 0.1527, "step": 9734 }, { "epoch": 0.2840014003150709, "grad_norm": 0.7821393443659244, "learning_rate": 8.401580998249383e-06, "loss": 0.1499, "step": 9735 }, { "epoch": 0.2840305735457144, "grad_norm": 0.8229367088023415, "learning_rate": 8.401234727222495e-06, "loss": 0.1568, "step": 9736 }, { "epoch": 0.28405974677635804, "grad_norm": 0.8706227628537601, "learning_rate": 8.400888425830193e-06, "loss": 0.1495, "step": 9737 }, { "epoch": 0.2840889200070016, "grad_norm": 0.6997778137587798, "learning_rate": 8.400542094075572e-06, "loss": 0.1346, "step": 9738 }, { "epoch": 0.28411809323764514, "grad_norm": 0.7620811191556768, "learning_rate": 8.400195731961725e-06, "loss": 0.1352, "step": 9739 }, { "epoch": 0.2841472664682887, "grad_norm": 0.740916557473217, "learning_rate": 8.39984933949174e-06, "loss": 0.1481, "step": 9740 }, { "epoch": 0.28417643969893225, "grad_norm": 0.6581033953325057, "learning_rate": 8.399502916668712e-06, "loss": 0.1644, "step": 9741 }, { "epoch": 0.2842056129295758, "grad_norm": 0.8227219664659927, "learning_rate": 8.399156463495735e-06, "loss": 0.1415, "step": 9742 }, { "epoch": 0.2842347861602194, "grad_norm": 0.7457571557541653, "learning_rate": 8.398809979975898e-06, "loss": 0.1405, "step": 9743 }, { "epoch": 0.28426395939086296, "grad_norm": 0.6780960869226833, "learning_rate": 8.398463466112298e-06, "loss": 0.1503, "step": 9744 }, { "epoch": 0.2842931326215065, "grad_norm": 0.7994458526346818, "learning_rate": 8.398116921908028e-06, "loss": 0.1329, "step": 9745 }, { "epoch": 0.28432230585215007, "grad_norm": 0.9628546074911857, "learning_rate": 8.397770347366181e-06, "loss": 0.1415, "step": 9746 }, { "epoch": 0.2843514790827936, "grad_norm": 0.6365447065570922, "learning_rate": 8.397423742489852e-06, "loss": 0.1444, "step": 9747 }, { "epoch": 0.2843806523134372, "grad_norm": 0.7321654765354305, "learning_rate": 8.397077107282134e-06, "loss": 0.1412, "step": 9748 }, { "epoch": 0.28440982554408073, "grad_norm": 1.3495289834810458, "learning_rate": 8.396730441746121e-06, "loss": 0.1539, "step": 9749 }, { "epoch": 0.28443899877472434, "grad_norm": 0.7402820912285748, "learning_rate": 8.396383745884913e-06, "loss": 0.1613, "step": 9750 }, { "epoch": 0.2844681720053679, "grad_norm": 0.7990324437728387, "learning_rate": 8.3960370197016e-06, "loss": 0.1454, "step": 9751 }, { "epoch": 0.28449734523601145, "grad_norm": 0.8000174929224456, "learning_rate": 8.395690263199279e-06, "loss": 0.1382, "step": 9752 }, { "epoch": 0.284526518466655, "grad_norm": 0.8392983185148813, "learning_rate": 8.395343476381047e-06, "loss": 0.1462, "step": 9753 }, { "epoch": 0.28455569169729855, "grad_norm": 0.811664279287985, "learning_rate": 8.394996659249996e-06, "loss": 0.1662, "step": 9754 }, { "epoch": 0.2845848649279421, "grad_norm": 0.8364192688275434, "learning_rate": 8.394649811809228e-06, "loss": 0.1635, "step": 9755 }, { "epoch": 0.28461403815858566, "grad_norm": 0.8514969812664084, "learning_rate": 8.394302934061836e-06, "loss": 0.1534, "step": 9756 }, { "epoch": 0.28464321138922927, "grad_norm": 0.6980328385273792, "learning_rate": 8.393956026010917e-06, "loss": 0.1375, "step": 9757 }, { "epoch": 0.2846723846198728, "grad_norm": 0.890712888984573, "learning_rate": 8.39360908765957e-06, "loss": 0.1501, "step": 9758 }, { "epoch": 0.2847015578505164, "grad_norm": 0.7935256251004001, "learning_rate": 8.393262119010891e-06, "loss": 0.1417, "step": 9759 }, { "epoch": 0.2847307310811599, "grad_norm": 1.0583601653878323, "learning_rate": 8.392915120067979e-06, "loss": 0.139, "step": 9760 }, { "epoch": 0.2847599043118035, "grad_norm": 0.8763551162292266, "learning_rate": 8.392568090833928e-06, "loss": 0.1318, "step": 9761 }, { "epoch": 0.28478907754244703, "grad_norm": 0.910605598245978, "learning_rate": 8.392221031311842e-06, "loss": 0.1421, "step": 9762 }, { "epoch": 0.2848182507730906, "grad_norm": 1.0002623823839114, "learning_rate": 8.391873941504813e-06, "loss": 0.1672, "step": 9763 }, { "epoch": 0.2848474240037342, "grad_norm": 1.150826451710878, "learning_rate": 8.391526821415946e-06, "loss": 0.1459, "step": 9764 }, { "epoch": 0.28487659723437775, "grad_norm": 0.8333527000506046, "learning_rate": 8.391179671048335e-06, "loss": 0.1463, "step": 9765 }, { "epoch": 0.2849057704650213, "grad_norm": 0.8496365208020461, "learning_rate": 8.390832490405083e-06, "loss": 0.1778, "step": 9766 }, { "epoch": 0.28493494369566486, "grad_norm": 0.9566922452947795, "learning_rate": 8.390485279489288e-06, "loss": 0.1557, "step": 9767 }, { "epoch": 0.2849641169263084, "grad_norm": 0.9743776238793905, "learning_rate": 8.39013803830405e-06, "loss": 0.1963, "step": 9768 }, { "epoch": 0.28499329015695196, "grad_norm": 0.8105312690064024, "learning_rate": 8.389790766852468e-06, "loss": 0.1618, "step": 9769 }, { "epoch": 0.2850224633875955, "grad_norm": 0.9612419895376347, "learning_rate": 8.389443465137644e-06, "loss": 0.1372, "step": 9770 }, { "epoch": 0.2850516366182391, "grad_norm": 0.989031838983565, "learning_rate": 8.389096133162676e-06, "loss": 0.1557, "step": 9771 }, { "epoch": 0.2850808098488827, "grad_norm": 0.7003070634055556, "learning_rate": 8.388748770930668e-06, "loss": 0.1578, "step": 9772 }, { "epoch": 0.28510998307952623, "grad_norm": 0.6892256137104693, "learning_rate": 8.38840137844472e-06, "loss": 0.1542, "step": 9773 }, { "epoch": 0.2851391563101698, "grad_norm": 0.7613776122796025, "learning_rate": 8.388053955707933e-06, "loss": 0.1462, "step": 9774 }, { "epoch": 0.28516832954081334, "grad_norm": 0.8396255255344625, "learning_rate": 8.387706502723411e-06, "loss": 0.1711, "step": 9775 }, { "epoch": 0.2851975027714569, "grad_norm": 0.8393162253934431, "learning_rate": 8.387359019494253e-06, "loss": 0.1616, "step": 9776 }, { "epoch": 0.2852266760021005, "grad_norm": 0.9361006689228223, "learning_rate": 8.38701150602356e-06, "loss": 0.1436, "step": 9777 }, { "epoch": 0.28525584923274405, "grad_norm": 0.7737408120928901, "learning_rate": 8.386663962314439e-06, "loss": 0.1526, "step": 9778 }, { "epoch": 0.2852850224633876, "grad_norm": 1.0143515421180576, "learning_rate": 8.38631638836999e-06, "loss": 0.1591, "step": 9779 }, { "epoch": 0.28531419569403116, "grad_norm": 0.9423559033595247, "learning_rate": 8.385968784193318e-06, "loss": 0.1482, "step": 9780 }, { "epoch": 0.2853433689246747, "grad_norm": 0.8088790668770177, "learning_rate": 8.385621149787523e-06, "loss": 0.1648, "step": 9781 }, { "epoch": 0.28537254215531827, "grad_norm": 1.2837111217628376, "learning_rate": 8.385273485155712e-06, "loss": 0.1449, "step": 9782 }, { "epoch": 0.2854017153859618, "grad_norm": 0.7041231284703034, "learning_rate": 8.384925790300988e-06, "loss": 0.1561, "step": 9783 }, { "epoch": 0.2854308886166054, "grad_norm": 0.7609432455307122, "learning_rate": 8.384578065226452e-06, "loss": 0.1636, "step": 9784 }, { "epoch": 0.285460061847249, "grad_norm": 1.0170438604752867, "learning_rate": 8.384230309935212e-06, "loss": 0.1524, "step": 9785 }, { "epoch": 0.28548923507789253, "grad_norm": 0.6991345591064341, "learning_rate": 8.383882524430373e-06, "loss": 0.1635, "step": 9786 }, { "epoch": 0.2855184083085361, "grad_norm": 0.793002041694541, "learning_rate": 8.383534708715039e-06, "loss": 0.1455, "step": 9787 }, { "epoch": 0.28554758153917964, "grad_norm": 0.9108779665096352, "learning_rate": 8.383186862792313e-06, "loss": 0.1561, "step": 9788 }, { "epoch": 0.2855767547698232, "grad_norm": 0.7557158888877378, "learning_rate": 8.382838986665303e-06, "loss": 0.1413, "step": 9789 }, { "epoch": 0.28560592800046675, "grad_norm": 0.6168635431655549, "learning_rate": 8.382491080337114e-06, "loss": 0.1429, "step": 9790 }, { "epoch": 0.28563510123111036, "grad_norm": 0.8606945167870395, "learning_rate": 8.382143143810853e-06, "loss": 0.1843, "step": 9791 }, { "epoch": 0.2856642744617539, "grad_norm": 0.8543899192255275, "learning_rate": 8.381795177089625e-06, "loss": 0.1742, "step": 9792 }, { "epoch": 0.28569344769239746, "grad_norm": 0.8743916722210373, "learning_rate": 8.381447180176536e-06, "loss": 0.1625, "step": 9793 }, { "epoch": 0.285722620923041, "grad_norm": 0.8911906150549053, "learning_rate": 8.381099153074694e-06, "loss": 0.1668, "step": 9794 }, { "epoch": 0.28575179415368457, "grad_norm": 0.8898417053456381, "learning_rate": 8.380751095787206e-06, "loss": 0.1621, "step": 9795 }, { "epoch": 0.2857809673843281, "grad_norm": 0.8492672080647266, "learning_rate": 8.38040300831718e-06, "loss": 0.1547, "step": 9796 }, { "epoch": 0.2858101406149717, "grad_norm": 0.8231220735251121, "learning_rate": 8.380054890667721e-06, "loss": 0.1542, "step": 9797 }, { "epoch": 0.2858393138456153, "grad_norm": 0.9392926209405533, "learning_rate": 8.379706742841942e-06, "loss": 0.1501, "step": 9798 }, { "epoch": 0.28586848707625884, "grad_norm": 0.8668033463206155, "learning_rate": 8.379358564842945e-06, "loss": 0.1825, "step": 9799 }, { "epoch": 0.2858976603069024, "grad_norm": 0.782579551210596, "learning_rate": 8.379010356673842e-06, "loss": 0.1397, "step": 9800 }, { "epoch": 0.28592683353754594, "grad_norm": 0.8429579693804491, "learning_rate": 8.378662118337744e-06, "loss": 0.1711, "step": 9801 }, { "epoch": 0.2859560067681895, "grad_norm": 1.1811981160323473, "learning_rate": 8.378313849837754e-06, "loss": 0.1583, "step": 9802 }, { "epoch": 0.28598517999883305, "grad_norm": 0.7390792447280129, "learning_rate": 8.377965551176986e-06, "loss": 0.1611, "step": 9803 }, { "epoch": 0.28601435322947666, "grad_norm": 0.8515452839779818, "learning_rate": 8.377617222358547e-06, "loss": 0.1976, "step": 9804 }, { "epoch": 0.2860435264601202, "grad_norm": 1.138326602151265, "learning_rate": 8.377268863385548e-06, "loss": 0.1824, "step": 9805 }, { "epoch": 0.28607269969076377, "grad_norm": 0.6932803493196364, "learning_rate": 8.376920474261098e-06, "loss": 0.1539, "step": 9806 }, { "epoch": 0.2861018729214073, "grad_norm": 0.8042209766996735, "learning_rate": 8.37657205498831e-06, "loss": 0.1639, "step": 9807 }, { "epoch": 0.28613104615205087, "grad_norm": 0.9771992419589725, "learning_rate": 8.376223605570292e-06, "loss": 0.1563, "step": 9808 }, { "epoch": 0.2861602193826944, "grad_norm": 0.8513448143246213, "learning_rate": 8.375875126010156e-06, "loss": 0.1436, "step": 9809 }, { "epoch": 0.286189392613338, "grad_norm": 0.7190331498425138, "learning_rate": 8.375526616311012e-06, "loss": 0.157, "step": 9810 }, { "epoch": 0.2862185658439816, "grad_norm": 0.7185870328303054, "learning_rate": 8.375178076475971e-06, "loss": 0.1276, "step": 9811 }, { "epoch": 0.28624773907462514, "grad_norm": 1.1960626524080855, "learning_rate": 8.37482950650815e-06, "loss": 0.1703, "step": 9812 }, { "epoch": 0.2862769123052687, "grad_norm": 0.9473444616150966, "learning_rate": 8.374480906410651e-06, "loss": 0.1562, "step": 9813 }, { "epoch": 0.28630608553591225, "grad_norm": 1.5379768613731213, "learning_rate": 8.374132276186596e-06, "loss": 0.1624, "step": 9814 }, { "epoch": 0.2863352587665558, "grad_norm": 0.8579904917878909, "learning_rate": 8.373783615839093e-06, "loss": 0.1488, "step": 9815 }, { "epoch": 0.28636443199719935, "grad_norm": 0.8139606444233826, "learning_rate": 8.373434925371255e-06, "loss": 0.1467, "step": 9816 }, { "epoch": 0.2863936052278429, "grad_norm": 0.8252844186380475, "learning_rate": 8.373086204786195e-06, "loss": 0.1696, "step": 9817 }, { "epoch": 0.2864227784584865, "grad_norm": 0.846608937522948, "learning_rate": 8.372737454087026e-06, "loss": 0.1355, "step": 9818 }, { "epoch": 0.28645195168913007, "grad_norm": 0.6950788202532748, "learning_rate": 8.372388673276864e-06, "loss": 0.1526, "step": 9819 }, { "epoch": 0.2864811249197736, "grad_norm": 0.7839518495820436, "learning_rate": 8.37203986235882e-06, "loss": 0.1835, "step": 9820 }, { "epoch": 0.2865102981504172, "grad_norm": 0.8047945199413639, "learning_rate": 8.371691021336008e-06, "loss": 0.13, "step": 9821 }, { "epoch": 0.28653947138106073, "grad_norm": 0.6528806430649705, "learning_rate": 8.371342150211544e-06, "loss": 0.1504, "step": 9822 }, { "epoch": 0.2865686446117043, "grad_norm": 0.8877748140982329, "learning_rate": 8.370993248988543e-06, "loss": 0.1557, "step": 9823 }, { "epoch": 0.28659781784234784, "grad_norm": 1.015067989317165, "learning_rate": 8.370644317670118e-06, "loss": 0.1635, "step": 9824 }, { "epoch": 0.28662699107299144, "grad_norm": 0.9142522240636217, "learning_rate": 8.370295356259386e-06, "loss": 0.1652, "step": 9825 }, { "epoch": 0.286656164303635, "grad_norm": 0.8756461731394208, "learning_rate": 8.369946364759462e-06, "loss": 0.1646, "step": 9826 }, { "epoch": 0.28668533753427855, "grad_norm": 1.1130965289972723, "learning_rate": 8.36959734317346e-06, "loss": 0.1558, "step": 9827 }, { "epoch": 0.2867145107649221, "grad_norm": 0.8259424285503624, "learning_rate": 8.369248291504497e-06, "loss": 0.1576, "step": 9828 }, { "epoch": 0.28674368399556566, "grad_norm": 0.9562641163907196, "learning_rate": 8.368899209755691e-06, "loss": 0.1625, "step": 9829 }, { "epoch": 0.2867728572262092, "grad_norm": 1.2872070525831119, "learning_rate": 8.368550097930156e-06, "loss": 0.1291, "step": 9830 }, { "epoch": 0.2868020304568528, "grad_norm": 0.8581258318130291, "learning_rate": 8.368200956031011e-06, "loss": 0.145, "step": 9831 }, { "epoch": 0.28683120368749637, "grad_norm": 1.087307890910858, "learning_rate": 8.367851784061371e-06, "loss": 0.1416, "step": 9832 }, { "epoch": 0.2868603769181399, "grad_norm": 0.9742143876335019, "learning_rate": 8.367502582024354e-06, "loss": 0.1525, "step": 9833 }, { "epoch": 0.2868895501487835, "grad_norm": 1.0880570097246778, "learning_rate": 8.367153349923078e-06, "loss": 0.2012, "step": 9834 }, { "epoch": 0.28691872337942703, "grad_norm": 1.1997412533739402, "learning_rate": 8.366804087760662e-06, "loss": 0.1496, "step": 9835 }, { "epoch": 0.2869478966100706, "grad_norm": 0.8698108124035253, "learning_rate": 8.366454795540221e-06, "loss": 0.1756, "step": 9836 }, { "epoch": 0.28697706984071414, "grad_norm": 1.1371208532155463, "learning_rate": 8.366105473264877e-06, "loss": 0.1822, "step": 9837 }, { "epoch": 0.28700624307135775, "grad_norm": 1.349272065080481, "learning_rate": 8.365756120937746e-06, "loss": 0.1827, "step": 9838 }, { "epoch": 0.2870354163020013, "grad_norm": 1.159707127945847, "learning_rate": 8.365406738561948e-06, "loss": 0.1712, "step": 9839 }, { "epoch": 0.28706458953264485, "grad_norm": 0.7695853217531026, "learning_rate": 8.365057326140602e-06, "loss": 0.1624, "step": 9840 }, { "epoch": 0.2870937627632884, "grad_norm": 1.2176623436540903, "learning_rate": 8.364707883676826e-06, "loss": 0.1629, "step": 9841 }, { "epoch": 0.28712293599393196, "grad_norm": 0.7969760569180943, "learning_rate": 8.364358411173742e-06, "loss": 0.1564, "step": 9842 }, { "epoch": 0.2871521092245755, "grad_norm": 0.7393559798164078, "learning_rate": 8.36400890863447e-06, "loss": 0.1545, "step": 9843 }, { "epoch": 0.28718128245521907, "grad_norm": 0.7461860357689802, "learning_rate": 8.363659376062129e-06, "loss": 0.1378, "step": 9844 }, { "epoch": 0.2872104556858627, "grad_norm": 0.9308486159446256, "learning_rate": 8.36330981345984e-06, "loss": 0.1397, "step": 9845 }, { "epoch": 0.28723962891650623, "grad_norm": 0.8344962007898478, "learning_rate": 8.362960220830725e-06, "loss": 0.1647, "step": 9846 }, { "epoch": 0.2872688021471498, "grad_norm": 0.7315788043770404, "learning_rate": 8.362610598177904e-06, "loss": 0.1698, "step": 9847 }, { "epoch": 0.28729797537779334, "grad_norm": 0.9402750814257732, "learning_rate": 8.362260945504497e-06, "loss": 0.1553, "step": 9848 }, { "epoch": 0.2873271486084369, "grad_norm": 0.8061549441004859, "learning_rate": 8.361911262813628e-06, "loss": 0.1546, "step": 9849 }, { "epoch": 0.28735632183908044, "grad_norm": 0.847045834650828, "learning_rate": 8.361561550108417e-06, "loss": 0.1336, "step": 9850 }, { "epoch": 0.287385495069724, "grad_norm": 0.8061753933894321, "learning_rate": 8.361211807391987e-06, "loss": 0.1699, "step": 9851 }, { "epoch": 0.2874146683003676, "grad_norm": 0.8444227995422279, "learning_rate": 8.36086203466746e-06, "loss": 0.15, "step": 9852 }, { "epoch": 0.28744384153101116, "grad_norm": 0.8212005765713866, "learning_rate": 8.36051223193796e-06, "loss": 0.167, "step": 9853 }, { "epoch": 0.2874730147616547, "grad_norm": 0.7896960182652282, "learning_rate": 8.360162399206609e-06, "loss": 0.1541, "step": 9854 }, { "epoch": 0.28750218799229826, "grad_norm": 0.7054874119732321, "learning_rate": 8.35981253647653e-06, "loss": 0.156, "step": 9855 }, { "epoch": 0.2875313612229418, "grad_norm": 0.794441548546251, "learning_rate": 8.359462643750847e-06, "loss": 0.1455, "step": 9856 }, { "epoch": 0.28756053445358537, "grad_norm": 0.7243266466759475, "learning_rate": 8.359112721032682e-06, "loss": 0.1522, "step": 9857 }, { "epoch": 0.287589707684229, "grad_norm": 0.854715048031843, "learning_rate": 8.358762768325162e-06, "loss": 0.1588, "step": 9858 }, { "epoch": 0.28761888091487253, "grad_norm": 0.7871523200061623, "learning_rate": 8.35841278563141e-06, "loss": 0.1573, "step": 9859 }, { "epoch": 0.2876480541455161, "grad_norm": 0.9531408083364188, "learning_rate": 8.358062772954549e-06, "loss": 0.1682, "step": 9860 }, { "epoch": 0.28767722737615964, "grad_norm": 0.6812932180373655, "learning_rate": 8.357712730297707e-06, "loss": 0.147, "step": 9861 }, { "epoch": 0.2877064006068032, "grad_norm": 0.7838031131738635, "learning_rate": 8.357362657664005e-06, "loss": 0.1545, "step": 9862 }, { "epoch": 0.28773557383744675, "grad_norm": 1.1692794571002676, "learning_rate": 8.357012555056571e-06, "loss": 0.1631, "step": 9863 }, { "epoch": 0.2877647470680903, "grad_norm": 0.759695566501756, "learning_rate": 8.356662422478532e-06, "loss": 0.1505, "step": 9864 }, { "epoch": 0.2877939202987339, "grad_norm": 0.8668270726138102, "learning_rate": 8.356312259933013e-06, "loss": 0.179, "step": 9865 }, { "epoch": 0.28782309352937746, "grad_norm": 1.2673528714091171, "learning_rate": 8.355962067423135e-06, "loss": 0.1636, "step": 9866 }, { "epoch": 0.287852266760021, "grad_norm": 0.8678672769322022, "learning_rate": 8.355611844952033e-06, "loss": 0.1697, "step": 9867 }, { "epoch": 0.28788143999066457, "grad_norm": 0.8174206486742935, "learning_rate": 8.355261592522828e-06, "loss": 0.1515, "step": 9868 }, { "epoch": 0.2879106132213081, "grad_norm": 0.7988968203734829, "learning_rate": 8.354911310138647e-06, "loss": 0.1491, "step": 9869 }, { "epoch": 0.2879397864519517, "grad_norm": 0.6804670031915753, "learning_rate": 8.354560997802622e-06, "loss": 0.1627, "step": 9870 }, { "epoch": 0.2879689596825952, "grad_norm": 0.7502123958402307, "learning_rate": 8.354210655517876e-06, "loss": 0.1439, "step": 9871 }, { "epoch": 0.28799813291323884, "grad_norm": 0.9163857814759543, "learning_rate": 8.353860283287535e-06, "loss": 0.1749, "step": 9872 }, { "epoch": 0.2880273061438824, "grad_norm": 0.7503514897510583, "learning_rate": 8.353509881114734e-06, "loss": 0.1671, "step": 9873 }, { "epoch": 0.28805647937452594, "grad_norm": 0.9986993786625777, "learning_rate": 8.353159449002595e-06, "loss": 0.1709, "step": 9874 }, { "epoch": 0.2880856526051695, "grad_norm": 0.7901623980860604, "learning_rate": 8.352808986954251e-06, "loss": 0.1723, "step": 9875 }, { "epoch": 0.28811482583581305, "grad_norm": 0.6467608429051405, "learning_rate": 8.352458494972825e-06, "loss": 0.1469, "step": 9876 }, { "epoch": 0.2881439990664566, "grad_norm": 0.7686440350854351, "learning_rate": 8.352107973061455e-06, "loss": 0.1693, "step": 9877 }, { "epoch": 0.28817317229710016, "grad_norm": 0.7673670580493841, "learning_rate": 8.351757421223262e-06, "loss": 0.145, "step": 9878 }, { "epoch": 0.28820234552774376, "grad_norm": 0.7527342040107813, "learning_rate": 8.351406839461378e-06, "loss": 0.1473, "step": 9879 }, { "epoch": 0.2882315187583873, "grad_norm": 0.9030520454763455, "learning_rate": 8.351056227778935e-06, "loss": 0.1328, "step": 9880 }, { "epoch": 0.28826069198903087, "grad_norm": 0.826526670234512, "learning_rate": 8.350705586179063e-06, "loss": 0.1554, "step": 9881 }, { "epoch": 0.2882898652196744, "grad_norm": 0.9628114119687834, "learning_rate": 8.35035491466489e-06, "loss": 0.1413, "step": 9882 }, { "epoch": 0.288319038450318, "grad_norm": 0.7046764569947076, "learning_rate": 8.350004213239549e-06, "loss": 0.1451, "step": 9883 }, { "epoch": 0.28834821168096153, "grad_norm": 0.7790342088529321, "learning_rate": 8.349653481906169e-06, "loss": 0.1656, "step": 9884 }, { "epoch": 0.2883773849116051, "grad_norm": 0.9868218322854813, "learning_rate": 8.349302720667883e-06, "loss": 0.1703, "step": 9885 }, { "epoch": 0.2884065581422487, "grad_norm": 0.7137767194711725, "learning_rate": 8.348951929527822e-06, "loss": 0.137, "step": 9886 }, { "epoch": 0.28843573137289225, "grad_norm": 0.8227141535054573, "learning_rate": 8.348601108489118e-06, "loss": 0.1455, "step": 9887 }, { "epoch": 0.2884649046035358, "grad_norm": 0.8436155558799002, "learning_rate": 8.348250257554902e-06, "loss": 0.1618, "step": 9888 }, { "epoch": 0.28849407783417935, "grad_norm": 1.0102542206494416, "learning_rate": 8.347899376728307e-06, "loss": 0.1252, "step": 9889 }, { "epoch": 0.2885232510648229, "grad_norm": 0.6229948350671283, "learning_rate": 8.347548466012464e-06, "loss": 0.1235, "step": 9890 }, { "epoch": 0.28855242429546646, "grad_norm": 0.7487960006338579, "learning_rate": 8.34719752541051e-06, "loss": 0.154, "step": 9891 }, { "epoch": 0.28858159752611007, "grad_norm": 0.8203898754336488, "learning_rate": 8.346846554925577e-06, "loss": 0.1546, "step": 9892 }, { "epoch": 0.2886107707567536, "grad_norm": 0.741354464163945, "learning_rate": 8.346495554560794e-06, "loss": 0.1654, "step": 9893 }, { "epoch": 0.2886399439873972, "grad_norm": 0.928620964154592, "learning_rate": 8.346144524319298e-06, "loss": 0.1563, "step": 9894 }, { "epoch": 0.2886691172180407, "grad_norm": 0.7041305464807207, "learning_rate": 8.345793464204221e-06, "loss": 0.1405, "step": 9895 }, { "epoch": 0.2886982904486843, "grad_norm": 0.7809452957929224, "learning_rate": 8.345442374218702e-06, "loss": 0.1347, "step": 9896 }, { "epoch": 0.28872746367932783, "grad_norm": 0.7657322015755201, "learning_rate": 8.34509125436587e-06, "loss": 0.139, "step": 9897 }, { "epoch": 0.2887566369099714, "grad_norm": 0.7689742927200562, "learning_rate": 8.344740104648862e-06, "loss": 0.1622, "step": 9898 }, { "epoch": 0.288785810140615, "grad_norm": 0.9481928855239199, "learning_rate": 8.344388925070812e-06, "loss": 0.1568, "step": 9899 }, { "epoch": 0.28881498337125855, "grad_norm": 0.7098603340041709, "learning_rate": 8.344037715634859e-06, "loss": 0.1382, "step": 9900 }, { "epoch": 0.2888441566019021, "grad_norm": 1.4781048055873522, "learning_rate": 8.343686476344132e-06, "loss": 0.1657, "step": 9901 }, { "epoch": 0.28887332983254566, "grad_norm": 0.9229871341863847, "learning_rate": 8.343335207201773e-06, "loss": 0.1345, "step": 9902 }, { "epoch": 0.2889025030631892, "grad_norm": 0.7753778008196123, "learning_rate": 8.342983908210915e-06, "loss": 0.1233, "step": 9903 }, { "epoch": 0.28893167629383276, "grad_norm": 0.8450474439015068, "learning_rate": 8.342632579374693e-06, "loss": 0.1416, "step": 9904 }, { "epoch": 0.2889608495244763, "grad_norm": 0.7643050333338456, "learning_rate": 8.342281220696246e-06, "loss": 0.1737, "step": 9905 }, { "epoch": 0.2889900227551199, "grad_norm": 0.8086534856495711, "learning_rate": 8.341929832178712e-06, "loss": 0.1298, "step": 9906 }, { "epoch": 0.2890191959857635, "grad_norm": 1.0202434340116664, "learning_rate": 8.341578413825224e-06, "loss": 0.1546, "step": 9907 }, { "epoch": 0.28904836921640703, "grad_norm": 0.8856129755694068, "learning_rate": 8.341226965638922e-06, "loss": 0.1483, "step": 9908 }, { "epoch": 0.2890775424470506, "grad_norm": 1.0051947137453312, "learning_rate": 8.340875487622944e-06, "loss": 0.1575, "step": 9909 }, { "epoch": 0.28910671567769414, "grad_norm": 0.8923354587071173, "learning_rate": 8.340523979780426e-06, "loss": 0.1697, "step": 9910 }, { "epoch": 0.2891358889083377, "grad_norm": 0.7912794953994675, "learning_rate": 8.340172442114509e-06, "loss": 0.138, "step": 9911 }, { "epoch": 0.28916506213898124, "grad_norm": 0.7861918709653906, "learning_rate": 8.33982087462833e-06, "loss": 0.1494, "step": 9912 }, { "epoch": 0.28919423536962485, "grad_norm": 0.9112625258963118, "learning_rate": 8.339469277325025e-06, "loss": 0.1475, "step": 9913 }, { "epoch": 0.2892234086002684, "grad_norm": 0.8893112375319981, "learning_rate": 8.339117650207738e-06, "loss": 0.1506, "step": 9914 }, { "epoch": 0.28925258183091196, "grad_norm": 0.9022641279176083, "learning_rate": 8.338765993279604e-06, "loss": 0.1437, "step": 9915 }, { "epoch": 0.2892817550615555, "grad_norm": 0.9962674200604096, "learning_rate": 8.338414306543764e-06, "loss": 0.1698, "step": 9916 }, { "epoch": 0.28931092829219907, "grad_norm": 0.8840193722246299, "learning_rate": 8.33806259000336e-06, "loss": 0.1334, "step": 9917 }, { "epoch": 0.2893401015228426, "grad_norm": 0.8890595472257062, "learning_rate": 8.337710843661528e-06, "loss": 0.1557, "step": 9918 }, { "epoch": 0.2893692747534862, "grad_norm": 0.8783307524752315, "learning_rate": 8.337359067521411e-06, "loss": 0.1654, "step": 9919 }, { "epoch": 0.2893984479841298, "grad_norm": 0.7667862558799786, "learning_rate": 8.33700726158615e-06, "loss": 0.1601, "step": 9920 }, { "epoch": 0.28942762121477333, "grad_norm": 1.0607707908253314, "learning_rate": 8.336655425858885e-06, "loss": 0.1531, "step": 9921 }, { "epoch": 0.2894567944454169, "grad_norm": 0.8227189793116355, "learning_rate": 8.336303560342756e-06, "loss": 0.1605, "step": 9922 }, { "epoch": 0.28948596767606044, "grad_norm": 0.8533704546216876, "learning_rate": 8.335951665040904e-06, "loss": 0.1693, "step": 9923 }, { "epoch": 0.289515140906704, "grad_norm": 0.8029944727248988, "learning_rate": 8.335599739956474e-06, "loss": 0.1656, "step": 9924 }, { "epoch": 0.28954431413734755, "grad_norm": 0.8797297804836803, "learning_rate": 8.335247785092604e-06, "loss": 0.1341, "step": 9925 }, { "epoch": 0.28957348736799116, "grad_norm": 0.8583529494083441, "learning_rate": 8.33489580045244e-06, "loss": 0.1602, "step": 9926 }, { "epoch": 0.2896026605986347, "grad_norm": 0.7376563991148708, "learning_rate": 8.334543786039122e-06, "loss": 0.1736, "step": 9927 }, { "epoch": 0.28963183382927826, "grad_norm": 0.8326224275218932, "learning_rate": 8.33419174185579e-06, "loss": 0.1708, "step": 9928 }, { "epoch": 0.2896610070599218, "grad_norm": 0.7862764218821267, "learning_rate": 8.333839667905594e-06, "loss": 0.1532, "step": 9929 }, { "epoch": 0.28969018029056537, "grad_norm": 0.6965774250218687, "learning_rate": 8.333487564191672e-06, "loss": 0.1631, "step": 9930 }, { "epoch": 0.2897193535212089, "grad_norm": 0.7734103847833171, "learning_rate": 8.333135430717167e-06, "loss": 0.1421, "step": 9931 }, { "epoch": 0.2897485267518525, "grad_norm": 0.7402186181779997, "learning_rate": 8.332783267485227e-06, "loss": 0.1544, "step": 9932 }, { "epoch": 0.2897776999824961, "grad_norm": 0.8362642679456843, "learning_rate": 8.332431074498992e-06, "loss": 0.1569, "step": 9933 }, { "epoch": 0.28980687321313964, "grad_norm": 0.8623462724671603, "learning_rate": 8.33207885176161e-06, "loss": 0.14, "step": 9934 }, { "epoch": 0.2898360464437832, "grad_norm": 0.8445223702334346, "learning_rate": 8.331726599276221e-06, "loss": 0.1729, "step": 9935 }, { "epoch": 0.28986521967442674, "grad_norm": 0.885988204287692, "learning_rate": 8.331374317045974e-06, "loss": 0.1449, "step": 9936 }, { "epoch": 0.2898943929050703, "grad_norm": 0.7388917651173174, "learning_rate": 8.33102200507401e-06, "loss": 0.1503, "step": 9937 }, { "epoch": 0.28992356613571385, "grad_norm": 0.7618862505541767, "learning_rate": 8.33066966336348e-06, "loss": 0.158, "step": 9938 }, { "epoch": 0.2899527393663574, "grad_norm": 0.7039575963698365, "learning_rate": 8.330317291917525e-06, "loss": 0.1407, "step": 9939 }, { "epoch": 0.289981912597001, "grad_norm": 0.9483758954090866, "learning_rate": 8.32996489073929e-06, "loss": 0.1624, "step": 9940 }, { "epoch": 0.29001108582764457, "grad_norm": 0.7212525909556974, "learning_rate": 8.329612459831926e-06, "loss": 0.1492, "step": 9941 }, { "epoch": 0.2900402590582881, "grad_norm": 0.8573451154135282, "learning_rate": 8.329259999198577e-06, "loss": 0.1365, "step": 9942 }, { "epoch": 0.2900694322889317, "grad_norm": 0.9432659078236447, "learning_rate": 8.328907508842388e-06, "loss": 0.1544, "step": 9943 }, { "epoch": 0.2900986055195752, "grad_norm": 0.7315033120337017, "learning_rate": 8.328554988766509e-06, "loss": 0.1493, "step": 9944 }, { "epoch": 0.2901277787502188, "grad_norm": 1.167455282089146, "learning_rate": 8.328202438974083e-06, "loss": 0.1396, "step": 9945 }, { "epoch": 0.2901569519808624, "grad_norm": 1.1215715354094669, "learning_rate": 8.327849859468263e-06, "loss": 0.1559, "step": 9946 }, { "epoch": 0.29018612521150594, "grad_norm": 0.8945220203736282, "learning_rate": 8.327497250252192e-06, "loss": 0.1417, "step": 9947 }, { "epoch": 0.2902152984421495, "grad_norm": 1.0190752539257046, "learning_rate": 8.327144611329022e-06, "loss": 0.1674, "step": 9948 }, { "epoch": 0.29024447167279305, "grad_norm": 0.933557154708929, "learning_rate": 8.326791942701895e-06, "loss": 0.1578, "step": 9949 }, { "epoch": 0.2902736449034366, "grad_norm": 0.9863857215165585, "learning_rate": 8.326439244373968e-06, "loss": 0.1636, "step": 9950 }, { "epoch": 0.29030281813408015, "grad_norm": 1.4159947206301777, "learning_rate": 8.326086516348384e-06, "loss": 0.1558, "step": 9951 }, { "epoch": 0.2903319913647237, "grad_norm": 0.8933563458370728, "learning_rate": 8.325733758628292e-06, "loss": 0.1575, "step": 9952 }, { "epoch": 0.2903611645953673, "grad_norm": 1.0723245638303787, "learning_rate": 8.325380971216846e-06, "loss": 0.1594, "step": 9953 }, { "epoch": 0.29039033782601087, "grad_norm": 0.9162685575737721, "learning_rate": 8.325028154117191e-06, "loss": 0.1939, "step": 9954 }, { "epoch": 0.2904195110566544, "grad_norm": 0.7659311648089028, "learning_rate": 8.324675307332478e-06, "loss": 0.1496, "step": 9955 }, { "epoch": 0.290448684287298, "grad_norm": 0.9303261328242991, "learning_rate": 8.324322430865858e-06, "loss": 0.1618, "step": 9956 }, { "epoch": 0.29047785751794153, "grad_norm": 0.8380924897880033, "learning_rate": 8.32396952472048e-06, "loss": 0.137, "step": 9957 }, { "epoch": 0.2905070307485851, "grad_norm": 1.0817912252194481, "learning_rate": 8.323616588899497e-06, "loss": 0.1697, "step": 9958 }, { "epoch": 0.29053620397922864, "grad_norm": 0.7405987750868062, "learning_rate": 8.323263623406057e-06, "loss": 0.1583, "step": 9959 }, { "epoch": 0.29056537720987224, "grad_norm": 1.1149496128661358, "learning_rate": 8.322910628243314e-06, "loss": 0.1495, "step": 9960 }, { "epoch": 0.2905945504405158, "grad_norm": 0.8458534683375639, "learning_rate": 8.322557603414418e-06, "loss": 0.1503, "step": 9961 }, { "epoch": 0.29062372367115935, "grad_norm": 0.9372275666730926, "learning_rate": 8.322204548922521e-06, "loss": 0.1401, "step": 9962 }, { "epoch": 0.2906528969018029, "grad_norm": 0.7629056453068565, "learning_rate": 8.321851464770775e-06, "loss": 0.1817, "step": 9963 }, { "epoch": 0.29068207013244646, "grad_norm": 1.1246679773874921, "learning_rate": 8.321498350962331e-06, "loss": 0.1607, "step": 9964 }, { "epoch": 0.29071124336309, "grad_norm": 0.8833611714238749, "learning_rate": 8.321145207500343e-06, "loss": 0.1478, "step": 9965 }, { "epoch": 0.29074041659373356, "grad_norm": 0.8184608900826866, "learning_rate": 8.320792034387964e-06, "loss": 0.1447, "step": 9966 }, { "epoch": 0.2907695898243772, "grad_norm": 0.9607555983475776, "learning_rate": 8.320438831628345e-06, "loss": 0.164, "step": 9967 }, { "epoch": 0.2907987630550207, "grad_norm": 0.888310835837436, "learning_rate": 8.320085599224642e-06, "loss": 0.1523, "step": 9968 }, { "epoch": 0.2908279362856643, "grad_norm": 0.8757880741813339, "learning_rate": 8.319732337180008e-06, "loss": 0.1589, "step": 9969 }, { "epoch": 0.29085710951630783, "grad_norm": 0.8857833465418704, "learning_rate": 8.319379045497595e-06, "loss": 0.1686, "step": 9970 }, { "epoch": 0.2908862827469514, "grad_norm": 1.0051605070308964, "learning_rate": 8.319025724180559e-06, "loss": 0.1522, "step": 9971 }, { "epoch": 0.29091545597759494, "grad_norm": 0.8221400217366555, "learning_rate": 8.318672373232053e-06, "loss": 0.1533, "step": 9972 }, { "epoch": 0.29094462920823855, "grad_norm": 0.8176771708565067, "learning_rate": 8.318318992655232e-06, "loss": 0.1668, "step": 9973 }, { "epoch": 0.2909738024388821, "grad_norm": 0.8854928575385405, "learning_rate": 8.317965582453251e-06, "loss": 0.1623, "step": 9974 }, { "epoch": 0.29100297566952565, "grad_norm": 0.8171128503970739, "learning_rate": 8.317612142629268e-06, "loss": 0.1705, "step": 9975 }, { "epoch": 0.2910321489001692, "grad_norm": 0.9079090036597574, "learning_rate": 8.317258673186432e-06, "loss": 0.1538, "step": 9976 }, { "epoch": 0.29106132213081276, "grad_norm": 0.7968587160417797, "learning_rate": 8.316905174127906e-06, "loss": 0.1505, "step": 9977 }, { "epoch": 0.2910904953614563, "grad_norm": 0.8164199405906625, "learning_rate": 8.31655164545684e-06, "loss": 0.1415, "step": 9978 }, { "epoch": 0.29111966859209987, "grad_norm": 0.8377358511468971, "learning_rate": 8.316198087176393e-06, "loss": 0.1704, "step": 9979 }, { "epoch": 0.2911488418227435, "grad_norm": 0.6789145992788284, "learning_rate": 8.315844499289722e-06, "loss": 0.161, "step": 9980 }, { "epoch": 0.29117801505338703, "grad_norm": 0.9878176562475328, "learning_rate": 8.315490881799982e-06, "loss": 0.1962, "step": 9981 }, { "epoch": 0.2912071882840306, "grad_norm": 0.7551533792819878, "learning_rate": 8.315137234710332e-06, "loss": 0.1341, "step": 9982 }, { "epoch": 0.29123636151467414, "grad_norm": 1.044286035733997, "learning_rate": 8.314783558023927e-06, "loss": 0.1564, "step": 9983 }, { "epoch": 0.2912655347453177, "grad_norm": 0.7874113543302953, "learning_rate": 8.314429851743927e-06, "loss": 0.1464, "step": 9984 }, { "epoch": 0.29129470797596124, "grad_norm": 0.9118496172043951, "learning_rate": 8.314076115873485e-06, "loss": 0.1596, "step": 9985 }, { "epoch": 0.2913238812066048, "grad_norm": 0.966762188743754, "learning_rate": 8.313722350415767e-06, "loss": 0.1763, "step": 9986 }, { "epoch": 0.2913530544372484, "grad_norm": 0.8326195256442548, "learning_rate": 8.313368555373925e-06, "loss": 0.1515, "step": 9987 }, { "epoch": 0.29138222766789196, "grad_norm": 0.9867023969084737, "learning_rate": 8.313014730751119e-06, "loss": 0.1868, "step": 9988 }, { "epoch": 0.2914114008985355, "grad_norm": 0.9944314437123973, "learning_rate": 8.312660876550509e-06, "loss": 0.1834, "step": 9989 }, { "epoch": 0.29144057412917906, "grad_norm": 0.7193657700096948, "learning_rate": 8.312306992775254e-06, "loss": 0.1385, "step": 9990 }, { "epoch": 0.2914697473598226, "grad_norm": 0.7827293906850198, "learning_rate": 8.311953079428511e-06, "loss": 0.1443, "step": 9991 }, { "epoch": 0.29149892059046617, "grad_norm": 0.775521931058426, "learning_rate": 8.311599136513443e-06, "loss": 0.1345, "step": 9992 }, { "epoch": 0.2915280938211097, "grad_norm": 0.7918526820206877, "learning_rate": 8.311245164033208e-06, "loss": 0.1337, "step": 9993 }, { "epoch": 0.29155726705175333, "grad_norm": 0.9449828723661597, "learning_rate": 8.310891161990967e-06, "loss": 0.14, "step": 9994 }, { "epoch": 0.2915864402823969, "grad_norm": 0.8535034442896857, "learning_rate": 8.31053713038988e-06, "loss": 0.1706, "step": 9995 }, { "epoch": 0.29161561351304044, "grad_norm": 0.9759757025382537, "learning_rate": 8.31018306923311e-06, "loss": 0.1319, "step": 9996 }, { "epoch": 0.291644786743684, "grad_norm": 0.7421867198696321, "learning_rate": 8.30982897852381e-06, "loss": 0.1462, "step": 9997 }, { "epoch": 0.29167395997432755, "grad_norm": 0.8340210160364849, "learning_rate": 8.309474858265153e-06, "loss": 0.1441, "step": 9998 }, { "epoch": 0.2917031332049711, "grad_norm": 0.9196255388910509, "learning_rate": 8.309120708460291e-06, "loss": 0.1611, "step": 9999 }, { "epoch": 0.29173230643561465, "grad_norm": 0.7995903341022464, "learning_rate": 8.30876652911239e-06, "loss": 0.1376, "step": 10000 }, { "epoch": 0.29176147966625826, "grad_norm": 0.5775818656754369, "learning_rate": 8.308412320224612e-06, "loss": 0.1346, "step": 10001 }, { "epoch": 0.2917906528969018, "grad_norm": 0.7585769294015465, "learning_rate": 8.30805808180012e-06, "loss": 0.1564, "step": 10002 }, { "epoch": 0.29181982612754537, "grad_norm": 0.9220451686588651, "learning_rate": 8.307703813842071e-06, "loss": 0.1724, "step": 10003 }, { "epoch": 0.2918489993581889, "grad_norm": 0.7012222953328154, "learning_rate": 8.307349516353634e-06, "loss": 0.1286, "step": 10004 }, { "epoch": 0.2918781725888325, "grad_norm": 0.7923834248307075, "learning_rate": 8.306995189337973e-06, "loss": 0.1679, "step": 10005 }, { "epoch": 0.291907345819476, "grad_norm": 0.946432255528313, "learning_rate": 8.306640832798242e-06, "loss": 0.1706, "step": 10006 }, { "epoch": 0.29193651905011964, "grad_norm": 0.8277375669619255, "learning_rate": 8.306286446737616e-06, "loss": 0.1492, "step": 10007 }, { "epoch": 0.2919656922807632, "grad_norm": 0.9954771463628386, "learning_rate": 8.305932031159253e-06, "loss": 0.1566, "step": 10008 }, { "epoch": 0.29199486551140674, "grad_norm": 0.8562541686694175, "learning_rate": 8.305577586066317e-06, "loss": 0.1625, "step": 10009 }, { "epoch": 0.2920240387420503, "grad_norm": 1.2764191456792593, "learning_rate": 8.305223111461975e-06, "loss": 0.1524, "step": 10010 }, { "epoch": 0.29205321197269385, "grad_norm": 1.1348108065448188, "learning_rate": 8.30486860734939e-06, "loss": 0.1657, "step": 10011 }, { "epoch": 0.2920823852033374, "grad_norm": 0.6247148365727904, "learning_rate": 8.304514073731724e-06, "loss": 0.1542, "step": 10012 }, { "epoch": 0.29211155843398096, "grad_norm": 0.8604647433551958, "learning_rate": 8.304159510612149e-06, "loss": 0.1557, "step": 10013 }, { "epoch": 0.29214073166462456, "grad_norm": 1.0468673531886143, "learning_rate": 8.303804917993825e-06, "loss": 0.1381, "step": 10014 }, { "epoch": 0.2921699048952681, "grad_norm": 0.9221026240612864, "learning_rate": 8.303450295879917e-06, "loss": 0.1417, "step": 10015 }, { "epoch": 0.29219907812591167, "grad_norm": 0.9779188760797921, "learning_rate": 8.303095644273598e-06, "loss": 0.1761, "step": 10016 }, { "epoch": 0.2922282513565552, "grad_norm": 0.95628535233947, "learning_rate": 8.302740963178026e-06, "loss": 0.1462, "step": 10017 }, { "epoch": 0.2922574245871988, "grad_norm": 0.8146698359549647, "learning_rate": 8.302386252596372e-06, "loss": 0.1274, "step": 10018 }, { "epoch": 0.29228659781784233, "grad_norm": 1.0327793173177984, "learning_rate": 8.302031512531802e-06, "loss": 0.1451, "step": 10019 }, { "epoch": 0.2923157710484859, "grad_norm": 0.783726058461353, "learning_rate": 8.301676742987484e-06, "loss": 0.1518, "step": 10020 }, { "epoch": 0.2923449442791295, "grad_norm": 0.7114796690993442, "learning_rate": 8.301321943966583e-06, "loss": 0.1537, "step": 10021 }, { "epoch": 0.29237411750977305, "grad_norm": 0.8343504034340079, "learning_rate": 8.30096711547227e-06, "loss": 0.1378, "step": 10022 }, { "epoch": 0.2924032907404166, "grad_norm": 1.0848139303363526, "learning_rate": 8.300612257507707e-06, "loss": 0.1599, "step": 10023 }, { "epoch": 0.29243246397106015, "grad_norm": 0.7767597599214419, "learning_rate": 8.300257370076069e-06, "loss": 0.1409, "step": 10024 }, { "epoch": 0.2924616372017037, "grad_norm": 0.8318580216756729, "learning_rate": 8.29990245318052e-06, "loss": 0.1302, "step": 10025 }, { "epoch": 0.29249081043234726, "grad_norm": 0.9190345106170331, "learning_rate": 8.299547506824228e-06, "loss": 0.1367, "step": 10026 }, { "epoch": 0.2925199836629908, "grad_norm": 0.8454636319416081, "learning_rate": 8.299192531010365e-06, "loss": 0.141, "step": 10027 }, { "epoch": 0.2925491568936344, "grad_norm": 0.9496611352931372, "learning_rate": 8.298837525742099e-06, "loss": 0.1216, "step": 10028 }, { "epoch": 0.292578330124278, "grad_norm": 1.0058824718148227, "learning_rate": 8.298482491022597e-06, "loss": 0.1559, "step": 10029 }, { "epoch": 0.2926075033549215, "grad_norm": 0.7105909931539826, "learning_rate": 8.298127426855032e-06, "loss": 0.1563, "step": 10030 }, { "epoch": 0.2926366765855651, "grad_norm": 0.8594947440795527, "learning_rate": 8.297772333242572e-06, "loss": 0.1431, "step": 10031 }, { "epoch": 0.29266584981620863, "grad_norm": 0.9180978214471736, "learning_rate": 8.29741721018839e-06, "loss": 0.1788, "step": 10032 }, { "epoch": 0.2926950230468522, "grad_norm": 0.8572780249185475, "learning_rate": 8.297062057695653e-06, "loss": 0.1425, "step": 10033 }, { "epoch": 0.2927241962774958, "grad_norm": 0.8718833186795538, "learning_rate": 8.296706875767533e-06, "loss": 0.1757, "step": 10034 }, { "epoch": 0.29275336950813935, "grad_norm": 1.0267974594728448, "learning_rate": 8.2963516644072e-06, "loss": 0.1799, "step": 10035 }, { "epoch": 0.2927825427387829, "grad_norm": 0.9630029145702377, "learning_rate": 8.295996423617828e-06, "loss": 0.1456, "step": 10036 }, { "epoch": 0.29281171596942646, "grad_norm": 0.9891852313316698, "learning_rate": 8.295641153402586e-06, "loss": 0.1544, "step": 10037 }, { "epoch": 0.29284088920007, "grad_norm": 0.8636462159642979, "learning_rate": 8.295285853764647e-06, "loss": 0.1688, "step": 10038 }, { "epoch": 0.29287006243071356, "grad_norm": 0.9216727137755006, "learning_rate": 8.294930524707181e-06, "loss": 0.1628, "step": 10039 }, { "epoch": 0.2928992356613571, "grad_norm": 1.1656075453781285, "learning_rate": 8.294575166233364e-06, "loss": 0.16, "step": 10040 }, { "epoch": 0.2929284088920007, "grad_norm": 0.8517564097130488, "learning_rate": 8.294219778346366e-06, "loss": 0.1477, "step": 10041 }, { "epoch": 0.2929575821226443, "grad_norm": 0.8107151679766771, "learning_rate": 8.293864361049358e-06, "loss": 0.1482, "step": 10042 }, { "epoch": 0.29298675535328783, "grad_norm": 0.7619916270717272, "learning_rate": 8.293508914345517e-06, "loss": 0.1508, "step": 10043 }, { "epoch": 0.2930159285839314, "grad_norm": 0.7420486994078926, "learning_rate": 8.293153438238015e-06, "loss": 0.1631, "step": 10044 }, { "epoch": 0.29304510181457494, "grad_norm": 0.7447317873850063, "learning_rate": 8.292797932730023e-06, "loss": 0.1728, "step": 10045 }, { "epoch": 0.2930742750452185, "grad_norm": 0.7478213728555458, "learning_rate": 8.292442397824721e-06, "loss": 0.1699, "step": 10046 }, { "epoch": 0.29310344827586204, "grad_norm": 0.6485024024547189, "learning_rate": 8.292086833525277e-06, "loss": 0.1519, "step": 10047 }, { "epoch": 0.29313262150650565, "grad_norm": 0.7590094261702236, "learning_rate": 8.291731239834865e-06, "loss": 0.1584, "step": 10048 }, { "epoch": 0.2931617947371492, "grad_norm": 0.8811821365401096, "learning_rate": 8.291375616756666e-06, "loss": 0.1418, "step": 10049 }, { "epoch": 0.29319096796779276, "grad_norm": 0.8239299166996248, "learning_rate": 8.291019964293852e-06, "loss": 0.2009, "step": 10050 }, { "epoch": 0.2932201411984363, "grad_norm": 0.6186205133828201, "learning_rate": 8.290664282449594e-06, "loss": 0.1417, "step": 10051 }, { "epoch": 0.29324931442907987, "grad_norm": 0.6901163615293291, "learning_rate": 8.290308571227073e-06, "loss": 0.138, "step": 10052 }, { "epoch": 0.2932784876597234, "grad_norm": 0.8567262364175189, "learning_rate": 8.289952830629462e-06, "loss": 0.1611, "step": 10053 }, { "epoch": 0.293307660890367, "grad_norm": 0.826080934820219, "learning_rate": 8.289597060659937e-06, "loss": 0.1556, "step": 10054 }, { "epoch": 0.2933368341210106, "grad_norm": 0.741746620216231, "learning_rate": 8.289241261321674e-06, "loss": 0.1523, "step": 10055 }, { "epoch": 0.29336600735165413, "grad_norm": 0.8285011445736965, "learning_rate": 8.288885432617853e-06, "loss": 0.1785, "step": 10056 }, { "epoch": 0.2933951805822977, "grad_norm": 0.7628317405006118, "learning_rate": 8.288529574551645e-06, "loss": 0.1475, "step": 10057 }, { "epoch": 0.29342435381294124, "grad_norm": 0.7375996428106932, "learning_rate": 8.288173687126231e-06, "loss": 0.1475, "step": 10058 }, { "epoch": 0.2934535270435848, "grad_norm": 0.9312608079105043, "learning_rate": 8.287817770344789e-06, "loss": 0.1627, "step": 10059 }, { "epoch": 0.29348270027422835, "grad_norm": 0.6759824880556607, "learning_rate": 8.287461824210491e-06, "loss": 0.1598, "step": 10060 }, { "epoch": 0.29351187350487196, "grad_norm": 1.141226546046676, "learning_rate": 8.287105848726523e-06, "loss": 0.1404, "step": 10061 }, { "epoch": 0.2935410467355155, "grad_norm": 1.0168099581722219, "learning_rate": 8.286749843896058e-06, "loss": 0.1657, "step": 10062 }, { "epoch": 0.29357021996615906, "grad_norm": 1.096694261391748, "learning_rate": 8.286393809722272e-06, "loss": 0.1248, "step": 10063 }, { "epoch": 0.2935993931968026, "grad_norm": 1.1444504946474283, "learning_rate": 8.286037746208348e-06, "loss": 0.1403, "step": 10064 }, { "epoch": 0.29362856642744617, "grad_norm": 0.8336631442716201, "learning_rate": 8.285681653357465e-06, "loss": 0.1317, "step": 10065 }, { "epoch": 0.2936577396580897, "grad_norm": 1.1462786308024369, "learning_rate": 8.2853255311728e-06, "loss": 0.1669, "step": 10066 }, { "epoch": 0.2936869128887333, "grad_norm": 0.6630550088012315, "learning_rate": 8.28496937965753e-06, "loss": 0.1295, "step": 10067 }, { "epoch": 0.2937160861193769, "grad_norm": 0.7847929484569912, "learning_rate": 8.28461319881484e-06, "loss": 0.136, "step": 10068 }, { "epoch": 0.29374525935002044, "grad_norm": 0.800683760933707, "learning_rate": 8.284256988647907e-06, "loss": 0.1539, "step": 10069 }, { "epoch": 0.293774432580664, "grad_norm": 0.8101367953085461, "learning_rate": 8.283900749159912e-06, "loss": 0.1429, "step": 10070 }, { "epoch": 0.29380360581130754, "grad_norm": 0.8707948290695925, "learning_rate": 8.283544480354036e-06, "loss": 0.1364, "step": 10071 }, { "epoch": 0.2938327790419511, "grad_norm": 1.0013094993900706, "learning_rate": 8.283188182233458e-06, "loss": 0.1401, "step": 10072 }, { "epoch": 0.29386195227259465, "grad_norm": 0.810950696245934, "learning_rate": 8.282831854801359e-06, "loss": 0.1635, "step": 10073 }, { "epoch": 0.2938911255032382, "grad_norm": 0.9939931738019238, "learning_rate": 8.28247549806092e-06, "loss": 0.1494, "step": 10074 }, { "epoch": 0.2939202987338818, "grad_norm": 0.9989568642233154, "learning_rate": 8.282119112015325e-06, "loss": 0.1496, "step": 10075 }, { "epoch": 0.29394947196452537, "grad_norm": 0.6881224454052522, "learning_rate": 8.281762696667755e-06, "loss": 0.1391, "step": 10076 }, { "epoch": 0.2939786451951689, "grad_norm": 1.1326747256186758, "learning_rate": 8.281406252021389e-06, "loss": 0.1647, "step": 10077 }, { "epoch": 0.2940078184258125, "grad_norm": 0.9086346742634053, "learning_rate": 8.28104977807941e-06, "loss": 0.195, "step": 10078 }, { "epoch": 0.294036991656456, "grad_norm": 1.705925428776988, "learning_rate": 8.280693274845006e-06, "loss": 0.137, "step": 10079 }, { "epoch": 0.2940661648870996, "grad_norm": 1.0773313951320238, "learning_rate": 8.280336742321351e-06, "loss": 0.1713, "step": 10080 }, { "epoch": 0.29409533811774313, "grad_norm": 1.0134830466090594, "learning_rate": 8.279980180511636e-06, "loss": 0.1641, "step": 10081 }, { "epoch": 0.29412451134838674, "grad_norm": 0.7687904917679984, "learning_rate": 8.279623589419041e-06, "loss": 0.1658, "step": 10082 }, { "epoch": 0.2941536845790303, "grad_norm": 1.1714835304382243, "learning_rate": 8.279266969046748e-06, "loss": 0.1585, "step": 10083 }, { "epoch": 0.29418285780967385, "grad_norm": 0.7753132212968409, "learning_rate": 8.278910319397944e-06, "loss": 0.1488, "step": 10084 }, { "epoch": 0.2942120310403174, "grad_norm": 0.7890368342165006, "learning_rate": 8.27855364047581e-06, "loss": 0.1437, "step": 10085 }, { "epoch": 0.29424120427096095, "grad_norm": 0.998941095158542, "learning_rate": 8.27819693228353e-06, "loss": 0.1393, "step": 10086 }, { "epoch": 0.2942703775016045, "grad_norm": 1.1003955530389602, "learning_rate": 8.277840194824293e-06, "loss": 0.1545, "step": 10087 }, { "epoch": 0.2942995507322481, "grad_norm": 0.8417741980652979, "learning_rate": 8.277483428101282e-06, "loss": 0.1558, "step": 10088 }, { "epoch": 0.29432872396289167, "grad_norm": 0.8643247738015186, "learning_rate": 8.277126632117678e-06, "loss": 0.1614, "step": 10089 }, { "epoch": 0.2943578971935352, "grad_norm": 0.8045667629341287, "learning_rate": 8.276769806876672e-06, "loss": 0.1687, "step": 10090 }, { "epoch": 0.2943870704241788, "grad_norm": 0.7557136704949229, "learning_rate": 8.276412952381447e-06, "loss": 0.1519, "step": 10091 }, { "epoch": 0.29441624365482233, "grad_norm": 0.8300803948164209, "learning_rate": 8.27605606863519e-06, "loss": 0.1488, "step": 10092 }, { "epoch": 0.2944454168854659, "grad_norm": 0.8716545199341675, "learning_rate": 8.275699155641086e-06, "loss": 0.1232, "step": 10093 }, { "epoch": 0.29447459011610944, "grad_norm": 0.9740788443599583, "learning_rate": 8.275342213402323e-06, "loss": 0.1553, "step": 10094 }, { "epoch": 0.29450376334675304, "grad_norm": 0.908314983029271, "learning_rate": 8.274985241922085e-06, "loss": 0.159, "step": 10095 }, { "epoch": 0.2945329365773966, "grad_norm": 0.7413311019259263, "learning_rate": 8.274628241203559e-06, "loss": 0.1329, "step": 10096 }, { "epoch": 0.29456210980804015, "grad_norm": 0.9126331962160001, "learning_rate": 8.274271211249936e-06, "loss": 0.1348, "step": 10097 }, { "epoch": 0.2945912830386837, "grad_norm": 0.735422309302129, "learning_rate": 8.273914152064402e-06, "loss": 0.1381, "step": 10098 }, { "epoch": 0.29462045626932726, "grad_norm": 0.9076706463326716, "learning_rate": 8.273557063650142e-06, "loss": 0.1653, "step": 10099 }, { "epoch": 0.2946496294999708, "grad_norm": 0.8381643215388135, "learning_rate": 8.27319994601035e-06, "loss": 0.1624, "step": 10100 }, { "epoch": 0.29467880273061436, "grad_norm": 0.7525793137297849, "learning_rate": 8.272842799148204e-06, "loss": 0.1559, "step": 10101 }, { "epoch": 0.294707975961258, "grad_norm": 0.845378689064027, "learning_rate": 8.272485623066902e-06, "loss": 0.1535, "step": 10102 }, { "epoch": 0.2947371491919015, "grad_norm": 0.7426295645551895, "learning_rate": 8.272128417769631e-06, "loss": 0.1551, "step": 10103 }, { "epoch": 0.2947663224225451, "grad_norm": 0.7030265053522543, "learning_rate": 8.271771183259576e-06, "loss": 0.1632, "step": 10104 }, { "epoch": 0.29479549565318863, "grad_norm": 0.8667857278370785, "learning_rate": 8.27141391953993e-06, "loss": 0.1812, "step": 10105 }, { "epoch": 0.2948246688838322, "grad_norm": 0.757826397612349, "learning_rate": 8.271056626613882e-06, "loss": 0.1428, "step": 10106 }, { "epoch": 0.29485384211447574, "grad_norm": 0.7010888562714828, "learning_rate": 8.27069930448462e-06, "loss": 0.1496, "step": 10107 }, { "epoch": 0.2948830153451193, "grad_norm": 0.8489102107716576, "learning_rate": 8.270341953155337e-06, "loss": 0.1667, "step": 10108 }, { "epoch": 0.2949121885757629, "grad_norm": 0.7182762903971774, "learning_rate": 8.269984572629221e-06, "loss": 0.1508, "step": 10109 }, { "epoch": 0.29494136180640645, "grad_norm": 0.9205830169267184, "learning_rate": 8.269627162909464e-06, "loss": 0.1374, "step": 10110 }, { "epoch": 0.29497053503705, "grad_norm": 0.9013683714979126, "learning_rate": 8.269269723999254e-06, "loss": 0.1667, "step": 10111 }, { "epoch": 0.29499970826769356, "grad_norm": 0.9022818034859249, "learning_rate": 8.268912255901787e-06, "loss": 0.1521, "step": 10112 }, { "epoch": 0.2950288814983371, "grad_norm": 0.8998209867337773, "learning_rate": 8.268554758620251e-06, "loss": 0.1366, "step": 10113 }, { "epoch": 0.29505805472898067, "grad_norm": 0.9093486643780566, "learning_rate": 8.268197232157838e-06, "loss": 0.1428, "step": 10114 }, { "epoch": 0.2950872279596243, "grad_norm": 0.8043344415927015, "learning_rate": 8.26783967651774e-06, "loss": 0.1538, "step": 10115 }, { "epoch": 0.29511640119026783, "grad_norm": 0.708856084485211, "learning_rate": 8.267482091703149e-06, "loss": 0.1394, "step": 10116 }, { "epoch": 0.2951455744209114, "grad_norm": 1.2013644115603497, "learning_rate": 8.26712447771726e-06, "loss": 0.1527, "step": 10117 }, { "epoch": 0.29517474765155494, "grad_norm": 0.9668843632885805, "learning_rate": 8.266766834563262e-06, "loss": 0.16, "step": 10118 }, { "epoch": 0.2952039208821985, "grad_norm": 0.82648255757909, "learning_rate": 8.266409162244349e-06, "loss": 0.167, "step": 10119 }, { "epoch": 0.29523309411284204, "grad_norm": 0.8733450903456376, "learning_rate": 8.266051460763715e-06, "loss": 0.1487, "step": 10120 }, { "epoch": 0.2952622673434856, "grad_norm": 1.0592545546496737, "learning_rate": 8.265693730124554e-06, "loss": 0.1856, "step": 10121 }, { "epoch": 0.2952914405741292, "grad_norm": 0.8973038592028657, "learning_rate": 8.26533597033006e-06, "loss": 0.1414, "step": 10122 }, { "epoch": 0.29532061380477276, "grad_norm": 0.9415657241169062, "learning_rate": 8.264978181383423e-06, "loss": 0.167, "step": 10123 }, { "epoch": 0.2953497870354163, "grad_norm": 1.0662482061733354, "learning_rate": 8.264620363287844e-06, "loss": 0.1469, "step": 10124 }, { "epoch": 0.29537896026605986, "grad_norm": 0.954724171184433, "learning_rate": 8.26426251604651e-06, "loss": 0.1817, "step": 10125 }, { "epoch": 0.2954081334967034, "grad_norm": 0.9343136494408995, "learning_rate": 8.26390463966262e-06, "loss": 0.1478, "step": 10126 }, { "epoch": 0.29543730672734697, "grad_norm": 0.9635716826314182, "learning_rate": 8.263546734139372e-06, "loss": 0.1444, "step": 10127 }, { "epoch": 0.2954664799579905, "grad_norm": 0.7939738464961386, "learning_rate": 8.263188799479955e-06, "loss": 0.1521, "step": 10128 }, { "epoch": 0.29549565318863413, "grad_norm": 0.9325713030748594, "learning_rate": 8.262830835687568e-06, "loss": 0.1633, "step": 10129 }, { "epoch": 0.2955248264192777, "grad_norm": 1.0552677541169195, "learning_rate": 8.262472842765405e-06, "loss": 0.1472, "step": 10130 }, { "epoch": 0.29555399964992124, "grad_norm": 0.9015243805482912, "learning_rate": 8.262114820716665e-06, "loss": 0.1473, "step": 10131 }, { "epoch": 0.2955831728805648, "grad_norm": 0.7980512752168968, "learning_rate": 8.261756769544541e-06, "loss": 0.1392, "step": 10132 }, { "epoch": 0.29561234611120835, "grad_norm": 1.1291745660181034, "learning_rate": 8.261398689252234e-06, "loss": 0.1683, "step": 10133 }, { "epoch": 0.2956415193418519, "grad_norm": 1.0890608310415952, "learning_rate": 8.261040579842933e-06, "loss": 0.161, "step": 10134 }, { "epoch": 0.29567069257249545, "grad_norm": 0.8316305480572167, "learning_rate": 8.260682441319845e-06, "loss": 0.1539, "step": 10135 }, { "epoch": 0.29569986580313906, "grad_norm": 0.9928843511492947, "learning_rate": 8.26032427368616e-06, "loss": 0.1514, "step": 10136 }, { "epoch": 0.2957290390337826, "grad_norm": 0.8259319623286105, "learning_rate": 8.25996607694508e-06, "loss": 0.1602, "step": 10137 }, { "epoch": 0.29575821226442617, "grad_norm": 0.9239118835587717, "learning_rate": 8.2596078510998e-06, "loss": 0.1593, "step": 10138 }, { "epoch": 0.2957873854950697, "grad_norm": 0.8069166392881194, "learning_rate": 8.259249596153521e-06, "loss": 0.1392, "step": 10139 }, { "epoch": 0.2958165587257133, "grad_norm": 0.9033003788543029, "learning_rate": 8.258891312109435e-06, "loss": 0.1503, "step": 10140 }, { "epoch": 0.2958457319563568, "grad_norm": 0.6946109693948403, "learning_rate": 8.25853299897075e-06, "loss": 0.1545, "step": 10141 }, { "epoch": 0.2958749051870004, "grad_norm": 0.9662825112047134, "learning_rate": 8.258174656740659e-06, "loss": 0.1629, "step": 10142 }, { "epoch": 0.295904078417644, "grad_norm": 0.7973506026688649, "learning_rate": 8.257816285422362e-06, "loss": 0.1412, "step": 10143 }, { "epoch": 0.29593325164828754, "grad_norm": 0.7062174155878412, "learning_rate": 8.257457885019059e-06, "loss": 0.1593, "step": 10144 }, { "epoch": 0.2959624248789311, "grad_norm": 0.8000916472159385, "learning_rate": 8.25709945553395e-06, "loss": 0.1429, "step": 10145 }, { "epoch": 0.29599159810957465, "grad_norm": 0.9658574160510581, "learning_rate": 8.256740996970233e-06, "loss": 0.1582, "step": 10146 }, { "epoch": 0.2960207713402182, "grad_norm": 0.6404408032298828, "learning_rate": 8.256382509331111e-06, "loss": 0.1401, "step": 10147 }, { "epoch": 0.29604994457086176, "grad_norm": 0.7047901170856019, "learning_rate": 8.256023992619784e-06, "loss": 0.1565, "step": 10148 }, { "epoch": 0.29607911780150536, "grad_norm": 0.9099826941267748, "learning_rate": 8.255665446839452e-06, "loss": 0.1507, "step": 10149 }, { "epoch": 0.2961082910321489, "grad_norm": 0.7380877626178057, "learning_rate": 8.255306871993314e-06, "loss": 0.1369, "step": 10150 }, { "epoch": 0.29613746426279247, "grad_norm": 0.7092539338751792, "learning_rate": 8.254948268084577e-06, "loss": 0.1561, "step": 10151 }, { "epoch": 0.296166637493436, "grad_norm": 0.8490075569884373, "learning_rate": 8.254589635116436e-06, "loss": 0.1482, "step": 10152 }, { "epoch": 0.2961958107240796, "grad_norm": 0.7187741221837821, "learning_rate": 8.254230973092097e-06, "loss": 0.1334, "step": 10153 }, { "epoch": 0.29622498395472313, "grad_norm": 0.7570635529022741, "learning_rate": 8.253872282014759e-06, "loss": 0.1869, "step": 10154 }, { "epoch": 0.2962541571853667, "grad_norm": 0.7745372825322814, "learning_rate": 8.253513561887627e-06, "loss": 0.148, "step": 10155 }, { "epoch": 0.2962833304160103, "grad_norm": 0.9402534676032971, "learning_rate": 8.253154812713903e-06, "loss": 0.1736, "step": 10156 }, { "epoch": 0.29631250364665385, "grad_norm": 0.9180733339274976, "learning_rate": 8.252796034496791e-06, "loss": 0.1718, "step": 10157 }, { "epoch": 0.2963416768772974, "grad_norm": 0.8470308732823799, "learning_rate": 8.252437227239489e-06, "loss": 0.1631, "step": 10158 }, { "epoch": 0.29637085010794095, "grad_norm": 0.7592252529268745, "learning_rate": 8.252078390945206e-06, "loss": 0.1559, "step": 10159 }, { "epoch": 0.2964000233385845, "grad_norm": 0.8559338330101544, "learning_rate": 8.251719525617144e-06, "loss": 0.1336, "step": 10160 }, { "epoch": 0.29642919656922806, "grad_norm": 0.8125242378958871, "learning_rate": 8.251360631258503e-06, "loss": 0.1435, "step": 10161 }, { "epoch": 0.2964583697998716, "grad_norm": 0.657284100177316, "learning_rate": 8.251001707872495e-06, "loss": 0.1442, "step": 10162 }, { "epoch": 0.2964875430305152, "grad_norm": 0.6370550522371218, "learning_rate": 8.250642755462318e-06, "loss": 0.1543, "step": 10163 }, { "epoch": 0.2965167162611588, "grad_norm": 0.7726338232353873, "learning_rate": 8.250283774031175e-06, "loss": 0.1427, "step": 10164 }, { "epoch": 0.29654588949180233, "grad_norm": 0.8420372492225615, "learning_rate": 8.249924763582278e-06, "loss": 0.175, "step": 10165 }, { "epoch": 0.2965750627224459, "grad_norm": 0.8813049245433957, "learning_rate": 8.249565724118828e-06, "loss": 0.1377, "step": 10166 }, { "epoch": 0.29660423595308943, "grad_norm": 0.7851547264469557, "learning_rate": 8.249206655644032e-06, "loss": 0.1731, "step": 10167 }, { "epoch": 0.296633409183733, "grad_norm": 0.9119747534119117, "learning_rate": 8.248847558161093e-06, "loss": 0.1742, "step": 10168 }, { "epoch": 0.29666258241437654, "grad_norm": 0.9045248243802394, "learning_rate": 8.248488431673221e-06, "loss": 0.1656, "step": 10169 }, { "epoch": 0.29669175564502015, "grad_norm": 0.8469130071152402, "learning_rate": 8.248129276183616e-06, "loss": 0.1624, "step": 10170 }, { "epoch": 0.2967209288756637, "grad_norm": 0.7706688757352926, "learning_rate": 8.247770091695491e-06, "loss": 0.1475, "step": 10171 }, { "epoch": 0.29675010210630726, "grad_norm": 0.9874774742372416, "learning_rate": 8.24741087821205e-06, "loss": 0.152, "step": 10172 }, { "epoch": 0.2967792753369508, "grad_norm": 0.9054971824276945, "learning_rate": 8.247051635736498e-06, "loss": 0.1824, "step": 10173 }, { "epoch": 0.29680844856759436, "grad_norm": 0.9128203976783008, "learning_rate": 8.246692364272045e-06, "loss": 0.1541, "step": 10174 }, { "epoch": 0.2968376217982379, "grad_norm": 1.1635042957392343, "learning_rate": 8.246333063821898e-06, "loss": 0.1382, "step": 10175 }, { "epoch": 0.2968667950288815, "grad_norm": 0.9255205883852237, "learning_rate": 8.245973734389263e-06, "loss": 0.18, "step": 10176 }, { "epoch": 0.2968959682595251, "grad_norm": 0.7999807333850611, "learning_rate": 8.24561437597735e-06, "loss": 0.1883, "step": 10177 }, { "epoch": 0.29692514149016863, "grad_norm": 0.9396477424366436, "learning_rate": 8.24525498858937e-06, "loss": 0.1387, "step": 10178 }, { "epoch": 0.2969543147208122, "grad_norm": 1.026522258031758, "learning_rate": 8.244895572228524e-06, "loss": 0.1718, "step": 10179 }, { "epoch": 0.29698348795145574, "grad_norm": 0.8259889089290392, "learning_rate": 8.244536126898025e-06, "loss": 0.177, "step": 10180 }, { "epoch": 0.2970126611820993, "grad_norm": 0.9117572925297596, "learning_rate": 8.244176652601084e-06, "loss": 0.1825, "step": 10181 }, { "epoch": 0.29704183441274284, "grad_norm": 0.8321216196138452, "learning_rate": 8.243817149340906e-06, "loss": 0.1501, "step": 10182 }, { "epoch": 0.29707100764338645, "grad_norm": 0.7396207053227941, "learning_rate": 8.243457617120705e-06, "loss": 0.1601, "step": 10183 }, { "epoch": 0.29710018087403, "grad_norm": 0.9472317680086649, "learning_rate": 8.243098055943687e-06, "loss": 0.1638, "step": 10184 }, { "epoch": 0.29712935410467356, "grad_norm": 0.7518333084253055, "learning_rate": 8.242738465813066e-06, "loss": 0.1533, "step": 10185 }, { "epoch": 0.2971585273353171, "grad_norm": 0.7751418699107506, "learning_rate": 8.242378846732048e-06, "loss": 0.1578, "step": 10186 }, { "epoch": 0.29718770056596067, "grad_norm": 0.7512033072350223, "learning_rate": 8.242019198703848e-06, "loss": 0.1619, "step": 10187 }, { "epoch": 0.2972168737966042, "grad_norm": 0.8138719776250573, "learning_rate": 8.241659521731672e-06, "loss": 0.1805, "step": 10188 }, { "epoch": 0.2972460470272478, "grad_norm": 0.705947187282989, "learning_rate": 8.241299815818735e-06, "loss": 0.1452, "step": 10189 }, { "epoch": 0.2972752202578914, "grad_norm": 0.8837392994603281, "learning_rate": 8.240940080968247e-06, "loss": 0.1459, "step": 10190 }, { "epoch": 0.29730439348853493, "grad_norm": 0.7240890297328977, "learning_rate": 8.240580317183419e-06, "loss": 0.1457, "step": 10191 }, { "epoch": 0.2973335667191785, "grad_norm": 0.7813744532725917, "learning_rate": 8.240220524467464e-06, "loss": 0.1622, "step": 10192 }, { "epoch": 0.29736273994982204, "grad_norm": 0.9073252697563148, "learning_rate": 8.239860702823595e-06, "loss": 0.1398, "step": 10193 }, { "epoch": 0.2973919131804656, "grad_norm": 0.7897438605174113, "learning_rate": 8.23950085225502e-06, "loss": 0.1438, "step": 10194 }, { "epoch": 0.29742108641110915, "grad_norm": 1.2624818808897202, "learning_rate": 8.239140972764956e-06, "loss": 0.1853, "step": 10195 }, { "epoch": 0.2974502596417527, "grad_norm": 1.672777151134189, "learning_rate": 8.238781064356616e-06, "loss": 0.1582, "step": 10196 }, { "epoch": 0.2974794328723963, "grad_norm": 0.8680324500789193, "learning_rate": 8.238421127033209e-06, "loss": 0.1537, "step": 10197 }, { "epoch": 0.29750860610303986, "grad_norm": 0.7200645621115024, "learning_rate": 8.238061160797955e-06, "loss": 0.1411, "step": 10198 }, { "epoch": 0.2975377793336834, "grad_norm": 0.9405584044150864, "learning_rate": 8.237701165654061e-06, "loss": 0.143, "step": 10199 }, { "epoch": 0.29756695256432697, "grad_norm": 0.904492235212471, "learning_rate": 8.237341141604744e-06, "loss": 0.1489, "step": 10200 }, { "epoch": 0.2975961257949705, "grad_norm": 0.8956774962792353, "learning_rate": 8.23698108865322e-06, "loss": 0.147, "step": 10201 }, { "epoch": 0.2976252990256141, "grad_norm": 0.8081462348216187, "learning_rate": 8.2366210068027e-06, "loss": 0.1521, "step": 10202 }, { "epoch": 0.2976544722562577, "grad_norm": 0.8383484266872394, "learning_rate": 8.2362608960564e-06, "loss": 0.138, "step": 10203 }, { "epoch": 0.29768364548690124, "grad_norm": 0.7245719642499691, "learning_rate": 8.235900756417536e-06, "loss": 0.141, "step": 10204 }, { "epoch": 0.2977128187175448, "grad_norm": 0.8899043513103961, "learning_rate": 8.235540587889323e-06, "loss": 0.1451, "step": 10205 }, { "epoch": 0.29774199194818834, "grad_norm": 0.8104554293482947, "learning_rate": 8.235180390474974e-06, "loss": 0.169, "step": 10206 }, { "epoch": 0.2977711651788319, "grad_norm": 0.9087468555306801, "learning_rate": 8.23482016417771e-06, "loss": 0.1293, "step": 10207 }, { "epoch": 0.29780033840947545, "grad_norm": 0.7162043882960849, "learning_rate": 8.234459909000743e-06, "loss": 0.1351, "step": 10208 }, { "epoch": 0.297829511640119, "grad_norm": 0.7840134994351502, "learning_rate": 8.234099624947289e-06, "loss": 0.1674, "step": 10209 }, { "epoch": 0.2978586848707626, "grad_norm": 1.0159502554130149, "learning_rate": 8.233739312020565e-06, "loss": 0.1701, "step": 10210 }, { "epoch": 0.29788785810140617, "grad_norm": 0.8907157936985476, "learning_rate": 8.233378970223789e-06, "loss": 0.1721, "step": 10211 }, { "epoch": 0.2979170313320497, "grad_norm": 0.6892916911517295, "learning_rate": 8.23301859956018e-06, "loss": 0.1455, "step": 10212 }, { "epoch": 0.2979462045626933, "grad_norm": 0.7885020635472038, "learning_rate": 8.232658200032948e-06, "loss": 0.1685, "step": 10213 }, { "epoch": 0.2979753777933368, "grad_norm": 0.9534180734690023, "learning_rate": 8.232297771645318e-06, "loss": 0.1571, "step": 10214 }, { "epoch": 0.2980045510239804, "grad_norm": 0.7569478004758581, "learning_rate": 8.231937314400505e-06, "loss": 0.1542, "step": 10215 }, { "epoch": 0.29803372425462393, "grad_norm": 0.805664931552756, "learning_rate": 8.231576828301725e-06, "loss": 0.1614, "step": 10216 }, { "epoch": 0.29806289748526754, "grad_norm": 0.7313367021495826, "learning_rate": 8.2312163133522e-06, "loss": 0.1556, "step": 10217 }, { "epoch": 0.2980920707159111, "grad_norm": 0.7169676352550582, "learning_rate": 8.23085576955515e-06, "loss": 0.1415, "step": 10218 }, { "epoch": 0.29812124394655465, "grad_norm": 0.6631959426055198, "learning_rate": 8.230495196913788e-06, "loss": 0.1578, "step": 10219 }, { "epoch": 0.2981504171771982, "grad_norm": 0.813108070470013, "learning_rate": 8.230134595431337e-06, "loss": 0.1515, "step": 10220 }, { "epoch": 0.29817959040784175, "grad_norm": 0.8102880711543592, "learning_rate": 8.229773965111014e-06, "loss": 0.1363, "step": 10221 }, { "epoch": 0.2982087636384853, "grad_norm": 0.7438666778322964, "learning_rate": 8.229413305956043e-06, "loss": 0.1604, "step": 10222 }, { "epoch": 0.29823793686912886, "grad_norm": 0.8293960771238174, "learning_rate": 8.229052617969637e-06, "loss": 0.1505, "step": 10223 }, { "epoch": 0.29826711009977247, "grad_norm": 0.7914743911608186, "learning_rate": 8.228691901155022e-06, "loss": 0.1632, "step": 10224 }, { "epoch": 0.298296283330416, "grad_norm": 0.7882176192556702, "learning_rate": 8.228331155515417e-06, "loss": 0.146, "step": 10225 }, { "epoch": 0.2983254565610596, "grad_norm": 0.7575265575180946, "learning_rate": 8.227970381054042e-06, "loss": 0.1801, "step": 10226 }, { "epoch": 0.29835462979170313, "grad_norm": 0.6871519850408472, "learning_rate": 8.227609577774116e-06, "loss": 0.125, "step": 10227 }, { "epoch": 0.2983838030223467, "grad_norm": 1.143200474102427, "learning_rate": 8.227248745678865e-06, "loss": 0.1453, "step": 10228 }, { "epoch": 0.29841297625299024, "grad_norm": 0.8561203769958857, "learning_rate": 8.226887884771506e-06, "loss": 0.1404, "step": 10229 }, { "epoch": 0.29844214948363385, "grad_norm": 0.8693956702410475, "learning_rate": 8.226526995055263e-06, "loss": 0.1463, "step": 10230 }, { "epoch": 0.2984713227142774, "grad_norm": 0.8539535376655861, "learning_rate": 8.226166076533357e-06, "loss": 0.1588, "step": 10231 }, { "epoch": 0.29850049594492095, "grad_norm": 0.73328261929883, "learning_rate": 8.22580512920901e-06, "loss": 0.1264, "step": 10232 }, { "epoch": 0.2985296691755645, "grad_norm": 0.9856024289609955, "learning_rate": 8.225444153085445e-06, "loss": 0.1547, "step": 10233 }, { "epoch": 0.29855884240620806, "grad_norm": 0.6253266718298031, "learning_rate": 8.225083148165885e-06, "loss": 0.1387, "step": 10234 }, { "epoch": 0.2985880156368516, "grad_norm": 0.8751337165851122, "learning_rate": 8.224722114453553e-06, "loss": 0.1614, "step": 10235 }, { "epoch": 0.29861718886749516, "grad_norm": 0.7518899303442966, "learning_rate": 8.22436105195167e-06, "loss": 0.1718, "step": 10236 }, { "epoch": 0.2986463620981388, "grad_norm": 0.7498274503018532, "learning_rate": 8.223999960663463e-06, "loss": 0.1501, "step": 10237 }, { "epoch": 0.2986755353287823, "grad_norm": 0.7797355591447336, "learning_rate": 8.223638840592154e-06, "loss": 0.1387, "step": 10238 }, { "epoch": 0.2987047085594259, "grad_norm": 0.9011564981490462, "learning_rate": 8.223277691740966e-06, "loss": 0.1606, "step": 10239 }, { "epoch": 0.29873388179006943, "grad_norm": 1.04411022699535, "learning_rate": 8.222916514113125e-06, "loss": 0.1498, "step": 10240 }, { "epoch": 0.298763055020713, "grad_norm": 0.7183070537426233, "learning_rate": 8.222555307711852e-06, "loss": 0.1487, "step": 10241 }, { "epoch": 0.29879222825135654, "grad_norm": 0.8366805257443651, "learning_rate": 8.222194072540377e-06, "loss": 0.1476, "step": 10242 }, { "epoch": 0.2988214014820001, "grad_norm": 0.774981856596977, "learning_rate": 8.221832808601925e-06, "loss": 0.1492, "step": 10243 }, { "epoch": 0.2988505747126437, "grad_norm": 0.9555939794967316, "learning_rate": 8.221471515899714e-06, "loss": 0.1579, "step": 10244 }, { "epoch": 0.29887974794328726, "grad_norm": 0.8608671731454439, "learning_rate": 8.221110194436976e-06, "loss": 0.1329, "step": 10245 }, { "epoch": 0.2989089211739308, "grad_norm": 0.6972785003151737, "learning_rate": 8.220748844216936e-06, "loss": 0.1436, "step": 10246 }, { "epoch": 0.29893809440457436, "grad_norm": 0.7262080941899789, "learning_rate": 8.220387465242819e-06, "loss": 0.1566, "step": 10247 }, { "epoch": 0.2989672676352179, "grad_norm": 0.8696697212962222, "learning_rate": 8.22002605751785e-06, "loss": 0.1439, "step": 10248 }, { "epoch": 0.29899644086586147, "grad_norm": 0.8963960824103208, "learning_rate": 8.219664621045258e-06, "loss": 0.1463, "step": 10249 }, { "epoch": 0.299025614096505, "grad_norm": 0.9134689373427836, "learning_rate": 8.21930315582827e-06, "loss": 0.1756, "step": 10250 }, { "epoch": 0.29905478732714863, "grad_norm": 0.791892052304756, "learning_rate": 8.21894166187011e-06, "loss": 0.1523, "step": 10251 }, { "epoch": 0.2990839605577922, "grad_norm": 0.8090637785611028, "learning_rate": 8.21858013917401e-06, "loss": 0.1562, "step": 10252 }, { "epoch": 0.29911313378843574, "grad_norm": 1.021161273179451, "learning_rate": 8.218218587743192e-06, "loss": 0.1435, "step": 10253 }, { "epoch": 0.2991423070190793, "grad_norm": 0.7197261281266153, "learning_rate": 8.217857007580888e-06, "loss": 0.1464, "step": 10254 }, { "epoch": 0.29917148024972284, "grad_norm": 0.9457110521780498, "learning_rate": 8.217495398690324e-06, "loss": 0.1701, "step": 10255 }, { "epoch": 0.2992006534803664, "grad_norm": 0.9163208966796982, "learning_rate": 8.21713376107473e-06, "loss": 0.1465, "step": 10256 }, { "epoch": 0.29922982671100995, "grad_norm": 0.8249674709456052, "learning_rate": 8.216772094737332e-06, "loss": 0.1586, "step": 10257 }, { "epoch": 0.29925899994165356, "grad_norm": 0.8585279812762839, "learning_rate": 8.216410399681365e-06, "loss": 0.1451, "step": 10258 }, { "epoch": 0.2992881731722971, "grad_norm": 0.6816281737593278, "learning_rate": 8.21604867591005e-06, "loss": 0.1662, "step": 10259 }, { "epoch": 0.29931734640294066, "grad_norm": 0.9843714814859522, "learning_rate": 8.215686923426622e-06, "loss": 0.1723, "step": 10260 }, { "epoch": 0.2993465196335842, "grad_norm": 0.7630391592359387, "learning_rate": 8.215325142234307e-06, "loss": 0.1229, "step": 10261 }, { "epoch": 0.29937569286422777, "grad_norm": 0.9016404250362807, "learning_rate": 8.214963332336339e-06, "loss": 0.1385, "step": 10262 }, { "epoch": 0.2994048660948713, "grad_norm": 0.8493932257954774, "learning_rate": 8.214601493735942e-06, "loss": 0.1648, "step": 10263 }, { "epoch": 0.29943403932551493, "grad_norm": 0.9210046561471733, "learning_rate": 8.214239626436354e-06, "loss": 0.1739, "step": 10264 }, { "epoch": 0.2994632125561585, "grad_norm": 0.7523427517724842, "learning_rate": 8.2138777304408e-06, "loss": 0.1584, "step": 10265 }, { "epoch": 0.29949238578680204, "grad_norm": 0.8284269398140005, "learning_rate": 8.213515805752513e-06, "loss": 0.1504, "step": 10266 }, { "epoch": 0.2995215590174456, "grad_norm": 0.7850184968983689, "learning_rate": 8.213153852374726e-06, "loss": 0.1479, "step": 10267 }, { "epoch": 0.29955073224808915, "grad_norm": 0.9958689700645952, "learning_rate": 8.212791870310665e-06, "loss": 0.1479, "step": 10268 }, { "epoch": 0.2995799054787327, "grad_norm": 0.7823202795779842, "learning_rate": 8.212429859563569e-06, "loss": 0.1367, "step": 10269 }, { "epoch": 0.29960907870937625, "grad_norm": 0.9517726455881705, "learning_rate": 8.212067820136663e-06, "loss": 0.1482, "step": 10270 }, { "epoch": 0.29963825194001986, "grad_norm": 0.747457871674271, "learning_rate": 8.211705752033183e-06, "loss": 0.1494, "step": 10271 }, { "epoch": 0.2996674251706634, "grad_norm": 0.6483071425870026, "learning_rate": 8.211343655256361e-06, "loss": 0.1229, "step": 10272 }, { "epoch": 0.29969659840130697, "grad_norm": 0.9479736087280959, "learning_rate": 8.210981529809432e-06, "loss": 0.1444, "step": 10273 }, { "epoch": 0.2997257716319505, "grad_norm": 0.6231169873050983, "learning_rate": 8.210619375695622e-06, "loss": 0.15, "step": 10274 }, { "epoch": 0.2997549448625941, "grad_norm": 0.8844057892049493, "learning_rate": 8.210257192918172e-06, "loss": 0.1386, "step": 10275 }, { "epoch": 0.29978411809323763, "grad_norm": 0.7838326925631378, "learning_rate": 8.20989498148031e-06, "loss": 0.1394, "step": 10276 }, { "epoch": 0.2998132913238812, "grad_norm": 0.7596321222748872, "learning_rate": 8.209532741385273e-06, "loss": 0.14, "step": 10277 }, { "epoch": 0.2998424645545248, "grad_norm": 0.7995005177917673, "learning_rate": 8.209170472636293e-06, "loss": 0.1774, "step": 10278 }, { "epoch": 0.29987163778516834, "grad_norm": 0.8847720040873738, "learning_rate": 8.208808175236607e-06, "loss": 0.1642, "step": 10279 }, { "epoch": 0.2999008110158119, "grad_norm": 0.7847791299400547, "learning_rate": 8.208445849189445e-06, "loss": 0.1556, "step": 10280 }, { "epoch": 0.29992998424645545, "grad_norm": 0.8683120041271374, "learning_rate": 8.208083494498045e-06, "loss": 0.1385, "step": 10281 }, { "epoch": 0.299959157477099, "grad_norm": 0.9240785679067222, "learning_rate": 8.207721111165643e-06, "loss": 0.1565, "step": 10282 }, { "epoch": 0.29998833070774256, "grad_norm": 0.7920139812191519, "learning_rate": 8.207358699195471e-06, "loss": 0.1437, "step": 10283 }, { "epoch": 0.3000175039383861, "grad_norm": 1.0384454371715393, "learning_rate": 8.206996258590767e-06, "loss": 0.1624, "step": 10284 }, { "epoch": 0.3000466771690297, "grad_norm": 1.1790725332448648, "learning_rate": 8.206633789354766e-06, "loss": 0.147, "step": 10285 }, { "epoch": 0.30007585039967327, "grad_norm": 0.7010278425790094, "learning_rate": 8.206271291490704e-06, "loss": 0.1624, "step": 10286 }, { "epoch": 0.3001050236303168, "grad_norm": 1.2912617849744015, "learning_rate": 8.205908765001817e-06, "loss": 0.1483, "step": 10287 }, { "epoch": 0.3001341968609604, "grad_norm": 1.1345470992794995, "learning_rate": 8.205546209891341e-06, "loss": 0.1336, "step": 10288 }, { "epoch": 0.30016337009160393, "grad_norm": 0.7795416089869779, "learning_rate": 8.205183626162515e-06, "loss": 0.1351, "step": 10289 }, { "epoch": 0.3001925433222475, "grad_norm": 0.9744407377689432, "learning_rate": 8.204821013818576e-06, "loss": 0.1624, "step": 10290 }, { "epoch": 0.3002217165528911, "grad_norm": 1.1623602615663815, "learning_rate": 8.204458372862757e-06, "loss": 0.1511, "step": 10291 }, { "epoch": 0.30025088978353465, "grad_norm": 1.5405276564005022, "learning_rate": 8.2040957032983e-06, "loss": 0.1634, "step": 10292 }, { "epoch": 0.3002800630141782, "grad_norm": 0.8384976869936589, "learning_rate": 8.203733005128443e-06, "loss": 0.1395, "step": 10293 }, { "epoch": 0.30030923624482175, "grad_norm": 1.538099235954295, "learning_rate": 8.203370278356422e-06, "loss": 0.1606, "step": 10294 }, { "epoch": 0.3003384094754653, "grad_norm": 0.9786164778485515, "learning_rate": 8.203007522985474e-06, "loss": 0.1685, "step": 10295 }, { "epoch": 0.30036758270610886, "grad_norm": 0.9661251732423624, "learning_rate": 8.202644739018839e-06, "loss": 0.168, "step": 10296 }, { "epoch": 0.3003967559367524, "grad_norm": 0.6613973036691084, "learning_rate": 8.20228192645976e-06, "loss": 0.1399, "step": 10297 }, { "epoch": 0.300425929167396, "grad_norm": 0.8107857653177807, "learning_rate": 8.201919085311468e-06, "loss": 0.1361, "step": 10298 }, { "epoch": 0.3004551023980396, "grad_norm": 0.9302113463508935, "learning_rate": 8.20155621557721e-06, "loss": 0.154, "step": 10299 }, { "epoch": 0.30048427562868313, "grad_norm": 0.725640049113617, "learning_rate": 8.20119331726022e-06, "loss": 0.1506, "step": 10300 }, { "epoch": 0.3005134488593267, "grad_norm": 1.125724549815784, "learning_rate": 8.200830390363741e-06, "loss": 0.1611, "step": 10301 }, { "epoch": 0.30054262208997023, "grad_norm": 0.987039406035594, "learning_rate": 8.200467434891013e-06, "loss": 0.1486, "step": 10302 }, { "epoch": 0.3005717953206138, "grad_norm": 0.7619418183972609, "learning_rate": 8.200104450845276e-06, "loss": 0.1359, "step": 10303 }, { "epoch": 0.30060096855125734, "grad_norm": 0.9464439971922779, "learning_rate": 8.19974143822977e-06, "loss": 0.1679, "step": 10304 }, { "epoch": 0.30063014178190095, "grad_norm": 0.9102843331183711, "learning_rate": 8.199378397047737e-06, "loss": 0.146, "step": 10305 }, { "epoch": 0.3006593150125445, "grad_norm": 0.8757140850972511, "learning_rate": 8.199015327302416e-06, "loss": 0.1658, "step": 10306 }, { "epoch": 0.30068848824318806, "grad_norm": 1.0102006920627888, "learning_rate": 8.19865222899705e-06, "loss": 0.2044, "step": 10307 }, { "epoch": 0.3007176614738316, "grad_norm": 1.0609644563273757, "learning_rate": 8.198289102134883e-06, "loss": 0.1744, "step": 10308 }, { "epoch": 0.30074683470447516, "grad_norm": 0.938656871725238, "learning_rate": 8.197925946719152e-06, "loss": 0.171, "step": 10309 }, { "epoch": 0.3007760079351187, "grad_norm": 0.8870932026683985, "learning_rate": 8.197562762753102e-06, "loss": 0.1357, "step": 10310 }, { "epoch": 0.30080518116576227, "grad_norm": 0.8816972483532031, "learning_rate": 8.197199550239974e-06, "loss": 0.1605, "step": 10311 }, { "epoch": 0.3008343543964059, "grad_norm": 0.9581892569146573, "learning_rate": 8.196836309183014e-06, "loss": 0.1447, "step": 10312 }, { "epoch": 0.30086352762704943, "grad_norm": 0.8903482159640526, "learning_rate": 8.19647303958546e-06, "loss": 0.1508, "step": 10313 }, { "epoch": 0.300892700857693, "grad_norm": 1.0125581683334, "learning_rate": 8.19610974145056e-06, "loss": 0.1396, "step": 10314 }, { "epoch": 0.30092187408833654, "grad_norm": 0.9137559289052214, "learning_rate": 8.195746414781554e-06, "loss": 0.1712, "step": 10315 }, { "epoch": 0.3009510473189801, "grad_norm": 0.9293851882221098, "learning_rate": 8.195383059581685e-06, "loss": 0.1784, "step": 10316 }, { "epoch": 0.30098022054962364, "grad_norm": 0.7644480786179568, "learning_rate": 8.195019675854201e-06, "loss": 0.1461, "step": 10317 }, { "epoch": 0.30100939378026725, "grad_norm": 0.954902379280209, "learning_rate": 8.194656263602345e-06, "loss": 0.1467, "step": 10318 }, { "epoch": 0.3010385670109108, "grad_norm": 0.9365848295917715, "learning_rate": 8.194292822829359e-06, "loss": 0.1612, "step": 10319 }, { "epoch": 0.30106774024155436, "grad_norm": 0.889638080171425, "learning_rate": 8.19392935353849e-06, "loss": 0.1601, "step": 10320 }, { "epoch": 0.3010969134721979, "grad_norm": 0.900540095228079, "learning_rate": 8.193565855732982e-06, "loss": 0.1455, "step": 10321 }, { "epoch": 0.30112608670284147, "grad_norm": 0.6722095940519582, "learning_rate": 8.193202329416079e-06, "loss": 0.162, "step": 10322 }, { "epoch": 0.301155259933485, "grad_norm": 1.0135786558021573, "learning_rate": 8.19283877459103e-06, "loss": 0.1422, "step": 10323 }, { "epoch": 0.3011844331641286, "grad_norm": 0.9075304908466894, "learning_rate": 8.192475191261078e-06, "loss": 0.1362, "step": 10324 }, { "epoch": 0.3012136063947722, "grad_norm": 0.9742175270809147, "learning_rate": 8.19211157942947e-06, "loss": 0.168, "step": 10325 }, { "epoch": 0.30124277962541574, "grad_norm": 0.8020497956000485, "learning_rate": 8.19174793909945e-06, "loss": 0.1618, "step": 10326 }, { "epoch": 0.3012719528560593, "grad_norm": 0.8269449779623378, "learning_rate": 8.191384270274267e-06, "loss": 0.1741, "step": 10327 }, { "epoch": 0.30130112608670284, "grad_norm": 0.8019645339230919, "learning_rate": 8.191020572957168e-06, "loss": 0.1693, "step": 10328 }, { "epoch": 0.3013302993173464, "grad_norm": 0.8147885163158093, "learning_rate": 8.190656847151399e-06, "loss": 0.1385, "step": 10329 }, { "epoch": 0.30135947254798995, "grad_norm": 0.621541687023796, "learning_rate": 8.190293092860206e-06, "loss": 0.1617, "step": 10330 }, { "epoch": 0.3013886457786335, "grad_norm": 0.7998469469603753, "learning_rate": 8.18992931008684e-06, "loss": 0.171, "step": 10331 }, { "epoch": 0.3014178190092771, "grad_norm": 0.6410746273269085, "learning_rate": 8.189565498834545e-06, "loss": 0.148, "step": 10332 }, { "epoch": 0.30144699223992066, "grad_norm": 0.7768540062680194, "learning_rate": 8.18920165910657e-06, "loss": 0.1833, "step": 10333 }, { "epoch": 0.3014761654705642, "grad_norm": 0.7328658348242143, "learning_rate": 8.188837790906166e-06, "loss": 0.1354, "step": 10334 }, { "epoch": 0.30150533870120777, "grad_norm": 0.6725912272901395, "learning_rate": 8.18847389423658e-06, "loss": 0.1334, "step": 10335 }, { "epoch": 0.3015345119318513, "grad_norm": 0.6057666850636925, "learning_rate": 8.188109969101057e-06, "loss": 0.1394, "step": 10336 }, { "epoch": 0.3015636851624949, "grad_norm": 0.9733823505793422, "learning_rate": 8.187746015502851e-06, "loss": 0.161, "step": 10337 }, { "epoch": 0.30159285839313843, "grad_norm": 0.7831182117923976, "learning_rate": 8.187382033445209e-06, "loss": 0.1712, "step": 10338 }, { "epoch": 0.30162203162378204, "grad_norm": 0.9387900721378101, "learning_rate": 8.187018022931383e-06, "loss": 0.1333, "step": 10339 }, { "epoch": 0.3016512048544256, "grad_norm": 0.882741465257527, "learning_rate": 8.18665398396462e-06, "loss": 0.1591, "step": 10340 }, { "epoch": 0.30168037808506915, "grad_norm": 0.7772272118956491, "learning_rate": 8.186289916548169e-06, "loss": 0.1591, "step": 10341 }, { "epoch": 0.3017095513157127, "grad_norm": 0.7726661431253278, "learning_rate": 8.185925820685283e-06, "loss": 0.152, "step": 10342 }, { "epoch": 0.30173872454635625, "grad_norm": 0.8159915507142284, "learning_rate": 8.185561696379213e-06, "loss": 0.167, "step": 10343 }, { "epoch": 0.3017678977769998, "grad_norm": 0.7668837526204888, "learning_rate": 8.185197543633207e-06, "loss": 0.1474, "step": 10344 }, { "epoch": 0.3017970710076434, "grad_norm": 0.7665248750590504, "learning_rate": 8.18483336245052e-06, "loss": 0.1538, "step": 10345 }, { "epoch": 0.30182624423828697, "grad_norm": 0.8241660718574091, "learning_rate": 8.1844691528344e-06, "loss": 0.1423, "step": 10346 }, { "epoch": 0.3018554174689305, "grad_norm": 0.7919856246679993, "learning_rate": 8.1841049147881e-06, "loss": 0.1321, "step": 10347 }, { "epoch": 0.3018845906995741, "grad_norm": 0.742251352204128, "learning_rate": 8.183740648314871e-06, "loss": 0.1747, "step": 10348 }, { "epoch": 0.3019137639302176, "grad_norm": 0.8004211233653133, "learning_rate": 8.183376353417965e-06, "loss": 0.1432, "step": 10349 }, { "epoch": 0.3019429371608612, "grad_norm": 0.9220228761361982, "learning_rate": 8.183012030100634e-06, "loss": 0.1413, "step": 10350 }, { "epoch": 0.30197211039150473, "grad_norm": 0.656116739552237, "learning_rate": 8.182647678366133e-06, "loss": 0.1532, "step": 10351 }, { "epoch": 0.30200128362214834, "grad_norm": 0.8473274892563499, "learning_rate": 8.182283298217712e-06, "loss": 0.1712, "step": 10352 }, { "epoch": 0.3020304568527919, "grad_norm": 0.9386840779750911, "learning_rate": 8.181918889658626e-06, "loss": 0.175, "step": 10353 }, { "epoch": 0.30205963008343545, "grad_norm": 0.7958658354844564, "learning_rate": 8.18155445269213e-06, "loss": 0.1527, "step": 10354 }, { "epoch": 0.302088803314079, "grad_norm": 0.8519466938766371, "learning_rate": 8.181189987321472e-06, "loss": 0.1483, "step": 10355 }, { "epoch": 0.30211797654472256, "grad_norm": 0.847514887919152, "learning_rate": 8.180825493549911e-06, "loss": 0.1497, "step": 10356 }, { "epoch": 0.3021471497753661, "grad_norm": 0.7475612548723383, "learning_rate": 8.180460971380699e-06, "loss": 0.1508, "step": 10357 }, { "epoch": 0.30217632300600966, "grad_norm": 0.838551162543459, "learning_rate": 8.18009642081709e-06, "loss": 0.1632, "step": 10358 }, { "epoch": 0.30220549623665327, "grad_norm": 0.9474378218034406, "learning_rate": 8.17973184186234e-06, "loss": 0.1561, "step": 10359 }, { "epoch": 0.3022346694672968, "grad_norm": 0.9161082185613947, "learning_rate": 8.179367234519704e-06, "loss": 0.1462, "step": 10360 }, { "epoch": 0.3022638426979404, "grad_norm": 0.8657332818263342, "learning_rate": 8.179002598792435e-06, "loss": 0.1411, "step": 10361 }, { "epoch": 0.30229301592858393, "grad_norm": 0.9058986082532352, "learning_rate": 8.17863793468379e-06, "loss": 0.1604, "step": 10362 }, { "epoch": 0.3023221891592275, "grad_norm": 0.7833653167162915, "learning_rate": 8.178273242197025e-06, "loss": 0.1455, "step": 10363 }, { "epoch": 0.30235136238987104, "grad_norm": 0.752484372612084, "learning_rate": 8.177908521335395e-06, "loss": 0.1505, "step": 10364 }, { "epoch": 0.3023805356205146, "grad_norm": 0.8873062199169135, "learning_rate": 8.177543772102155e-06, "loss": 0.1355, "step": 10365 }, { "epoch": 0.3024097088511582, "grad_norm": 0.8772193812904566, "learning_rate": 8.177178994500564e-06, "loss": 0.1776, "step": 10366 }, { "epoch": 0.30243888208180175, "grad_norm": 0.7696517234416018, "learning_rate": 8.176814188533877e-06, "loss": 0.1451, "step": 10367 }, { "epoch": 0.3024680553124453, "grad_norm": 0.8426114226823732, "learning_rate": 8.17644935420535e-06, "loss": 0.1495, "step": 10368 }, { "epoch": 0.30249722854308886, "grad_norm": 0.8623379632947579, "learning_rate": 8.176084491518245e-06, "loss": 0.1502, "step": 10369 }, { "epoch": 0.3025264017737324, "grad_norm": 0.7152243468324411, "learning_rate": 8.175719600475813e-06, "loss": 0.1406, "step": 10370 }, { "epoch": 0.30255557500437597, "grad_norm": 0.9131510402685786, "learning_rate": 8.175354681081316e-06, "loss": 0.1512, "step": 10371 }, { "epoch": 0.3025847482350195, "grad_norm": 0.8318978801392292, "learning_rate": 8.174989733338009e-06, "loss": 0.1558, "step": 10372 }, { "epoch": 0.3026139214656631, "grad_norm": 0.7242443691616607, "learning_rate": 8.174624757249153e-06, "loss": 0.1456, "step": 10373 }, { "epoch": 0.3026430946963067, "grad_norm": 0.8414540738241345, "learning_rate": 8.174259752818003e-06, "loss": 0.1537, "step": 10374 }, { "epoch": 0.30267226792695023, "grad_norm": 0.7253542838346363, "learning_rate": 8.173894720047821e-06, "loss": 0.1369, "step": 10375 }, { "epoch": 0.3027014411575938, "grad_norm": 0.7510016927922919, "learning_rate": 8.173529658941865e-06, "loss": 0.1605, "step": 10376 }, { "epoch": 0.30273061438823734, "grad_norm": 0.7228815788623432, "learning_rate": 8.173164569503393e-06, "loss": 0.1385, "step": 10377 }, { "epoch": 0.3027597876188809, "grad_norm": 0.947579151274672, "learning_rate": 8.172799451735666e-06, "loss": 0.2099, "step": 10378 }, { "epoch": 0.3027889608495245, "grad_norm": 0.9877209902427233, "learning_rate": 8.17243430564194e-06, "loss": 0.1683, "step": 10379 }, { "epoch": 0.30281813408016806, "grad_norm": 0.8461806348773305, "learning_rate": 8.172069131225481e-06, "loss": 0.1508, "step": 10380 }, { "epoch": 0.3028473073108116, "grad_norm": 1.0047050747683886, "learning_rate": 8.171703928489548e-06, "loss": 0.1471, "step": 10381 }, { "epoch": 0.30287648054145516, "grad_norm": 0.7623453747136857, "learning_rate": 8.171338697437394e-06, "loss": 0.1327, "step": 10382 }, { "epoch": 0.3029056537720987, "grad_norm": 0.7533753878572584, "learning_rate": 8.170973438072289e-06, "loss": 0.1624, "step": 10383 }, { "epoch": 0.30293482700274227, "grad_norm": 0.8229887691219726, "learning_rate": 8.170608150397489e-06, "loss": 0.1401, "step": 10384 }, { "epoch": 0.3029640002333858, "grad_norm": 0.6903285197247508, "learning_rate": 8.170242834416256e-06, "loss": 0.1565, "step": 10385 }, { "epoch": 0.30299317346402943, "grad_norm": 0.8827581146553132, "learning_rate": 8.169877490131852e-06, "loss": 0.1414, "step": 10386 }, { "epoch": 0.303022346694673, "grad_norm": 0.8311994245631472, "learning_rate": 8.16951211754754e-06, "loss": 0.1255, "step": 10387 }, { "epoch": 0.30305151992531654, "grad_norm": 0.9045715324433118, "learning_rate": 8.169146716666578e-06, "loss": 0.1647, "step": 10388 }, { "epoch": 0.3030806931559601, "grad_norm": 0.7350612393133354, "learning_rate": 8.168781287492232e-06, "loss": 0.1418, "step": 10389 }, { "epoch": 0.30310986638660364, "grad_norm": 0.898199394934361, "learning_rate": 8.168415830027762e-06, "loss": 0.1561, "step": 10390 }, { "epoch": 0.3031390396172472, "grad_norm": 0.8829793775558368, "learning_rate": 8.168050344276434e-06, "loss": 0.1499, "step": 10391 }, { "epoch": 0.30316821284789075, "grad_norm": 0.8110120390463106, "learning_rate": 8.167684830241506e-06, "loss": 0.1282, "step": 10392 }, { "epoch": 0.30319738607853436, "grad_norm": 0.7874125730460129, "learning_rate": 8.167319287926247e-06, "loss": 0.1314, "step": 10393 }, { "epoch": 0.3032265593091779, "grad_norm": 0.8854226381926515, "learning_rate": 8.166953717333915e-06, "loss": 0.1629, "step": 10394 }, { "epoch": 0.30325573253982147, "grad_norm": 0.9192712835689084, "learning_rate": 8.166588118467778e-06, "loss": 0.1611, "step": 10395 }, { "epoch": 0.303284905770465, "grad_norm": 1.1209985968149403, "learning_rate": 8.166222491331097e-06, "loss": 0.1592, "step": 10396 }, { "epoch": 0.30331407900110857, "grad_norm": 0.8058659321648536, "learning_rate": 8.165856835927138e-06, "loss": 0.143, "step": 10397 }, { "epoch": 0.3033432522317521, "grad_norm": 0.8609479958453766, "learning_rate": 8.165491152259163e-06, "loss": 0.1882, "step": 10398 }, { "epoch": 0.3033724254623957, "grad_norm": 0.71334878740112, "learning_rate": 8.165125440330443e-06, "loss": 0.1528, "step": 10399 }, { "epoch": 0.3034015986930393, "grad_norm": 0.8731792207689436, "learning_rate": 8.164759700144235e-06, "loss": 0.1399, "step": 10400 }, { "epoch": 0.30343077192368284, "grad_norm": 0.7985744200953458, "learning_rate": 8.16439393170381e-06, "loss": 0.1398, "step": 10401 }, { "epoch": 0.3034599451543264, "grad_norm": 0.8038517488074439, "learning_rate": 8.164028135012429e-06, "loss": 0.1645, "step": 10402 }, { "epoch": 0.30348911838496995, "grad_norm": 0.8762013050995305, "learning_rate": 8.163662310073362e-06, "loss": 0.1447, "step": 10403 }, { "epoch": 0.3035182916156135, "grad_norm": 0.8191903536601476, "learning_rate": 8.163296456889873e-06, "loss": 0.1365, "step": 10404 }, { "epoch": 0.30354746484625705, "grad_norm": 0.7931625614152237, "learning_rate": 8.162930575465228e-06, "loss": 0.1434, "step": 10405 }, { "epoch": 0.30357663807690066, "grad_norm": 0.742726212924904, "learning_rate": 8.162564665802693e-06, "loss": 0.1678, "step": 10406 }, { "epoch": 0.3036058113075442, "grad_norm": 0.8429872191417145, "learning_rate": 8.162198727905536e-06, "loss": 0.1287, "step": 10407 }, { "epoch": 0.30363498453818777, "grad_norm": 0.7898703474677157, "learning_rate": 8.161832761777024e-06, "loss": 0.1505, "step": 10408 }, { "epoch": 0.3036641577688313, "grad_norm": 0.7891222648082646, "learning_rate": 8.161466767420426e-06, "loss": 0.1725, "step": 10409 }, { "epoch": 0.3036933309994749, "grad_norm": 0.8572627779806057, "learning_rate": 8.161100744839004e-06, "loss": 0.1568, "step": 10410 }, { "epoch": 0.30372250423011843, "grad_norm": 0.8907723320209793, "learning_rate": 8.160734694036031e-06, "loss": 0.1539, "step": 10411 }, { "epoch": 0.303751677460762, "grad_norm": 0.8754776835040876, "learning_rate": 8.160368615014771e-06, "loss": 0.1557, "step": 10412 }, { "epoch": 0.3037808506914056, "grad_norm": 0.7988711441501878, "learning_rate": 8.160002507778497e-06, "loss": 0.1639, "step": 10413 }, { "epoch": 0.30381002392204914, "grad_norm": 1.0929980824225913, "learning_rate": 8.159636372330475e-06, "loss": 0.167, "step": 10414 }, { "epoch": 0.3038391971526927, "grad_norm": 1.0097993171818347, "learning_rate": 8.159270208673973e-06, "loss": 0.1495, "step": 10415 }, { "epoch": 0.30386837038333625, "grad_norm": 0.802350305488741, "learning_rate": 8.15890401681226e-06, "loss": 0.1539, "step": 10416 }, { "epoch": 0.3038975436139798, "grad_norm": 0.8516468241319436, "learning_rate": 8.158537796748607e-06, "loss": 0.1612, "step": 10417 }, { "epoch": 0.30392671684462336, "grad_norm": 0.8920408566504613, "learning_rate": 8.158171548486281e-06, "loss": 0.1745, "step": 10418 }, { "epoch": 0.3039558900752669, "grad_norm": 0.7488362337002084, "learning_rate": 8.157805272028557e-06, "loss": 0.1687, "step": 10419 }, { "epoch": 0.3039850633059105, "grad_norm": 0.6807176760151589, "learning_rate": 8.157438967378697e-06, "loss": 0.1696, "step": 10420 }, { "epoch": 0.30401423653655407, "grad_norm": 0.8600650742579082, "learning_rate": 8.157072634539977e-06, "loss": 0.1498, "step": 10421 }, { "epoch": 0.3040434097671976, "grad_norm": 0.7670456470979753, "learning_rate": 8.156706273515667e-06, "loss": 0.1672, "step": 10422 }, { "epoch": 0.3040725829978412, "grad_norm": 0.7617168129050363, "learning_rate": 8.156339884309038e-06, "loss": 0.1683, "step": 10423 }, { "epoch": 0.30410175622848473, "grad_norm": 1.0888207429500198, "learning_rate": 8.155973466923359e-06, "loss": 0.1537, "step": 10424 }, { "epoch": 0.3041309294591283, "grad_norm": 0.9988337315375342, "learning_rate": 8.155607021361903e-06, "loss": 0.1798, "step": 10425 }, { "epoch": 0.30416010268977184, "grad_norm": 0.7287563782009567, "learning_rate": 8.155240547627938e-06, "loss": 0.1542, "step": 10426 }, { "epoch": 0.30418927592041545, "grad_norm": 0.755339563244242, "learning_rate": 8.15487404572474e-06, "loss": 0.1574, "step": 10427 }, { "epoch": 0.304218449151059, "grad_norm": 0.8623245463594157, "learning_rate": 8.154507515655581e-06, "loss": 0.1942, "step": 10428 }, { "epoch": 0.30424762238170255, "grad_norm": 0.7249600682792448, "learning_rate": 8.15414095742373e-06, "loss": 0.1322, "step": 10429 }, { "epoch": 0.3042767956123461, "grad_norm": 0.7004999686107767, "learning_rate": 8.153774371032464e-06, "loss": 0.1464, "step": 10430 }, { "epoch": 0.30430596884298966, "grad_norm": 0.8112538713363775, "learning_rate": 8.15340775648505e-06, "loss": 0.161, "step": 10431 }, { "epoch": 0.3043351420736332, "grad_norm": 0.8591419625633835, "learning_rate": 8.153041113784767e-06, "loss": 0.1521, "step": 10432 }, { "epoch": 0.3043643153042768, "grad_norm": 0.8700572414342698, "learning_rate": 8.152674442934885e-06, "loss": 0.155, "step": 10433 }, { "epoch": 0.3043934885349204, "grad_norm": 0.7908035965519012, "learning_rate": 8.152307743938677e-06, "loss": 0.1467, "step": 10434 }, { "epoch": 0.30442266176556393, "grad_norm": 0.8839608715563377, "learning_rate": 8.151941016799419e-06, "loss": 0.173, "step": 10435 }, { "epoch": 0.3044518349962075, "grad_norm": 0.7360087090301753, "learning_rate": 8.151574261520383e-06, "loss": 0.1415, "step": 10436 }, { "epoch": 0.30448100822685104, "grad_norm": 0.6797613757354378, "learning_rate": 8.151207478104845e-06, "loss": 0.1459, "step": 10437 }, { "epoch": 0.3045101814574946, "grad_norm": 0.9030865435243874, "learning_rate": 8.15084066655608e-06, "loss": 0.1582, "step": 10438 }, { "epoch": 0.30453935468813814, "grad_norm": 0.6909089962920174, "learning_rate": 8.150473826877362e-06, "loss": 0.1622, "step": 10439 }, { "epoch": 0.30456852791878175, "grad_norm": 0.6740257320563846, "learning_rate": 8.150106959071964e-06, "loss": 0.1495, "step": 10440 }, { "epoch": 0.3045977011494253, "grad_norm": 0.9058561789803754, "learning_rate": 8.149740063143164e-06, "loss": 0.1684, "step": 10441 }, { "epoch": 0.30462687438006886, "grad_norm": 0.7488456272778348, "learning_rate": 8.149373139094234e-06, "loss": 0.1491, "step": 10442 }, { "epoch": 0.3046560476107124, "grad_norm": 0.8715300170098201, "learning_rate": 8.149006186928456e-06, "loss": 0.1731, "step": 10443 }, { "epoch": 0.30468522084135596, "grad_norm": 0.6919558722174817, "learning_rate": 8.148639206649102e-06, "loss": 0.1624, "step": 10444 }, { "epoch": 0.3047143940719995, "grad_norm": 0.7923866396440833, "learning_rate": 8.148272198259447e-06, "loss": 0.1586, "step": 10445 }, { "epoch": 0.30474356730264307, "grad_norm": 0.8144296132624399, "learning_rate": 8.14790516176277e-06, "loss": 0.126, "step": 10446 }, { "epoch": 0.3047727405332867, "grad_norm": 0.7364901169217135, "learning_rate": 8.147538097162348e-06, "loss": 0.1407, "step": 10447 }, { "epoch": 0.30480191376393023, "grad_norm": 0.9646854588622465, "learning_rate": 8.147171004461456e-06, "loss": 0.157, "step": 10448 }, { "epoch": 0.3048310869945738, "grad_norm": 0.7595725187358502, "learning_rate": 8.146803883663374e-06, "loss": 0.1316, "step": 10449 }, { "epoch": 0.30486026022521734, "grad_norm": 0.8946622909608126, "learning_rate": 8.146436734771377e-06, "loss": 0.1509, "step": 10450 }, { "epoch": 0.3048894334558609, "grad_norm": 1.0007198457280269, "learning_rate": 8.146069557788745e-06, "loss": 0.1618, "step": 10451 }, { "epoch": 0.30491860668650445, "grad_norm": 0.9129991902060763, "learning_rate": 8.145702352718754e-06, "loss": 0.1501, "step": 10452 }, { "epoch": 0.304947779917148, "grad_norm": 0.8097005749941277, "learning_rate": 8.145335119564683e-06, "loss": 0.1555, "step": 10453 }, { "epoch": 0.3049769531477916, "grad_norm": 0.9170231266424669, "learning_rate": 8.144967858329813e-06, "loss": 0.1869, "step": 10454 }, { "epoch": 0.30500612637843516, "grad_norm": 0.9509365711142003, "learning_rate": 8.14460056901742e-06, "loss": 0.1622, "step": 10455 }, { "epoch": 0.3050352996090787, "grad_norm": 0.9542346725135301, "learning_rate": 8.144233251630782e-06, "loss": 0.1453, "step": 10456 }, { "epoch": 0.30506447283972227, "grad_norm": 0.7056139469173172, "learning_rate": 8.14386590617318e-06, "loss": 0.1483, "step": 10457 }, { "epoch": 0.3050936460703658, "grad_norm": 0.8549857988433522, "learning_rate": 8.143498532647897e-06, "loss": 0.1475, "step": 10458 }, { "epoch": 0.3051228193010094, "grad_norm": 1.0269668522883968, "learning_rate": 8.143131131058208e-06, "loss": 0.16, "step": 10459 }, { "epoch": 0.305151992531653, "grad_norm": 0.7689087023102948, "learning_rate": 8.142763701407392e-06, "loss": 0.1167, "step": 10460 }, { "epoch": 0.30518116576229654, "grad_norm": 1.0460279979785334, "learning_rate": 8.142396243698735e-06, "loss": 0.1608, "step": 10461 }, { "epoch": 0.3052103389929401, "grad_norm": 0.9350770804843989, "learning_rate": 8.142028757935512e-06, "loss": 0.1536, "step": 10462 }, { "epoch": 0.30523951222358364, "grad_norm": 1.0405166274056603, "learning_rate": 8.141661244121008e-06, "loss": 0.1754, "step": 10463 }, { "epoch": 0.3052686854542272, "grad_norm": 0.6683346329780173, "learning_rate": 8.141293702258503e-06, "loss": 0.1338, "step": 10464 }, { "epoch": 0.30529785868487075, "grad_norm": 0.6545227732087646, "learning_rate": 8.140926132351276e-06, "loss": 0.1357, "step": 10465 }, { "epoch": 0.3053270319155143, "grad_norm": 0.8560348701733025, "learning_rate": 8.140558534402612e-06, "loss": 0.1387, "step": 10466 }, { "epoch": 0.3053562051461579, "grad_norm": 0.6970725379754402, "learning_rate": 8.14019090841579e-06, "loss": 0.1431, "step": 10467 }, { "epoch": 0.30538537837680146, "grad_norm": 0.7710512787846989, "learning_rate": 8.139823254394093e-06, "loss": 0.1352, "step": 10468 }, { "epoch": 0.305414551607445, "grad_norm": 0.8962857019792468, "learning_rate": 8.139455572340805e-06, "loss": 0.1635, "step": 10469 }, { "epoch": 0.30544372483808857, "grad_norm": 0.905799204557261, "learning_rate": 8.139087862259207e-06, "loss": 0.1646, "step": 10470 }, { "epoch": 0.3054728980687321, "grad_norm": 0.8407657457303579, "learning_rate": 8.138720124152579e-06, "loss": 0.1542, "step": 10471 }, { "epoch": 0.3055020712993757, "grad_norm": 0.8183441760590756, "learning_rate": 8.13835235802421e-06, "loss": 0.1626, "step": 10472 }, { "epoch": 0.30553124453001923, "grad_norm": 0.763458797159557, "learning_rate": 8.137984563877379e-06, "loss": 0.1529, "step": 10473 }, { "epoch": 0.30556041776066284, "grad_norm": 0.937601898474738, "learning_rate": 8.137616741715371e-06, "loss": 0.1529, "step": 10474 }, { "epoch": 0.3055895909913064, "grad_norm": 0.9490012727975684, "learning_rate": 8.137248891541471e-06, "loss": 0.1453, "step": 10475 }, { "epoch": 0.30561876422194995, "grad_norm": 0.8531084592155752, "learning_rate": 8.136881013358961e-06, "loss": 0.1378, "step": 10476 }, { "epoch": 0.3056479374525935, "grad_norm": 0.8723797164563764, "learning_rate": 8.136513107171125e-06, "loss": 0.1579, "step": 10477 }, { "epoch": 0.30567711068323705, "grad_norm": 0.8086968325029635, "learning_rate": 8.13614517298125e-06, "loss": 0.161, "step": 10478 }, { "epoch": 0.3057062839138806, "grad_norm": 0.8715856525622632, "learning_rate": 8.13577721079262e-06, "loss": 0.146, "step": 10479 }, { "epoch": 0.30573545714452416, "grad_norm": 0.7591362339179039, "learning_rate": 8.13540922060852e-06, "loss": 0.1557, "step": 10480 }, { "epoch": 0.30576463037516777, "grad_norm": 0.8213478670060186, "learning_rate": 8.135041202432233e-06, "loss": 0.127, "step": 10481 }, { "epoch": 0.3057938036058113, "grad_norm": 0.8292046129731733, "learning_rate": 8.134673156267048e-06, "loss": 0.1474, "step": 10482 }, { "epoch": 0.3058229768364549, "grad_norm": 0.7640714590123188, "learning_rate": 8.134305082116247e-06, "loss": 0.1731, "step": 10483 }, { "epoch": 0.3058521500670984, "grad_norm": 0.7413570950614466, "learning_rate": 8.133936979983122e-06, "loss": 0.1592, "step": 10484 }, { "epoch": 0.305881323297742, "grad_norm": 0.9120628833778192, "learning_rate": 8.133568849870953e-06, "loss": 0.1588, "step": 10485 }, { "epoch": 0.30591049652838553, "grad_norm": 0.7695217672867839, "learning_rate": 8.13320069178303e-06, "loss": 0.1248, "step": 10486 }, { "epoch": 0.30593966975902914, "grad_norm": 0.999547933643516, "learning_rate": 8.13283250572264e-06, "loss": 0.1553, "step": 10487 }, { "epoch": 0.3059688429896727, "grad_norm": 0.8916076244179395, "learning_rate": 8.132464291693068e-06, "loss": 0.1727, "step": 10488 }, { "epoch": 0.30599801622031625, "grad_norm": 0.6844763225824567, "learning_rate": 8.132096049697604e-06, "loss": 0.1527, "step": 10489 }, { "epoch": 0.3060271894509598, "grad_norm": 0.9559178165337375, "learning_rate": 8.131727779739533e-06, "loss": 0.159, "step": 10490 }, { "epoch": 0.30605636268160336, "grad_norm": 0.9284463156598932, "learning_rate": 8.131359481822145e-06, "loss": 0.181, "step": 10491 }, { "epoch": 0.3060855359122469, "grad_norm": 0.781265055697093, "learning_rate": 8.130991155948726e-06, "loss": 0.1523, "step": 10492 }, { "epoch": 0.30611470914289046, "grad_norm": 0.7559726874149129, "learning_rate": 8.130622802122566e-06, "loss": 0.13, "step": 10493 }, { "epoch": 0.30614388237353407, "grad_norm": 0.9472351681259725, "learning_rate": 8.130254420346954e-06, "loss": 0.1477, "step": 10494 }, { "epoch": 0.3061730556041776, "grad_norm": 0.8739245150334625, "learning_rate": 8.129886010625176e-06, "loss": 0.1725, "step": 10495 }, { "epoch": 0.3062022288348212, "grad_norm": 0.9484586419264308, "learning_rate": 8.129517572960523e-06, "loss": 0.1354, "step": 10496 }, { "epoch": 0.30623140206546473, "grad_norm": 1.0090227898368496, "learning_rate": 8.129149107356285e-06, "loss": 0.1495, "step": 10497 }, { "epoch": 0.3062605752961083, "grad_norm": 0.9908528918921414, "learning_rate": 8.12878061381575e-06, "loss": 0.1388, "step": 10498 }, { "epoch": 0.30628974852675184, "grad_norm": 1.005759873694505, "learning_rate": 8.12841209234221e-06, "loss": 0.1626, "step": 10499 }, { "epoch": 0.3063189217573954, "grad_norm": 0.977183583589827, "learning_rate": 8.128043542938953e-06, "loss": 0.155, "step": 10500 }, { "epoch": 0.306348094988039, "grad_norm": 0.9762998814957362, "learning_rate": 8.12767496560927e-06, "loss": 0.1639, "step": 10501 }, { "epoch": 0.30637726821868255, "grad_norm": 0.8543228431308588, "learning_rate": 8.127306360356451e-06, "loss": 0.1751, "step": 10502 }, { "epoch": 0.3064064414493261, "grad_norm": 0.9352274648423903, "learning_rate": 8.126937727183789e-06, "loss": 0.1697, "step": 10503 }, { "epoch": 0.30643561467996966, "grad_norm": 1.011660987303486, "learning_rate": 8.12656906609457e-06, "loss": 0.1798, "step": 10504 }, { "epoch": 0.3064647879106132, "grad_norm": 0.8967067042704103, "learning_rate": 8.12620037709209e-06, "loss": 0.1365, "step": 10505 }, { "epoch": 0.30649396114125677, "grad_norm": 1.0385156300922904, "learning_rate": 8.125831660179642e-06, "loss": 0.171, "step": 10506 }, { "epoch": 0.3065231343719003, "grad_norm": 1.5670291613771283, "learning_rate": 8.125462915360511e-06, "loss": 0.18, "step": 10507 }, { "epoch": 0.3065523076025439, "grad_norm": 0.9472429438047364, "learning_rate": 8.125094142637997e-06, "loss": 0.163, "step": 10508 }, { "epoch": 0.3065814808331875, "grad_norm": 0.764220114055355, "learning_rate": 8.124725342015387e-06, "loss": 0.1461, "step": 10509 }, { "epoch": 0.30661065406383103, "grad_norm": 0.9179438054200285, "learning_rate": 8.124356513495975e-06, "loss": 0.1493, "step": 10510 }, { "epoch": 0.3066398272944746, "grad_norm": 0.8480395922489338, "learning_rate": 8.123987657083054e-06, "loss": 0.165, "step": 10511 }, { "epoch": 0.30666900052511814, "grad_norm": 0.6859944965232568, "learning_rate": 8.123618772779917e-06, "loss": 0.1409, "step": 10512 }, { "epoch": 0.3066981737557617, "grad_norm": 0.7930331392558869, "learning_rate": 8.123249860589856e-06, "loss": 0.1642, "step": 10513 }, { "epoch": 0.30672734698640525, "grad_norm": 0.8710376750806804, "learning_rate": 8.122880920516167e-06, "loss": 0.1315, "step": 10514 }, { "epoch": 0.30675652021704886, "grad_norm": 0.8011444341675579, "learning_rate": 8.122511952562143e-06, "loss": 0.1536, "step": 10515 }, { "epoch": 0.3067856934476924, "grad_norm": 1.2824621487969918, "learning_rate": 8.122142956731078e-06, "loss": 0.1644, "step": 10516 }, { "epoch": 0.30681486667833596, "grad_norm": 0.9464360122593795, "learning_rate": 8.121773933026265e-06, "loss": 0.1442, "step": 10517 }, { "epoch": 0.3068440399089795, "grad_norm": 0.9458128911218686, "learning_rate": 8.121404881451e-06, "loss": 0.1433, "step": 10518 }, { "epoch": 0.30687321313962307, "grad_norm": 0.9214215518317815, "learning_rate": 8.121035802008577e-06, "loss": 0.1413, "step": 10519 }, { "epoch": 0.3069023863702666, "grad_norm": 1.0435395184292795, "learning_rate": 8.120666694702292e-06, "loss": 0.1555, "step": 10520 }, { "epoch": 0.30693155960091023, "grad_norm": 0.9804105724680121, "learning_rate": 8.12029755953544e-06, "loss": 0.1479, "step": 10521 }, { "epoch": 0.3069607328315538, "grad_norm": 0.8337310791436562, "learning_rate": 8.119928396511315e-06, "loss": 0.1397, "step": 10522 }, { "epoch": 0.30698990606219734, "grad_norm": 0.8634544296137432, "learning_rate": 8.119559205633213e-06, "loss": 0.1714, "step": 10523 }, { "epoch": 0.3070190792928409, "grad_norm": 1.511782194331731, "learning_rate": 8.119189986904435e-06, "loss": 0.1663, "step": 10524 }, { "epoch": 0.30704825252348444, "grad_norm": 0.9658302254436144, "learning_rate": 8.11882074032827e-06, "loss": 0.1587, "step": 10525 }, { "epoch": 0.307077425754128, "grad_norm": 0.8059627139149879, "learning_rate": 8.11845146590802e-06, "loss": 0.155, "step": 10526 }, { "epoch": 0.30710659898477155, "grad_norm": 0.7921105237179331, "learning_rate": 8.118082163646979e-06, "loss": 0.1771, "step": 10527 }, { "epoch": 0.30713577221541516, "grad_norm": 0.8326608499445625, "learning_rate": 8.117712833548443e-06, "loss": 0.1532, "step": 10528 }, { "epoch": 0.3071649454460587, "grad_norm": 0.9911650912599619, "learning_rate": 8.117343475615714e-06, "loss": 0.179, "step": 10529 }, { "epoch": 0.30719411867670227, "grad_norm": 0.7040185556638828, "learning_rate": 8.116974089852085e-06, "loss": 0.1483, "step": 10530 }, { "epoch": 0.3072232919073458, "grad_norm": 1.1280098054724013, "learning_rate": 8.116604676260855e-06, "loss": 0.1892, "step": 10531 }, { "epoch": 0.3072524651379894, "grad_norm": 0.6794934403780349, "learning_rate": 8.116235234845324e-06, "loss": 0.1301, "step": 10532 }, { "epoch": 0.3072816383686329, "grad_norm": 0.7077044569622198, "learning_rate": 8.115865765608789e-06, "loss": 0.1644, "step": 10533 }, { "epoch": 0.3073108115992765, "grad_norm": 0.7466181617760245, "learning_rate": 8.115496268554545e-06, "loss": 0.1698, "step": 10534 }, { "epoch": 0.3073399848299201, "grad_norm": 0.6420098664136437, "learning_rate": 8.115126743685897e-06, "loss": 0.1364, "step": 10535 }, { "epoch": 0.30736915806056364, "grad_norm": 0.8882542711176362, "learning_rate": 8.114757191006141e-06, "loss": 0.1362, "step": 10536 }, { "epoch": 0.3073983312912072, "grad_norm": 1.1086496996804374, "learning_rate": 8.114387610518574e-06, "loss": 0.1501, "step": 10537 }, { "epoch": 0.30742750452185075, "grad_norm": 0.7485858463299588, "learning_rate": 8.1140180022265e-06, "loss": 0.1461, "step": 10538 }, { "epoch": 0.3074566777524943, "grad_norm": 1.022310161568353, "learning_rate": 8.113648366133218e-06, "loss": 0.1257, "step": 10539 }, { "epoch": 0.30748585098313785, "grad_norm": 0.8740042421782029, "learning_rate": 8.113278702242025e-06, "loss": 0.1386, "step": 10540 }, { "epoch": 0.3075150242137814, "grad_norm": 1.071707657078678, "learning_rate": 8.112909010556222e-06, "loss": 0.1383, "step": 10541 }, { "epoch": 0.307544197444425, "grad_norm": 1.1158674356285905, "learning_rate": 8.11253929107911e-06, "loss": 0.16, "step": 10542 }, { "epoch": 0.30757337067506857, "grad_norm": 0.8078489878682861, "learning_rate": 8.112169543813992e-06, "loss": 0.1356, "step": 10543 }, { "epoch": 0.3076025439057121, "grad_norm": 0.8692041404439562, "learning_rate": 8.111799768764169e-06, "loss": 0.1359, "step": 10544 }, { "epoch": 0.3076317171363557, "grad_norm": 1.1023630639028916, "learning_rate": 8.111429965932938e-06, "loss": 0.1697, "step": 10545 }, { "epoch": 0.30766089036699923, "grad_norm": 0.8951416247908781, "learning_rate": 8.111060135323601e-06, "loss": 0.1631, "step": 10546 }, { "epoch": 0.3076900635976428, "grad_norm": 1.138681929210952, "learning_rate": 8.110690276939466e-06, "loss": 0.1515, "step": 10547 }, { "epoch": 0.3077192368282864, "grad_norm": 1.1975022017271477, "learning_rate": 8.110320390783828e-06, "loss": 0.1498, "step": 10548 }, { "epoch": 0.30774841005892994, "grad_norm": 0.780744692482144, "learning_rate": 8.109950476859993e-06, "loss": 0.1731, "step": 10549 }, { "epoch": 0.3077775832895735, "grad_norm": 0.7934698933211289, "learning_rate": 8.109580535171262e-06, "loss": 0.137, "step": 10550 }, { "epoch": 0.30780675652021705, "grad_norm": 1.0296175217764811, "learning_rate": 8.10921056572094e-06, "loss": 0.1326, "step": 10551 }, { "epoch": 0.3078359297508606, "grad_norm": 0.7560402079317322, "learning_rate": 8.108840568512326e-06, "loss": 0.1429, "step": 10552 }, { "epoch": 0.30786510298150416, "grad_norm": 0.6237040766499032, "learning_rate": 8.108470543548728e-06, "loss": 0.1237, "step": 10553 }, { "epoch": 0.3078942762121477, "grad_norm": 1.0722082215091353, "learning_rate": 8.108100490833444e-06, "loss": 0.1441, "step": 10554 }, { "epoch": 0.3079234494427913, "grad_norm": 0.8508576037764862, "learning_rate": 8.107730410369783e-06, "loss": 0.1475, "step": 10555 }, { "epoch": 0.3079526226734349, "grad_norm": 0.7939273351996611, "learning_rate": 8.107360302161047e-06, "loss": 0.1687, "step": 10556 }, { "epoch": 0.3079817959040784, "grad_norm": 0.7899107972686913, "learning_rate": 8.106990166210539e-06, "loss": 0.1441, "step": 10557 }, { "epoch": 0.308010969134722, "grad_norm": 0.902306511065696, "learning_rate": 8.106620002521564e-06, "loss": 0.1463, "step": 10558 }, { "epoch": 0.30804014236536553, "grad_norm": 0.8155122241241145, "learning_rate": 8.106249811097428e-06, "loss": 0.1644, "step": 10559 }, { "epoch": 0.3080693155960091, "grad_norm": 0.8296850751204471, "learning_rate": 8.105879591941436e-06, "loss": 0.1453, "step": 10560 }, { "epoch": 0.30809848882665264, "grad_norm": 0.8270095028586417, "learning_rate": 8.10550934505689e-06, "loss": 0.181, "step": 10561 }, { "epoch": 0.30812766205729625, "grad_norm": 0.8338867825945535, "learning_rate": 8.1051390704471e-06, "loss": 0.1396, "step": 10562 }, { "epoch": 0.3081568352879398, "grad_norm": 0.8411356780114958, "learning_rate": 8.10476876811537e-06, "loss": 0.1719, "step": 10563 }, { "epoch": 0.30818600851858335, "grad_norm": 0.8384720164246909, "learning_rate": 8.104398438065004e-06, "loss": 0.1436, "step": 10564 }, { "epoch": 0.3082151817492269, "grad_norm": 1.586304716249349, "learning_rate": 8.10402808029931e-06, "loss": 0.1643, "step": 10565 }, { "epoch": 0.30824435497987046, "grad_norm": 0.7091876904890907, "learning_rate": 8.103657694821597e-06, "loss": 0.1438, "step": 10566 }, { "epoch": 0.308273528210514, "grad_norm": 0.6861675408497042, "learning_rate": 8.103287281635165e-06, "loss": 0.1282, "step": 10567 }, { "epoch": 0.30830270144115757, "grad_norm": 0.8024224508177042, "learning_rate": 8.102916840743327e-06, "loss": 0.142, "step": 10568 }, { "epoch": 0.3083318746718012, "grad_norm": 0.739856121913459, "learning_rate": 8.102546372149389e-06, "loss": 0.1523, "step": 10569 }, { "epoch": 0.30836104790244473, "grad_norm": 0.7441723765909202, "learning_rate": 8.102175875856655e-06, "loss": 0.156, "step": 10570 }, { "epoch": 0.3083902211330883, "grad_norm": 0.8616164805014292, "learning_rate": 8.101805351868438e-06, "loss": 0.1452, "step": 10571 }, { "epoch": 0.30841939436373184, "grad_norm": 0.7507438067787525, "learning_rate": 8.101434800188042e-06, "loss": 0.1618, "step": 10572 }, { "epoch": 0.3084485675943754, "grad_norm": 0.6780369471077534, "learning_rate": 8.101064220818776e-06, "loss": 0.1563, "step": 10573 }, { "epoch": 0.30847774082501894, "grad_norm": 0.6803403032015081, "learning_rate": 8.10069361376395e-06, "loss": 0.1392, "step": 10574 }, { "epoch": 0.30850691405566255, "grad_norm": 0.7246904738957859, "learning_rate": 8.100322979026872e-06, "loss": 0.1521, "step": 10575 }, { "epoch": 0.3085360872863061, "grad_norm": 0.781055731214032, "learning_rate": 8.099952316610849e-06, "loss": 0.1332, "step": 10576 }, { "epoch": 0.30856526051694966, "grad_norm": 0.7652344949530506, "learning_rate": 8.099581626519193e-06, "loss": 0.1525, "step": 10577 }, { "epoch": 0.3085944337475932, "grad_norm": 0.7697622437224937, "learning_rate": 8.099210908755213e-06, "loss": 0.1789, "step": 10578 }, { "epoch": 0.30862360697823676, "grad_norm": 1.137822249189753, "learning_rate": 8.098840163322215e-06, "loss": 0.1519, "step": 10579 }, { "epoch": 0.3086527802088803, "grad_norm": 0.9379822407843574, "learning_rate": 8.098469390223514e-06, "loss": 0.1594, "step": 10580 }, { "epoch": 0.30868195343952387, "grad_norm": 0.7880585471282775, "learning_rate": 8.098098589462416e-06, "loss": 0.1458, "step": 10581 }, { "epoch": 0.3087111266701675, "grad_norm": 1.0291747194947232, "learning_rate": 8.097727761042236e-06, "loss": 0.1373, "step": 10582 }, { "epoch": 0.30874029990081103, "grad_norm": 0.7812957602030183, "learning_rate": 8.09735690496628e-06, "loss": 0.1317, "step": 10583 }, { "epoch": 0.3087694731314546, "grad_norm": 0.8718213140293359, "learning_rate": 8.096986021237863e-06, "loss": 0.1727, "step": 10584 }, { "epoch": 0.30879864636209814, "grad_norm": 0.9414986040552127, "learning_rate": 8.096615109860291e-06, "loss": 0.1537, "step": 10585 }, { "epoch": 0.3088278195927417, "grad_norm": 0.7669372121470045, "learning_rate": 8.09624417083688e-06, "loss": 0.122, "step": 10586 }, { "epoch": 0.30885699282338525, "grad_norm": 0.762580240102507, "learning_rate": 8.09587320417094e-06, "loss": 0.166, "step": 10587 }, { "epoch": 0.3088861660540288, "grad_norm": 0.6681375948337287, "learning_rate": 8.095502209865785e-06, "loss": 0.1337, "step": 10588 }, { "epoch": 0.3089153392846724, "grad_norm": 0.8626854000460653, "learning_rate": 8.095131187924723e-06, "loss": 0.1658, "step": 10589 }, { "epoch": 0.30894451251531596, "grad_norm": 0.7997351499456614, "learning_rate": 8.09476013835107e-06, "loss": 0.1515, "step": 10590 }, { "epoch": 0.3089736857459595, "grad_norm": 0.9376171703942249, "learning_rate": 8.094389061148135e-06, "loss": 0.1401, "step": 10591 }, { "epoch": 0.30900285897660307, "grad_norm": 0.7558847608908864, "learning_rate": 8.094017956319236e-06, "loss": 0.1524, "step": 10592 }, { "epoch": 0.3090320322072466, "grad_norm": 0.8543530620818583, "learning_rate": 8.093646823867683e-06, "loss": 0.1326, "step": 10593 }, { "epoch": 0.3090612054378902, "grad_norm": 0.7740183128550507, "learning_rate": 8.093275663796787e-06, "loss": 0.1584, "step": 10594 }, { "epoch": 0.3090903786685337, "grad_norm": 0.8451521187831812, "learning_rate": 8.092904476109867e-06, "loss": 0.1485, "step": 10595 }, { "epoch": 0.30911955189917734, "grad_norm": 0.7527218799395663, "learning_rate": 8.092533260810234e-06, "loss": 0.1547, "step": 10596 }, { "epoch": 0.3091487251298209, "grad_norm": 0.8702787459635344, "learning_rate": 8.0921620179012e-06, "loss": 0.1334, "step": 10597 }, { "epoch": 0.30917789836046444, "grad_norm": 0.8224779270059287, "learning_rate": 8.091790747386084e-06, "loss": 0.1384, "step": 10598 }, { "epoch": 0.309207071591108, "grad_norm": 0.9221937293844968, "learning_rate": 8.091419449268197e-06, "loss": 0.1703, "step": 10599 }, { "epoch": 0.30923624482175155, "grad_norm": 0.9491475623337832, "learning_rate": 8.091048123550855e-06, "loss": 0.136, "step": 10600 }, { "epoch": 0.3092654180523951, "grad_norm": 0.7995688228938687, "learning_rate": 8.090676770237374e-06, "loss": 0.1482, "step": 10601 }, { "epoch": 0.3092945912830387, "grad_norm": 0.9582166128890751, "learning_rate": 8.090305389331069e-06, "loss": 0.1453, "step": 10602 }, { "epoch": 0.30932376451368226, "grad_norm": 0.9879063390849203, "learning_rate": 8.089933980835254e-06, "loss": 0.1476, "step": 10603 }, { "epoch": 0.3093529377443258, "grad_norm": 0.9051906040954101, "learning_rate": 8.089562544753247e-06, "loss": 0.1451, "step": 10604 }, { "epoch": 0.30938211097496937, "grad_norm": 1.0179094349743891, "learning_rate": 8.089191081088364e-06, "loss": 0.1574, "step": 10605 }, { "epoch": 0.3094112842056129, "grad_norm": 0.8367263083938471, "learning_rate": 8.088819589843919e-06, "loss": 0.1905, "step": 10606 }, { "epoch": 0.3094404574362565, "grad_norm": 1.0329858362778113, "learning_rate": 8.08844807102323e-06, "loss": 0.1341, "step": 10607 }, { "epoch": 0.30946963066690003, "grad_norm": 1.1484767834595642, "learning_rate": 8.088076524629613e-06, "loss": 0.1603, "step": 10608 }, { "epoch": 0.30949880389754364, "grad_norm": 1.0068689908140545, "learning_rate": 8.087704950666388e-06, "loss": 0.1458, "step": 10609 }, { "epoch": 0.3095279771281872, "grad_norm": 1.0035871570590194, "learning_rate": 8.08733334913687e-06, "loss": 0.1787, "step": 10610 }, { "epoch": 0.30955715035883075, "grad_norm": 0.841565240388496, "learning_rate": 8.086961720044374e-06, "loss": 0.1485, "step": 10611 }, { "epoch": 0.3095863235894743, "grad_norm": 1.01173294811488, "learning_rate": 8.086590063392224e-06, "loss": 0.1374, "step": 10612 }, { "epoch": 0.30961549682011785, "grad_norm": 1.083260824888384, "learning_rate": 8.086218379183735e-06, "loss": 0.1599, "step": 10613 }, { "epoch": 0.3096446700507614, "grad_norm": 0.8371162877497406, "learning_rate": 8.085846667422224e-06, "loss": 0.1554, "step": 10614 }, { "epoch": 0.30967384328140496, "grad_norm": 0.9618314941000575, "learning_rate": 8.08547492811101e-06, "loss": 0.1601, "step": 10615 }, { "epoch": 0.30970301651204857, "grad_norm": 0.8562001405810296, "learning_rate": 8.085103161253413e-06, "loss": 0.1595, "step": 10616 }, { "epoch": 0.3097321897426921, "grad_norm": 0.7229896698756507, "learning_rate": 8.084731366852752e-06, "loss": 0.1459, "step": 10617 }, { "epoch": 0.3097613629733357, "grad_norm": 0.7145254810818518, "learning_rate": 8.084359544912344e-06, "loss": 0.1332, "step": 10618 }, { "epoch": 0.3097905362039792, "grad_norm": 0.7985107534143561, "learning_rate": 8.08398769543551e-06, "loss": 0.1589, "step": 10619 }, { "epoch": 0.3098197094346228, "grad_norm": 0.8866769673396308, "learning_rate": 8.083615818425573e-06, "loss": 0.1632, "step": 10620 }, { "epoch": 0.30984888266526633, "grad_norm": 0.9127343915015304, "learning_rate": 8.083243913885848e-06, "loss": 0.1417, "step": 10621 }, { "epoch": 0.3098780558959099, "grad_norm": 0.6578977562893924, "learning_rate": 8.082871981819658e-06, "loss": 0.1552, "step": 10622 }, { "epoch": 0.3099072291265535, "grad_norm": 1.1127677504484195, "learning_rate": 8.082500022230323e-06, "loss": 0.1483, "step": 10623 }, { "epoch": 0.30993640235719705, "grad_norm": 0.8847830017787616, "learning_rate": 8.082128035121162e-06, "loss": 0.186, "step": 10624 }, { "epoch": 0.3099655755878406, "grad_norm": 0.6800510056270985, "learning_rate": 8.081756020495501e-06, "loss": 0.1247, "step": 10625 }, { "epoch": 0.30999474881848416, "grad_norm": 0.7799657725364644, "learning_rate": 8.081383978356655e-06, "loss": 0.1543, "step": 10626 }, { "epoch": 0.3100239220491277, "grad_norm": 0.8811745190164654, "learning_rate": 8.08101190870795e-06, "loss": 0.153, "step": 10627 }, { "epoch": 0.31005309527977126, "grad_norm": 0.7482189825762555, "learning_rate": 8.080639811552704e-06, "loss": 0.1504, "step": 10628 }, { "epoch": 0.3100822685104148, "grad_norm": 0.7595443605436023, "learning_rate": 8.080267686894244e-06, "loss": 0.1514, "step": 10629 }, { "epoch": 0.3101114417410584, "grad_norm": 0.7780154206427352, "learning_rate": 8.079895534735887e-06, "loss": 0.1475, "step": 10630 }, { "epoch": 0.310140614971702, "grad_norm": 0.7048107343833214, "learning_rate": 8.07952335508096e-06, "loss": 0.1656, "step": 10631 }, { "epoch": 0.31016978820234553, "grad_norm": 0.7852511955892731, "learning_rate": 8.079151147932783e-06, "loss": 0.1604, "step": 10632 }, { "epoch": 0.3101989614329891, "grad_norm": 0.8862456259772208, "learning_rate": 8.078778913294677e-06, "loss": 0.1579, "step": 10633 }, { "epoch": 0.31022813466363264, "grad_norm": 0.859633153948346, "learning_rate": 8.078406651169972e-06, "loss": 0.1588, "step": 10634 }, { "epoch": 0.3102573078942762, "grad_norm": 0.7830934120308404, "learning_rate": 8.078034361561986e-06, "loss": 0.1706, "step": 10635 }, { "epoch": 0.3102864811249198, "grad_norm": 0.6885264756111578, "learning_rate": 8.077662044474043e-06, "loss": 0.1322, "step": 10636 }, { "epoch": 0.31031565435556335, "grad_norm": 0.9129535942341924, "learning_rate": 8.077289699909467e-06, "loss": 0.1569, "step": 10637 }, { "epoch": 0.3103448275862069, "grad_norm": 1.5824290449546556, "learning_rate": 8.076917327871585e-06, "loss": 0.1634, "step": 10638 }, { "epoch": 0.31037400081685046, "grad_norm": 0.6993724461831868, "learning_rate": 8.07654492836372e-06, "loss": 0.1311, "step": 10639 }, { "epoch": 0.310403174047494, "grad_norm": 0.7090772943109829, "learning_rate": 8.076172501389194e-06, "loss": 0.1416, "step": 10640 }, { "epoch": 0.31043234727813757, "grad_norm": 0.7689088325897413, "learning_rate": 8.075800046951336e-06, "loss": 0.1214, "step": 10641 }, { "epoch": 0.3104615205087811, "grad_norm": 0.9255989646444487, "learning_rate": 8.075427565053471e-06, "loss": 0.1551, "step": 10642 }, { "epoch": 0.31049069373942473, "grad_norm": 0.9407014546893899, "learning_rate": 8.07505505569892e-06, "loss": 0.1643, "step": 10643 }, { "epoch": 0.3105198669700683, "grad_norm": 0.6631045486438311, "learning_rate": 8.074682518891013e-06, "loss": 0.1692, "step": 10644 }, { "epoch": 0.31054904020071183, "grad_norm": 0.7748464554000842, "learning_rate": 8.074309954633074e-06, "loss": 0.1507, "step": 10645 }, { "epoch": 0.3105782134313554, "grad_norm": 0.7765330736455465, "learning_rate": 8.07393736292843e-06, "loss": 0.1413, "step": 10646 }, { "epoch": 0.31060738666199894, "grad_norm": 0.8579947591245294, "learning_rate": 8.073564743780407e-06, "loss": 0.1758, "step": 10647 }, { "epoch": 0.3106365598926425, "grad_norm": 0.7410292246732326, "learning_rate": 8.07319209719233e-06, "loss": 0.137, "step": 10648 }, { "epoch": 0.31066573312328605, "grad_norm": 0.7330946753608799, "learning_rate": 8.072819423167529e-06, "loss": 0.1572, "step": 10649 }, { "epoch": 0.31069490635392966, "grad_norm": 0.9172111121028844, "learning_rate": 8.07244672170933e-06, "loss": 0.144, "step": 10650 }, { "epoch": 0.3107240795845732, "grad_norm": 0.8347018719207745, "learning_rate": 8.07207399282106e-06, "loss": 0.165, "step": 10651 }, { "epoch": 0.31075325281521676, "grad_norm": 0.9026895241321442, "learning_rate": 8.071701236506046e-06, "loss": 0.1516, "step": 10652 }, { "epoch": 0.3107824260458603, "grad_norm": 0.7653364453481548, "learning_rate": 8.071328452767616e-06, "loss": 0.1541, "step": 10653 }, { "epoch": 0.31081159927650387, "grad_norm": 0.842937097174414, "learning_rate": 8.0709556416091e-06, "loss": 0.1536, "step": 10654 }, { "epoch": 0.3108407725071474, "grad_norm": 0.7441741293056079, "learning_rate": 8.070582803033827e-06, "loss": 0.1593, "step": 10655 }, { "epoch": 0.310869945737791, "grad_norm": 0.8379730241885758, "learning_rate": 8.07020993704512e-06, "loss": 0.1361, "step": 10656 }, { "epoch": 0.3108991189684346, "grad_norm": 0.8639332739638844, "learning_rate": 8.069837043646313e-06, "loss": 0.1453, "step": 10657 }, { "epoch": 0.31092829219907814, "grad_norm": 0.8095227989886883, "learning_rate": 8.069464122840736e-06, "loss": 0.1624, "step": 10658 }, { "epoch": 0.3109574654297217, "grad_norm": 0.8009859399076495, "learning_rate": 8.069091174631713e-06, "loss": 0.1314, "step": 10659 }, { "epoch": 0.31098663866036524, "grad_norm": 0.7809940192959718, "learning_rate": 8.068718199022578e-06, "loss": 0.1241, "step": 10660 }, { "epoch": 0.3110158118910088, "grad_norm": 0.8787555741360006, "learning_rate": 8.06834519601666e-06, "loss": 0.1445, "step": 10661 }, { "epoch": 0.31104498512165235, "grad_norm": 0.8156662472298689, "learning_rate": 8.067972165617287e-06, "loss": 0.1576, "step": 10662 }, { "epoch": 0.31107415835229596, "grad_norm": 0.8065762398079646, "learning_rate": 8.067599107827793e-06, "loss": 0.1546, "step": 10663 }, { "epoch": 0.3111033315829395, "grad_norm": 0.7447673881203032, "learning_rate": 8.067226022651505e-06, "loss": 0.1406, "step": 10664 }, { "epoch": 0.31113250481358307, "grad_norm": 0.8428734686193561, "learning_rate": 8.066852910091754e-06, "loss": 0.1521, "step": 10665 }, { "epoch": 0.3111616780442266, "grad_norm": 0.7604425710000424, "learning_rate": 8.066479770151875e-06, "loss": 0.1539, "step": 10666 }, { "epoch": 0.3111908512748702, "grad_norm": 0.8009054114318241, "learning_rate": 8.066106602835195e-06, "loss": 0.1547, "step": 10667 }, { "epoch": 0.3112200245055137, "grad_norm": 1.0157049129411353, "learning_rate": 8.065733408145047e-06, "loss": 0.1528, "step": 10668 }, { "epoch": 0.3112491977361573, "grad_norm": 0.970607898681344, "learning_rate": 8.065360186084764e-06, "loss": 0.1535, "step": 10669 }, { "epoch": 0.3112783709668009, "grad_norm": 0.692521856646399, "learning_rate": 8.064986936657678e-06, "loss": 0.1637, "step": 10670 }, { "epoch": 0.31130754419744444, "grad_norm": 0.9277228764146348, "learning_rate": 8.064613659867117e-06, "loss": 0.18, "step": 10671 }, { "epoch": 0.311336717428088, "grad_norm": 1.0267082516333221, "learning_rate": 8.06424035571642e-06, "loss": 0.1336, "step": 10672 }, { "epoch": 0.31136589065873155, "grad_norm": 0.7567316283677745, "learning_rate": 8.063867024208915e-06, "loss": 0.1424, "step": 10673 }, { "epoch": 0.3113950638893751, "grad_norm": 0.8372580592986668, "learning_rate": 8.063493665347937e-06, "loss": 0.1521, "step": 10674 }, { "epoch": 0.31142423712001865, "grad_norm": 0.8257915263991669, "learning_rate": 8.063120279136818e-06, "loss": 0.1568, "step": 10675 }, { "epoch": 0.3114534103506622, "grad_norm": 0.6840641979015937, "learning_rate": 8.062746865578894e-06, "loss": 0.1512, "step": 10676 }, { "epoch": 0.3114825835813058, "grad_norm": 0.956027467205039, "learning_rate": 8.062373424677497e-06, "loss": 0.1588, "step": 10677 }, { "epoch": 0.31151175681194937, "grad_norm": 0.74587657090834, "learning_rate": 8.061999956435959e-06, "loss": 0.132, "step": 10678 }, { "epoch": 0.3115409300425929, "grad_norm": 0.7686836235919776, "learning_rate": 8.061626460857618e-06, "loss": 0.1306, "step": 10679 }, { "epoch": 0.3115701032732365, "grad_norm": 0.6952941128726813, "learning_rate": 8.061252937945807e-06, "loss": 0.1406, "step": 10680 }, { "epoch": 0.31159927650388003, "grad_norm": 0.7551025388799266, "learning_rate": 8.06087938770386e-06, "loss": 0.1355, "step": 10681 }, { "epoch": 0.3116284497345236, "grad_norm": 0.7968590949508415, "learning_rate": 8.060505810135113e-06, "loss": 0.123, "step": 10682 }, { "epoch": 0.31165762296516714, "grad_norm": 0.7104227466698682, "learning_rate": 8.0601322052429e-06, "loss": 0.1587, "step": 10683 }, { "epoch": 0.31168679619581074, "grad_norm": 0.770712191753551, "learning_rate": 8.059758573030559e-06, "loss": 0.137, "step": 10684 }, { "epoch": 0.3117159694264543, "grad_norm": 1.0207202104356439, "learning_rate": 8.059384913501422e-06, "loss": 0.1379, "step": 10685 }, { "epoch": 0.31174514265709785, "grad_norm": 0.9498034214744107, "learning_rate": 8.059011226658826e-06, "loss": 0.1337, "step": 10686 }, { "epoch": 0.3117743158877414, "grad_norm": 0.7442823311245105, "learning_rate": 8.05863751250611e-06, "loss": 0.1565, "step": 10687 }, { "epoch": 0.31180348911838496, "grad_norm": 0.8086803621633196, "learning_rate": 8.058263771046608e-06, "loss": 0.1645, "step": 10688 }, { "epoch": 0.3118326623490285, "grad_norm": 0.7835500579927944, "learning_rate": 8.057890002283657e-06, "loss": 0.13, "step": 10689 }, { "epoch": 0.3118618355796721, "grad_norm": 0.8949264288312289, "learning_rate": 8.057516206220594e-06, "loss": 0.1867, "step": 10690 }, { "epoch": 0.3118910088103157, "grad_norm": 0.763094435902557, "learning_rate": 8.057142382860757e-06, "loss": 0.1439, "step": 10691 }, { "epoch": 0.3119201820409592, "grad_norm": 1.1047382814335238, "learning_rate": 8.05676853220748e-06, "loss": 0.1707, "step": 10692 }, { "epoch": 0.3119493552716028, "grad_norm": 1.0420450594740067, "learning_rate": 8.056394654264107e-06, "loss": 0.1338, "step": 10693 }, { "epoch": 0.31197852850224633, "grad_norm": 1.1649076445090332, "learning_rate": 8.056020749033968e-06, "loss": 0.1528, "step": 10694 }, { "epoch": 0.3120077017328899, "grad_norm": 0.9895356237269789, "learning_rate": 8.055646816520409e-06, "loss": 0.1424, "step": 10695 }, { "epoch": 0.31203687496353344, "grad_norm": 1.155915915417647, "learning_rate": 8.05527285672676e-06, "loss": 0.1454, "step": 10696 }, { "epoch": 0.31206604819417705, "grad_norm": 0.8550922229798301, "learning_rate": 8.05489886965637e-06, "loss": 0.1343, "step": 10697 }, { "epoch": 0.3120952214248206, "grad_norm": 1.0372996907065362, "learning_rate": 8.054524855312568e-06, "loss": 0.1417, "step": 10698 }, { "epoch": 0.31212439465546415, "grad_norm": 0.8225030740209992, "learning_rate": 8.0541508136987e-06, "loss": 0.149, "step": 10699 }, { "epoch": 0.3121535678861077, "grad_norm": 1.0389846090648762, "learning_rate": 8.053776744818102e-06, "loss": 0.1408, "step": 10700 }, { "epoch": 0.31218274111675126, "grad_norm": 0.8972509564498101, "learning_rate": 8.053402648674113e-06, "loss": 0.1606, "step": 10701 }, { "epoch": 0.3122119143473948, "grad_norm": 0.7994333451254089, "learning_rate": 8.053028525270075e-06, "loss": 0.1475, "step": 10702 }, { "epoch": 0.31224108757803837, "grad_norm": 1.2633577308984363, "learning_rate": 8.052654374609326e-06, "loss": 0.1556, "step": 10703 }, { "epoch": 0.312270260808682, "grad_norm": 1.0440090002034648, "learning_rate": 8.052280196695209e-06, "loss": 0.1432, "step": 10704 }, { "epoch": 0.31229943403932553, "grad_norm": 0.7322160840033916, "learning_rate": 8.051905991531061e-06, "loss": 0.1416, "step": 10705 }, { "epoch": 0.3123286072699691, "grad_norm": 0.9900805434997059, "learning_rate": 8.051531759120228e-06, "loss": 0.1613, "step": 10706 }, { "epoch": 0.31235778050061264, "grad_norm": 0.9174698172214244, "learning_rate": 8.051157499466044e-06, "loss": 0.131, "step": 10707 }, { "epoch": 0.3123869537312562, "grad_norm": 0.7010178794851797, "learning_rate": 8.050783212571857e-06, "loss": 0.1499, "step": 10708 }, { "epoch": 0.31241612696189974, "grad_norm": 0.9298159267560002, "learning_rate": 8.050408898441005e-06, "loss": 0.1613, "step": 10709 }, { "epoch": 0.3124453001925433, "grad_norm": 0.8912738723340679, "learning_rate": 8.050034557076831e-06, "loss": 0.1572, "step": 10710 }, { "epoch": 0.3124744734231869, "grad_norm": 0.7617971180421135, "learning_rate": 8.049660188482677e-06, "loss": 0.1328, "step": 10711 }, { "epoch": 0.31250364665383046, "grad_norm": 0.6849817819796101, "learning_rate": 8.049285792661882e-06, "loss": 0.1429, "step": 10712 }, { "epoch": 0.312532819884474, "grad_norm": 0.912788931517718, "learning_rate": 8.048911369617794e-06, "loss": 0.1484, "step": 10713 }, { "epoch": 0.31256199311511756, "grad_norm": 0.8351000231911664, "learning_rate": 8.048536919353753e-06, "loss": 0.1559, "step": 10714 }, { "epoch": 0.3125911663457611, "grad_norm": 0.7026278300710169, "learning_rate": 8.048162441873102e-06, "loss": 0.1551, "step": 10715 }, { "epoch": 0.31262033957640467, "grad_norm": 0.6711410878079361, "learning_rate": 8.047787937179183e-06, "loss": 0.1316, "step": 10716 }, { "epoch": 0.3126495128070483, "grad_norm": 0.7666660355141117, "learning_rate": 8.047413405275344e-06, "loss": 0.1463, "step": 10717 }, { "epoch": 0.31267868603769183, "grad_norm": 0.8413731982089626, "learning_rate": 8.047038846164923e-06, "loss": 0.1822, "step": 10718 }, { "epoch": 0.3127078592683354, "grad_norm": 0.8177612017088078, "learning_rate": 8.046664259851267e-06, "loss": 0.1576, "step": 10719 }, { "epoch": 0.31273703249897894, "grad_norm": 0.9037697769111159, "learning_rate": 8.046289646337719e-06, "loss": 0.1275, "step": 10720 }, { "epoch": 0.3127662057296225, "grad_norm": 0.8146306173853757, "learning_rate": 8.045915005627626e-06, "loss": 0.1588, "step": 10721 }, { "epoch": 0.31279537896026605, "grad_norm": 0.8398277328802198, "learning_rate": 8.045540337724329e-06, "loss": 0.1623, "step": 10722 }, { "epoch": 0.3128245521909096, "grad_norm": 0.6977706410600809, "learning_rate": 8.045165642631176e-06, "loss": 0.1473, "step": 10723 }, { "epoch": 0.3128537254215532, "grad_norm": 0.8347512600324639, "learning_rate": 8.044790920351512e-06, "loss": 0.1282, "step": 10724 }, { "epoch": 0.31288289865219676, "grad_norm": 1.0121807184248806, "learning_rate": 8.044416170888681e-06, "loss": 0.1446, "step": 10725 }, { "epoch": 0.3129120718828403, "grad_norm": 0.6374140196195595, "learning_rate": 8.044041394246027e-06, "loss": 0.1518, "step": 10726 }, { "epoch": 0.31294124511348387, "grad_norm": 1.0939177543901855, "learning_rate": 8.0436665904269e-06, "loss": 0.1559, "step": 10727 }, { "epoch": 0.3129704183441274, "grad_norm": 1.0342650290269129, "learning_rate": 8.043291759434643e-06, "loss": 0.1361, "step": 10728 }, { "epoch": 0.312999591574771, "grad_norm": 0.906841026379668, "learning_rate": 8.042916901272606e-06, "loss": 0.16, "step": 10729 }, { "epoch": 0.3130287648054145, "grad_norm": 0.7827500777722924, "learning_rate": 8.042542015944133e-06, "loss": 0.1565, "step": 10730 }, { "epoch": 0.31305793803605814, "grad_norm": 0.9417677569325281, "learning_rate": 8.04216710345257e-06, "loss": 0.1701, "step": 10731 }, { "epoch": 0.3130871112667017, "grad_norm": 0.6544675059020303, "learning_rate": 8.041792163801266e-06, "loss": 0.1428, "step": 10732 }, { "epoch": 0.31311628449734524, "grad_norm": 0.7878177342915902, "learning_rate": 8.041417196993565e-06, "loss": 0.1716, "step": 10733 }, { "epoch": 0.3131454577279888, "grad_norm": 0.8376969242944549, "learning_rate": 8.041042203032821e-06, "loss": 0.1472, "step": 10734 }, { "epoch": 0.31317463095863235, "grad_norm": 0.9388497991176934, "learning_rate": 8.040667181922378e-06, "loss": 0.1382, "step": 10735 }, { "epoch": 0.3132038041892759, "grad_norm": 0.7996431337234259, "learning_rate": 8.040292133665582e-06, "loss": 0.1428, "step": 10736 }, { "epoch": 0.31323297741991946, "grad_norm": 0.9414314353418634, "learning_rate": 8.039917058265784e-06, "loss": 0.1388, "step": 10737 }, { "epoch": 0.31326215065056306, "grad_norm": 1.0338513178501352, "learning_rate": 8.039541955726333e-06, "loss": 0.1438, "step": 10738 }, { "epoch": 0.3132913238812066, "grad_norm": 0.9033452820449029, "learning_rate": 8.039166826050577e-06, "loss": 0.146, "step": 10739 }, { "epoch": 0.31332049711185017, "grad_norm": 0.9429621006855641, "learning_rate": 8.038791669241865e-06, "loss": 0.1303, "step": 10740 }, { "epoch": 0.3133496703424937, "grad_norm": 0.7545239946383687, "learning_rate": 8.038416485303546e-06, "loss": 0.1469, "step": 10741 }, { "epoch": 0.3133788435731373, "grad_norm": 0.8022933400590296, "learning_rate": 8.03804127423897e-06, "loss": 0.1571, "step": 10742 }, { "epoch": 0.31340801680378083, "grad_norm": 0.9467000448849283, "learning_rate": 8.037666036051489e-06, "loss": 0.19, "step": 10743 }, { "epoch": 0.3134371900344244, "grad_norm": 0.7750292485174162, "learning_rate": 8.037290770744448e-06, "loss": 0.1495, "step": 10744 }, { "epoch": 0.313466363265068, "grad_norm": 1.0258822155581384, "learning_rate": 8.036915478321201e-06, "loss": 0.1524, "step": 10745 }, { "epoch": 0.31349553649571155, "grad_norm": 0.8124866017833455, "learning_rate": 8.036540158785097e-06, "loss": 0.1536, "step": 10746 }, { "epoch": 0.3135247097263551, "grad_norm": 1.0427948302415047, "learning_rate": 8.036164812139487e-06, "loss": 0.132, "step": 10747 }, { "epoch": 0.31355388295699865, "grad_norm": 0.8445958957902322, "learning_rate": 8.035789438387724e-06, "loss": 0.1376, "step": 10748 }, { "epoch": 0.3135830561876422, "grad_norm": 1.2067884629560839, "learning_rate": 8.035414037533156e-06, "loss": 0.1911, "step": 10749 }, { "epoch": 0.31361222941828576, "grad_norm": 1.1192234183737655, "learning_rate": 8.035038609579138e-06, "loss": 0.1509, "step": 10750 }, { "epoch": 0.31364140264892937, "grad_norm": 0.8019585745421046, "learning_rate": 8.034663154529018e-06, "loss": 0.1631, "step": 10751 }, { "epoch": 0.3136705758795729, "grad_norm": 1.1192644625980521, "learning_rate": 8.03428767238615e-06, "loss": 0.1314, "step": 10752 }, { "epoch": 0.3136997491102165, "grad_norm": 0.8762285213604124, "learning_rate": 8.033912163153886e-06, "loss": 0.1622, "step": 10753 }, { "epoch": 0.31372892234086003, "grad_norm": 0.8767256040478457, "learning_rate": 8.03353662683558e-06, "loss": 0.1578, "step": 10754 }, { "epoch": 0.3137580955715036, "grad_norm": 0.999344346633624, "learning_rate": 8.033161063434582e-06, "loss": 0.1516, "step": 10755 }, { "epoch": 0.31378726880214713, "grad_norm": 0.7808600078335992, "learning_rate": 8.032785472954246e-06, "loss": 0.143, "step": 10756 }, { "epoch": 0.3138164420327907, "grad_norm": 0.8899712922793491, "learning_rate": 8.032409855397925e-06, "loss": 0.1265, "step": 10757 }, { "epoch": 0.3138456152634343, "grad_norm": 0.8781754079412691, "learning_rate": 8.032034210768973e-06, "loss": 0.1348, "step": 10758 }, { "epoch": 0.31387478849407785, "grad_norm": 0.9344278429889267, "learning_rate": 8.031658539070744e-06, "loss": 0.1515, "step": 10759 }, { "epoch": 0.3139039617247214, "grad_norm": 0.7558407745735243, "learning_rate": 8.03128284030659e-06, "loss": 0.1305, "step": 10760 }, { "epoch": 0.31393313495536496, "grad_norm": 0.9094098942735137, "learning_rate": 8.030907114479866e-06, "loss": 0.1572, "step": 10761 }, { "epoch": 0.3139623081860085, "grad_norm": 0.8515731402323798, "learning_rate": 8.03053136159393e-06, "loss": 0.1671, "step": 10762 }, { "epoch": 0.31399148141665206, "grad_norm": 0.8192516539648291, "learning_rate": 8.030155581652131e-06, "loss": 0.1429, "step": 10763 }, { "epoch": 0.3140206546472956, "grad_norm": 0.7003987349239982, "learning_rate": 8.029779774657827e-06, "loss": 0.1488, "step": 10764 }, { "epoch": 0.3140498278779392, "grad_norm": 0.6648401101509428, "learning_rate": 8.029403940614372e-06, "loss": 0.1438, "step": 10765 }, { "epoch": 0.3140790011085828, "grad_norm": 1.1170959064644317, "learning_rate": 8.029028079525124e-06, "loss": 0.1357, "step": 10766 }, { "epoch": 0.31410817433922633, "grad_norm": 0.8448134227537687, "learning_rate": 8.028652191393432e-06, "loss": 0.1645, "step": 10767 }, { "epoch": 0.3141373475698699, "grad_norm": 0.7307092293862033, "learning_rate": 8.028276276222658e-06, "loss": 0.1488, "step": 10768 }, { "epoch": 0.31416652080051344, "grad_norm": 0.9418361270201677, "learning_rate": 8.027900334016158e-06, "loss": 0.1589, "step": 10769 }, { "epoch": 0.314195694031157, "grad_norm": 0.9507383502112244, "learning_rate": 8.027524364777285e-06, "loss": 0.1523, "step": 10770 }, { "epoch": 0.31422486726180054, "grad_norm": 0.8114521425027372, "learning_rate": 8.027148368509398e-06, "loss": 0.1177, "step": 10771 }, { "epoch": 0.31425404049244415, "grad_norm": 0.9654237897383231, "learning_rate": 8.026772345215853e-06, "loss": 0.1436, "step": 10772 }, { "epoch": 0.3142832137230877, "grad_norm": 0.7326560639180407, "learning_rate": 8.026396294900007e-06, "loss": 0.1462, "step": 10773 }, { "epoch": 0.31431238695373126, "grad_norm": 0.9610973891969635, "learning_rate": 8.026020217565217e-06, "loss": 0.1469, "step": 10774 }, { "epoch": 0.3143415601843748, "grad_norm": 0.7081117753533893, "learning_rate": 8.02564411321484e-06, "loss": 0.1193, "step": 10775 }, { "epoch": 0.31437073341501837, "grad_norm": 0.8779128326353343, "learning_rate": 8.025267981852236e-06, "loss": 0.1724, "step": 10776 }, { "epoch": 0.3143999066456619, "grad_norm": 0.7002203640783967, "learning_rate": 8.024891823480763e-06, "loss": 0.139, "step": 10777 }, { "epoch": 0.31442907987630553, "grad_norm": 0.8890647628148157, "learning_rate": 8.024515638103775e-06, "loss": 0.1699, "step": 10778 }, { "epoch": 0.3144582531069491, "grad_norm": 0.7395894130367539, "learning_rate": 8.024139425724636e-06, "loss": 0.1491, "step": 10779 }, { "epoch": 0.31448742633759263, "grad_norm": 0.7167755306461832, "learning_rate": 8.023763186346701e-06, "loss": 0.1569, "step": 10780 }, { "epoch": 0.3145165995682362, "grad_norm": 0.8959477083050045, "learning_rate": 8.023386919973328e-06, "loss": 0.1428, "step": 10781 }, { "epoch": 0.31454577279887974, "grad_norm": 0.7926966857231422, "learning_rate": 8.023010626607881e-06, "loss": 0.1534, "step": 10782 }, { "epoch": 0.3145749460295233, "grad_norm": 0.7519276625120004, "learning_rate": 8.022634306253717e-06, "loss": 0.1517, "step": 10783 }, { "epoch": 0.31460411926016685, "grad_norm": 0.7797314713469649, "learning_rate": 8.022257958914194e-06, "loss": 0.1564, "step": 10784 }, { "epoch": 0.31463329249081046, "grad_norm": 1.3093424623422203, "learning_rate": 8.021881584592672e-06, "loss": 0.1461, "step": 10785 }, { "epoch": 0.314662465721454, "grad_norm": 1.0210827254759813, "learning_rate": 8.021505183292515e-06, "loss": 0.1674, "step": 10786 }, { "epoch": 0.31469163895209756, "grad_norm": 0.8662462238752835, "learning_rate": 8.02112875501708e-06, "loss": 0.1486, "step": 10787 }, { "epoch": 0.3147208121827411, "grad_norm": 0.8316937972387971, "learning_rate": 8.02075229976973e-06, "loss": 0.1325, "step": 10788 }, { "epoch": 0.31474998541338467, "grad_norm": 1.1354955370376583, "learning_rate": 8.020375817553824e-06, "loss": 0.1685, "step": 10789 }, { "epoch": 0.3147791586440282, "grad_norm": 0.9533332775851375, "learning_rate": 8.019999308372724e-06, "loss": 0.1638, "step": 10790 }, { "epoch": 0.3148083318746718, "grad_norm": 0.9078033304961471, "learning_rate": 8.01962277222979e-06, "loss": 0.1686, "step": 10791 }, { "epoch": 0.3148375051053154, "grad_norm": 0.9491410550554914, "learning_rate": 8.019246209128384e-06, "loss": 0.1622, "step": 10792 }, { "epoch": 0.31486667833595894, "grad_norm": 0.8494628393581969, "learning_rate": 8.01886961907187e-06, "loss": 0.1465, "step": 10793 }, { "epoch": 0.3148958515666025, "grad_norm": 1.2820199107453916, "learning_rate": 8.018493002063608e-06, "loss": 0.145, "step": 10794 }, { "epoch": 0.31492502479724604, "grad_norm": 1.037733081029843, "learning_rate": 8.018116358106962e-06, "loss": 0.1601, "step": 10795 }, { "epoch": 0.3149541980278896, "grad_norm": 0.9043489129817143, "learning_rate": 8.017739687205295e-06, "loss": 0.1668, "step": 10796 }, { "epoch": 0.31498337125853315, "grad_norm": 1.2169740694511075, "learning_rate": 8.017362989361965e-06, "loss": 0.1394, "step": 10797 }, { "epoch": 0.3150125444891767, "grad_norm": 0.875495406598159, "learning_rate": 8.016986264580341e-06, "loss": 0.141, "step": 10798 }, { "epoch": 0.3150417177198203, "grad_norm": 0.9965183835394941, "learning_rate": 8.016609512863784e-06, "loss": 0.1465, "step": 10799 }, { "epoch": 0.31507089095046387, "grad_norm": 0.7638952406978357, "learning_rate": 8.016232734215655e-06, "loss": 0.1268, "step": 10800 }, { "epoch": 0.3151000641811074, "grad_norm": 0.8975328949139931, "learning_rate": 8.015855928639323e-06, "loss": 0.1742, "step": 10801 }, { "epoch": 0.315129237411751, "grad_norm": 0.6719977806764448, "learning_rate": 8.015479096138149e-06, "loss": 0.1366, "step": 10802 }, { "epoch": 0.3151584106423945, "grad_norm": 0.9731258658017219, "learning_rate": 8.015102236715494e-06, "loss": 0.1734, "step": 10803 }, { "epoch": 0.3151875838730381, "grad_norm": 0.8365768415108314, "learning_rate": 8.01472535037473e-06, "loss": 0.1477, "step": 10804 }, { "epoch": 0.3152167571036817, "grad_norm": 0.8668622645569349, "learning_rate": 8.014348437119215e-06, "loss": 0.1504, "step": 10805 }, { "epoch": 0.31524593033432524, "grad_norm": 0.8114028199900455, "learning_rate": 8.013971496952318e-06, "loss": 0.1433, "step": 10806 }, { "epoch": 0.3152751035649688, "grad_norm": 0.7153840424955196, "learning_rate": 8.013594529877402e-06, "loss": 0.1324, "step": 10807 }, { "epoch": 0.31530427679561235, "grad_norm": 0.7751645304753552, "learning_rate": 8.013217535897835e-06, "loss": 0.1318, "step": 10808 }, { "epoch": 0.3153334500262559, "grad_norm": 1.0414413983644775, "learning_rate": 8.012840515016979e-06, "loss": 0.1358, "step": 10809 }, { "epoch": 0.31536262325689945, "grad_norm": 1.9637889379868687, "learning_rate": 8.012463467238206e-06, "loss": 0.1597, "step": 10810 }, { "epoch": 0.315391796487543, "grad_norm": 0.7796162263142161, "learning_rate": 8.012086392564876e-06, "loss": 0.1422, "step": 10811 }, { "epoch": 0.3154209697181866, "grad_norm": 0.8997929953235346, "learning_rate": 8.011709291000356e-06, "loss": 0.1832, "step": 10812 }, { "epoch": 0.31545014294883017, "grad_norm": 0.874184850708513, "learning_rate": 8.011332162548016e-06, "loss": 0.1351, "step": 10813 }, { "epoch": 0.3154793161794737, "grad_norm": 0.8213953251088818, "learning_rate": 8.01095500721122e-06, "loss": 0.1437, "step": 10814 }, { "epoch": 0.3155084894101173, "grad_norm": 0.7463334749308627, "learning_rate": 8.01057782499334e-06, "loss": 0.1755, "step": 10815 }, { "epoch": 0.31553766264076083, "grad_norm": 0.8353718007235376, "learning_rate": 8.010200615897736e-06, "loss": 0.1573, "step": 10816 }, { "epoch": 0.3155668358714044, "grad_norm": 0.7838193827414881, "learning_rate": 8.00982337992778e-06, "loss": 0.1535, "step": 10817 }, { "epoch": 0.31559600910204794, "grad_norm": 0.9705819795490795, "learning_rate": 8.009446117086842e-06, "loss": 0.1528, "step": 10818 }, { "epoch": 0.31562518233269155, "grad_norm": 0.6172604382681017, "learning_rate": 8.009068827378286e-06, "loss": 0.143, "step": 10819 }, { "epoch": 0.3156543555633351, "grad_norm": 0.9071173585735542, "learning_rate": 8.008691510805483e-06, "loss": 0.1779, "step": 10820 }, { "epoch": 0.31568352879397865, "grad_norm": 0.8501037531554967, "learning_rate": 8.008314167371799e-06, "loss": 0.1423, "step": 10821 }, { "epoch": 0.3157127020246222, "grad_norm": 0.783255751313327, "learning_rate": 8.007936797080604e-06, "loss": 0.1383, "step": 10822 }, { "epoch": 0.31574187525526576, "grad_norm": 0.7944849478688778, "learning_rate": 8.007559399935267e-06, "loss": 0.1503, "step": 10823 }, { "epoch": 0.3157710484859093, "grad_norm": 3.6894205757231644, "learning_rate": 8.007181975939158e-06, "loss": 0.2021, "step": 10824 }, { "epoch": 0.31580022171655286, "grad_norm": 0.8167854693751999, "learning_rate": 8.006804525095646e-06, "loss": 0.146, "step": 10825 }, { "epoch": 0.3158293949471965, "grad_norm": 1.3318246565123562, "learning_rate": 8.006427047408103e-06, "loss": 0.1619, "step": 10826 }, { "epoch": 0.31585856817784, "grad_norm": 0.8930266059831007, "learning_rate": 8.006049542879894e-06, "loss": 0.1658, "step": 10827 }, { "epoch": 0.3158877414084836, "grad_norm": 0.8459288008179908, "learning_rate": 8.005672011514395e-06, "loss": 0.151, "step": 10828 }, { "epoch": 0.31591691463912713, "grad_norm": 1.0865249739569145, "learning_rate": 8.005294453314974e-06, "loss": 0.1411, "step": 10829 }, { "epoch": 0.3159460878697707, "grad_norm": 1.2482569102171208, "learning_rate": 8.004916868285e-06, "loss": 0.1595, "step": 10830 }, { "epoch": 0.31597526110041424, "grad_norm": 0.7735840041017777, "learning_rate": 8.004539256427845e-06, "loss": 0.1571, "step": 10831 }, { "epoch": 0.31600443433105785, "grad_norm": 0.8111414793263528, "learning_rate": 8.004161617746882e-06, "loss": 0.1326, "step": 10832 }, { "epoch": 0.3160336075617014, "grad_norm": 0.6201908759795967, "learning_rate": 8.003783952245481e-06, "loss": 0.1562, "step": 10833 }, { "epoch": 0.31606278079234496, "grad_norm": 0.7690556915432534, "learning_rate": 8.003406259927012e-06, "loss": 0.1743, "step": 10834 }, { "epoch": 0.3160919540229885, "grad_norm": 0.8998700133027496, "learning_rate": 8.003028540794852e-06, "loss": 0.1518, "step": 10835 }, { "epoch": 0.31612112725363206, "grad_norm": 0.7703325383165246, "learning_rate": 8.002650794852367e-06, "loss": 0.1296, "step": 10836 }, { "epoch": 0.3161503004842756, "grad_norm": 0.8496010461891292, "learning_rate": 8.002273022102936e-06, "loss": 0.1336, "step": 10837 }, { "epoch": 0.31617947371491917, "grad_norm": 0.9303915219740783, "learning_rate": 8.001895222549925e-06, "loss": 0.124, "step": 10838 }, { "epoch": 0.3162086469455628, "grad_norm": 0.8372252034605704, "learning_rate": 8.001517396196711e-06, "loss": 0.1697, "step": 10839 }, { "epoch": 0.31623782017620633, "grad_norm": 0.7688007130811148, "learning_rate": 8.001139543046668e-06, "loss": 0.1374, "step": 10840 }, { "epoch": 0.3162669934068499, "grad_norm": 0.9295937439114904, "learning_rate": 8.000761663103164e-06, "loss": 0.1521, "step": 10841 }, { "epoch": 0.31629616663749344, "grad_norm": 0.9722704463867623, "learning_rate": 8.00038375636958e-06, "loss": 0.1632, "step": 10842 }, { "epoch": 0.316325339868137, "grad_norm": 0.9706299166019182, "learning_rate": 8.000005822849284e-06, "loss": 0.1528, "step": 10843 }, { "epoch": 0.31635451309878054, "grad_norm": 0.7881230146869718, "learning_rate": 7.999627862545652e-06, "loss": 0.153, "step": 10844 }, { "epoch": 0.3163836863294241, "grad_norm": 1.3278552700245558, "learning_rate": 7.999249875462058e-06, "loss": 0.1779, "step": 10845 }, { "epoch": 0.3164128595600677, "grad_norm": 0.7948740939947869, "learning_rate": 7.99887186160188e-06, "loss": 0.1446, "step": 10846 }, { "epoch": 0.31644203279071126, "grad_norm": 1.0025482089247173, "learning_rate": 7.998493820968487e-06, "loss": 0.1516, "step": 10847 }, { "epoch": 0.3164712060213548, "grad_norm": 0.8067955914748581, "learning_rate": 7.998115753565259e-06, "loss": 0.1374, "step": 10848 }, { "epoch": 0.31650037925199836, "grad_norm": 1.141590069966231, "learning_rate": 7.997737659395569e-06, "loss": 0.1324, "step": 10849 }, { "epoch": 0.3165295524826419, "grad_norm": 0.6753486870475083, "learning_rate": 7.99735953846279e-06, "loss": 0.1698, "step": 10850 }, { "epoch": 0.31655872571328547, "grad_norm": 0.6681018223927342, "learning_rate": 7.996981390770305e-06, "loss": 0.1406, "step": 10851 }, { "epoch": 0.316587898943929, "grad_norm": 0.9748489643663281, "learning_rate": 7.996603216321482e-06, "loss": 0.1493, "step": 10852 }, { "epoch": 0.31661707217457263, "grad_norm": 0.8698199547853437, "learning_rate": 7.996225015119702e-06, "loss": 0.1403, "step": 10853 }, { "epoch": 0.3166462454052162, "grad_norm": 0.810940875301132, "learning_rate": 7.99584678716834e-06, "loss": 0.1348, "step": 10854 }, { "epoch": 0.31667541863585974, "grad_norm": 0.9992900550347783, "learning_rate": 7.995468532470773e-06, "loss": 0.1532, "step": 10855 }, { "epoch": 0.3167045918665033, "grad_norm": 0.8919525298022424, "learning_rate": 7.995090251030379e-06, "loss": 0.1314, "step": 10856 }, { "epoch": 0.31673376509714685, "grad_norm": 1.0945965505220643, "learning_rate": 7.994711942850536e-06, "loss": 0.1277, "step": 10857 }, { "epoch": 0.3167629383277904, "grad_norm": 1.0945924397721185, "learning_rate": 7.994333607934616e-06, "loss": 0.1483, "step": 10858 }, { "epoch": 0.316792111558434, "grad_norm": 0.8033755381283553, "learning_rate": 7.993955246286e-06, "loss": 0.1609, "step": 10859 }, { "epoch": 0.31682128478907756, "grad_norm": 0.8190977917699773, "learning_rate": 7.99357685790807e-06, "loss": 0.1388, "step": 10860 }, { "epoch": 0.3168504580197211, "grad_norm": 0.8652544400655409, "learning_rate": 7.993198442804198e-06, "loss": 0.1426, "step": 10861 }, { "epoch": 0.31687963125036467, "grad_norm": 1.0621468836251697, "learning_rate": 7.992820000977765e-06, "loss": 0.1746, "step": 10862 }, { "epoch": 0.3169088044810082, "grad_norm": 0.7581573525477618, "learning_rate": 7.99244153243215e-06, "loss": 0.1535, "step": 10863 }, { "epoch": 0.3169379777116518, "grad_norm": 1.0495793236692208, "learning_rate": 7.992063037170731e-06, "loss": 0.143, "step": 10864 }, { "epoch": 0.31696715094229533, "grad_norm": 0.9951663002642008, "learning_rate": 7.991684515196887e-06, "loss": 0.1601, "step": 10865 }, { "epoch": 0.31699632417293894, "grad_norm": 0.8261304615673515, "learning_rate": 7.991305966513998e-06, "loss": 0.151, "step": 10866 }, { "epoch": 0.3170254974035825, "grad_norm": 0.9458205934801588, "learning_rate": 7.990927391125445e-06, "loss": 0.1427, "step": 10867 }, { "epoch": 0.31705467063422604, "grad_norm": 0.8106575000502098, "learning_rate": 7.990548789034605e-06, "loss": 0.1373, "step": 10868 }, { "epoch": 0.3170838438648696, "grad_norm": 0.7383692640181932, "learning_rate": 7.990170160244859e-06, "loss": 0.1622, "step": 10869 }, { "epoch": 0.31711301709551315, "grad_norm": 0.8348757818591909, "learning_rate": 7.989791504759588e-06, "loss": 0.1498, "step": 10870 }, { "epoch": 0.3171421903261567, "grad_norm": 0.8478373700925567, "learning_rate": 7.989412822582172e-06, "loss": 0.1322, "step": 10871 }, { "epoch": 0.31717136355680026, "grad_norm": 0.7925508960137072, "learning_rate": 7.989034113715991e-06, "loss": 0.1785, "step": 10872 }, { "epoch": 0.31720053678744387, "grad_norm": 0.9072265128149225, "learning_rate": 7.98865537816443e-06, "loss": 0.1493, "step": 10873 }, { "epoch": 0.3172297100180874, "grad_norm": 0.6618899521697119, "learning_rate": 7.988276615930864e-06, "loss": 0.1317, "step": 10874 }, { "epoch": 0.31725888324873097, "grad_norm": 0.856390210165339, "learning_rate": 7.98789782701868e-06, "loss": 0.14, "step": 10875 }, { "epoch": 0.3172880564793745, "grad_norm": 0.8282340027579668, "learning_rate": 7.987519011431256e-06, "loss": 0.1585, "step": 10876 }, { "epoch": 0.3173172297100181, "grad_norm": 0.8133317569681802, "learning_rate": 7.987140169171976e-06, "loss": 0.1446, "step": 10877 }, { "epoch": 0.31734640294066163, "grad_norm": 0.6426067356846086, "learning_rate": 7.986761300244221e-06, "loss": 0.1391, "step": 10878 }, { "epoch": 0.3173755761713052, "grad_norm": 0.7135164368604657, "learning_rate": 7.986382404651376e-06, "loss": 0.1456, "step": 10879 }, { "epoch": 0.3174047494019488, "grad_norm": 0.8220779592727057, "learning_rate": 7.98600348239682e-06, "loss": 0.1581, "step": 10880 }, { "epoch": 0.31743392263259235, "grad_norm": 0.8985655432169064, "learning_rate": 7.985624533483938e-06, "loss": 0.1603, "step": 10881 }, { "epoch": 0.3174630958632359, "grad_norm": 0.8243899362161067, "learning_rate": 7.985245557916114e-06, "loss": 0.1499, "step": 10882 }, { "epoch": 0.31749226909387945, "grad_norm": 0.7845523521009368, "learning_rate": 7.984866555696728e-06, "loss": 0.1355, "step": 10883 }, { "epoch": 0.317521442324523, "grad_norm": 0.9071479875801252, "learning_rate": 7.984487526829168e-06, "loss": 0.1446, "step": 10884 }, { "epoch": 0.31755061555516656, "grad_norm": 1.0093951628565614, "learning_rate": 7.984108471316815e-06, "loss": 0.1612, "step": 10885 }, { "epoch": 0.3175797887858101, "grad_norm": 0.6823686103470579, "learning_rate": 7.983729389163054e-06, "loss": 0.1421, "step": 10886 }, { "epoch": 0.3176089620164537, "grad_norm": 0.8161962542510086, "learning_rate": 7.983350280371269e-06, "loss": 0.1542, "step": 10887 }, { "epoch": 0.3176381352470973, "grad_norm": 0.7319529789211147, "learning_rate": 7.982971144944846e-06, "loss": 0.1255, "step": 10888 }, { "epoch": 0.31766730847774083, "grad_norm": 0.6978920309349856, "learning_rate": 7.982591982887168e-06, "loss": 0.162, "step": 10889 }, { "epoch": 0.3176964817083844, "grad_norm": 0.7974316615751017, "learning_rate": 7.982212794201621e-06, "loss": 0.1428, "step": 10890 }, { "epoch": 0.31772565493902793, "grad_norm": 0.7324167704909141, "learning_rate": 7.98183357889159e-06, "loss": 0.1363, "step": 10891 }, { "epoch": 0.3177548281696715, "grad_norm": 0.9590530925832134, "learning_rate": 7.98145433696046e-06, "loss": 0.1514, "step": 10892 }, { "epoch": 0.3177840014003151, "grad_norm": 0.8461320732724353, "learning_rate": 7.98107506841162e-06, "loss": 0.161, "step": 10893 }, { "epoch": 0.31781317463095865, "grad_norm": 0.8229166627709099, "learning_rate": 7.98069577324845e-06, "loss": 0.1564, "step": 10894 }, { "epoch": 0.3178423478616022, "grad_norm": 0.8833468948109152, "learning_rate": 7.980316451474339e-06, "loss": 0.1361, "step": 10895 }, { "epoch": 0.31787152109224576, "grad_norm": 1.0419301433172863, "learning_rate": 7.979937103092677e-06, "loss": 0.153, "step": 10896 }, { "epoch": 0.3179006943228893, "grad_norm": 0.7309515740337175, "learning_rate": 7.979557728106848e-06, "loss": 0.168, "step": 10897 }, { "epoch": 0.31792986755353286, "grad_norm": 1.0051947532936936, "learning_rate": 7.979178326520238e-06, "loss": 0.1501, "step": 10898 }, { "epoch": 0.3179590407841764, "grad_norm": 0.8832778360594972, "learning_rate": 7.978798898336235e-06, "loss": 0.1376, "step": 10899 }, { "epoch": 0.31798821401482, "grad_norm": 0.8412014254184449, "learning_rate": 7.978419443558227e-06, "loss": 0.1861, "step": 10900 }, { "epoch": 0.3180173872454636, "grad_norm": 0.9142482863988516, "learning_rate": 7.9780399621896e-06, "loss": 0.1645, "step": 10901 }, { "epoch": 0.31804656047610713, "grad_norm": 1.1139117786825425, "learning_rate": 7.977660454233744e-06, "loss": 0.1618, "step": 10902 }, { "epoch": 0.3180757337067507, "grad_norm": 0.8952417141100726, "learning_rate": 7.977280919694047e-06, "loss": 0.1453, "step": 10903 }, { "epoch": 0.31810490693739424, "grad_norm": 1.2342890758949616, "learning_rate": 7.976901358573896e-06, "loss": 0.1479, "step": 10904 }, { "epoch": 0.3181340801680378, "grad_norm": 1.2172665966323095, "learning_rate": 7.976521770876679e-06, "loss": 0.1504, "step": 10905 }, { "epoch": 0.31816325339868134, "grad_norm": 0.9820515451969672, "learning_rate": 7.976142156605788e-06, "loss": 0.1514, "step": 10906 }, { "epoch": 0.31819242662932495, "grad_norm": 0.7379730837343433, "learning_rate": 7.97576251576461e-06, "loss": 0.1243, "step": 10907 }, { "epoch": 0.3182215998599685, "grad_norm": 1.2874446005764328, "learning_rate": 7.975382848356533e-06, "loss": 0.1566, "step": 10908 }, { "epoch": 0.31825077309061206, "grad_norm": 0.9823168690057406, "learning_rate": 7.97500315438495e-06, "loss": 0.145, "step": 10909 }, { "epoch": 0.3182799463212556, "grad_norm": 0.976866401389679, "learning_rate": 7.974623433853248e-06, "loss": 0.1529, "step": 10910 }, { "epoch": 0.31830911955189917, "grad_norm": 1.0329106099311451, "learning_rate": 7.97424368676482e-06, "loss": 0.1701, "step": 10911 }, { "epoch": 0.3183382927825427, "grad_norm": 1.238083789297518, "learning_rate": 7.973863913123053e-06, "loss": 0.1494, "step": 10912 }, { "epoch": 0.3183674660131863, "grad_norm": 0.8916058755722975, "learning_rate": 7.973484112931337e-06, "loss": 0.1447, "step": 10913 }, { "epoch": 0.3183966392438299, "grad_norm": 0.7613436905631191, "learning_rate": 7.973104286193067e-06, "loss": 0.1718, "step": 10914 }, { "epoch": 0.31842581247447344, "grad_norm": 1.0944601201705713, "learning_rate": 7.972724432911632e-06, "loss": 0.1562, "step": 10915 }, { "epoch": 0.318454985705117, "grad_norm": 0.8782577023184184, "learning_rate": 7.972344553090422e-06, "loss": 0.1608, "step": 10916 }, { "epoch": 0.31848415893576054, "grad_norm": 0.8529555948164445, "learning_rate": 7.97196464673283e-06, "loss": 0.1446, "step": 10917 }, { "epoch": 0.3185133321664041, "grad_norm": 0.8768972001228, "learning_rate": 7.971584713842247e-06, "loss": 0.141, "step": 10918 }, { "epoch": 0.31854250539704765, "grad_norm": 0.8389996599149626, "learning_rate": 7.971204754422063e-06, "loss": 0.1521, "step": 10919 }, { "epoch": 0.31857167862769126, "grad_norm": 0.939397250400976, "learning_rate": 7.970824768475675e-06, "loss": 0.1667, "step": 10920 }, { "epoch": 0.3186008518583348, "grad_norm": 0.7930986520373468, "learning_rate": 7.970444756006473e-06, "loss": 0.1543, "step": 10921 }, { "epoch": 0.31863002508897836, "grad_norm": 0.8159388536947406, "learning_rate": 7.970064717017847e-06, "loss": 0.141, "step": 10922 }, { "epoch": 0.3186591983196219, "grad_norm": 0.766377693268765, "learning_rate": 7.969684651513193e-06, "loss": 0.1422, "step": 10923 }, { "epoch": 0.31868837155026547, "grad_norm": 0.8639135229695858, "learning_rate": 7.969304559495903e-06, "loss": 0.1842, "step": 10924 }, { "epoch": 0.318717544780909, "grad_norm": 0.7901964245547788, "learning_rate": 7.968924440969372e-06, "loss": 0.1442, "step": 10925 }, { "epoch": 0.3187467180115526, "grad_norm": 0.7547189508705513, "learning_rate": 7.968544295936992e-06, "loss": 0.1261, "step": 10926 }, { "epoch": 0.3187758912421962, "grad_norm": 0.8062241964346836, "learning_rate": 7.968164124402156e-06, "loss": 0.1627, "step": 10927 }, { "epoch": 0.31880506447283974, "grad_norm": 0.7902701470212061, "learning_rate": 7.967783926368259e-06, "loss": 0.1312, "step": 10928 }, { "epoch": 0.3188342377034833, "grad_norm": 0.7790390263454149, "learning_rate": 7.967403701838697e-06, "loss": 0.1509, "step": 10929 }, { "epoch": 0.31886341093412685, "grad_norm": 0.8192497541244029, "learning_rate": 7.967023450816864e-06, "loss": 0.1816, "step": 10930 }, { "epoch": 0.3188925841647704, "grad_norm": 0.7206749785485109, "learning_rate": 7.966643173306151e-06, "loss": 0.1573, "step": 10931 }, { "epoch": 0.31892175739541395, "grad_norm": 0.7553026242956746, "learning_rate": 7.96626286930996e-06, "loss": 0.1279, "step": 10932 }, { "epoch": 0.3189509306260575, "grad_norm": 0.7900128280464674, "learning_rate": 7.965882538831678e-06, "loss": 0.1378, "step": 10933 }, { "epoch": 0.3189801038567011, "grad_norm": 0.781617927870265, "learning_rate": 7.965502181874707e-06, "loss": 0.1466, "step": 10934 }, { "epoch": 0.31900927708734467, "grad_norm": 0.7226488317673317, "learning_rate": 7.965121798442438e-06, "loss": 0.1346, "step": 10935 }, { "epoch": 0.3190384503179882, "grad_norm": 0.6723334210407697, "learning_rate": 7.964741388538272e-06, "loss": 0.1321, "step": 10936 }, { "epoch": 0.3190676235486318, "grad_norm": 0.8684507089063188, "learning_rate": 7.964360952165603e-06, "loss": 0.1424, "step": 10937 }, { "epoch": 0.3190967967792753, "grad_norm": 0.9239067511311433, "learning_rate": 7.963980489327826e-06, "loss": 0.1736, "step": 10938 }, { "epoch": 0.3191259700099189, "grad_norm": 0.8309033312700427, "learning_rate": 7.96360000002834e-06, "loss": 0.1342, "step": 10939 }, { "epoch": 0.31915514324056243, "grad_norm": 0.8864879593290103, "learning_rate": 7.963219484270537e-06, "loss": 0.151, "step": 10940 }, { "epoch": 0.31918431647120604, "grad_norm": 0.9543105795961997, "learning_rate": 7.962838942057821e-06, "loss": 0.1625, "step": 10941 }, { "epoch": 0.3192134897018496, "grad_norm": 0.7911350191859794, "learning_rate": 7.962458373393587e-06, "loss": 0.1401, "step": 10942 }, { "epoch": 0.31924266293249315, "grad_norm": 0.8265873061121952, "learning_rate": 7.96207777828123e-06, "loss": 0.1403, "step": 10943 }, { "epoch": 0.3192718361631367, "grad_norm": 1.0898136522483184, "learning_rate": 7.961697156724149e-06, "loss": 0.1652, "step": 10944 }, { "epoch": 0.31930100939378026, "grad_norm": 0.7436830795595055, "learning_rate": 7.961316508725745e-06, "loss": 0.1411, "step": 10945 }, { "epoch": 0.3193301826244238, "grad_norm": 1.0525989627328995, "learning_rate": 7.960935834289412e-06, "loss": 0.1467, "step": 10946 }, { "epoch": 0.3193593558550674, "grad_norm": 0.6856700623005294, "learning_rate": 7.960555133418551e-06, "loss": 0.1375, "step": 10947 }, { "epoch": 0.31938852908571097, "grad_norm": 0.7745108663757383, "learning_rate": 7.960174406116561e-06, "loss": 0.1351, "step": 10948 }, { "epoch": 0.3194177023163545, "grad_norm": 0.8947273049284142, "learning_rate": 7.959793652386841e-06, "loss": 0.1486, "step": 10949 }, { "epoch": 0.3194468755469981, "grad_norm": 0.8237067717890412, "learning_rate": 7.95941287223279e-06, "loss": 0.1483, "step": 10950 }, { "epoch": 0.31947604877764163, "grad_norm": 0.9335537446322002, "learning_rate": 7.959032065657807e-06, "loss": 0.1458, "step": 10951 }, { "epoch": 0.3195052220082852, "grad_norm": 0.7338829389501422, "learning_rate": 7.958651232665292e-06, "loss": 0.1611, "step": 10952 }, { "epoch": 0.31953439523892874, "grad_norm": 0.7767700317831092, "learning_rate": 7.958270373258645e-06, "loss": 0.1398, "step": 10953 }, { "epoch": 0.31956356846957235, "grad_norm": 0.6514863483326759, "learning_rate": 7.957889487441266e-06, "loss": 0.1501, "step": 10954 }, { "epoch": 0.3195927417002159, "grad_norm": 0.8655125459821137, "learning_rate": 7.957508575216556e-06, "loss": 0.1396, "step": 10955 }, { "epoch": 0.31962191493085945, "grad_norm": 0.7380382683302142, "learning_rate": 7.957127636587916e-06, "loss": 0.1596, "step": 10956 }, { "epoch": 0.319651088161503, "grad_norm": 0.832772327199341, "learning_rate": 7.956746671558746e-06, "loss": 0.1354, "step": 10957 }, { "epoch": 0.31968026139214656, "grad_norm": 0.8846179502107682, "learning_rate": 7.956365680132447e-06, "loss": 0.1505, "step": 10958 }, { "epoch": 0.3197094346227901, "grad_norm": 0.766310363080684, "learning_rate": 7.955984662312422e-06, "loss": 0.1433, "step": 10959 }, { "epoch": 0.31973860785343367, "grad_norm": 0.8378369436549159, "learning_rate": 7.955603618102072e-06, "loss": 0.1391, "step": 10960 }, { "epoch": 0.3197677810840773, "grad_norm": 0.9344404355154655, "learning_rate": 7.955222547504795e-06, "loss": 0.1541, "step": 10961 }, { "epoch": 0.3197969543147208, "grad_norm": 0.8942332460010611, "learning_rate": 7.954841450524e-06, "loss": 0.1649, "step": 10962 }, { "epoch": 0.3198261275453644, "grad_norm": 0.9588201479934079, "learning_rate": 7.954460327163085e-06, "loss": 0.1615, "step": 10963 }, { "epoch": 0.31985530077600793, "grad_norm": 1.065150453822113, "learning_rate": 7.954079177425454e-06, "loss": 0.1623, "step": 10964 }, { "epoch": 0.3198844740066515, "grad_norm": 1.104971740385175, "learning_rate": 7.953698001314508e-06, "loss": 0.1605, "step": 10965 }, { "epoch": 0.31991364723729504, "grad_norm": 0.8466155555025482, "learning_rate": 7.953316798833653e-06, "loss": 0.1789, "step": 10966 }, { "epoch": 0.3199428204679386, "grad_norm": 0.8382534225784651, "learning_rate": 7.952935569986289e-06, "loss": 0.1475, "step": 10967 }, { "epoch": 0.3199719936985822, "grad_norm": 0.8249852671925271, "learning_rate": 7.952554314775822e-06, "loss": 0.1221, "step": 10968 }, { "epoch": 0.32000116692922576, "grad_norm": 0.8597897758081215, "learning_rate": 7.952173033205654e-06, "loss": 0.1617, "step": 10969 }, { "epoch": 0.3200303401598693, "grad_norm": 0.7663886683609561, "learning_rate": 7.951791725279192e-06, "loss": 0.1851, "step": 10970 }, { "epoch": 0.32005951339051286, "grad_norm": 0.7510502612254416, "learning_rate": 7.951410390999836e-06, "loss": 0.1285, "step": 10971 }, { "epoch": 0.3200886866211564, "grad_norm": 0.84640212145857, "learning_rate": 7.951029030370993e-06, "loss": 0.1336, "step": 10972 }, { "epoch": 0.32011785985179997, "grad_norm": 0.6752485779770158, "learning_rate": 7.950647643396069e-06, "loss": 0.1377, "step": 10973 }, { "epoch": 0.3201470330824436, "grad_norm": 0.8721891486643623, "learning_rate": 7.950266230078465e-06, "loss": 0.157, "step": 10974 }, { "epoch": 0.32017620631308713, "grad_norm": 0.8238194749835336, "learning_rate": 7.949884790421591e-06, "loss": 0.1556, "step": 10975 }, { "epoch": 0.3202053795437307, "grad_norm": 0.6238059548766347, "learning_rate": 7.949503324428847e-06, "loss": 0.1603, "step": 10976 }, { "epoch": 0.32023455277437424, "grad_norm": 0.8611891416617158, "learning_rate": 7.949121832103643e-06, "loss": 0.1675, "step": 10977 }, { "epoch": 0.3202637260050178, "grad_norm": 0.8696323812718035, "learning_rate": 7.948740313449382e-06, "loss": 0.1566, "step": 10978 }, { "epoch": 0.32029289923566134, "grad_norm": 0.7918960502139011, "learning_rate": 7.948358768469473e-06, "loss": 0.1565, "step": 10979 }, { "epoch": 0.3203220724663049, "grad_norm": 0.8701276574822827, "learning_rate": 7.94797719716732e-06, "loss": 0.1347, "step": 10980 }, { "epoch": 0.3203512456969485, "grad_norm": 0.9816902745359031, "learning_rate": 7.94759559954633e-06, "loss": 0.1339, "step": 10981 }, { "epoch": 0.32038041892759206, "grad_norm": 0.9397271221685616, "learning_rate": 7.94721397560991e-06, "loss": 0.1261, "step": 10982 }, { "epoch": 0.3204095921582356, "grad_norm": 1.0055992255206205, "learning_rate": 7.946832325361468e-06, "loss": 0.1323, "step": 10983 }, { "epoch": 0.32043876538887917, "grad_norm": 0.7657455244757513, "learning_rate": 7.94645064880441e-06, "loss": 0.1447, "step": 10984 }, { "epoch": 0.3204679386195227, "grad_norm": 0.879737735234662, "learning_rate": 7.946068945942144e-06, "loss": 0.1293, "step": 10985 }, { "epoch": 0.32049711185016627, "grad_norm": 0.8606244572622053, "learning_rate": 7.945687216778078e-06, "loss": 0.1403, "step": 10986 }, { "epoch": 0.3205262850808098, "grad_norm": 0.8821171060557782, "learning_rate": 7.94530546131562e-06, "loss": 0.1705, "step": 10987 }, { "epoch": 0.32055545831145343, "grad_norm": 0.9948303760975354, "learning_rate": 7.944923679558176e-06, "loss": 0.1404, "step": 10988 }, { "epoch": 0.320584631542097, "grad_norm": 0.8341498066454957, "learning_rate": 7.944541871509159e-06, "loss": 0.1322, "step": 10989 }, { "epoch": 0.32061380477274054, "grad_norm": 0.7838082232009056, "learning_rate": 7.944160037171973e-06, "loss": 0.1305, "step": 10990 }, { "epoch": 0.3206429780033841, "grad_norm": 0.7141231750828113, "learning_rate": 7.94377817655003e-06, "loss": 0.1544, "step": 10991 }, { "epoch": 0.32067215123402765, "grad_norm": 0.8018060602628841, "learning_rate": 7.943396289646738e-06, "loss": 0.1526, "step": 10992 }, { "epoch": 0.3207013244646712, "grad_norm": 0.7334660975972801, "learning_rate": 7.943014376465508e-06, "loss": 0.1424, "step": 10993 }, { "epoch": 0.32073049769531475, "grad_norm": 0.704839933028303, "learning_rate": 7.942632437009746e-06, "loss": 0.1454, "step": 10994 }, { "epoch": 0.32075967092595836, "grad_norm": 0.8012705308577012, "learning_rate": 7.942250471282864e-06, "loss": 0.1636, "step": 10995 }, { "epoch": 0.3207888441566019, "grad_norm": 0.7911693022338174, "learning_rate": 7.941868479288276e-06, "loss": 0.1471, "step": 10996 }, { "epoch": 0.32081801738724547, "grad_norm": 0.7842991048808973, "learning_rate": 7.941486461029384e-06, "loss": 0.1414, "step": 10997 }, { "epoch": 0.320847190617889, "grad_norm": 0.719470077422427, "learning_rate": 7.941104416509604e-06, "loss": 0.158, "step": 10998 }, { "epoch": 0.3208763638485326, "grad_norm": 0.8242210322970973, "learning_rate": 7.940722345732347e-06, "loss": 0.1439, "step": 10999 }, { "epoch": 0.32090553707917613, "grad_norm": 0.8489609518286017, "learning_rate": 7.940340248701022e-06, "loss": 0.1579, "step": 11000 }, { "epoch": 0.3209347103098197, "grad_norm": 0.8551238030790933, "learning_rate": 7.939958125419042e-06, "loss": 0.1464, "step": 11001 }, { "epoch": 0.3209638835404633, "grad_norm": 0.7940226839705632, "learning_rate": 7.939575975889817e-06, "loss": 0.1482, "step": 11002 }, { "epoch": 0.32099305677110684, "grad_norm": 1.1013519935521343, "learning_rate": 7.93919380011676e-06, "loss": 0.126, "step": 11003 }, { "epoch": 0.3210222300017504, "grad_norm": 0.9228344579150133, "learning_rate": 7.938811598103282e-06, "loss": 0.1517, "step": 11004 }, { "epoch": 0.32105140323239395, "grad_norm": 1.227069686520391, "learning_rate": 7.938429369852796e-06, "loss": 0.1824, "step": 11005 }, { "epoch": 0.3210805764630375, "grad_norm": 0.761631095083584, "learning_rate": 7.938047115368713e-06, "loss": 0.1245, "step": 11006 }, { "epoch": 0.32110974969368106, "grad_norm": 1.0703644652778053, "learning_rate": 7.937664834654449e-06, "loss": 0.1436, "step": 11007 }, { "epoch": 0.32113892292432467, "grad_norm": 0.9210100666132759, "learning_rate": 7.937282527713412e-06, "loss": 0.1511, "step": 11008 }, { "epoch": 0.3211680961549682, "grad_norm": 0.8369537948127395, "learning_rate": 7.93690019454902e-06, "loss": 0.1563, "step": 11009 }, { "epoch": 0.32119726938561177, "grad_norm": 1.1750987802114254, "learning_rate": 7.936517835164682e-06, "loss": 0.1387, "step": 11010 }, { "epoch": 0.3212264426162553, "grad_norm": 1.078128425755581, "learning_rate": 7.936135449563815e-06, "loss": 0.1705, "step": 11011 }, { "epoch": 0.3212556158468989, "grad_norm": 1.0341634095727026, "learning_rate": 7.935753037749832e-06, "loss": 0.1482, "step": 11012 }, { "epoch": 0.32128478907754243, "grad_norm": 1.003273278719217, "learning_rate": 7.935370599726147e-06, "loss": 0.165, "step": 11013 }, { "epoch": 0.321313962308186, "grad_norm": 0.8501567233419036, "learning_rate": 7.93498813549617e-06, "loss": 0.1592, "step": 11014 }, { "epoch": 0.3213431355388296, "grad_norm": 1.1143881601142083, "learning_rate": 7.934605645063325e-06, "loss": 0.1757, "step": 11015 }, { "epoch": 0.32137230876947315, "grad_norm": 0.7427794374614133, "learning_rate": 7.934223128431017e-06, "loss": 0.1674, "step": 11016 }, { "epoch": 0.3214014820001167, "grad_norm": 0.9468726487888137, "learning_rate": 7.93384058560267e-06, "loss": 0.144, "step": 11017 }, { "epoch": 0.32143065523076025, "grad_norm": 0.7587061259930971, "learning_rate": 7.933458016581691e-06, "loss": 0.1369, "step": 11018 }, { "epoch": 0.3214598284614038, "grad_norm": 0.8365927342007075, "learning_rate": 7.9330754213715e-06, "loss": 0.161, "step": 11019 }, { "epoch": 0.32148900169204736, "grad_norm": 0.7843770152117566, "learning_rate": 7.932692799975513e-06, "loss": 0.1702, "step": 11020 }, { "epoch": 0.3215181749226909, "grad_norm": 0.8703596720960927, "learning_rate": 7.932310152397142e-06, "loss": 0.1456, "step": 11021 }, { "epoch": 0.3215473481533345, "grad_norm": 1.0118251000382694, "learning_rate": 7.931927478639809e-06, "loss": 0.156, "step": 11022 }, { "epoch": 0.3215765213839781, "grad_norm": 0.6723312241330154, "learning_rate": 7.931544778706925e-06, "loss": 0.1544, "step": 11023 }, { "epoch": 0.32160569461462163, "grad_norm": 0.6346271035640576, "learning_rate": 7.93116205260191e-06, "loss": 0.157, "step": 11024 }, { "epoch": 0.3216348678452652, "grad_norm": 0.6450277752550774, "learning_rate": 7.93077930032818e-06, "loss": 0.1293, "step": 11025 }, { "epoch": 0.32166404107590874, "grad_norm": 0.7727609392974565, "learning_rate": 7.930396521889152e-06, "loss": 0.164, "step": 11026 }, { "epoch": 0.3216932143065523, "grad_norm": 0.8947902710093486, "learning_rate": 7.930013717288244e-06, "loss": 0.1368, "step": 11027 }, { "epoch": 0.32172238753719584, "grad_norm": 0.7523660713209434, "learning_rate": 7.929630886528874e-06, "loss": 0.1491, "step": 11028 }, { "epoch": 0.32175156076783945, "grad_norm": 1.0094263864221134, "learning_rate": 7.929248029614455e-06, "loss": 0.1517, "step": 11029 }, { "epoch": 0.321780733998483, "grad_norm": 0.8769132684330191, "learning_rate": 7.928865146548411e-06, "loss": 0.156, "step": 11030 }, { "epoch": 0.32180990722912656, "grad_norm": 0.8737246949168511, "learning_rate": 7.928482237334159e-06, "loss": 0.1731, "step": 11031 }, { "epoch": 0.3218390804597701, "grad_norm": 0.8785322342147993, "learning_rate": 7.928099301975116e-06, "loss": 0.1587, "step": 11032 }, { "epoch": 0.32186825369041366, "grad_norm": 0.9112884315348686, "learning_rate": 7.927716340474701e-06, "loss": 0.1502, "step": 11033 }, { "epoch": 0.3218974269210572, "grad_norm": 0.7559231869687969, "learning_rate": 7.927333352836334e-06, "loss": 0.1507, "step": 11034 }, { "epoch": 0.3219266001517008, "grad_norm": 0.8276737793134261, "learning_rate": 7.926950339063435e-06, "loss": 0.134, "step": 11035 }, { "epoch": 0.3219557733823444, "grad_norm": 1.1149869744579006, "learning_rate": 7.92656729915942e-06, "loss": 0.1249, "step": 11036 }, { "epoch": 0.32198494661298793, "grad_norm": 0.9181834269609487, "learning_rate": 7.926184233127711e-06, "loss": 0.1415, "step": 11037 }, { "epoch": 0.3220141198436315, "grad_norm": 0.7895080741571071, "learning_rate": 7.925801140971728e-06, "loss": 0.1581, "step": 11038 }, { "epoch": 0.32204329307427504, "grad_norm": 0.8239369710506671, "learning_rate": 7.92541802269489e-06, "loss": 0.1574, "step": 11039 }, { "epoch": 0.3220724663049186, "grad_norm": 1.0015644202281127, "learning_rate": 7.925034878300619e-06, "loss": 0.1439, "step": 11040 }, { "epoch": 0.32210163953556215, "grad_norm": 0.8838377900826695, "learning_rate": 7.924651707792337e-06, "loss": 0.1629, "step": 11041 }, { "epoch": 0.32213081276620575, "grad_norm": 0.9723878533223169, "learning_rate": 7.924268511173459e-06, "loss": 0.1552, "step": 11042 }, { "epoch": 0.3221599859968493, "grad_norm": 0.8082210243148918, "learning_rate": 7.923885288447413e-06, "loss": 0.1437, "step": 11043 }, { "epoch": 0.32218915922749286, "grad_norm": 1.0104745462052036, "learning_rate": 7.923502039617615e-06, "loss": 0.1501, "step": 11044 }, { "epoch": 0.3222183324581364, "grad_norm": 0.9943546720506817, "learning_rate": 7.923118764687489e-06, "loss": 0.1871, "step": 11045 }, { "epoch": 0.32224750568877997, "grad_norm": 0.9721145965729276, "learning_rate": 7.922735463660455e-06, "loss": 0.1912, "step": 11046 }, { "epoch": 0.3222766789194235, "grad_norm": 0.7190358049891815, "learning_rate": 7.922352136539938e-06, "loss": 0.1494, "step": 11047 }, { "epoch": 0.3223058521500671, "grad_norm": 0.9486864584518475, "learning_rate": 7.921968783329362e-06, "loss": 0.1492, "step": 11048 }, { "epoch": 0.3223350253807107, "grad_norm": 1.1308214754558508, "learning_rate": 7.921585404032142e-06, "loss": 0.1556, "step": 11049 }, { "epoch": 0.32236419861135424, "grad_norm": 0.8976733890933533, "learning_rate": 7.921201998651707e-06, "loss": 0.1849, "step": 11050 }, { "epoch": 0.3223933718419978, "grad_norm": 0.7042217237944887, "learning_rate": 7.920818567191476e-06, "loss": 0.163, "step": 11051 }, { "epoch": 0.32242254507264134, "grad_norm": 0.9241326609308739, "learning_rate": 7.920435109654877e-06, "loss": 0.1505, "step": 11052 }, { "epoch": 0.3224517183032849, "grad_norm": 0.7560781844894718, "learning_rate": 7.920051626045326e-06, "loss": 0.1374, "step": 11053 }, { "epoch": 0.32248089153392845, "grad_norm": 0.8588511064051029, "learning_rate": 7.919668116366254e-06, "loss": 0.1693, "step": 11054 }, { "epoch": 0.322510064764572, "grad_norm": 0.7864607662435174, "learning_rate": 7.919284580621082e-06, "loss": 0.1531, "step": 11055 }, { "epoch": 0.3225392379952156, "grad_norm": 0.7873508435241552, "learning_rate": 7.918901018813234e-06, "loss": 0.1386, "step": 11056 }, { "epoch": 0.32256841122585916, "grad_norm": 0.7088069197804802, "learning_rate": 7.918517430946135e-06, "loss": 0.1156, "step": 11057 }, { "epoch": 0.3225975844565027, "grad_norm": 0.7059647387036411, "learning_rate": 7.91813381702321e-06, "loss": 0.1333, "step": 11058 }, { "epoch": 0.32262675768714627, "grad_norm": 0.666454605594955, "learning_rate": 7.917750177047881e-06, "loss": 0.1361, "step": 11059 }, { "epoch": 0.3226559309177898, "grad_norm": 1.2113628974280057, "learning_rate": 7.917366511023575e-06, "loss": 0.1457, "step": 11060 }, { "epoch": 0.3226851041484334, "grad_norm": 0.842500261956832, "learning_rate": 7.916982818953718e-06, "loss": 0.1524, "step": 11061 }, { "epoch": 0.322714277379077, "grad_norm": 0.8034266029849304, "learning_rate": 7.916599100841734e-06, "loss": 0.1482, "step": 11062 }, { "epoch": 0.32274345060972054, "grad_norm": 0.9418898273074237, "learning_rate": 7.916215356691051e-06, "loss": 0.1457, "step": 11063 }, { "epoch": 0.3227726238403641, "grad_norm": 0.8991728969743075, "learning_rate": 7.915831586505092e-06, "loss": 0.1658, "step": 11064 }, { "epoch": 0.32280179707100765, "grad_norm": 0.7920795098995894, "learning_rate": 7.915447790287285e-06, "loss": 0.1569, "step": 11065 }, { "epoch": 0.3228309703016512, "grad_norm": 0.8132999221667033, "learning_rate": 7.915063968041055e-06, "loss": 0.1798, "step": 11066 }, { "epoch": 0.32286014353229475, "grad_norm": 1.0027852714766794, "learning_rate": 7.914680119769831e-06, "loss": 0.1545, "step": 11067 }, { "epoch": 0.3228893167629383, "grad_norm": 0.6407065559864056, "learning_rate": 7.91429624547704e-06, "loss": 0.1309, "step": 11068 }, { "epoch": 0.3229184899935819, "grad_norm": 0.8188800605209654, "learning_rate": 7.913912345166106e-06, "loss": 0.1461, "step": 11069 }, { "epoch": 0.32294766322422547, "grad_norm": 0.8840368480843117, "learning_rate": 7.91352841884046e-06, "loss": 0.1815, "step": 11070 }, { "epoch": 0.322976836454869, "grad_norm": 0.963373660572464, "learning_rate": 7.913144466503524e-06, "loss": 0.1514, "step": 11071 }, { "epoch": 0.3230060096855126, "grad_norm": 0.847454837134214, "learning_rate": 7.912760488158732e-06, "loss": 0.1659, "step": 11072 }, { "epoch": 0.3230351829161561, "grad_norm": 0.8394342933315185, "learning_rate": 7.91237648380951e-06, "loss": 0.1512, "step": 11073 }, { "epoch": 0.3230643561467997, "grad_norm": 0.988287441046759, "learning_rate": 7.911992453459286e-06, "loss": 0.1424, "step": 11074 }, { "epoch": 0.32309352937744323, "grad_norm": 0.8230831543372418, "learning_rate": 7.911608397111488e-06, "loss": 0.1503, "step": 11075 }, { "epoch": 0.32312270260808684, "grad_norm": 0.8519448947315523, "learning_rate": 7.911224314769546e-06, "loss": 0.1641, "step": 11076 }, { "epoch": 0.3231518758387304, "grad_norm": 0.8377550743800041, "learning_rate": 7.910840206436888e-06, "loss": 0.1523, "step": 11077 }, { "epoch": 0.32318104906937395, "grad_norm": 0.610019788807047, "learning_rate": 7.910456072116944e-06, "loss": 0.1445, "step": 11078 }, { "epoch": 0.3232102223000175, "grad_norm": 0.8807595995263549, "learning_rate": 7.910071911813142e-06, "loss": 0.1551, "step": 11079 }, { "epoch": 0.32323939553066106, "grad_norm": 0.7677249527011757, "learning_rate": 7.909687725528911e-06, "loss": 0.1506, "step": 11080 }, { "epoch": 0.3232685687613046, "grad_norm": 0.7463840512551806, "learning_rate": 7.909303513267685e-06, "loss": 0.1361, "step": 11081 }, { "epoch": 0.32329774199194816, "grad_norm": 1.12408880891437, "learning_rate": 7.908919275032892e-06, "loss": 0.1363, "step": 11082 }, { "epoch": 0.32332691522259177, "grad_norm": 0.7552353405072342, "learning_rate": 7.90853501082796e-06, "loss": 0.1705, "step": 11083 }, { "epoch": 0.3233560884532353, "grad_norm": 0.7364333369784888, "learning_rate": 7.908150720656324e-06, "loss": 0.1795, "step": 11084 }, { "epoch": 0.3233852616838789, "grad_norm": 0.9176463827324679, "learning_rate": 7.907766404521414e-06, "loss": 0.1496, "step": 11085 }, { "epoch": 0.32341443491452243, "grad_norm": 0.8070896589297977, "learning_rate": 7.907382062426656e-06, "loss": 0.1337, "step": 11086 }, { "epoch": 0.323443608145166, "grad_norm": 0.8071384790599688, "learning_rate": 7.906997694375486e-06, "loss": 0.1577, "step": 11087 }, { "epoch": 0.32347278137580954, "grad_norm": 0.8882272638805864, "learning_rate": 7.906613300371336e-06, "loss": 0.1642, "step": 11088 }, { "epoch": 0.32350195460645315, "grad_norm": 0.9243385594022651, "learning_rate": 7.906228880417635e-06, "loss": 0.1372, "step": 11089 }, { "epoch": 0.3235311278370967, "grad_norm": 0.7258597190739321, "learning_rate": 7.905844434517816e-06, "loss": 0.1654, "step": 11090 }, { "epoch": 0.32356030106774025, "grad_norm": 0.7576725544155756, "learning_rate": 7.905459962675313e-06, "loss": 0.1519, "step": 11091 }, { "epoch": 0.3235894742983838, "grad_norm": 0.8156263374114935, "learning_rate": 7.905075464893555e-06, "loss": 0.1644, "step": 11092 }, { "epoch": 0.32361864752902736, "grad_norm": 0.9372645601270756, "learning_rate": 7.904690941175979e-06, "loss": 0.1293, "step": 11093 }, { "epoch": 0.3236478207596709, "grad_norm": 0.6610553053673265, "learning_rate": 7.904306391526012e-06, "loss": 0.125, "step": 11094 }, { "epoch": 0.32367699399031447, "grad_norm": 0.7163195473998811, "learning_rate": 7.903921815947095e-06, "loss": 0.1426, "step": 11095 }, { "epoch": 0.3237061672209581, "grad_norm": 1.1733843832401145, "learning_rate": 7.903537214442656e-06, "loss": 0.1623, "step": 11096 }, { "epoch": 0.3237353404516016, "grad_norm": 1.0320280774661348, "learning_rate": 7.90315258701613e-06, "loss": 0.1539, "step": 11097 }, { "epoch": 0.3237645136822452, "grad_norm": 1.084036003611658, "learning_rate": 7.90276793367095e-06, "loss": 0.1355, "step": 11098 }, { "epoch": 0.32379368691288873, "grad_norm": 0.8556990140036362, "learning_rate": 7.902383254410551e-06, "loss": 0.164, "step": 11099 }, { "epoch": 0.3238228601435323, "grad_norm": 0.8520628457765326, "learning_rate": 7.901998549238368e-06, "loss": 0.1372, "step": 11100 }, { "epoch": 0.32385203337417584, "grad_norm": 1.0005421369998007, "learning_rate": 7.901613818157834e-06, "loss": 0.1467, "step": 11101 }, { "epoch": 0.3238812066048194, "grad_norm": 1.1171458073610518, "learning_rate": 7.901229061172385e-06, "loss": 0.1362, "step": 11102 }, { "epoch": 0.323910379835463, "grad_norm": 0.8278324943556908, "learning_rate": 7.900844278285456e-06, "loss": 0.1517, "step": 11103 }, { "epoch": 0.32393955306610656, "grad_norm": 0.8315488441333372, "learning_rate": 7.900459469500479e-06, "loss": 0.1347, "step": 11104 }, { "epoch": 0.3239687262967501, "grad_norm": 1.2264931770295593, "learning_rate": 7.900074634820895e-06, "loss": 0.159, "step": 11105 }, { "epoch": 0.32399789952739366, "grad_norm": 0.8233679230374752, "learning_rate": 7.899689774250135e-06, "loss": 0.1328, "step": 11106 }, { "epoch": 0.3240270727580372, "grad_norm": 0.8688961977315816, "learning_rate": 7.899304887791639e-06, "loss": 0.1586, "step": 11107 }, { "epoch": 0.32405624598868077, "grad_norm": 0.9345347455118921, "learning_rate": 7.89891997544884e-06, "loss": 0.1552, "step": 11108 }, { "epoch": 0.3240854192193243, "grad_norm": 0.9791120219413809, "learning_rate": 7.898535037225175e-06, "loss": 0.1317, "step": 11109 }, { "epoch": 0.32411459244996793, "grad_norm": 0.9586664401398748, "learning_rate": 7.898150073124082e-06, "loss": 0.1489, "step": 11110 }, { "epoch": 0.3241437656806115, "grad_norm": 0.9522432142225266, "learning_rate": 7.897765083148996e-06, "loss": 0.1444, "step": 11111 }, { "epoch": 0.32417293891125504, "grad_norm": 0.8064721428674649, "learning_rate": 7.897380067303358e-06, "loss": 0.1707, "step": 11112 }, { "epoch": 0.3242021121418986, "grad_norm": 1.0066213801822907, "learning_rate": 7.896995025590599e-06, "loss": 0.1688, "step": 11113 }, { "epoch": 0.32423128537254214, "grad_norm": 0.998585894294648, "learning_rate": 7.896609958014161e-06, "loss": 0.1603, "step": 11114 }, { "epoch": 0.3242604586031857, "grad_norm": 0.7110054043003348, "learning_rate": 7.896224864577481e-06, "loss": 0.1472, "step": 11115 }, { "epoch": 0.32428963183382925, "grad_norm": 1.0472696116758133, "learning_rate": 7.895839745283995e-06, "loss": 0.159, "step": 11116 }, { "epoch": 0.32431880506447286, "grad_norm": 1.0111319698013854, "learning_rate": 7.895454600137146e-06, "loss": 0.1261, "step": 11117 }, { "epoch": 0.3243479782951164, "grad_norm": 0.9057512398346269, "learning_rate": 7.895069429140368e-06, "loss": 0.1294, "step": 11118 }, { "epoch": 0.32437715152575997, "grad_norm": 0.7968962884674599, "learning_rate": 7.894684232297102e-06, "loss": 0.1548, "step": 11119 }, { "epoch": 0.3244063247564035, "grad_norm": 0.9322557724599413, "learning_rate": 7.894299009610785e-06, "loss": 0.146, "step": 11120 }, { "epoch": 0.3244354979870471, "grad_norm": 0.8162066814814293, "learning_rate": 7.89391376108486e-06, "loss": 0.1448, "step": 11121 }, { "epoch": 0.3244646712176906, "grad_norm": 0.880392781658322, "learning_rate": 7.89352848672276e-06, "loss": 0.1897, "step": 11122 }, { "epoch": 0.32449384444833423, "grad_norm": 0.9233284810847567, "learning_rate": 7.893143186527932e-06, "loss": 0.1256, "step": 11123 }, { "epoch": 0.3245230176789778, "grad_norm": 0.7914260869194623, "learning_rate": 7.892757860503811e-06, "loss": 0.1541, "step": 11124 }, { "epoch": 0.32455219090962134, "grad_norm": 1.0147958123374983, "learning_rate": 7.892372508653836e-06, "loss": 0.1603, "step": 11125 }, { "epoch": 0.3245813641402649, "grad_norm": 0.778283953114621, "learning_rate": 7.891987130981453e-06, "loss": 0.1393, "step": 11126 }, { "epoch": 0.32461053737090845, "grad_norm": 0.9710467584953915, "learning_rate": 7.891601727490097e-06, "loss": 0.1487, "step": 11127 }, { "epoch": 0.324639710601552, "grad_norm": 0.7689397872836617, "learning_rate": 7.891216298183211e-06, "loss": 0.1341, "step": 11128 }, { "epoch": 0.32466888383219555, "grad_norm": 0.8023403353438116, "learning_rate": 7.890830843064238e-06, "loss": 0.1435, "step": 11129 }, { "epoch": 0.32469805706283916, "grad_norm": 0.8413398113707, "learning_rate": 7.890445362136617e-06, "loss": 0.1591, "step": 11130 }, { "epoch": 0.3247272302934827, "grad_norm": 0.6539052901783666, "learning_rate": 7.890059855403788e-06, "loss": 0.1716, "step": 11131 }, { "epoch": 0.32475640352412627, "grad_norm": 0.963041856513202, "learning_rate": 7.889674322869197e-06, "loss": 0.14, "step": 11132 }, { "epoch": 0.3247855767547698, "grad_norm": 0.7747670818321699, "learning_rate": 7.889288764536283e-06, "loss": 0.1558, "step": 11133 }, { "epoch": 0.3248147499854134, "grad_norm": 0.7081080602780688, "learning_rate": 7.888903180408487e-06, "loss": 0.1487, "step": 11134 }, { "epoch": 0.32484392321605693, "grad_norm": 0.7767953386113995, "learning_rate": 7.888517570489254e-06, "loss": 0.1484, "step": 11135 }, { "epoch": 0.3248730964467005, "grad_norm": 0.9086030933297429, "learning_rate": 7.888131934782025e-06, "loss": 0.16, "step": 11136 }, { "epoch": 0.3249022696773441, "grad_norm": 0.8442980583402525, "learning_rate": 7.887746273290244e-06, "loss": 0.1743, "step": 11137 }, { "epoch": 0.32493144290798764, "grad_norm": 0.781528325486156, "learning_rate": 7.887360586017355e-06, "loss": 0.1633, "step": 11138 }, { "epoch": 0.3249606161386312, "grad_norm": 0.8827801295303677, "learning_rate": 7.886974872966797e-06, "loss": 0.1407, "step": 11139 }, { "epoch": 0.32498978936927475, "grad_norm": 0.861913956074121, "learning_rate": 7.88658913414202e-06, "loss": 0.152, "step": 11140 }, { "epoch": 0.3250189625999183, "grad_norm": 0.7879894827965718, "learning_rate": 7.88620336954646e-06, "loss": 0.1577, "step": 11141 }, { "epoch": 0.32504813583056186, "grad_norm": 0.9862164150006236, "learning_rate": 7.885817579183568e-06, "loss": 0.1499, "step": 11142 }, { "epoch": 0.3250773090612054, "grad_norm": 0.7817851699096838, "learning_rate": 7.885431763056785e-06, "loss": 0.1492, "step": 11143 }, { "epoch": 0.325106482291849, "grad_norm": 0.7994504919131414, "learning_rate": 7.885045921169558e-06, "loss": 0.1532, "step": 11144 }, { "epoch": 0.3251356555224926, "grad_norm": 0.8545913647459151, "learning_rate": 7.884660053525328e-06, "loss": 0.1661, "step": 11145 }, { "epoch": 0.3251648287531361, "grad_norm": 0.7178098624736637, "learning_rate": 7.88427416012754e-06, "loss": 0.1312, "step": 11146 }, { "epoch": 0.3251940019837797, "grad_norm": 0.9187691784701346, "learning_rate": 7.883888240979645e-06, "loss": 0.1677, "step": 11147 }, { "epoch": 0.32522317521442323, "grad_norm": 0.8330749505255329, "learning_rate": 7.883502296085082e-06, "loss": 0.1505, "step": 11148 }, { "epoch": 0.3252523484450668, "grad_norm": 0.9117442196393197, "learning_rate": 7.883116325447297e-06, "loss": 0.1354, "step": 11149 }, { "epoch": 0.3252815216757104, "grad_norm": 0.8664809256906031, "learning_rate": 7.88273032906974e-06, "loss": 0.1235, "step": 11150 }, { "epoch": 0.32531069490635395, "grad_norm": 0.8885645752805146, "learning_rate": 7.882344306955854e-06, "loss": 0.1585, "step": 11151 }, { "epoch": 0.3253398681369975, "grad_norm": 1.043108426199016, "learning_rate": 7.881958259109086e-06, "loss": 0.1667, "step": 11152 }, { "epoch": 0.32536904136764105, "grad_norm": 0.797157519242401, "learning_rate": 7.881572185532883e-06, "loss": 0.1454, "step": 11153 }, { "epoch": 0.3253982145982846, "grad_norm": 0.7372472441294787, "learning_rate": 7.881186086230692e-06, "loss": 0.1498, "step": 11154 }, { "epoch": 0.32542738782892816, "grad_norm": 0.8927634683470496, "learning_rate": 7.880799961205958e-06, "loss": 0.1338, "step": 11155 }, { "epoch": 0.3254565610595717, "grad_norm": 0.9679172359122158, "learning_rate": 7.880413810462131e-06, "loss": 0.1573, "step": 11156 }, { "epoch": 0.3254857342902153, "grad_norm": 0.8530429571627943, "learning_rate": 7.880027634002656e-06, "loss": 0.1401, "step": 11157 }, { "epoch": 0.3255149075208589, "grad_norm": 1.0792902672644435, "learning_rate": 7.879641431830982e-06, "loss": 0.1489, "step": 11158 }, { "epoch": 0.32554408075150243, "grad_norm": 0.8090864583869836, "learning_rate": 7.879255203950558e-06, "loss": 0.151, "step": 11159 }, { "epoch": 0.325573253982146, "grad_norm": 0.8242460238232276, "learning_rate": 7.87886895036483e-06, "loss": 0.1628, "step": 11160 }, { "epoch": 0.32560242721278954, "grad_norm": 0.955244334399679, "learning_rate": 7.878482671077245e-06, "loss": 0.1436, "step": 11161 }, { "epoch": 0.3256316004434331, "grad_norm": 0.8717935363950352, "learning_rate": 7.878096366091257e-06, "loss": 0.1548, "step": 11162 }, { "epoch": 0.32566077367407664, "grad_norm": 0.7861828283605923, "learning_rate": 7.87771003541031e-06, "loss": 0.1532, "step": 11163 }, { "epoch": 0.32568994690472025, "grad_norm": 1.0991238720235803, "learning_rate": 7.877323679037856e-06, "loss": 0.1376, "step": 11164 }, { "epoch": 0.3257191201353638, "grad_norm": 0.8782449589718162, "learning_rate": 7.876937296977343e-06, "loss": 0.1605, "step": 11165 }, { "epoch": 0.32574829336600736, "grad_norm": 0.9904516402225627, "learning_rate": 7.87655088923222e-06, "loss": 0.1545, "step": 11166 }, { "epoch": 0.3257774665966509, "grad_norm": 0.926925223891559, "learning_rate": 7.876164455805936e-06, "loss": 0.1526, "step": 11167 }, { "epoch": 0.32580663982729446, "grad_norm": 1.0697371220281324, "learning_rate": 7.875777996701945e-06, "loss": 0.1456, "step": 11168 }, { "epoch": 0.325835813057938, "grad_norm": 0.8192916929295959, "learning_rate": 7.875391511923694e-06, "loss": 0.1539, "step": 11169 }, { "epoch": 0.32586498628858157, "grad_norm": 0.9150330513863495, "learning_rate": 7.875005001474634e-06, "loss": 0.1766, "step": 11170 }, { "epoch": 0.3258941595192252, "grad_norm": 0.9828826048360307, "learning_rate": 7.874618465358214e-06, "loss": 0.1422, "step": 11171 }, { "epoch": 0.32592333274986873, "grad_norm": 0.6383145733164322, "learning_rate": 7.874231903577888e-06, "loss": 0.1409, "step": 11172 }, { "epoch": 0.3259525059805123, "grad_norm": 0.7695161636554368, "learning_rate": 7.873845316137105e-06, "loss": 0.1356, "step": 11173 }, { "epoch": 0.32598167921115584, "grad_norm": 1.0856298287608799, "learning_rate": 7.873458703039318e-06, "loss": 0.1471, "step": 11174 }, { "epoch": 0.3260108524417994, "grad_norm": 0.9055813711090364, "learning_rate": 7.873072064287977e-06, "loss": 0.1394, "step": 11175 }, { "epoch": 0.32604002567244295, "grad_norm": 0.7847444403890237, "learning_rate": 7.872685399886534e-06, "loss": 0.1183, "step": 11176 }, { "epoch": 0.32606919890308655, "grad_norm": 0.906607351781194, "learning_rate": 7.872298709838442e-06, "loss": 0.1475, "step": 11177 }, { "epoch": 0.3260983721337301, "grad_norm": 0.7337407212504137, "learning_rate": 7.871911994147153e-06, "loss": 0.1605, "step": 11178 }, { "epoch": 0.32612754536437366, "grad_norm": 0.9478752233020101, "learning_rate": 7.871525252816118e-06, "loss": 0.1473, "step": 11179 }, { "epoch": 0.3261567185950172, "grad_norm": 1.0254850132373712, "learning_rate": 7.871138485848792e-06, "loss": 0.1634, "step": 11180 }, { "epoch": 0.32618589182566077, "grad_norm": 0.8695701462858811, "learning_rate": 7.870751693248629e-06, "loss": 0.1402, "step": 11181 }, { "epoch": 0.3262150650563043, "grad_norm": 0.8789685242400097, "learning_rate": 7.870364875019077e-06, "loss": 0.1551, "step": 11182 }, { "epoch": 0.3262442382869479, "grad_norm": 1.056578533196887, "learning_rate": 7.869978031163595e-06, "loss": 0.1532, "step": 11183 }, { "epoch": 0.3262734115175915, "grad_norm": 0.8102537330696241, "learning_rate": 7.869591161685632e-06, "loss": 0.1528, "step": 11184 }, { "epoch": 0.32630258474823504, "grad_norm": 0.676497318379484, "learning_rate": 7.869204266588646e-06, "loss": 0.137, "step": 11185 }, { "epoch": 0.3263317579788786, "grad_norm": 0.7706067624671409, "learning_rate": 7.868817345876087e-06, "loss": 0.1326, "step": 11186 }, { "epoch": 0.32636093120952214, "grad_norm": 0.9601753302633754, "learning_rate": 7.868430399551414e-06, "loss": 0.1532, "step": 11187 }, { "epoch": 0.3263901044401657, "grad_norm": 0.8017299868137221, "learning_rate": 7.868043427618079e-06, "loss": 0.1731, "step": 11188 }, { "epoch": 0.32641927767080925, "grad_norm": 0.9979207894961692, "learning_rate": 7.867656430079536e-06, "loss": 0.1564, "step": 11189 }, { "epoch": 0.3264484509014528, "grad_norm": 0.8489733284107609, "learning_rate": 7.867269406939241e-06, "loss": 0.1531, "step": 11190 }, { "epoch": 0.3264776241320964, "grad_norm": 1.2871244862954452, "learning_rate": 7.86688235820065e-06, "loss": 0.1477, "step": 11191 }, { "epoch": 0.32650679736273996, "grad_norm": 0.8434115513925409, "learning_rate": 7.866495283867217e-06, "loss": 0.1592, "step": 11192 }, { "epoch": 0.3265359705933835, "grad_norm": 0.7868800112584352, "learning_rate": 7.866108183942398e-06, "loss": 0.1339, "step": 11193 }, { "epoch": 0.32656514382402707, "grad_norm": 0.7322802973463202, "learning_rate": 7.86572105842965e-06, "loss": 0.145, "step": 11194 }, { "epoch": 0.3265943170546706, "grad_norm": 0.7045795705883792, "learning_rate": 7.865333907332428e-06, "loss": 0.1633, "step": 11195 }, { "epoch": 0.3266234902853142, "grad_norm": 0.8515529749835823, "learning_rate": 7.864946730654189e-06, "loss": 0.1372, "step": 11196 }, { "epoch": 0.32665266351595773, "grad_norm": 0.8143587191431548, "learning_rate": 7.864559528398389e-06, "loss": 0.1445, "step": 11197 }, { "epoch": 0.32668183674660134, "grad_norm": 0.7803727200147857, "learning_rate": 7.864172300568486e-06, "loss": 0.1749, "step": 11198 }, { "epoch": 0.3267110099772449, "grad_norm": 0.8890237745265267, "learning_rate": 7.863785047167937e-06, "loss": 0.1546, "step": 11199 }, { "epoch": 0.32674018320788845, "grad_norm": 0.8192458116294992, "learning_rate": 7.863397768200199e-06, "loss": 0.1536, "step": 11200 }, { "epoch": 0.326769356438532, "grad_norm": 0.8109583909799909, "learning_rate": 7.863010463668727e-06, "loss": 0.166, "step": 11201 }, { "epoch": 0.32679852966917555, "grad_norm": 0.7512780401103101, "learning_rate": 7.862623133576983e-06, "loss": 0.1344, "step": 11202 }, { "epoch": 0.3268277028998191, "grad_norm": 0.8751608725724604, "learning_rate": 7.862235777928421e-06, "loss": 0.1426, "step": 11203 }, { "epoch": 0.3268568761304627, "grad_norm": 0.7561860223338784, "learning_rate": 7.861848396726503e-06, "loss": 0.1424, "step": 11204 }, { "epoch": 0.32688604936110627, "grad_norm": 1.0115884453789517, "learning_rate": 7.861460989974687e-06, "loss": 0.1652, "step": 11205 }, { "epoch": 0.3269152225917498, "grad_norm": 0.7903954517382313, "learning_rate": 7.86107355767643e-06, "loss": 0.1373, "step": 11206 }, { "epoch": 0.3269443958223934, "grad_norm": 0.7381217575098457, "learning_rate": 7.860686099835189e-06, "loss": 0.1402, "step": 11207 }, { "epoch": 0.3269735690530369, "grad_norm": 0.8546309919055587, "learning_rate": 7.860298616454427e-06, "loss": 0.1529, "step": 11208 }, { "epoch": 0.3270027422836805, "grad_norm": 0.7078441242209368, "learning_rate": 7.8599111075376e-06, "loss": 0.154, "step": 11209 }, { "epoch": 0.32703191551432403, "grad_norm": 1.0809135431051031, "learning_rate": 7.85952357308817e-06, "loss": 0.1312, "step": 11210 }, { "epoch": 0.32706108874496764, "grad_norm": 0.9314678237368279, "learning_rate": 7.8591360131096e-06, "loss": 0.1228, "step": 11211 }, { "epoch": 0.3270902619756112, "grad_norm": 0.9749281045999717, "learning_rate": 7.85874842760534e-06, "loss": 0.1342, "step": 11212 }, { "epoch": 0.32711943520625475, "grad_norm": 0.8503717148202816, "learning_rate": 7.85836081657886e-06, "loss": 0.14, "step": 11213 }, { "epoch": 0.3271486084368983, "grad_norm": 0.7811678816801917, "learning_rate": 7.857973180033615e-06, "loss": 0.1725, "step": 11214 }, { "epoch": 0.32717778166754186, "grad_norm": 0.8289429545831913, "learning_rate": 7.85758551797307e-06, "loss": 0.1597, "step": 11215 }, { "epoch": 0.3272069548981854, "grad_norm": 0.8870308805479863, "learning_rate": 7.857197830400683e-06, "loss": 0.1655, "step": 11216 }, { "epoch": 0.32723612812882896, "grad_norm": 0.6846381544502131, "learning_rate": 7.856810117319916e-06, "loss": 0.1552, "step": 11217 }, { "epoch": 0.32726530135947257, "grad_norm": 0.778537156230549, "learning_rate": 7.85642237873423e-06, "loss": 0.1317, "step": 11218 }, { "epoch": 0.3272944745901161, "grad_norm": 0.741098038821696, "learning_rate": 7.856034614647087e-06, "loss": 0.1648, "step": 11219 }, { "epoch": 0.3273236478207597, "grad_norm": 0.7653311032549058, "learning_rate": 7.855646825061948e-06, "loss": 0.1472, "step": 11220 }, { "epoch": 0.32735282105140323, "grad_norm": 0.5955863209548142, "learning_rate": 7.855259009982275e-06, "loss": 0.1336, "step": 11221 }, { "epoch": 0.3273819942820468, "grad_norm": 0.7148012467713774, "learning_rate": 7.854871169411533e-06, "loss": 0.1552, "step": 11222 }, { "epoch": 0.32741116751269034, "grad_norm": 0.8127493975852811, "learning_rate": 7.854483303353182e-06, "loss": 0.1856, "step": 11223 }, { "epoch": 0.3274403407433339, "grad_norm": 0.6241468024838582, "learning_rate": 7.854095411810688e-06, "loss": 0.1233, "step": 11224 }, { "epoch": 0.3274695139739775, "grad_norm": 0.8904881057328751, "learning_rate": 7.853707494787508e-06, "loss": 0.1457, "step": 11225 }, { "epoch": 0.32749868720462105, "grad_norm": 0.9114078043253493, "learning_rate": 7.85331955228711e-06, "loss": 0.1591, "step": 11226 }, { "epoch": 0.3275278604352646, "grad_norm": 0.8735414792196101, "learning_rate": 7.852931584312955e-06, "loss": 0.1543, "step": 11227 }, { "epoch": 0.32755703366590816, "grad_norm": 1.187150524167911, "learning_rate": 7.85254359086851e-06, "loss": 0.1556, "step": 11228 }, { "epoch": 0.3275862068965517, "grad_norm": 0.896711636972266, "learning_rate": 7.852155571957237e-06, "loss": 0.1389, "step": 11229 }, { "epoch": 0.32761538012719527, "grad_norm": 1.2077028882700518, "learning_rate": 7.851767527582597e-06, "loss": 0.1527, "step": 11230 }, { "epoch": 0.3276445533578389, "grad_norm": 0.7263188814374241, "learning_rate": 7.851379457748058e-06, "loss": 0.1503, "step": 11231 }, { "epoch": 0.32767372658848243, "grad_norm": 0.7322433282618662, "learning_rate": 7.850991362457086e-06, "loss": 0.1095, "step": 11232 }, { "epoch": 0.327702899819126, "grad_norm": 0.8758994807569604, "learning_rate": 7.850603241713143e-06, "loss": 0.1504, "step": 11233 }, { "epoch": 0.32773207304976953, "grad_norm": 0.8231619583819102, "learning_rate": 7.850215095519693e-06, "loss": 0.1395, "step": 11234 }, { "epoch": 0.3277612462804131, "grad_norm": 0.7710888808662312, "learning_rate": 7.849826923880205e-06, "loss": 0.1651, "step": 11235 }, { "epoch": 0.32779041951105664, "grad_norm": 0.8255779225335644, "learning_rate": 7.849438726798142e-06, "loss": 0.1653, "step": 11236 }, { "epoch": 0.3278195927417002, "grad_norm": 1.2851151130354712, "learning_rate": 7.84905050427697e-06, "loss": 0.1373, "step": 11237 }, { "epoch": 0.3278487659723438, "grad_norm": 0.9665058607869698, "learning_rate": 7.848662256320155e-06, "loss": 0.1725, "step": 11238 }, { "epoch": 0.32787793920298736, "grad_norm": 0.7286947726251944, "learning_rate": 7.848273982931164e-06, "loss": 0.1347, "step": 11239 }, { "epoch": 0.3279071124336309, "grad_norm": 0.9619457127205155, "learning_rate": 7.847885684113463e-06, "loss": 0.1782, "step": 11240 }, { "epoch": 0.32793628566427446, "grad_norm": 1.3098646305027817, "learning_rate": 7.847497359870517e-06, "loss": 0.1584, "step": 11241 }, { "epoch": 0.327965458894918, "grad_norm": 0.7374484823036807, "learning_rate": 7.847109010205796e-06, "loss": 0.1514, "step": 11242 }, { "epoch": 0.32799463212556157, "grad_norm": 0.847237314622174, "learning_rate": 7.846720635122765e-06, "loss": 0.1322, "step": 11243 }, { "epoch": 0.3280238053562051, "grad_norm": 0.9292241527720175, "learning_rate": 7.84633223462489e-06, "loss": 0.16, "step": 11244 }, { "epoch": 0.32805297858684873, "grad_norm": 0.8562620323172302, "learning_rate": 7.845943808715643e-06, "loss": 0.1466, "step": 11245 }, { "epoch": 0.3280821518174923, "grad_norm": 0.7034178594607732, "learning_rate": 7.845555357398488e-06, "loss": 0.1357, "step": 11246 }, { "epoch": 0.32811132504813584, "grad_norm": 0.8738043470643679, "learning_rate": 7.845166880676894e-06, "loss": 0.1457, "step": 11247 }, { "epoch": 0.3281404982787794, "grad_norm": 0.7889693949055656, "learning_rate": 7.844778378554328e-06, "loss": 0.1391, "step": 11248 }, { "epoch": 0.32816967150942294, "grad_norm": 0.9570973099032345, "learning_rate": 7.844389851034262e-06, "loss": 0.1621, "step": 11249 }, { "epoch": 0.3281988447400665, "grad_norm": 1.0272368292421759, "learning_rate": 7.84400129812016e-06, "loss": 0.1403, "step": 11250 }, { "epoch": 0.32822801797071005, "grad_norm": 0.7281150320970144, "learning_rate": 7.843612719815495e-06, "loss": 0.1505, "step": 11251 }, { "epoch": 0.32825719120135366, "grad_norm": 0.9004117672538315, "learning_rate": 7.843224116123735e-06, "loss": 0.1709, "step": 11252 }, { "epoch": 0.3282863644319972, "grad_norm": 0.9160051166247537, "learning_rate": 7.842835487048347e-06, "loss": 0.1388, "step": 11253 }, { "epoch": 0.32831553766264077, "grad_norm": 0.6360534837243493, "learning_rate": 7.842446832592805e-06, "loss": 0.1462, "step": 11254 }, { "epoch": 0.3283447108932843, "grad_norm": 0.8936577309356724, "learning_rate": 7.842058152760573e-06, "loss": 0.1816, "step": 11255 }, { "epoch": 0.3283738841239279, "grad_norm": 0.8079007857039887, "learning_rate": 7.841669447555126e-06, "loss": 0.1429, "step": 11256 }, { "epoch": 0.3284030573545714, "grad_norm": 0.8079294421546867, "learning_rate": 7.841280716979933e-06, "loss": 0.1311, "step": 11257 }, { "epoch": 0.328432230585215, "grad_norm": 0.6798051594740071, "learning_rate": 7.840891961038464e-06, "loss": 0.1484, "step": 11258 }, { "epoch": 0.3284614038158586, "grad_norm": 0.8981734630181988, "learning_rate": 7.840503179734188e-06, "loss": 0.1597, "step": 11259 }, { "epoch": 0.32849057704650214, "grad_norm": 0.79375380572684, "learning_rate": 7.840114373070579e-06, "loss": 0.1344, "step": 11260 }, { "epoch": 0.3285197502771457, "grad_norm": 0.8088605455674179, "learning_rate": 7.839725541051106e-06, "loss": 0.1217, "step": 11261 }, { "epoch": 0.32854892350778925, "grad_norm": 0.7509650738130004, "learning_rate": 7.839336683679241e-06, "loss": 0.1379, "step": 11262 }, { "epoch": 0.3285780967384328, "grad_norm": 0.9784735787095662, "learning_rate": 7.838947800958459e-06, "loss": 0.1506, "step": 11263 }, { "epoch": 0.32860726996907635, "grad_norm": 0.9860942217770315, "learning_rate": 7.838558892892226e-06, "loss": 0.1491, "step": 11264 }, { "epoch": 0.32863644319971996, "grad_norm": 0.8076819378761262, "learning_rate": 7.838169959484017e-06, "loss": 0.1514, "step": 11265 }, { "epoch": 0.3286656164303635, "grad_norm": 0.9269881626634534, "learning_rate": 7.837781000737306e-06, "loss": 0.1586, "step": 11266 }, { "epoch": 0.32869478966100707, "grad_norm": 0.7327365327819917, "learning_rate": 7.837392016655562e-06, "loss": 0.1646, "step": 11267 }, { "epoch": 0.3287239628916506, "grad_norm": 0.831318890265991, "learning_rate": 7.837003007242258e-06, "loss": 0.1245, "step": 11268 }, { "epoch": 0.3287531361222942, "grad_norm": 0.8241302883260383, "learning_rate": 7.83661397250087e-06, "loss": 0.1414, "step": 11269 }, { "epoch": 0.32878230935293773, "grad_norm": 0.9117858533668711, "learning_rate": 7.83622491243487e-06, "loss": 0.1463, "step": 11270 }, { "epoch": 0.3288114825835813, "grad_norm": 0.9879726116733071, "learning_rate": 7.835835827047731e-06, "loss": 0.1549, "step": 11271 }, { "epoch": 0.3288406558142249, "grad_norm": 0.8261101970410129, "learning_rate": 7.835446716342926e-06, "loss": 0.1554, "step": 11272 }, { "epoch": 0.32886982904486844, "grad_norm": 0.6414598425966459, "learning_rate": 7.83505758032393e-06, "loss": 0.1415, "step": 11273 }, { "epoch": 0.328899002275512, "grad_norm": 0.7659185417449016, "learning_rate": 7.834668418994216e-06, "loss": 0.1532, "step": 11274 }, { "epoch": 0.32892817550615555, "grad_norm": 1.0189263475884969, "learning_rate": 7.834279232357261e-06, "loss": 0.1509, "step": 11275 }, { "epoch": 0.3289573487367991, "grad_norm": 0.6077407312814666, "learning_rate": 7.833890020416537e-06, "loss": 0.1503, "step": 11276 }, { "epoch": 0.32898652196744266, "grad_norm": 0.8050501152685532, "learning_rate": 7.833500783175518e-06, "loss": 0.1685, "step": 11277 }, { "epoch": 0.3290156951980862, "grad_norm": 0.7442021567327869, "learning_rate": 7.833111520637681e-06, "loss": 0.135, "step": 11278 }, { "epoch": 0.3290448684287298, "grad_norm": 0.7083448176879351, "learning_rate": 7.832722232806503e-06, "loss": 0.1499, "step": 11279 }, { "epoch": 0.3290740416593734, "grad_norm": 0.715504077425142, "learning_rate": 7.832332919685452e-06, "loss": 0.1408, "step": 11280 }, { "epoch": 0.3291032148900169, "grad_norm": 0.9422272669677965, "learning_rate": 7.831943581278011e-06, "loss": 0.142, "step": 11281 }, { "epoch": 0.3291323881206605, "grad_norm": 0.704431642332087, "learning_rate": 7.831554217587655e-06, "loss": 0.1437, "step": 11282 }, { "epoch": 0.32916156135130403, "grad_norm": 0.6947801321932113, "learning_rate": 7.831164828617858e-06, "loss": 0.1325, "step": 11283 }, { "epoch": 0.3291907345819476, "grad_norm": 0.7415241606652632, "learning_rate": 7.830775414372099e-06, "loss": 0.1795, "step": 11284 }, { "epoch": 0.32921990781259114, "grad_norm": 0.8856096077059094, "learning_rate": 7.830385974853852e-06, "loss": 0.1756, "step": 11285 }, { "epoch": 0.32924908104323475, "grad_norm": 1.024118628830476, "learning_rate": 7.829996510066594e-06, "loss": 0.1392, "step": 11286 }, { "epoch": 0.3292782542738783, "grad_norm": 0.9095232382551075, "learning_rate": 7.829607020013802e-06, "loss": 0.1477, "step": 11287 }, { "epoch": 0.32930742750452185, "grad_norm": 0.9476127453849916, "learning_rate": 7.829217504698957e-06, "loss": 0.1559, "step": 11288 }, { "epoch": 0.3293366007351654, "grad_norm": 0.8061722285929731, "learning_rate": 7.82882796412553e-06, "loss": 0.1477, "step": 11289 }, { "epoch": 0.32936577396580896, "grad_norm": 0.6870770038574902, "learning_rate": 7.828438398297005e-06, "loss": 0.1429, "step": 11290 }, { "epoch": 0.3293949471964525, "grad_norm": 0.9215153764572268, "learning_rate": 7.828048807216854e-06, "loss": 0.1488, "step": 11291 }, { "epoch": 0.3294241204270961, "grad_norm": 0.8951236392736993, "learning_rate": 7.827659190888562e-06, "loss": 0.1558, "step": 11292 }, { "epoch": 0.3294532936577397, "grad_norm": 0.9029770747599283, "learning_rate": 7.827269549315602e-06, "loss": 0.1737, "step": 11293 }, { "epoch": 0.32948246688838323, "grad_norm": 0.926310766870844, "learning_rate": 7.826879882501455e-06, "loss": 0.134, "step": 11294 }, { "epoch": 0.3295116401190268, "grad_norm": 0.8409863761331066, "learning_rate": 7.826490190449596e-06, "loss": 0.1479, "step": 11295 }, { "epoch": 0.32954081334967034, "grad_norm": 0.8437637679566358, "learning_rate": 7.826100473163512e-06, "loss": 0.1388, "step": 11296 }, { "epoch": 0.3295699865803139, "grad_norm": 0.891714974199195, "learning_rate": 7.825710730646676e-06, "loss": 0.156, "step": 11297 }, { "epoch": 0.32959915981095744, "grad_norm": 0.897468335129447, "learning_rate": 7.825320962902568e-06, "loss": 0.1492, "step": 11298 }, { "epoch": 0.32962833304160105, "grad_norm": 0.6932552940129606, "learning_rate": 7.82493116993467e-06, "loss": 0.126, "step": 11299 }, { "epoch": 0.3296575062722446, "grad_norm": 0.8449570914458303, "learning_rate": 7.82454135174646e-06, "loss": 0.144, "step": 11300 }, { "epoch": 0.32968667950288816, "grad_norm": 0.9087588850523538, "learning_rate": 7.82415150834142e-06, "loss": 0.1432, "step": 11301 }, { "epoch": 0.3297158527335317, "grad_norm": 0.7557663548751011, "learning_rate": 7.823761639723029e-06, "loss": 0.1441, "step": 11302 }, { "epoch": 0.32974502596417526, "grad_norm": 0.887443115216238, "learning_rate": 7.823371745894768e-06, "loss": 0.143, "step": 11303 }, { "epoch": 0.3297741991948188, "grad_norm": 0.7834573258451816, "learning_rate": 7.822981826860118e-06, "loss": 0.1434, "step": 11304 }, { "epoch": 0.32980337242546237, "grad_norm": 0.8623294560083001, "learning_rate": 7.822591882622562e-06, "loss": 0.1396, "step": 11305 }, { "epoch": 0.329832545656106, "grad_norm": 0.9454631376020142, "learning_rate": 7.822201913185577e-06, "loss": 0.1551, "step": 11306 }, { "epoch": 0.32986171888674953, "grad_norm": 0.8654563307718793, "learning_rate": 7.821811918552647e-06, "loss": 0.1896, "step": 11307 }, { "epoch": 0.3298908921173931, "grad_norm": 0.953560744709995, "learning_rate": 7.821421898727255e-06, "loss": 0.1817, "step": 11308 }, { "epoch": 0.32992006534803664, "grad_norm": 0.8154254191760685, "learning_rate": 7.821031853712881e-06, "loss": 0.133, "step": 11309 }, { "epoch": 0.3299492385786802, "grad_norm": 0.9257272792921443, "learning_rate": 7.82064178351301e-06, "loss": 0.1518, "step": 11310 }, { "epoch": 0.32997841180932375, "grad_norm": 0.803301284297927, "learning_rate": 7.820251688131121e-06, "loss": 0.1299, "step": 11311 }, { "epoch": 0.3300075850399673, "grad_norm": 0.818206556440267, "learning_rate": 7.819861567570699e-06, "loss": 0.1424, "step": 11312 }, { "epoch": 0.3300367582706109, "grad_norm": 0.9667275436284948, "learning_rate": 7.819471421835224e-06, "loss": 0.1454, "step": 11313 }, { "epoch": 0.33006593150125446, "grad_norm": 0.9182018915794946, "learning_rate": 7.819081250928184e-06, "loss": 0.132, "step": 11314 }, { "epoch": 0.330095104731898, "grad_norm": 0.9555021564657732, "learning_rate": 7.818691054853056e-06, "loss": 0.1572, "step": 11315 }, { "epoch": 0.33012427796254157, "grad_norm": 0.7809252659139025, "learning_rate": 7.81830083361333e-06, "loss": 0.1366, "step": 11316 }, { "epoch": 0.3301534511931851, "grad_norm": 0.9081021460098465, "learning_rate": 7.817910587212486e-06, "loss": 0.1532, "step": 11317 }, { "epoch": 0.3301826244238287, "grad_norm": 0.8547658909787655, "learning_rate": 7.81752031565401e-06, "loss": 0.1382, "step": 11318 }, { "epoch": 0.3302117976544723, "grad_norm": 0.8022174461139985, "learning_rate": 7.817130018941383e-06, "loss": 0.1544, "step": 11319 }, { "epoch": 0.33024097088511584, "grad_norm": 0.97159226609784, "learning_rate": 7.816739697078094e-06, "loss": 0.127, "step": 11320 }, { "epoch": 0.3302701441157594, "grad_norm": 0.853486364731287, "learning_rate": 7.816349350067625e-06, "loss": 0.1342, "step": 11321 }, { "epoch": 0.33029931734640294, "grad_norm": 1.0613319647337767, "learning_rate": 7.81595897791346e-06, "loss": 0.1628, "step": 11322 }, { "epoch": 0.3303284905770465, "grad_norm": 0.9607553484038386, "learning_rate": 7.815568580619087e-06, "loss": 0.1689, "step": 11323 }, { "epoch": 0.33035766380769005, "grad_norm": 0.9772394208745946, "learning_rate": 7.815178158187991e-06, "loss": 0.1398, "step": 11324 }, { "epoch": 0.3303868370383336, "grad_norm": 1.2193309788058466, "learning_rate": 7.814787710623652e-06, "loss": 0.1434, "step": 11325 }, { "epoch": 0.3304160102689772, "grad_norm": 1.0430780770787516, "learning_rate": 7.814397237929564e-06, "loss": 0.1672, "step": 11326 }, { "epoch": 0.33044518349962076, "grad_norm": 0.9829767299141772, "learning_rate": 7.814006740109208e-06, "loss": 0.14, "step": 11327 }, { "epoch": 0.3304743567302643, "grad_norm": 1.1205465339136425, "learning_rate": 7.813616217166071e-06, "loss": 0.1486, "step": 11328 }, { "epoch": 0.33050352996090787, "grad_norm": 0.8423887911422739, "learning_rate": 7.813225669103641e-06, "loss": 0.1601, "step": 11329 }, { "epoch": 0.3305327031915514, "grad_norm": 0.7399347113271597, "learning_rate": 7.812835095925404e-06, "loss": 0.1799, "step": 11330 }, { "epoch": 0.330561876422195, "grad_norm": 1.0475515746343944, "learning_rate": 7.812444497634847e-06, "loss": 0.1327, "step": 11331 }, { "epoch": 0.33059104965283853, "grad_norm": 0.7769497032823693, "learning_rate": 7.812053874235455e-06, "loss": 0.1253, "step": 11332 }, { "epoch": 0.33062022288348214, "grad_norm": 0.8137021183200754, "learning_rate": 7.811663225730718e-06, "loss": 0.1522, "step": 11333 }, { "epoch": 0.3306493961141257, "grad_norm": 0.6840079985260334, "learning_rate": 7.811272552124125e-06, "loss": 0.1333, "step": 11334 }, { "epoch": 0.33067856934476925, "grad_norm": 0.8757888851495554, "learning_rate": 7.81088185341916e-06, "loss": 0.1583, "step": 11335 }, { "epoch": 0.3307077425754128, "grad_norm": 0.9502181614649482, "learning_rate": 7.810491129619314e-06, "loss": 0.152, "step": 11336 }, { "epoch": 0.33073691580605635, "grad_norm": 1.0521417441197338, "learning_rate": 7.810100380728072e-06, "loss": 0.1318, "step": 11337 }, { "epoch": 0.3307660890366999, "grad_norm": 0.7521275234108433, "learning_rate": 7.809709606748926e-06, "loss": 0.159, "step": 11338 }, { "epoch": 0.33079526226734346, "grad_norm": 0.9068589984650683, "learning_rate": 7.809318807685364e-06, "loss": 0.1591, "step": 11339 }, { "epoch": 0.33082443549798707, "grad_norm": 0.9063226841119603, "learning_rate": 7.808927983540873e-06, "loss": 0.141, "step": 11340 }, { "epoch": 0.3308536087286306, "grad_norm": 1.023090238082688, "learning_rate": 7.808537134318944e-06, "loss": 0.1406, "step": 11341 }, { "epoch": 0.3308827819592742, "grad_norm": 0.9189415234848043, "learning_rate": 7.808146260023067e-06, "loss": 0.1472, "step": 11342 }, { "epoch": 0.33091195518991773, "grad_norm": 0.9280011152793862, "learning_rate": 7.807755360656727e-06, "loss": 0.1763, "step": 11343 }, { "epoch": 0.3309411284205613, "grad_norm": 1.0233840700040582, "learning_rate": 7.807364436223422e-06, "loss": 0.1477, "step": 11344 }, { "epoch": 0.33097030165120483, "grad_norm": 1.161617052132787, "learning_rate": 7.806973486726634e-06, "loss": 0.1357, "step": 11345 }, { "epoch": 0.33099947488184844, "grad_norm": 1.083374635455969, "learning_rate": 7.806582512169859e-06, "loss": 0.1408, "step": 11346 }, { "epoch": 0.331028648112492, "grad_norm": 0.9179876388683236, "learning_rate": 7.806191512556584e-06, "loss": 0.1467, "step": 11347 }, { "epoch": 0.33105782134313555, "grad_norm": 0.9674249558014094, "learning_rate": 7.805800487890302e-06, "loss": 0.1539, "step": 11348 }, { "epoch": 0.3310869945737791, "grad_norm": 1.1368088176756828, "learning_rate": 7.805409438174502e-06, "loss": 0.1404, "step": 11349 }, { "epoch": 0.33111616780442266, "grad_norm": 0.8900375496043085, "learning_rate": 7.805018363412677e-06, "loss": 0.1312, "step": 11350 }, { "epoch": 0.3311453410350662, "grad_norm": 0.7199386376933307, "learning_rate": 7.804627263608317e-06, "loss": 0.155, "step": 11351 }, { "epoch": 0.33117451426570976, "grad_norm": 1.0488604656936606, "learning_rate": 7.804236138764916e-06, "loss": 0.1585, "step": 11352 }, { "epoch": 0.33120368749635337, "grad_norm": 0.9974434078440045, "learning_rate": 7.803844988885962e-06, "loss": 0.1404, "step": 11353 }, { "epoch": 0.3312328607269969, "grad_norm": 0.8802585061595912, "learning_rate": 7.803453813974951e-06, "loss": 0.1821, "step": 11354 }, { "epoch": 0.3312620339576405, "grad_norm": 0.9447214851208002, "learning_rate": 7.803062614035372e-06, "loss": 0.1803, "step": 11355 }, { "epoch": 0.33129120718828403, "grad_norm": 1.341498285531634, "learning_rate": 7.802671389070721e-06, "loss": 0.1629, "step": 11356 }, { "epoch": 0.3313203804189276, "grad_norm": 0.8542320307106278, "learning_rate": 7.802280139084489e-06, "loss": 0.1138, "step": 11357 }, { "epoch": 0.33134955364957114, "grad_norm": 0.8783499068564741, "learning_rate": 7.801888864080166e-06, "loss": 0.1682, "step": 11358 }, { "epoch": 0.3313787268802147, "grad_norm": 0.9882950313021468, "learning_rate": 7.80149756406125e-06, "loss": 0.1627, "step": 11359 }, { "epoch": 0.3314079001108583, "grad_norm": 0.7591211774130825, "learning_rate": 7.801106239031233e-06, "loss": 0.1412, "step": 11360 }, { "epoch": 0.33143707334150185, "grad_norm": 0.7556389361556377, "learning_rate": 7.800714888993607e-06, "loss": 0.1457, "step": 11361 }, { "epoch": 0.3314662465721454, "grad_norm": 0.9666078241016921, "learning_rate": 7.800323513951867e-06, "loss": 0.1707, "step": 11362 }, { "epoch": 0.33149541980278896, "grad_norm": 0.8416880894335402, "learning_rate": 7.799932113909508e-06, "loss": 0.1347, "step": 11363 }, { "epoch": 0.3315245930334325, "grad_norm": 0.7060471191911536, "learning_rate": 7.799540688870024e-06, "loss": 0.1154, "step": 11364 }, { "epoch": 0.33155376626407607, "grad_norm": 0.9235554422842465, "learning_rate": 7.799149238836908e-06, "loss": 0.1428, "step": 11365 }, { "epoch": 0.3315829394947196, "grad_norm": 0.794240013814919, "learning_rate": 7.798757763813656e-06, "loss": 0.1502, "step": 11366 }, { "epoch": 0.33161211272536323, "grad_norm": 0.7457371350575628, "learning_rate": 7.798366263803763e-06, "loss": 0.1691, "step": 11367 }, { "epoch": 0.3316412859560068, "grad_norm": 0.8171198498176778, "learning_rate": 7.797974738810723e-06, "loss": 0.1499, "step": 11368 }, { "epoch": 0.33167045918665033, "grad_norm": 0.7518992320645509, "learning_rate": 7.797583188838033e-06, "loss": 0.1799, "step": 11369 }, { "epoch": 0.3316996324172939, "grad_norm": 0.9249122309323343, "learning_rate": 7.79719161388919e-06, "loss": 0.1528, "step": 11370 }, { "epoch": 0.33172880564793744, "grad_norm": 0.908845387575393, "learning_rate": 7.796800013967685e-06, "loss": 0.1468, "step": 11371 }, { "epoch": 0.331757978878581, "grad_norm": 0.7230178464134541, "learning_rate": 7.79640838907702e-06, "loss": 0.164, "step": 11372 }, { "epoch": 0.33178715210922455, "grad_norm": 0.8837016122612601, "learning_rate": 7.796016739220686e-06, "loss": 0.1611, "step": 11373 }, { "epoch": 0.33181632533986816, "grad_norm": 1.1638407280343497, "learning_rate": 7.795625064402184e-06, "loss": 0.1473, "step": 11374 }, { "epoch": 0.3318454985705117, "grad_norm": 1.0006773533680604, "learning_rate": 7.795233364625008e-06, "loss": 0.1458, "step": 11375 }, { "epoch": 0.33187467180115526, "grad_norm": 0.87323873384313, "learning_rate": 7.794841639892655e-06, "loss": 0.1576, "step": 11376 }, { "epoch": 0.3319038450317988, "grad_norm": 0.7583736870201176, "learning_rate": 7.794449890208624e-06, "loss": 0.1535, "step": 11377 }, { "epoch": 0.33193301826244237, "grad_norm": 0.907097595137542, "learning_rate": 7.794058115576411e-06, "loss": 0.1434, "step": 11378 }, { "epoch": 0.3319621914930859, "grad_norm": 1.078460085956634, "learning_rate": 7.793666315999514e-06, "loss": 0.1501, "step": 11379 }, { "epoch": 0.33199136472372953, "grad_norm": 0.7874561281958534, "learning_rate": 7.793274491481431e-06, "loss": 0.1457, "step": 11380 }, { "epoch": 0.3320205379543731, "grad_norm": 0.9717033244524153, "learning_rate": 7.792882642025662e-06, "loss": 0.1608, "step": 11381 }, { "epoch": 0.33204971118501664, "grad_norm": 1.1234856164189455, "learning_rate": 7.7924907676357e-06, "loss": 0.1338, "step": 11382 }, { "epoch": 0.3320788844156602, "grad_norm": 0.6348800428920081, "learning_rate": 7.79209886831505e-06, "loss": 0.1479, "step": 11383 }, { "epoch": 0.33210805764630374, "grad_norm": 0.9411558584280161, "learning_rate": 7.791706944067207e-06, "loss": 0.1586, "step": 11384 }, { "epoch": 0.3321372308769473, "grad_norm": 0.9933133911246549, "learning_rate": 7.79131499489567e-06, "loss": 0.1839, "step": 11385 }, { "epoch": 0.33216640410759085, "grad_norm": 0.8544700110182338, "learning_rate": 7.79092302080394e-06, "loss": 0.1702, "step": 11386 }, { "epoch": 0.33219557733823446, "grad_norm": 0.8343955580679631, "learning_rate": 7.790531021795516e-06, "loss": 0.1392, "step": 11387 }, { "epoch": 0.332224750568878, "grad_norm": 0.8935637225075622, "learning_rate": 7.790138997873895e-06, "loss": 0.1613, "step": 11388 }, { "epoch": 0.33225392379952157, "grad_norm": 0.7621996166956655, "learning_rate": 7.789746949042582e-06, "loss": 0.1307, "step": 11389 }, { "epoch": 0.3322830970301651, "grad_norm": 0.8073744013821377, "learning_rate": 7.789354875305074e-06, "loss": 0.1692, "step": 11390 }, { "epoch": 0.3323122702608087, "grad_norm": 0.8965316595742587, "learning_rate": 7.788962776664867e-06, "loss": 0.165, "step": 11391 }, { "epoch": 0.3323414434914522, "grad_norm": 0.7934239369806525, "learning_rate": 7.78857065312547e-06, "loss": 0.1411, "step": 11392 }, { "epoch": 0.3323706167220958, "grad_norm": 0.8277492835564576, "learning_rate": 7.78817850469038e-06, "loss": 0.1432, "step": 11393 }, { "epoch": 0.3323997899527394, "grad_norm": 0.7552515074734151, "learning_rate": 7.787786331363097e-06, "loss": 0.1502, "step": 11394 }, { "epoch": 0.33242896318338294, "grad_norm": 0.9938117671261162, "learning_rate": 7.787394133147125e-06, "loss": 0.1521, "step": 11395 }, { "epoch": 0.3324581364140265, "grad_norm": 0.8481254081862278, "learning_rate": 7.787001910045962e-06, "loss": 0.1471, "step": 11396 }, { "epoch": 0.33248730964467005, "grad_norm": 0.8684717639587272, "learning_rate": 7.786609662063109e-06, "loss": 0.1685, "step": 11397 }, { "epoch": 0.3325164828753136, "grad_norm": 0.9166406583040055, "learning_rate": 7.786217389202073e-06, "loss": 0.1614, "step": 11398 }, { "epoch": 0.33254565610595715, "grad_norm": 0.7848013886076809, "learning_rate": 7.785825091466352e-06, "loss": 0.1466, "step": 11399 }, { "epoch": 0.3325748293366007, "grad_norm": 0.6776690801310096, "learning_rate": 7.78543276885945e-06, "loss": 0.1657, "step": 11400 }, { "epoch": 0.3326040025672443, "grad_norm": 1.0952363863083787, "learning_rate": 7.785040421384871e-06, "loss": 0.1528, "step": 11401 }, { "epoch": 0.33263317579788787, "grad_norm": 0.7588286368887359, "learning_rate": 7.784648049046114e-06, "loss": 0.1511, "step": 11402 }, { "epoch": 0.3326623490285314, "grad_norm": 0.8805903717398548, "learning_rate": 7.784255651846684e-06, "loss": 0.1444, "step": 11403 }, { "epoch": 0.332691522259175, "grad_norm": 0.7726353599224017, "learning_rate": 7.783863229790085e-06, "loss": 0.134, "step": 11404 }, { "epoch": 0.33272069548981853, "grad_norm": 0.7227797656123466, "learning_rate": 7.783470782879818e-06, "loss": 0.137, "step": 11405 }, { "epoch": 0.3327498687204621, "grad_norm": 0.6219612835543467, "learning_rate": 7.783078311119389e-06, "loss": 0.1406, "step": 11406 }, { "epoch": 0.3327790419511057, "grad_norm": 0.7399841750316177, "learning_rate": 7.782685814512303e-06, "loss": 0.1339, "step": 11407 }, { "epoch": 0.33280821518174925, "grad_norm": 0.5929710896739792, "learning_rate": 7.782293293062062e-06, "loss": 0.1421, "step": 11408 }, { "epoch": 0.3328373884123928, "grad_norm": 0.6908336840206548, "learning_rate": 7.781900746772169e-06, "loss": 0.1606, "step": 11409 }, { "epoch": 0.33286656164303635, "grad_norm": 0.7495708806481434, "learning_rate": 7.78150817564613e-06, "loss": 0.1692, "step": 11410 }, { "epoch": 0.3328957348736799, "grad_norm": 0.8252047910093573, "learning_rate": 7.781115579687452e-06, "loss": 0.1226, "step": 11411 }, { "epoch": 0.33292490810432346, "grad_norm": 0.8055027083945296, "learning_rate": 7.780722958899637e-06, "loss": 0.168, "step": 11412 }, { "epoch": 0.332954081334967, "grad_norm": 0.7067676823503098, "learning_rate": 7.78033031328619e-06, "loss": 0.1467, "step": 11413 }, { "epoch": 0.3329832545656106, "grad_norm": 0.9506405613626454, "learning_rate": 7.779937642850618e-06, "loss": 0.1352, "step": 11414 }, { "epoch": 0.3330124277962542, "grad_norm": 1.0165299854068452, "learning_rate": 7.779544947596428e-06, "loss": 0.1617, "step": 11415 }, { "epoch": 0.3330416010268977, "grad_norm": 0.7459070939394024, "learning_rate": 7.779152227527124e-06, "loss": 0.1506, "step": 11416 }, { "epoch": 0.3330707742575413, "grad_norm": 0.9564467771215571, "learning_rate": 7.778759482646213e-06, "loss": 0.1498, "step": 11417 }, { "epoch": 0.33309994748818483, "grad_norm": 0.8113753157089846, "learning_rate": 7.778366712957198e-06, "loss": 0.1232, "step": 11418 }, { "epoch": 0.3331291207188284, "grad_norm": 0.8962795501339426, "learning_rate": 7.77797391846359e-06, "loss": 0.1415, "step": 11419 }, { "epoch": 0.33315829394947194, "grad_norm": 0.9959166871234717, "learning_rate": 7.777581099168894e-06, "loss": 0.1635, "step": 11420 }, { "epoch": 0.33318746718011555, "grad_norm": 0.8844159556240272, "learning_rate": 7.777188255076616e-06, "loss": 0.1437, "step": 11421 }, { "epoch": 0.3332166404107591, "grad_norm": 0.8218999750495313, "learning_rate": 7.776795386190265e-06, "loss": 0.1365, "step": 11422 }, { "epoch": 0.33324581364140266, "grad_norm": 0.8954471981358914, "learning_rate": 7.77640249251335e-06, "loss": 0.1316, "step": 11423 }, { "epoch": 0.3332749868720462, "grad_norm": 0.8534036207605681, "learning_rate": 7.776009574049373e-06, "loss": 0.1469, "step": 11424 }, { "epoch": 0.33330416010268976, "grad_norm": 0.8490377272865366, "learning_rate": 7.775616630801846e-06, "loss": 0.1331, "step": 11425 }, { "epoch": 0.3333333333333333, "grad_norm": 0.888002051237779, "learning_rate": 7.775223662774276e-06, "loss": 0.1599, "step": 11426 }, { "epoch": 0.33336250656397687, "grad_norm": 0.7401795024990379, "learning_rate": 7.774830669970172e-06, "loss": 0.1525, "step": 11427 }, { "epoch": 0.3333916797946205, "grad_norm": 1.0355651212089865, "learning_rate": 7.774437652393042e-06, "loss": 0.1568, "step": 11428 }, { "epoch": 0.33342085302526403, "grad_norm": 0.7684071493659119, "learning_rate": 7.774044610046396e-06, "loss": 0.1581, "step": 11429 }, { "epoch": 0.3334500262559076, "grad_norm": 0.7654692111072509, "learning_rate": 7.77365154293374e-06, "loss": 0.1529, "step": 11430 }, { "epoch": 0.33347919948655114, "grad_norm": 0.8563814345870975, "learning_rate": 7.773258451058587e-06, "loss": 0.1444, "step": 11431 }, { "epoch": 0.3335083727171947, "grad_norm": 0.7146638452884608, "learning_rate": 7.772865334424444e-06, "loss": 0.1467, "step": 11432 }, { "epoch": 0.33353754594783824, "grad_norm": 0.8164867364796478, "learning_rate": 7.772472193034821e-06, "loss": 0.1572, "step": 11433 }, { "epoch": 0.33356671917848185, "grad_norm": 0.9721989059492175, "learning_rate": 7.772079026893229e-06, "loss": 0.1618, "step": 11434 }, { "epoch": 0.3335958924091254, "grad_norm": 0.9092877986747195, "learning_rate": 7.771685836003175e-06, "loss": 0.123, "step": 11435 }, { "epoch": 0.33362506563976896, "grad_norm": 0.8869602588924006, "learning_rate": 7.771292620368173e-06, "loss": 0.1313, "step": 11436 }, { "epoch": 0.3336542388704125, "grad_norm": 0.8444640841614083, "learning_rate": 7.770899379991732e-06, "loss": 0.1567, "step": 11437 }, { "epoch": 0.33368341210105606, "grad_norm": 1.1219937013782346, "learning_rate": 7.770506114877364e-06, "loss": 0.181, "step": 11438 }, { "epoch": 0.3337125853316996, "grad_norm": 0.9832192985802704, "learning_rate": 7.770112825028578e-06, "loss": 0.1154, "step": 11439 }, { "epoch": 0.33374175856234317, "grad_norm": 0.7641647964742511, "learning_rate": 7.769719510448886e-06, "loss": 0.1601, "step": 11440 }, { "epoch": 0.3337709317929868, "grad_norm": 1.0673713528525672, "learning_rate": 7.769326171141797e-06, "loss": 0.147, "step": 11441 }, { "epoch": 0.33380010502363033, "grad_norm": 1.0112107621321438, "learning_rate": 7.768932807110828e-06, "loss": 0.1354, "step": 11442 }, { "epoch": 0.3338292782542739, "grad_norm": 0.5999351158126687, "learning_rate": 7.768539418359487e-06, "loss": 0.1398, "step": 11443 }, { "epoch": 0.33385845148491744, "grad_norm": 0.7862293484002059, "learning_rate": 7.768146004891287e-06, "loss": 0.1294, "step": 11444 }, { "epoch": 0.333887624715561, "grad_norm": 0.7798420221815294, "learning_rate": 7.767752566709739e-06, "loss": 0.1478, "step": 11445 }, { "epoch": 0.33391679794620455, "grad_norm": 1.0460569583065815, "learning_rate": 7.767359103818357e-06, "loss": 0.1767, "step": 11446 }, { "epoch": 0.3339459711768481, "grad_norm": 0.7185436296585129, "learning_rate": 7.766965616220655e-06, "loss": 0.1661, "step": 11447 }, { "epoch": 0.3339751444074917, "grad_norm": 1.0710920071104186, "learning_rate": 7.766572103920144e-06, "loss": 0.1636, "step": 11448 }, { "epoch": 0.33400431763813526, "grad_norm": 0.8645450446415387, "learning_rate": 7.766178566920338e-06, "loss": 0.1599, "step": 11449 }, { "epoch": 0.3340334908687788, "grad_norm": 0.8817345169230677, "learning_rate": 7.76578500522475e-06, "loss": 0.1291, "step": 11450 }, { "epoch": 0.33406266409942237, "grad_norm": 0.9304675892587817, "learning_rate": 7.765391418836893e-06, "loss": 0.1468, "step": 11451 }, { "epoch": 0.3340918373300659, "grad_norm": 0.7462799203420152, "learning_rate": 7.764997807760283e-06, "loss": 0.1332, "step": 11452 }, { "epoch": 0.3341210105607095, "grad_norm": 0.8722627404862797, "learning_rate": 7.764604171998432e-06, "loss": 0.1615, "step": 11453 }, { "epoch": 0.33415018379135303, "grad_norm": 0.937752211240302, "learning_rate": 7.764210511554854e-06, "loss": 0.1536, "step": 11454 }, { "epoch": 0.33417935702199664, "grad_norm": 0.777923375392146, "learning_rate": 7.763816826433066e-06, "loss": 0.1522, "step": 11455 }, { "epoch": 0.3342085302526402, "grad_norm": 1.0454405741657025, "learning_rate": 7.76342311663658e-06, "loss": 0.155, "step": 11456 }, { "epoch": 0.33423770348328374, "grad_norm": 0.9664911830553712, "learning_rate": 7.763029382168912e-06, "loss": 0.1381, "step": 11457 }, { "epoch": 0.3342668767139273, "grad_norm": 0.9646019848903161, "learning_rate": 7.762635623033577e-06, "loss": 0.1464, "step": 11458 }, { "epoch": 0.33429604994457085, "grad_norm": 0.959057745452574, "learning_rate": 7.76224183923409e-06, "loss": 0.1415, "step": 11459 }, { "epoch": 0.3343252231752144, "grad_norm": 0.815788890676571, "learning_rate": 7.76184803077397e-06, "loss": 0.1317, "step": 11460 }, { "epoch": 0.334354396405858, "grad_norm": 0.6938665614662191, "learning_rate": 7.761454197656728e-06, "loss": 0.1191, "step": 11461 }, { "epoch": 0.33438356963650157, "grad_norm": 0.8584014957548999, "learning_rate": 7.761060339885882e-06, "loss": 0.1425, "step": 11462 }, { "epoch": 0.3344127428671451, "grad_norm": 0.9439635147918899, "learning_rate": 7.76066645746495e-06, "loss": 0.1584, "step": 11463 }, { "epoch": 0.33444191609778867, "grad_norm": 0.7275435427161401, "learning_rate": 7.760272550397446e-06, "loss": 0.1487, "step": 11464 }, { "epoch": 0.3344710893284322, "grad_norm": 0.8284264484764152, "learning_rate": 7.759878618686886e-06, "loss": 0.1596, "step": 11465 }, { "epoch": 0.3345002625590758, "grad_norm": 0.9728561120807099, "learning_rate": 7.759484662336792e-06, "loss": 0.1761, "step": 11466 }, { "epoch": 0.33452943578971933, "grad_norm": 0.7633814891351184, "learning_rate": 7.759090681350676e-06, "loss": 0.1312, "step": 11467 }, { "epoch": 0.33455860902036294, "grad_norm": 0.6563076696473864, "learning_rate": 7.758696675732057e-06, "loss": 0.153, "step": 11468 }, { "epoch": 0.3345877822510065, "grad_norm": 0.907111826757, "learning_rate": 7.758302645484451e-06, "loss": 0.1594, "step": 11469 }, { "epoch": 0.33461695548165005, "grad_norm": 0.7887719592653158, "learning_rate": 7.75790859061138e-06, "loss": 0.1394, "step": 11470 }, { "epoch": 0.3346461287122936, "grad_norm": 0.6941328921783568, "learning_rate": 7.757514511116358e-06, "loss": 0.1366, "step": 11471 }, { "epoch": 0.33467530194293715, "grad_norm": 0.7424400834815343, "learning_rate": 7.757120407002904e-06, "loss": 0.1467, "step": 11472 }, { "epoch": 0.3347044751735807, "grad_norm": 0.7666029824850172, "learning_rate": 7.75672627827454e-06, "loss": 0.1444, "step": 11473 }, { "epoch": 0.33473364840422426, "grad_norm": 0.8542415574993383, "learning_rate": 7.75633212493478e-06, "loss": 0.1442, "step": 11474 }, { "epoch": 0.33476282163486787, "grad_norm": 0.8128021005906819, "learning_rate": 7.755937946987144e-06, "loss": 0.1456, "step": 11475 }, { "epoch": 0.3347919948655114, "grad_norm": 0.882984283684404, "learning_rate": 7.755543744435153e-06, "loss": 0.1631, "step": 11476 }, { "epoch": 0.334821168096155, "grad_norm": 0.8129933914221654, "learning_rate": 7.755149517282325e-06, "loss": 0.1695, "step": 11477 }, { "epoch": 0.33485034132679853, "grad_norm": 0.8860049986178933, "learning_rate": 7.75475526553218e-06, "loss": 0.144, "step": 11478 }, { "epoch": 0.3348795145574421, "grad_norm": 0.8283335356209518, "learning_rate": 7.754360989188237e-06, "loss": 0.1469, "step": 11479 }, { "epoch": 0.33490868778808563, "grad_norm": 0.635544225279093, "learning_rate": 7.753966688254018e-06, "loss": 0.1456, "step": 11480 }, { "epoch": 0.3349378610187292, "grad_norm": 1.1415888434866515, "learning_rate": 7.75357236273304e-06, "loss": 0.1827, "step": 11481 }, { "epoch": 0.3349670342493728, "grad_norm": 0.9347864709686792, "learning_rate": 7.753178012628826e-06, "loss": 0.1437, "step": 11482 }, { "epoch": 0.33499620748001635, "grad_norm": 0.6804694142483877, "learning_rate": 7.752783637944897e-06, "loss": 0.1429, "step": 11483 }, { "epoch": 0.3350253807106599, "grad_norm": 1.253721368214493, "learning_rate": 7.752389238684773e-06, "loss": 0.1703, "step": 11484 }, { "epoch": 0.33505455394130346, "grad_norm": 1.0616431022335604, "learning_rate": 7.751994814851973e-06, "loss": 0.1159, "step": 11485 }, { "epoch": 0.335083727171947, "grad_norm": 0.8984131766963803, "learning_rate": 7.751600366450021e-06, "loss": 0.158, "step": 11486 }, { "epoch": 0.33511290040259056, "grad_norm": 0.9184852145861464, "learning_rate": 7.751205893482438e-06, "loss": 0.1545, "step": 11487 }, { "epoch": 0.33514207363323417, "grad_norm": 0.7671535794109193, "learning_rate": 7.750811395952745e-06, "loss": 0.1354, "step": 11488 }, { "epoch": 0.3351712468638777, "grad_norm": 0.8323228137742448, "learning_rate": 7.750416873864464e-06, "loss": 0.1406, "step": 11489 }, { "epoch": 0.3352004200945213, "grad_norm": 0.777114115614653, "learning_rate": 7.75002232722112e-06, "loss": 0.1619, "step": 11490 }, { "epoch": 0.33522959332516483, "grad_norm": 0.8828507213690756, "learning_rate": 7.749627756026232e-06, "loss": 0.1614, "step": 11491 }, { "epoch": 0.3352587665558084, "grad_norm": 0.790047056780413, "learning_rate": 7.749233160283323e-06, "loss": 0.1747, "step": 11492 }, { "epoch": 0.33528793978645194, "grad_norm": 0.9592747218110751, "learning_rate": 7.748838539995918e-06, "loss": 0.1744, "step": 11493 }, { "epoch": 0.3353171130170955, "grad_norm": 0.7547276464424753, "learning_rate": 7.748443895167539e-06, "loss": 0.1469, "step": 11494 }, { "epoch": 0.3353462862477391, "grad_norm": 0.7952485905164294, "learning_rate": 7.748049225801706e-06, "loss": 0.1523, "step": 11495 }, { "epoch": 0.33537545947838265, "grad_norm": 1.1703727583077554, "learning_rate": 7.747654531901949e-06, "loss": 0.1455, "step": 11496 }, { "epoch": 0.3354046327090262, "grad_norm": 0.7693174535264024, "learning_rate": 7.747259813471786e-06, "loss": 0.1323, "step": 11497 }, { "epoch": 0.33543380593966976, "grad_norm": 0.7369482111349072, "learning_rate": 7.746865070514744e-06, "loss": 0.1522, "step": 11498 }, { "epoch": 0.3354629791703133, "grad_norm": 0.9564162516945413, "learning_rate": 7.746470303034347e-06, "loss": 0.1771, "step": 11499 }, { "epoch": 0.33549215240095687, "grad_norm": 0.8108868752021353, "learning_rate": 7.746075511034119e-06, "loss": 0.1578, "step": 11500 }, { "epoch": 0.3355213256316004, "grad_norm": 0.7785537166136959, "learning_rate": 7.745680694517582e-06, "loss": 0.1497, "step": 11501 }, { "epoch": 0.33555049886224403, "grad_norm": 0.958205389114564, "learning_rate": 7.745285853488264e-06, "loss": 0.1813, "step": 11502 }, { "epoch": 0.3355796720928876, "grad_norm": 0.9537185309746866, "learning_rate": 7.74489098794969e-06, "loss": 0.1475, "step": 11503 }, { "epoch": 0.33560884532353114, "grad_norm": 0.6389980035757383, "learning_rate": 7.744496097905385e-06, "loss": 0.1361, "step": 11504 }, { "epoch": 0.3356380185541747, "grad_norm": 0.800956536064156, "learning_rate": 7.744101183358874e-06, "loss": 0.1943, "step": 11505 }, { "epoch": 0.33566719178481824, "grad_norm": 0.923807856964023, "learning_rate": 7.743706244313682e-06, "loss": 0.1681, "step": 11506 }, { "epoch": 0.3356963650154618, "grad_norm": 0.7330873874583181, "learning_rate": 7.743311280773335e-06, "loss": 0.1356, "step": 11507 }, { "epoch": 0.33572553824610535, "grad_norm": 0.7937806466044719, "learning_rate": 7.742916292741363e-06, "loss": 0.1694, "step": 11508 }, { "epoch": 0.33575471147674896, "grad_norm": 1.0402736819907141, "learning_rate": 7.742521280221286e-06, "loss": 0.1381, "step": 11509 }, { "epoch": 0.3357838847073925, "grad_norm": 0.6845691795976966, "learning_rate": 7.742126243216635e-06, "loss": 0.1355, "step": 11510 }, { "epoch": 0.33581305793803606, "grad_norm": 0.9618473390404858, "learning_rate": 7.741731181730933e-06, "loss": 0.1573, "step": 11511 }, { "epoch": 0.3358422311686796, "grad_norm": 0.9770323611181168, "learning_rate": 7.741336095767713e-06, "loss": 0.1521, "step": 11512 }, { "epoch": 0.33587140439932317, "grad_norm": 0.7055360652263694, "learning_rate": 7.740940985330497e-06, "loss": 0.1747, "step": 11513 }, { "epoch": 0.3359005776299667, "grad_norm": 0.9706015849531999, "learning_rate": 7.740545850422813e-06, "loss": 0.1533, "step": 11514 }, { "epoch": 0.3359297508606103, "grad_norm": 0.8807825946758363, "learning_rate": 7.740150691048192e-06, "loss": 0.1536, "step": 11515 }, { "epoch": 0.3359589240912539, "grad_norm": 0.8525706200236469, "learning_rate": 7.73975550721016e-06, "loss": 0.1347, "step": 11516 }, { "epoch": 0.33598809732189744, "grad_norm": 0.7138250781620418, "learning_rate": 7.739360298912243e-06, "loss": 0.1617, "step": 11517 }, { "epoch": 0.336017270552541, "grad_norm": 0.8366224157733974, "learning_rate": 7.738965066157973e-06, "loss": 0.1487, "step": 11518 }, { "epoch": 0.33604644378318455, "grad_norm": 0.9277490964824728, "learning_rate": 7.738569808950875e-06, "loss": 0.1802, "step": 11519 }, { "epoch": 0.3360756170138281, "grad_norm": 0.8420714910620926, "learning_rate": 7.738174527294481e-06, "loss": 0.1388, "step": 11520 }, { "epoch": 0.33610479024447165, "grad_norm": 0.8219743641842782, "learning_rate": 7.737779221192317e-06, "loss": 0.1344, "step": 11521 }, { "epoch": 0.33613396347511526, "grad_norm": 0.8191873161261526, "learning_rate": 7.737383890647915e-06, "loss": 0.1721, "step": 11522 }, { "epoch": 0.3361631367057588, "grad_norm": 0.7910467427290071, "learning_rate": 7.736988535664803e-06, "loss": 0.1642, "step": 11523 }, { "epoch": 0.33619230993640237, "grad_norm": 0.9137471482546433, "learning_rate": 7.73659315624651e-06, "loss": 0.1749, "step": 11524 }, { "epoch": 0.3362214831670459, "grad_norm": 0.8395428256052949, "learning_rate": 7.736197752396566e-06, "loss": 0.1739, "step": 11525 }, { "epoch": 0.3362506563976895, "grad_norm": 0.7931969987369323, "learning_rate": 7.735802324118503e-06, "loss": 0.1773, "step": 11526 }, { "epoch": 0.336279829628333, "grad_norm": 0.760285127003303, "learning_rate": 7.73540687141585e-06, "loss": 0.162, "step": 11527 }, { "epoch": 0.3363090028589766, "grad_norm": 0.7667508518337527, "learning_rate": 7.735011394292136e-06, "loss": 0.1632, "step": 11528 }, { "epoch": 0.3363381760896202, "grad_norm": 0.659323906184719, "learning_rate": 7.734615892750895e-06, "loss": 0.1462, "step": 11529 }, { "epoch": 0.33636734932026374, "grad_norm": 0.7444712343622968, "learning_rate": 7.734220366795655e-06, "loss": 0.1387, "step": 11530 }, { "epoch": 0.3363965225509073, "grad_norm": 0.9170292521706552, "learning_rate": 7.733824816429948e-06, "loss": 0.1288, "step": 11531 }, { "epoch": 0.33642569578155085, "grad_norm": 0.8803357947564434, "learning_rate": 7.733429241657306e-06, "loss": 0.1354, "step": 11532 }, { "epoch": 0.3364548690121944, "grad_norm": 0.8195762284716492, "learning_rate": 7.73303364248126e-06, "loss": 0.1425, "step": 11533 }, { "epoch": 0.33648404224283796, "grad_norm": 0.778225354573916, "learning_rate": 7.732638018905343e-06, "loss": 0.1501, "step": 11534 }, { "epoch": 0.3365132154734815, "grad_norm": 0.8929304391087508, "learning_rate": 7.732242370933085e-06, "loss": 0.1262, "step": 11535 }, { "epoch": 0.3365423887041251, "grad_norm": 0.7725661324199767, "learning_rate": 7.731846698568021e-06, "loss": 0.1244, "step": 11536 }, { "epoch": 0.33657156193476867, "grad_norm": 0.8611246450752171, "learning_rate": 7.73145100181368e-06, "loss": 0.1502, "step": 11537 }, { "epoch": 0.3366007351654122, "grad_norm": 0.9232382153814258, "learning_rate": 7.731055280673598e-06, "loss": 0.1326, "step": 11538 }, { "epoch": 0.3366299083960558, "grad_norm": 0.80485965733028, "learning_rate": 7.730659535151306e-06, "loss": 0.1585, "step": 11539 }, { "epoch": 0.33665908162669933, "grad_norm": 0.7852042836798314, "learning_rate": 7.730263765250337e-06, "loss": 0.1658, "step": 11540 }, { "epoch": 0.3366882548573429, "grad_norm": 0.8940431546891692, "learning_rate": 7.729867970974223e-06, "loss": 0.1592, "step": 11541 }, { "epoch": 0.33671742808798644, "grad_norm": 0.8670945497825935, "learning_rate": 7.729472152326503e-06, "loss": 0.117, "step": 11542 }, { "epoch": 0.33674660131863005, "grad_norm": 0.8225740785398498, "learning_rate": 7.729076309310704e-06, "loss": 0.1563, "step": 11543 }, { "epoch": 0.3367757745492736, "grad_norm": 1.127576641006544, "learning_rate": 7.728680441930366e-06, "loss": 0.1558, "step": 11544 }, { "epoch": 0.33680494777991715, "grad_norm": 0.8866329896476555, "learning_rate": 7.72828455018902e-06, "loss": 0.1561, "step": 11545 }, { "epoch": 0.3368341210105607, "grad_norm": 0.8556623051275783, "learning_rate": 7.727888634090199e-06, "loss": 0.1528, "step": 11546 }, { "epoch": 0.33686329424120426, "grad_norm": 0.9138167291437244, "learning_rate": 7.72749269363744e-06, "loss": 0.142, "step": 11547 }, { "epoch": 0.3368924674718478, "grad_norm": 0.9106381062019776, "learning_rate": 7.727096728834278e-06, "loss": 0.1659, "step": 11548 }, { "epoch": 0.3369216407024914, "grad_norm": 0.9148809918401275, "learning_rate": 7.726700739684247e-06, "loss": 0.159, "step": 11549 }, { "epoch": 0.336950813933135, "grad_norm": 0.9319620451235113, "learning_rate": 7.726304726190884e-06, "loss": 0.1596, "step": 11550 }, { "epoch": 0.3369799871637785, "grad_norm": 0.9805622581121656, "learning_rate": 7.725908688357722e-06, "loss": 0.1625, "step": 11551 }, { "epoch": 0.3370091603944221, "grad_norm": 0.7912166294319537, "learning_rate": 7.725512626188299e-06, "loss": 0.1566, "step": 11552 }, { "epoch": 0.33703833362506563, "grad_norm": 0.9273319767245617, "learning_rate": 7.725116539686148e-06, "loss": 0.1409, "step": 11553 }, { "epoch": 0.3370675068557092, "grad_norm": 0.7738242382317029, "learning_rate": 7.72472042885481e-06, "loss": 0.1334, "step": 11554 }, { "epoch": 0.33709668008635274, "grad_norm": 0.9688612676545774, "learning_rate": 7.724324293697816e-06, "loss": 0.1564, "step": 11555 }, { "epoch": 0.33712585331699635, "grad_norm": 0.9992497320618804, "learning_rate": 7.723928134218705e-06, "loss": 0.1633, "step": 11556 }, { "epoch": 0.3371550265476399, "grad_norm": 0.8453637314931073, "learning_rate": 7.723531950421014e-06, "loss": 0.158, "step": 11557 }, { "epoch": 0.33718419977828346, "grad_norm": 0.9307297852232389, "learning_rate": 7.72313574230828e-06, "loss": 0.1397, "step": 11558 }, { "epoch": 0.337213373008927, "grad_norm": 0.8159144809842509, "learning_rate": 7.722739509884042e-06, "loss": 0.1594, "step": 11559 }, { "epoch": 0.33724254623957056, "grad_norm": 0.8272914173842891, "learning_rate": 7.722343253151834e-06, "loss": 0.1831, "step": 11560 }, { "epoch": 0.3372717194702141, "grad_norm": 0.8692708278798568, "learning_rate": 7.721946972115196e-06, "loss": 0.1327, "step": 11561 }, { "epoch": 0.33730089270085767, "grad_norm": 1.0081766452662235, "learning_rate": 7.721550666777664e-06, "loss": 0.1526, "step": 11562 }, { "epoch": 0.3373300659315013, "grad_norm": 0.8039508529224917, "learning_rate": 7.721154337142778e-06, "loss": 0.1307, "step": 11563 }, { "epoch": 0.33735923916214483, "grad_norm": 0.8050979085130708, "learning_rate": 7.720757983214076e-06, "loss": 0.1278, "step": 11564 }, { "epoch": 0.3373884123927884, "grad_norm": 0.6830516599371543, "learning_rate": 7.720361604995097e-06, "loss": 0.1462, "step": 11565 }, { "epoch": 0.33741758562343194, "grad_norm": 0.8294060227749831, "learning_rate": 7.719965202489377e-06, "loss": 0.1356, "step": 11566 }, { "epoch": 0.3374467588540755, "grad_norm": 0.8635004014556787, "learning_rate": 7.71956877570046e-06, "loss": 0.1421, "step": 11567 }, { "epoch": 0.33747593208471904, "grad_norm": 0.7720892884340611, "learning_rate": 7.719172324631878e-06, "loss": 0.131, "step": 11568 }, { "epoch": 0.3375051053153626, "grad_norm": 0.7483209597107527, "learning_rate": 7.718775849287178e-06, "loss": 0.1339, "step": 11569 }, { "epoch": 0.3375342785460062, "grad_norm": 1.015752418732121, "learning_rate": 7.718379349669893e-06, "loss": 0.1766, "step": 11570 }, { "epoch": 0.33756345177664976, "grad_norm": 1.0326672410961844, "learning_rate": 7.71798282578357e-06, "loss": 0.1597, "step": 11571 }, { "epoch": 0.3375926250072933, "grad_norm": 0.9156662847768858, "learning_rate": 7.717586277631744e-06, "loss": 0.1536, "step": 11572 }, { "epoch": 0.33762179823793687, "grad_norm": 0.9008508855862385, "learning_rate": 7.717189705217954e-06, "loss": 0.1333, "step": 11573 }, { "epoch": 0.3376509714685804, "grad_norm": 1.0395423797248333, "learning_rate": 7.716793108545745e-06, "loss": 0.1627, "step": 11574 }, { "epoch": 0.33768014469922397, "grad_norm": 0.9719764033060374, "learning_rate": 7.716396487618655e-06, "loss": 0.1613, "step": 11575 }, { "epoch": 0.3377093179298676, "grad_norm": 0.9309991623526762, "learning_rate": 7.715999842440225e-06, "loss": 0.1527, "step": 11576 }, { "epoch": 0.33773849116051113, "grad_norm": 0.9842918455596753, "learning_rate": 7.715603173013999e-06, "loss": 0.198, "step": 11577 }, { "epoch": 0.3377676643911547, "grad_norm": 1.4516828031751667, "learning_rate": 7.715206479343516e-06, "loss": 0.1587, "step": 11578 }, { "epoch": 0.33779683762179824, "grad_norm": 0.9777438508037208, "learning_rate": 7.714809761432317e-06, "loss": 0.1418, "step": 11579 }, { "epoch": 0.3378260108524418, "grad_norm": 0.9632788395810549, "learning_rate": 7.714413019283942e-06, "loss": 0.1748, "step": 11580 }, { "epoch": 0.33785518408308535, "grad_norm": 0.8574549089734549, "learning_rate": 7.714016252901939e-06, "loss": 0.1432, "step": 11581 }, { "epoch": 0.3378843573137289, "grad_norm": 0.9000887676744005, "learning_rate": 7.713619462289846e-06, "loss": 0.1449, "step": 11582 }, { "epoch": 0.3379135305443725, "grad_norm": 0.9491616826860819, "learning_rate": 7.713222647451203e-06, "loss": 0.1383, "step": 11583 }, { "epoch": 0.33794270377501606, "grad_norm": 0.8886926381518871, "learning_rate": 7.71282580838956e-06, "loss": 0.1482, "step": 11584 }, { "epoch": 0.3379718770056596, "grad_norm": 0.7507167077278053, "learning_rate": 7.712428945108454e-06, "loss": 0.1478, "step": 11585 }, { "epoch": 0.33800105023630317, "grad_norm": 0.9189021330503667, "learning_rate": 7.712032057611431e-06, "loss": 0.1477, "step": 11586 }, { "epoch": 0.3380302234669467, "grad_norm": 1.0128260734012735, "learning_rate": 7.711635145902032e-06, "loss": 0.1583, "step": 11587 }, { "epoch": 0.3380593966975903, "grad_norm": 0.8575179334345707, "learning_rate": 7.711238209983802e-06, "loss": 0.1577, "step": 11588 }, { "epoch": 0.33808856992823383, "grad_norm": 0.9998616785880221, "learning_rate": 7.710841249860286e-06, "loss": 0.1535, "step": 11589 }, { "epoch": 0.33811774315887744, "grad_norm": 0.9767503548015507, "learning_rate": 7.710444265535024e-06, "loss": 0.1438, "step": 11590 }, { "epoch": 0.338146916389521, "grad_norm": 0.7505459740062331, "learning_rate": 7.710047257011564e-06, "loss": 0.1472, "step": 11591 }, { "epoch": 0.33817608962016454, "grad_norm": 0.989067891001346, "learning_rate": 7.709650224293449e-06, "loss": 0.1196, "step": 11592 }, { "epoch": 0.3382052628508081, "grad_norm": 0.8147623034459972, "learning_rate": 7.709253167384223e-06, "loss": 0.1322, "step": 11593 }, { "epoch": 0.33823443608145165, "grad_norm": 0.7033094865173153, "learning_rate": 7.708856086287432e-06, "loss": 0.145, "step": 11594 }, { "epoch": 0.3382636093120952, "grad_norm": 1.0309814820693728, "learning_rate": 7.708458981006621e-06, "loss": 0.1682, "step": 11595 }, { "epoch": 0.33829278254273876, "grad_norm": 0.775960565931343, "learning_rate": 7.708061851545334e-06, "loss": 0.1753, "step": 11596 }, { "epoch": 0.33832195577338237, "grad_norm": 0.8237789546111267, "learning_rate": 7.707664697907117e-06, "loss": 0.1498, "step": 11597 }, { "epoch": 0.3383511290040259, "grad_norm": 0.9349919105975405, "learning_rate": 7.707267520095515e-06, "loss": 0.1742, "step": 11598 }, { "epoch": 0.33838030223466947, "grad_norm": 1.0083016500343505, "learning_rate": 7.70687031811408e-06, "loss": 0.1268, "step": 11599 }, { "epoch": 0.338409475465313, "grad_norm": 0.697713882631908, "learning_rate": 7.706473091966347e-06, "loss": 0.118, "step": 11600 }, { "epoch": 0.3384386486959566, "grad_norm": 0.7643882171336065, "learning_rate": 7.706075841655871e-06, "loss": 0.1489, "step": 11601 }, { "epoch": 0.33846782192660013, "grad_norm": 1.0678200592313596, "learning_rate": 7.705678567186195e-06, "loss": 0.1532, "step": 11602 }, { "epoch": 0.33849699515724374, "grad_norm": 0.8535030518138836, "learning_rate": 7.705281268560866e-06, "loss": 0.1302, "step": 11603 }, { "epoch": 0.3385261683878873, "grad_norm": 0.7600060163055182, "learning_rate": 7.704883945783435e-06, "loss": 0.1661, "step": 11604 }, { "epoch": 0.33855534161853085, "grad_norm": 1.0692788141133338, "learning_rate": 7.704486598857444e-06, "loss": 0.147, "step": 11605 }, { "epoch": 0.3385845148491744, "grad_norm": 0.9700778543064547, "learning_rate": 7.70408922778644e-06, "loss": 0.1467, "step": 11606 }, { "epoch": 0.33861368807981795, "grad_norm": 0.9525649354280223, "learning_rate": 7.703691832573975e-06, "loss": 0.135, "step": 11607 }, { "epoch": 0.3386428613104615, "grad_norm": 0.8816075139805843, "learning_rate": 7.703294413223595e-06, "loss": 0.1381, "step": 11608 }, { "epoch": 0.33867203454110506, "grad_norm": 0.7983909998112839, "learning_rate": 7.702896969738847e-06, "loss": 0.1508, "step": 11609 }, { "epoch": 0.33870120777174867, "grad_norm": 0.9203609233880439, "learning_rate": 7.702499502123281e-06, "loss": 0.1558, "step": 11610 }, { "epoch": 0.3387303810023922, "grad_norm": 0.6744051911156428, "learning_rate": 7.702102010380444e-06, "loss": 0.1439, "step": 11611 }, { "epoch": 0.3387595542330358, "grad_norm": 0.7199704773594848, "learning_rate": 7.701704494513885e-06, "loss": 0.1645, "step": 11612 }, { "epoch": 0.33878872746367933, "grad_norm": 0.8217566485644069, "learning_rate": 7.701306954527153e-06, "loss": 0.1357, "step": 11613 }, { "epoch": 0.3388179006943229, "grad_norm": 0.8422794302581642, "learning_rate": 7.700909390423798e-06, "loss": 0.1558, "step": 11614 }, { "epoch": 0.33884707392496644, "grad_norm": 0.7310318262358183, "learning_rate": 7.70051180220737e-06, "loss": 0.1409, "step": 11615 }, { "epoch": 0.33887624715561, "grad_norm": 3.798914361404054, "learning_rate": 7.700114189881413e-06, "loss": 0.1392, "step": 11616 }, { "epoch": 0.3389054203862536, "grad_norm": 0.8806185465121645, "learning_rate": 7.699716553449485e-06, "loss": 0.1639, "step": 11617 }, { "epoch": 0.33893459361689715, "grad_norm": 0.8395571390425997, "learning_rate": 7.699318892915131e-06, "loss": 0.1384, "step": 11618 }, { "epoch": 0.3389637668475407, "grad_norm": 0.8303897802809496, "learning_rate": 7.698921208281903e-06, "loss": 0.159, "step": 11619 }, { "epoch": 0.33899294007818426, "grad_norm": 0.8165403889974459, "learning_rate": 7.69852349955335e-06, "loss": 0.146, "step": 11620 }, { "epoch": 0.3390221133088278, "grad_norm": 1.092760362693043, "learning_rate": 7.698125766733023e-06, "loss": 0.1588, "step": 11621 }, { "epoch": 0.33905128653947136, "grad_norm": 0.6902754686469906, "learning_rate": 7.697728009824475e-06, "loss": 0.1317, "step": 11622 }, { "epoch": 0.3390804597701149, "grad_norm": 0.7312169230775266, "learning_rate": 7.697330228831254e-06, "loss": 0.1541, "step": 11623 }, { "epoch": 0.3391096330007585, "grad_norm": 0.8231684244030799, "learning_rate": 7.696932423756912e-06, "loss": 0.1491, "step": 11624 }, { "epoch": 0.3391388062314021, "grad_norm": 0.9415065153653359, "learning_rate": 7.696534594605e-06, "loss": 0.1329, "step": 11625 }, { "epoch": 0.33916797946204563, "grad_norm": 0.792732553868953, "learning_rate": 7.696136741379073e-06, "loss": 0.1679, "step": 11626 }, { "epoch": 0.3391971526926892, "grad_norm": 0.7594829618379453, "learning_rate": 7.69573886408268e-06, "loss": 0.1394, "step": 11627 }, { "epoch": 0.33922632592333274, "grad_norm": 0.9546364425628039, "learning_rate": 7.695340962719376e-06, "loss": 0.1308, "step": 11628 }, { "epoch": 0.3392554991539763, "grad_norm": 0.7239429493125663, "learning_rate": 7.69494303729271e-06, "loss": 0.1607, "step": 11629 }, { "epoch": 0.33928467238461985, "grad_norm": 0.7648747255782211, "learning_rate": 7.694545087806236e-06, "loss": 0.1538, "step": 11630 }, { "epoch": 0.33931384561526345, "grad_norm": 1.0398673908202867, "learning_rate": 7.694147114263505e-06, "loss": 0.1606, "step": 11631 }, { "epoch": 0.339343018845907, "grad_norm": 0.8509038630853565, "learning_rate": 7.693749116668073e-06, "loss": 0.134, "step": 11632 }, { "epoch": 0.33937219207655056, "grad_norm": 0.6808845366630925, "learning_rate": 7.69335109502349e-06, "loss": 0.1486, "step": 11633 }, { "epoch": 0.3394013653071941, "grad_norm": 0.9596622071022206, "learning_rate": 7.692953049333315e-06, "loss": 0.157, "step": 11634 }, { "epoch": 0.33943053853783767, "grad_norm": 0.9780170561775249, "learning_rate": 7.692554979601097e-06, "loss": 0.1492, "step": 11635 }, { "epoch": 0.3394597117684812, "grad_norm": 0.7947887071631937, "learning_rate": 7.69215688583039e-06, "loss": 0.1423, "step": 11636 }, { "epoch": 0.33948888499912483, "grad_norm": 0.7950864769771145, "learning_rate": 7.69175876802475e-06, "loss": 0.1439, "step": 11637 }, { "epoch": 0.3395180582297684, "grad_norm": 0.852636176725188, "learning_rate": 7.691360626187729e-06, "loss": 0.1345, "step": 11638 }, { "epoch": 0.33954723146041194, "grad_norm": 0.8749306756916018, "learning_rate": 7.690962460322883e-06, "loss": 0.1546, "step": 11639 }, { "epoch": 0.3395764046910555, "grad_norm": 0.7573621451009913, "learning_rate": 7.690564270433766e-06, "loss": 0.1427, "step": 11640 }, { "epoch": 0.33960557792169904, "grad_norm": 0.6883258193192641, "learning_rate": 7.690166056523935e-06, "loss": 0.1364, "step": 11641 }, { "epoch": 0.3396347511523426, "grad_norm": 0.8212558475232992, "learning_rate": 7.689767818596943e-06, "loss": 0.1739, "step": 11642 }, { "epoch": 0.33966392438298615, "grad_norm": 1.0178685133479788, "learning_rate": 7.689369556656346e-06, "loss": 0.1603, "step": 11643 }, { "epoch": 0.33969309761362976, "grad_norm": 0.7929640467630086, "learning_rate": 7.6889712707057e-06, "loss": 0.1419, "step": 11644 }, { "epoch": 0.3397222708442733, "grad_norm": 0.8285062651424151, "learning_rate": 7.68857296074856e-06, "loss": 0.1648, "step": 11645 }, { "epoch": 0.33975144407491686, "grad_norm": 0.7637143786134057, "learning_rate": 7.688174626788483e-06, "loss": 0.1465, "step": 11646 }, { "epoch": 0.3397806173055604, "grad_norm": 0.8038705124572267, "learning_rate": 7.687776268829024e-06, "loss": 0.1341, "step": 11647 }, { "epoch": 0.33980979053620397, "grad_norm": 0.6997760841537921, "learning_rate": 7.687377886873739e-06, "loss": 0.1316, "step": 11648 }, { "epoch": 0.3398389637668475, "grad_norm": 0.8494265532655626, "learning_rate": 7.686979480926189e-06, "loss": 0.1584, "step": 11649 }, { "epoch": 0.3398681369974911, "grad_norm": 0.8115325270981985, "learning_rate": 7.686581050989925e-06, "loss": 0.153, "step": 11650 }, { "epoch": 0.3398973102281347, "grad_norm": 0.6196469535696476, "learning_rate": 7.686182597068505e-06, "loss": 0.1444, "step": 11651 }, { "epoch": 0.33992648345877824, "grad_norm": 0.8742196977812604, "learning_rate": 7.685784119165492e-06, "loss": 0.1488, "step": 11652 }, { "epoch": 0.3399556566894218, "grad_norm": 0.9336099521443803, "learning_rate": 7.685385617284437e-06, "loss": 0.149, "step": 11653 }, { "epoch": 0.33998482992006535, "grad_norm": 0.8556779058735722, "learning_rate": 7.684987091428902e-06, "loss": 0.1323, "step": 11654 }, { "epoch": 0.3400140031507089, "grad_norm": 0.9249586535401686, "learning_rate": 7.684588541602443e-06, "loss": 0.1431, "step": 11655 }, { "epoch": 0.34004317638135245, "grad_norm": 1.0598896664425277, "learning_rate": 7.684189967808616e-06, "loss": 0.1416, "step": 11656 }, { "epoch": 0.340072349611996, "grad_norm": 0.813634062026962, "learning_rate": 7.683791370050984e-06, "loss": 0.1554, "step": 11657 }, { "epoch": 0.3401015228426396, "grad_norm": 1.0084842884915317, "learning_rate": 7.683392748333102e-06, "loss": 0.1618, "step": 11658 }, { "epoch": 0.34013069607328317, "grad_norm": 1.2608013321156923, "learning_rate": 7.682994102658532e-06, "loss": 0.1681, "step": 11659 }, { "epoch": 0.3401598693039267, "grad_norm": 0.7856307772707307, "learning_rate": 7.68259543303083e-06, "loss": 0.1473, "step": 11660 }, { "epoch": 0.3401890425345703, "grad_norm": 1.024086214458485, "learning_rate": 7.682196739453556e-06, "loss": 0.1544, "step": 11661 }, { "epoch": 0.3402182157652138, "grad_norm": 1.2069008199184548, "learning_rate": 7.68179802193027e-06, "loss": 0.1893, "step": 11662 }, { "epoch": 0.3402473889958574, "grad_norm": 0.872775111768538, "learning_rate": 7.681399280464531e-06, "loss": 0.1641, "step": 11663 }, { "epoch": 0.340276562226501, "grad_norm": 0.8029842648616166, "learning_rate": 7.681000515059901e-06, "loss": 0.144, "step": 11664 }, { "epoch": 0.34030573545714454, "grad_norm": 1.0516458477893995, "learning_rate": 7.680601725719937e-06, "loss": 0.1396, "step": 11665 }, { "epoch": 0.3403349086877881, "grad_norm": 0.9434842839220801, "learning_rate": 7.680202912448201e-06, "loss": 0.1601, "step": 11666 }, { "epoch": 0.34036408191843165, "grad_norm": 0.737427446621569, "learning_rate": 7.679804075248254e-06, "loss": 0.1498, "step": 11667 }, { "epoch": 0.3403932551490752, "grad_norm": 0.7840859302324004, "learning_rate": 7.679405214123654e-06, "loss": 0.1346, "step": 11668 }, { "epoch": 0.34042242837971876, "grad_norm": 0.8400931942200677, "learning_rate": 7.679006329077965e-06, "loss": 0.1316, "step": 11669 }, { "epoch": 0.3404516016103623, "grad_norm": 0.758598806223022, "learning_rate": 7.678607420114747e-06, "loss": 0.1637, "step": 11670 }, { "epoch": 0.3404807748410059, "grad_norm": 0.8306524681387498, "learning_rate": 7.678208487237562e-06, "loss": 0.1394, "step": 11671 }, { "epoch": 0.34050994807164947, "grad_norm": 0.7991370783091845, "learning_rate": 7.677809530449971e-06, "loss": 0.1478, "step": 11672 }, { "epoch": 0.340539121302293, "grad_norm": 0.6720491765664105, "learning_rate": 7.677410549755534e-06, "loss": 0.1399, "step": 11673 }, { "epoch": 0.3405682945329366, "grad_norm": 0.765124001772518, "learning_rate": 7.677011545157818e-06, "loss": 0.1362, "step": 11674 }, { "epoch": 0.34059746776358013, "grad_norm": 1.0082223645553332, "learning_rate": 7.676612516660379e-06, "loss": 0.1415, "step": 11675 }, { "epoch": 0.3406266409942237, "grad_norm": 0.6765023902159439, "learning_rate": 7.676213464266783e-06, "loss": 0.1751, "step": 11676 }, { "epoch": 0.34065581422486724, "grad_norm": 0.8235423491168042, "learning_rate": 7.675814387980592e-06, "loss": 0.1436, "step": 11677 }, { "epoch": 0.34068498745551085, "grad_norm": 0.8448033539821688, "learning_rate": 7.67541528780537e-06, "loss": 0.1483, "step": 11678 }, { "epoch": 0.3407141606861544, "grad_norm": 0.8426377749357006, "learning_rate": 7.67501616374468e-06, "loss": 0.1615, "step": 11679 }, { "epoch": 0.34074333391679795, "grad_norm": 0.8540467357121467, "learning_rate": 7.67461701580208e-06, "loss": 0.16, "step": 11680 }, { "epoch": 0.3407725071474415, "grad_norm": 0.8699308629755351, "learning_rate": 7.674217843981142e-06, "loss": 0.1469, "step": 11681 }, { "epoch": 0.34080168037808506, "grad_norm": 1.067218892242633, "learning_rate": 7.673818648285423e-06, "loss": 0.1409, "step": 11682 }, { "epoch": 0.3408308536087286, "grad_norm": 0.9097203156946464, "learning_rate": 7.67341942871849e-06, "loss": 0.1502, "step": 11683 }, { "epoch": 0.34086002683937217, "grad_norm": 0.7491748931649446, "learning_rate": 7.673020185283908e-06, "loss": 0.1156, "step": 11684 }, { "epoch": 0.3408892000700158, "grad_norm": 0.7259473015696555, "learning_rate": 7.672620917985238e-06, "loss": 0.1329, "step": 11685 }, { "epoch": 0.3409183733006593, "grad_norm": 0.8042559195369055, "learning_rate": 7.672221626826046e-06, "loss": 0.1607, "step": 11686 }, { "epoch": 0.3409475465313029, "grad_norm": 0.8026800332869615, "learning_rate": 7.671822311809899e-06, "loss": 0.1669, "step": 11687 }, { "epoch": 0.34097671976194643, "grad_norm": 0.7856470094621225, "learning_rate": 7.671422972940359e-06, "loss": 0.1606, "step": 11688 }, { "epoch": 0.34100589299259, "grad_norm": 0.6992651121871641, "learning_rate": 7.671023610220993e-06, "loss": 0.1456, "step": 11689 }, { "epoch": 0.34103506622323354, "grad_norm": 0.8107099706788065, "learning_rate": 7.670624223655367e-06, "loss": 0.1348, "step": 11690 }, { "epoch": 0.34106423945387715, "grad_norm": 0.598836061017337, "learning_rate": 7.670224813247043e-06, "loss": 0.1322, "step": 11691 }, { "epoch": 0.3410934126845207, "grad_norm": 0.8186424373152729, "learning_rate": 7.66982537899959e-06, "loss": 0.1429, "step": 11692 }, { "epoch": 0.34112258591516426, "grad_norm": 1.0788267547690011, "learning_rate": 7.669425920916575e-06, "loss": 0.1651, "step": 11693 }, { "epoch": 0.3411517591458078, "grad_norm": 0.7094197262715586, "learning_rate": 7.669026439001562e-06, "loss": 0.1215, "step": 11694 }, { "epoch": 0.34118093237645136, "grad_norm": 0.8876359519809253, "learning_rate": 7.668626933258117e-06, "loss": 0.1395, "step": 11695 }, { "epoch": 0.3412101056070949, "grad_norm": 1.0110935606247058, "learning_rate": 7.668227403689807e-06, "loss": 0.1361, "step": 11696 }, { "epoch": 0.34123927883773847, "grad_norm": 0.6144065129193836, "learning_rate": 7.667827850300203e-06, "loss": 0.1535, "step": 11697 }, { "epoch": 0.3412684520683821, "grad_norm": 0.8677026963363644, "learning_rate": 7.667428273092867e-06, "loss": 0.16, "step": 11698 }, { "epoch": 0.34129762529902563, "grad_norm": 0.8629451257224124, "learning_rate": 7.667028672071368e-06, "loss": 0.1571, "step": 11699 }, { "epoch": 0.3413267985296692, "grad_norm": 0.8449410464990044, "learning_rate": 7.666629047239273e-06, "loss": 0.1829, "step": 11700 }, { "epoch": 0.34135597176031274, "grad_norm": 0.8818627338487651, "learning_rate": 7.666229398600151e-06, "loss": 0.1771, "step": 11701 }, { "epoch": 0.3413851449909563, "grad_norm": 1.0868901220828913, "learning_rate": 7.66582972615757e-06, "loss": 0.1712, "step": 11702 }, { "epoch": 0.34141431822159984, "grad_norm": 0.836277865875183, "learning_rate": 7.665430029915098e-06, "loss": 0.1461, "step": 11703 }, { "epoch": 0.3414434914522434, "grad_norm": 0.9585987858847443, "learning_rate": 7.665030309876303e-06, "loss": 0.1406, "step": 11704 }, { "epoch": 0.341472664682887, "grad_norm": 0.9428673267000058, "learning_rate": 7.664630566044751e-06, "loss": 0.1582, "step": 11705 }, { "epoch": 0.34150183791353056, "grad_norm": 0.8198290317941089, "learning_rate": 7.664230798424016e-06, "loss": 0.1325, "step": 11706 }, { "epoch": 0.3415310111441741, "grad_norm": 0.9226597031973783, "learning_rate": 7.663831007017664e-06, "loss": 0.1335, "step": 11707 }, { "epoch": 0.34156018437481767, "grad_norm": 0.8968307048251073, "learning_rate": 7.663431191829263e-06, "loss": 0.1528, "step": 11708 }, { "epoch": 0.3415893576054612, "grad_norm": 0.9982291680294357, "learning_rate": 7.663031352862387e-06, "loss": 0.165, "step": 11709 }, { "epoch": 0.3416185308361048, "grad_norm": 0.8121613049726315, "learning_rate": 7.6626314901206e-06, "loss": 0.1399, "step": 11710 }, { "epoch": 0.3416477040667483, "grad_norm": 0.8579351196202621, "learning_rate": 7.662231603607475e-06, "loss": 0.1857, "step": 11711 }, { "epoch": 0.34167687729739193, "grad_norm": 0.9538851908123045, "learning_rate": 7.661831693326584e-06, "loss": 0.1689, "step": 11712 }, { "epoch": 0.3417060505280355, "grad_norm": 0.7149473699449731, "learning_rate": 7.661431759281492e-06, "loss": 0.1455, "step": 11713 }, { "epoch": 0.34173522375867904, "grad_norm": 1.1132623450476096, "learning_rate": 7.661031801475776e-06, "loss": 0.1673, "step": 11714 }, { "epoch": 0.3417643969893226, "grad_norm": 0.721488298031133, "learning_rate": 7.660631819913001e-06, "loss": 0.1537, "step": 11715 }, { "epoch": 0.34179357021996615, "grad_norm": 0.6243591437984605, "learning_rate": 7.66023181459674e-06, "loss": 0.1426, "step": 11716 }, { "epoch": 0.3418227434506097, "grad_norm": 0.7133498745963897, "learning_rate": 7.659831785530567e-06, "loss": 0.1305, "step": 11717 }, { "epoch": 0.3418519166812533, "grad_norm": 0.8109652739888527, "learning_rate": 7.659431732718048e-06, "loss": 0.1555, "step": 11718 }, { "epoch": 0.34188108991189686, "grad_norm": 0.8429149586885538, "learning_rate": 7.659031656162759e-06, "loss": 0.1695, "step": 11719 }, { "epoch": 0.3419102631425404, "grad_norm": 0.9783021579542683, "learning_rate": 7.65863155586827e-06, "loss": 0.175, "step": 11720 }, { "epoch": 0.34193943637318397, "grad_norm": 0.7455761986059449, "learning_rate": 7.658231431838153e-06, "loss": 0.1139, "step": 11721 }, { "epoch": 0.3419686096038275, "grad_norm": 0.8614572722605297, "learning_rate": 7.657831284075978e-06, "loss": 0.1547, "step": 11722 }, { "epoch": 0.3419977828344711, "grad_norm": 0.8069408183015282, "learning_rate": 7.657431112585323e-06, "loss": 0.1469, "step": 11723 }, { "epoch": 0.34202695606511463, "grad_norm": 1.0893315245729782, "learning_rate": 7.657030917369757e-06, "loss": 0.1525, "step": 11724 }, { "epoch": 0.34205612929575824, "grad_norm": 0.9923218410567376, "learning_rate": 7.656630698432852e-06, "loss": 0.1449, "step": 11725 }, { "epoch": 0.3420853025264018, "grad_norm": 0.8452843397484929, "learning_rate": 7.656230455778182e-06, "loss": 0.1511, "step": 11726 }, { "epoch": 0.34211447575704534, "grad_norm": 0.7588875919560713, "learning_rate": 7.655830189409322e-06, "loss": 0.163, "step": 11727 }, { "epoch": 0.3421436489876889, "grad_norm": 0.8346468307359247, "learning_rate": 7.655429899329843e-06, "loss": 0.1403, "step": 11728 }, { "epoch": 0.34217282221833245, "grad_norm": 0.9571316650537509, "learning_rate": 7.65502958554332e-06, "loss": 0.1581, "step": 11729 }, { "epoch": 0.342201995448976, "grad_norm": 0.9824069479427872, "learning_rate": 7.654629248053326e-06, "loss": 0.1318, "step": 11730 }, { "epoch": 0.34223116867961956, "grad_norm": 0.9142646902186613, "learning_rate": 7.654228886863437e-06, "loss": 0.1619, "step": 11731 }, { "epoch": 0.34226034191026317, "grad_norm": 1.007233342140085, "learning_rate": 7.653828501977228e-06, "loss": 0.1552, "step": 11732 }, { "epoch": 0.3422895151409067, "grad_norm": 0.7545373944008383, "learning_rate": 7.653428093398268e-06, "loss": 0.1476, "step": 11733 }, { "epoch": 0.3423186883715503, "grad_norm": 1.0079163395605315, "learning_rate": 7.653027661130137e-06, "loss": 0.1419, "step": 11734 }, { "epoch": 0.3423478616021938, "grad_norm": 1.177233676016444, "learning_rate": 7.652627205176409e-06, "loss": 0.1355, "step": 11735 }, { "epoch": 0.3423770348328374, "grad_norm": 0.8320629231908665, "learning_rate": 7.652226725540657e-06, "loss": 0.1316, "step": 11736 }, { "epoch": 0.34240620806348093, "grad_norm": 1.0215268112603635, "learning_rate": 7.651826222226459e-06, "loss": 0.1291, "step": 11737 }, { "epoch": 0.3424353812941245, "grad_norm": 0.7397867723421131, "learning_rate": 7.651425695237388e-06, "loss": 0.1575, "step": 11738 }, { "epoch": 0.3424645545247681, "grad_norm": 0.8591103044847006, "learning_rate": 7.651025144577025e-06, "loss": 0.1633, "step": 11739 }, { "epoch": 0.34249372775541165, "grad_norm": 1.1377176191831462, "learning_rate": 7.650624570248938e-06, "loss": 0.1551, "step": 11740 }, { "epoch": 0.3425229009860552, "grad_norm": 0.5858570252888864, "learning_rate": 7.650223972256709e-06, "loss": 0.1306, "step": 11741 }, { "epoch": 0.34255207421669875, "grad_norm": 1.1229020819776705, "learning_rate": 7.649823350603915e-06, "loss": 0.139, "step": 11742 }, { "epoch": 0.3425812474473423, "grad_norm": 0.9165859861890355, "learning_rate": 7.649422705294127e-06, "loss": 0.1496, "step": 11743 }, { "epoch": 0.34261042067798586, "grad_norm": 0.7792939136773086, "learning_rate": 7.64902203633093e-06, "loss": 0.1327, "step": 11744 }, { "epoch": 0.3426395939086294, "grad_norm": 0.8641812268876443, "learning_rate": 7.648621343717895e-06, "loss": 0.1557, "step": 11745 }, { "epoch": 0.342668767139273, "grad_norm": 0.9967717067789429, "learning_rate": 7.648220627458597e-06, "loss": 0.1321, "step": 11746 }, { "epoch": 0.3426979403699166, "grad_norm": 0.7088341018010219, "learning_rate": 7.647819887556621e-06, "loss": 0.1454, "step": 11747 }, { "epoch": 0.34272711360056013, "grad_norm": 0.9249612003418474, "learning_rate": 7.647419124015543e-06, "loss": 0.1451, "step": 11748 }, { "epoch": 0.3427562868312037, "grad_norm": 0.8937051285563443, "learning_rate": 7.647018336838936e-06, "loss": 0.1641, "step": 11749 }, { "epoch": 0.34278546006184724, "grad_norm": 0.9790850300978163, "learning_rate": 7.646617526030381e-06, "loss": 0.1453, "step": 11750 }, { "epoch": 0.3428146332924908, "grad_norm": 0.7835615237928236, "learning_rate": 7.64621669159346e-06, "loss": 0.1433, "step": 11751 }, { "epoch": 0.3428438065231344, "grad_norm": 0.9354865998205882, "learning_rate": 7.645815833531745e-06, "loss": 0.1608, "step": 11752 }, { "epoch": 0.34287297975377795, "grad_norm": 0.97072884507175, "learning_rate": 7.645414951848817e-06, "loss": 0.1621, "step": 11753 }, { "epoch": 0.3429021529844215, "grad_norm": 0.810817602561764, "learning_rate": 7.64501404654826e-06, "loss": 0.1611, "step": 11754 }, { "epoch": 0.34293132621506506, "grad_norm": 0.8599322383836334, "learning_rate": 7.644613117633644e-06, "loss": 0.1463, "step": 11755 }, { "epoch": 0.3429604994457086, "grad_norm": 0.9766877972562575, "learning_rate": 7.644212165108556e-06, "loss": 0.1782, "step": 11756 }, { "epoch": 0.34298967267635216, "grad_norm": 1.0184043242109055, "learning_rate": 7.643811188976574e-06, "loss": 0.1624, "step": 11757 }, { "epoch": 0.3430188459069957, "grad_norm": 0.9744970603175473, "learning_rate": 7.643410189241275e-06, "loss": 0.1539, "step": 11758 }, { "epoch": 0.3430480191376393, "grad_norm": 0.8742113603224083, "learning_rate": 7.643009165906242e-06, "loss": 0.1264, "step": 11759 }, { "epoch": 0.3430771923682829, "grad_norm": 1.0238609436396693, "learning_rate": 7.642608118975055e-06, "loss": 0.1735, "step": 11760 }, { "epoch": 0.34310636559892643, "grad_norm": 0.9282281248682216, "learning_rate": 7.64220704845129e-06, "loss": 0.1312, "step": 11761 }, { "epoch": 0.34313553882957, "grad_norm": 0.6502956815705189, "learning_rate": 7.641805954338534e-06, "loss": 0.1294, "step": 11762 }, { "epoch": 0.34316471206021354, "grad_norm": 0.9077142918053643, "learning_rate": 7.641404836640365e-06, "loss": 0.1338, "step": 11763 }, { "epoch": 0.3431938852908571, "grad_norm": 0.8776814631969013, "learning_rate": 7.641003695360363e-06, "loss": 0.1656, "step": 11764 }, { "epoch": 0.34322305852150065, "grad_norm": 0.6837570423818243, "learning_rate": 7.640602530502112e-06, "loss": 0.1539, "step": 11765 }, { "epoch": 0.34325223175214425, "grad_norm": 1.0231023425151644, "learning_rate": 7.64020134206919e-06, "loss": 0.1592, "step": 11766 }, { "epoch": 0.3432814049827878, "grad_norm": 0.7795675692279443, "learning_rate": 7.639800130065183e-06, "loss": 0.1513, "step": 11767 }, { "epoch": 0.34331057821343136, "grad_norm": 0.9129465443806678, "learning_rate": 7.639398894493668e-06, "loss": 0.1579, "step": 11768 }, { "epoch": 0.3433397514440749, "grad_norm": 0.740825249618372, "learning_rate": 7.638997635358232e-06, "loss": 0.1726, "step": 11769 }, { "epoch": 0.34336892467471847, "grad_norm": 0.7092008977067692, "learning_rate": 7.638596352662453e-06, "loss": 0.1361, "step": 11770 }, { "epoch": 0.343398097905362, "grad_norm": 0.8115283013936152, "learning_rate": 7.638195046409918e-06, "loss": 0.1497, "step": 11771 }, { "epoch": 0.3434272711360056, "grad_norm": 0.7232949261510621, "learning_rate": 7.637793716604208e-06, "loss": 0.1359, "step": 11772 }, { "epoch": 0.3434564443666492, "grad_norm": 0.7038991828709446, "learning_rate": 7.637392363248901e-06, "loss": 0.1502, "step": 11773 }, { "epoch": 0.34348561759729274, "grad_norm": 0.8078874550307183, "learning_rate": 7.636990986347588e-06, "loss": 0.1607, "step": 11774 }, { "epoch": 0.3435147908279363, "grad_norm": 0.7599301363984657, "learning_rate": 7.636589585903849e-06, "loss": 0.1629, "step": 11775 }, { "epoch": 0.34354396405857984, "grad_norm": 0.7400366455527537, "learning_rate": 7.636188161921265e-06, "loss": 0.1298, "step": 11776 }, { "epoch": 0.3435731372892234, "grad_norm": 0.8050150329011148, "learning_rate": 7.635786714403426e-06, "loss": 0.1799, "step": 11777 }, { "epoch": 0.34360231051986695, "grad_norm": 0.8460019406267215, "learning_rate": 7.63538524335391e-06, "loss": 0.1495, "step": 11778 }, { "epoch": 0.34363148375051056, "grad_norm": 0.7025849164842259, "learning_rate": 7.634983748776303e-06, "loss": 0.1403, "step": 11779 }, { "epoch": 0.3436606569811541, "grad_norm": 0.7814189906227804, "learning_rate": 7.634582230674192e-06, "loss": 0.159, "step": 11780 }, { "epoch": 0.34368983021179766, "grad_norm": 0.9038548466785211, "learning_rate": 7.63418068905116e-06, "loss": 0.1195, "step": 11781 }, { "epoch": 0.3437190034424412, "grad_norm": 0.7281775028636094, "learning_rate": 7.63377912391079e-06, "loss": 0.1377, "step": 11782 }, { "epoch": 0.34374817667308477, "grad_norm": 0.7515869071367169, "learning_rate": 7.63337753525667e-06, "loss": 0.1449, "step": 11783 }, { "epoch": 0.3437773499037283, "grad_norm": 1.0078354578427962, "learning_rate": 7.632975923092384e-06, "loss": 0.1326, "step": 11784 }, { "epoch": 0.3438065231343719, "grad_norm": 0.7075062353104524, "learning_rate": 7.632574287421516e-06, "loss": 0.1323, "step": 11785 }, { "epoch": 0.3438356963650155, "grad_norm": 1.0379271170357378, "learning_rate": 7.632172628247654e-06, "loss": 0.1375, "step": 11786 }, { "epoch": 0.34386486959565904, "grad_norm": 0.7948980531470224, "learning_rate": 7.631770945574384e-06, "loss": 0.1518, "step": 11787 }, { "epoch": 0.3438940428263026, "grad_norm": 0.9554102668718361, "learning_rate": 7.63136923940529e-06, "loss": 0.1317, "step": 11788 }, { "epoch": 0.34392321605694615, "grad_norm": 0.7869855229794803, "learning_rate": 7.63096750974396e-06, "loss": 0.1472, "step": 11789 }, { "epoch": 0.3439523892875897, "grad_norm": 0.8554123739345731, "learning_rate": 7.630565756593981e-06, "loss": 0.156, "step": 11790 }, { "epoch": 0.34398156251823325, "grad_norm": 0.8192941417051375, "learning_rate": 7.630163979958938e-06, "loss": 0.1298, "step": 11791 }, { "epoch": 0.3440107357488768, "grad_norm": 0.7793168006203326, "learning_rate": 7.629762179842419e-06, "loss": 0.1516, "step": 11792 }, { "epoch": 0.3440399089795204, "grad_norm": 0.9064907156623201, "learning_rate": 7.629360356248012e-06, "loss": 0.1571, "step": 11793 }, { "epoch": 0.34406908221016397, "grad_norm": 0.8137239435787438, "learning_rate": 7.628958509179303e-06, "loss": 0.139, "step": 11794 }, { "epoch": 0.3440982554408075, "grad_norm": 1.104659530404001, "learning_rate": 7.628556638639879e-06, "loss": 0.1451, "step": 11795 }, { "epoch": 0.3441274286714511, "grad_norm": 0.8586459575034965, "learning_rate": 7.628154744633329e-06, "loss": 0.1328, "step": 11796 }, { "epoch": 0.3441566019020946, "grad_norm": 0.7810139291249382, "learning_rate": 7.627752827163242e-06, "loss": 0.1368, "step": 11797 }, { "epoch": 0.3441857751327382, "grad_norm": 0.9871821266778147, "learning_rate": 7.627350886233203e-06, "loss": 0.1574, "step": 11798 }, { "epoch": 0.34421494836338173, "grad_norm": 0.7171893193030499, "learning_rate": 7.626948921846805e-06, "loss": 0.1251, "step": 11799 }, { "epoch": 0.34424412159402534, "grad_norm": 0.8632420151489926, "learning_rate": 7.6265469340076326e-06, "loss": 0.1502, "step": 11800 }, { "epoch": 0.3442732948246689, "grad_norm": 0.8329398766342871, "learning_rate": 7.6261449227192765e-06, "loss": 0.1432, "step": 11801 }, { "epoch": 0.34430246805531245, "grad_norm": 0.7117900534539481, "learning_rate": 7.625742887985325e-06, "loss": 0.1604, "step": 11802 }, { "epoch": 0.344331641285956, "grad_norm": 0.7999454698812005, "learning_rate": 7.6253408298093665e-06, "loss": 0.1484, "step": 11803 }, { "epoch": 0.34436081451659956, "grad_norm": 0.9287764574396962, "learning_rate": 7.6249387481949954e-06, "loss": 0.1777, "step": 11804 }, { "epoch": 0.3443899877472431, "grad_norm": 0.6733447502490658, "learning_rate": 7.624536643145796e-06, "loss": 0.1488, "step": 11805 }, { "epoch": 0.3444191609778867, "grad_norm": 0.7819878196454474, "learning_rate": 7.624134514665359e-06, "loss": 0.1527, "step": 11806 }, { "epoch": 0.34444833420853027, "grad_norm": 0.7754314146898684, "learning_rate": 7.623732362757277e-06, "loss": 0.1405, "step": 11807 }, { "epoch": 0.3444775074391738, "grad_norm": 0.9180180567783497, "learning_rate": 7.6233301874251375e-06, "loss": 0.1505, "step": 11808 }, { "epoch": 0.3445066806698174, "grad_norm": 0.7774195654853466, "learning_rate": 7.622927988672533e-06, "loss": 0.1502, "step": 11809 }, { "epoch": 0.34453585390046093, "grad_norm": 0.829149529881248, "learning_rate": 7.622525766503054e-06, "loss": 0.1508, "step": 11810 }, { "epoch": 0.3445650271311045, "grad_norm": 0.7174062064908221, "learning_rate": 7.62212352092029e-06, "loss": 0.1468, "step": 11811 }, { "epoch": 0.34459420036174804, "grad_norm": 0.6797694666968168, "learning_rate": 7.6217212519278335e-06, "loss": 0.1492, "step": 11812 }, { "epoch": 0.34462337359239165, "grad_norm": 0.7676823786348645, "learning_rate": 7.621318959529276e-06, "loss": 0.1412, "step": 11813 }, { "epoch": 0.3446525468230352, "grad_norm": 0.8530918494192405, "learning_rate": 7.620916643728209e-06, "loss": 0.1587, "step": 11814 }, { "epoch": 0.34468172005367875, "grad_norm": 0.8594938308878028, "learning_rate": 7.620514304528223e-06, "loss": 0.164, "step": 11815 }, { "epoch": 0.3447108932843223, "grad_norm": 0.7989164744621123, "learning_rate": 7.62011194193291e-06, "loss": 0.1455, "step": 11816 }, { "epoch": 0.34474006651496586, "grad_norm": 1.052688992387018, "learning_rate": 7.619709555945865e-06, "loss": 0.185, "step": 11817 }, { "epoch": 0.3447692397456094, "grad_norm": 0.6429489131274733, "learning_rate": 7.619307146570677e-06, "loss": 0.122, "step": 11818 }, { "epoch": 0.34479841297625297, "grad_norm": 0.803905314838356, "learning_rate": 7.618904713810941e-06, "loss": 0.1481, "step": 11819 }, { "epoch": 0.3448275862068966, "grad_norm": 1.0731413585649892, "learning_rate": 7.618502257670249e-06, "loss": 0.1467, "step": 11820 }, { "epoch": 0.34485675943754013, "grad_norm": 0.639650152004615, "learning_rate": 7.618099778152193e-06, "loss": 0.1305, "step": 11821 }, { "epoch": 0.3448859326681837, "grad_norm": 0.7677202402257982, "learning_rate": 7.617697275260367e-06, "loss": 0.1682, "step": 11822 }, { "epoch": 0.34491510589882723, "grad_norm": 1.022652750176267, "learning_rate": 7.6172947489983655e-06, "loss": 0.1397, "step": 11823 }, { "epoch": 0.3449442791294708, "grad_norm": 0.7521072658515839, "learning_rate": 7.616892199369781e-06, "loss": 0.1339, "step": 11824 }, { "epoch": 0.34497345236011434, "grad_norm": 0.7864432038689395, "learning_rate": 7.616489626378207e-06, "loss": 0.1631, "step": 11825 }, { "epoch": 0.3450026255907579, "grad_norm": 0.7365787358449442, "learning_rate": 7.616087030027239e-06, "loss": 0.1544, "step": 11826 }, { "epoch": 0.3450317988214015, "grad_norm": 0.9077261418591541, "learning_rate": 7.6156844103204704e-06, "loss": 0.1677, "step": 11827 }, { "epoch": 0.34506097205204506, "grad_norm": 0.7962660675824412, "learning_rate": 7.615281767261495e-06, "loss": 0.1502, "step": 11828 }, { "epoch": 0.3450901452826886, "grad_norm": 0.8082546928000954, "learning_rate": 7.6148791008539106e-06, "loss": 0.1372, "step": 11829 }, { "epoch": 0.34511931851333216, "grad_norm": 0.8922996356975215, "learning_rate": 7.614476411101308e-06, "loss": 0.1753, "step": 11830 }, { "epoch": 0.3451484917439757, "grad_norm": 0.8510290452679441, "learning_rate": 7.614073698007285e-06, "loss": 0.1533, "step": 11831 }, { "epoch": 0.34517766497461927, "grad_norm": 0.8544577967758579, "learning_rate": 7.613670961575435e-06, "loss": 0.1509, "step": 11832 }, { "epoch": 0.3452068382052629, "grad_norm": 1.0018614761194795, "learning_rate": 7.613268201809354e-06, "loss": 0.1526, "step": 11833 }, { "epoch": 0.34523601143590643, "grad_norm": 1.0104117854874037, "learning_rate": 7.61286541871264e-06, "loss": 0.1476, "step": 11834 }, { "epoch": 0.34526518466655, "grad_norm": 0.6568999429253323, "learning_rate": 7.612462612288886e-06, "loss": 0.1153, "step": 11835 }, { "epoch": 0.34529435789719354, "grad_norm": 0.9294501296868387, "learning_rate": 7.61205978254169e-06, "loss": 0.1575, "step": 11836 }, { "epoch": 0.3453235311278371, "grad_norm": 1.1657132330027167, "learning_rate": 7.611656929474649e-06, "loss": 0.1731, "step": 11837 }, { "epoch": 0.34535270435848064, "grad_norm": 0.9563649950542031, "learning_rate": 7.611254053091357e-06, "loss": 0.1704, "step": 11838 }, { "epoch": 0.3453818775891242, "grad_norm": 1.2000026011577534, "learning_rate": 7.610851153395413e-06, "loss": 0.1684, "step": 11839 }, { "epoch": 0.3454110508197678, "grad_norm": 0.7921110737620093, "learning_rate": 7.6104482303904126e-06, "loss": 0.1356, "step": 11840 }, { "epoch": 0.34544022405041136, "grad_norm": 0.9640731752079257, "learning_rate": 7.610045284079954e-06, "loss": 0.1611, "step": 11841 }, { "epoch": 0.3454693972810549, "grad_norm": 0.9162487112195041, "learning_rate": 7.609642314467633e-06, "loss": 0.1499, "step": 11842 }, { "epoch": 0.34549857051169847, "grad_norm": 0.8670030361238367, "learning_rate": 7.609239321557049e-06, "loss": 0.1494, "step": 11843 }, { "epoch": 0.345527743742342, "grad_norm": 0.6570273748431265, "learning_rate": 7.608836305351799e-06, "loss": 0.1318, "step": 11844 }, { "epoch": 0.3455569169729856, "grad_norm": 0.8564481053229913, "learning_rate": 7.608433265855482e-06, "loss": 0.1615, "step": 11845 }, { "epoch": 0.3455860902036291, "grad_norm": 0.9063894688631816, "learning_rate": 7.608030203071695e-06, "loss": 0.1593, "step": 11846 }, { "epoch": 0.34561526343427273, "grad_norm": 0.7773547283597553, "learning_rate": 7.607627117004038e-06, "loss": 0.1485, "step": 11847 }, { "epoch": 0.3456444366649163, "grad_norm": 0.7893258556823853, "learning_rate": 7.607224007656107e-06, "loss": 0.1692, "step": 11848 }, { "epoch": 0.34567360989555984, "grad_norm": 0.8479660892855598, "learning_rate": 7.606820875031504e-06, "loss": 0.1544, "step": 11849 }, { "epoch": 0.3457027831262034, "grad_norm": 0.803579369190686, "learning_rate": 7.606417719133825e-06, "loss": 0.1518, "step": 11850 }, { "epoch": 0.34573195635684695, "grad_norm": 0.8354596794096185, "learning_rate": 7.6060145399666704e-06, "loss": 0.1504, "step": 11851 }, { "epoch": 0.3457611295874905, "grad_norm": 0.8353572247515243, "learning_rate": 7.605611337533643e-06, "loss": 0.1693, "step": 11852 }, { "epoch": 0.34579030281813405, "grad_norm": 0.7725720728359323, "learning_rate": 7.6052081118383355e-06, "loss": 0.1195, "step": 11853 }, { "epoch": 0.34581947604877766, "grad_norm": 0.8027182717158562, "learning_rate": 7.604804862884356e-06, "loss": 0.1787, "step": 11854 }, { "epoch": 0.3458486492794212, "grad_norm": 0.7192842937875643, "learning_rate": 7.604401590675299e-06, "loss": 0.1328, "step": 11855 }, { "epoch": 0.34587782251006477, "grad_norm": 0.6950695808910768, "learning_rate": 7.603998295214765e-06, "loss": 0.1352, "step": 11856 }, { "epoch": 0.3459069957407083, "grad_norm": 0.7661365595616834, "learning_rate": 7.603594976506356e-06, "loss": 0.1266, "step": 11857 }, { "epoch": 0.3459361689713519, "grad_norm": 0.6651939436178301, "learning_rate": 7.6031916345536735e-06, "loss": 0.1726, "step": 11858 }, { "epoch": 0.34596534220199543, "grad_norm": 0.8730659487962338, "learning_rate": 7.602788269360318e-06, "loss": 0.1596, "step": 11859 }, { "epoch": 0.34599451543263904, "grad_norm": 0.8727372307097581, "learning_rate": 7.602384880929889e-06, "loss": 0.1703, "step": 11860 }, { "epoch": 0.3460236886632826, "grad_norm": 0.8503142625307699, "learning_rate": 7.6019814692659885e-06, "loss": 0.1466, "step": 11861 }, { "epoch": 0.34605286189392614, "grad_norm": 0.9260690869444146, "learning_rate": 7.601578034372221e-06, "loss": 0.1417, "step": 11862 }, { "epoch": 0.3460820351245697, "grad_norm": 0.9450116606750869, "learning_rate": 7.601174576252184e-06, "loss": 0.1424, "step": 11863 }, { "epoch": 0.34611120835521325, "grad_norm": 0.9138215423529065, "learning_rate": 7.600771094909483e-06, "loss": 0.1172, "step": 11864 }, { "epoch": 0.3461403815858568, "grad_norm": 0.7713805072391263, "learning_rate": 7.600367590347716e-06, "loss": 0.1481, "step": 11865 }, { "epoch": 0.34616955481650036, "grad_norm": 0.879120531850407, "learning_rate": 7.59996406257049e-06, "loss": 0.1858, "step": 11866 }, { "epoch": 0.34619872804714397, "grad_norm": 0.7535421844174716, "learning_rate": 7.599560511581406e-06, "loss": 0.1262, "step": 11867 }, { "epoch": 0.3462279012777875, "grad_norm": 0.8378352398830269, "learning_rate": 7.5991569373840625e-06, "loss": 0.1309, "step": 11868 }, { "epoch": 0.3462570745084311, "grad_norm": 0.8339734934011489, "learning_rate": 7.59875333998207e-06, "loss": 0.1248, "step": 11869 }, { "epoch": 0.3462862477390746, "grad_norm": 0.7902759412143556, "learning_rate": 7.598349719379028e-06, "loss": 0.1437, "step": 11870 }, { "epoch": 0.3463154209697182, "grad_norm": 0.8554018337484433, "learning_rate": 7.597946075578538e-06, "loss": 0.1287, "step": 11871 }, { "epoch": 0.34634459420036173, "grad_norm": 0.9772872600690051, "learning_rate": 7.5975424085842064e-06, "loss": 0.1411, "step": 11872 }, { "epoch": 0.3463737674310053, "grad_norm": 0.8488484589746388, "learning_rate": 7.597138718399637e-06, "loss": 0.1531, "step": 11873 }, { "epoch": 0.3464029406616489, "grad_norm": 0.8447135368301548, "learning_rate": 7.596735005028433e-06, "loss": 0.1408, "step": 11874 }, { "epoch": 0.34643211389229245, "grad_norm": 0.9235467300621414, "learning_rate": 7.596331268474198e-06, "loss": 0.1532, "step": 11875 }, { "epoch": 0.346461287122936, "grad_norm": 0.7567157832898448, "learning_rate": 7.595927508740537e-06, "loss": 0.1313, "step": 11876 }, { "epoch": 0.34649046035357955, "grad_norm": 1.0210908962802436, "learning_rate": 7.595523725831055e-06, "loss": 0.1479, "step": 11877 }, { "epoch": 0.3465196335842231, "grad_norm": 0.9050302294750344, "learning_rate": 7.595119919749358e-06, "loss": 0.1532, "step": 11878 }, { "epoch": 0.34654880681486666, "grad_norm": 0.8667936149681241, "learning_rate": 7.594716090499049e-06, "loss": 0.147, "step": 11879 }, { "epoch": 0.3465779800455102, "grad_norm": 1.254160825988959, "learning_rate": 7.5943122380837334e-06, "loss": 0.1464, "step": 11880 }, { "epoch": 0.3466071532761538, "grad_norm": 1.0749824320900403, "learning_rate": 7.5939083625070186e-06, "loss": 0.161, "step": 11881 }, { "epoch": 0.3466363265067974, "grad_norm": 0.8301352679499988, "learning_rate": 7.593504463772509e-06, "loss": 0.1449, "step": 11882 }, { "epoch": 0.34666549973744093, "grad_norm": 0.7502731131233293, "learning_rate": 7.59310054188381e-06, "loss": 0.1406, "step": 11883 }, { "epoch": 0.3466946729680845, "grad_norm": 1.1800001635301305, "learning_rate": 7.592696596844528e-06, "loss": 0.1627, "step": 11884 }, { "epoch": 0.34672384619872804, "grad_norm": 0.9820867231760797, "learning_rate": 7.592292628658272e-06, "loss": 0.1413, "step": 11885 }, { "epoch": 0.3467530194293716, "grad_norm": 0.6702341534760974, "learning_rate": 7.591888637328645e-06, "loss": 0.126, "step": 11886 }, { "epoch": 0.34678219266001514, "grad_norm": 1.0147294314014843, "learning_rate": 7.591484622859254e-06, "loss": 0.1646, "step": 11887 }, { "epoch": 0.34681136589065875, "grad_norm": 1.0580514292145637, "learning_rate": 7.591080585253709e-06, "loss": 0.1637, "step": 11888 }, { "epoch": 0.3468405391213023, "grad_norm": 0.7188400047709667, "learning_rate": 7.590676524515612e-06, "loss": 0.1382, "step": 11889 }, { "epoch": 0.34686971235194586, "grad_norm": 0.8577118374671773, "learning_rate": 7.5902724406485765e-06, "loss": 0.1584, "step": 11890 }, { "epoch": 0.3468988855825894, "grad_norm": 0.8155399706740338, "learning_rate": 7.589868333656205e-06, "loss": 0.1305, "step": 11891 }, { "epoch": 0.34692805881323296, "grad_norm": 0.9305157557534992, "learning_rate": 7.5894642035421085e-06, "loss": 0.1433, "step": 11892 }, { "epoch": 0.3469572320438765, "grad_norm": 0.6943071711003896, "learning_rate": 7.589060050309893e-06, "loss": 0.1369, "step": 11893 }, { "epoch": 0.3469864052745201, "grad_norm": 0.7759315503155931, "learning_rate": 7.588655873963169e-06, "loss": 0.1411, "step": 11894 }, { "epoch": 0.3470155785051637, "grad_norm": 0.7894836418023452, "learning_rate": 7.58825167450554e-06, "loss": 0.1446, "step": 11895 }, { "epoch": 0.34704475173580723, "grad_norm": 0.8018921402564896, "learning_rate": 7.58784745194062e-06, "loss": 0.1408, "step": 11896 }, { "epoch": 0.3470739249664508, "grad_norm": 0.7823097961516359, "learning_rate": 7.587443206272016e-06, "loss": 0.1768, "step": 11897 }, { "epoch": 0.34710309819709434, "grad_norm": 0.5954033579328916, "learning_rate": 7.587038937503336e-06, "loss": 0.1427, "step": 11898 }, { "epoch": 0.3471322714277379, "grad_norm": 0.8447046836073026, "learning_rate": 7.586634645638192e-06, "loss": 0.1302, "step": 11899 }, { "epoch": 0.34716144465838145, "grad_norm": 1.320328461967567, "learning_rate": 7.586230330680189e-06, "loss": 0.1341, "step": 11900 }, { "epoch": 0.34719061788902505, "grad_norm": 0.8787874159944483, "learning_rate": 7.58582599263294e-06, "loss": 0.1734, "step": 11901 }, { "epoch": 0.3472197911196686, "grad_norm": 0.8941425430177624, "learning_rate": 7.585421631500053e-06, "loss": 0.1544, "step": 11902 }, { "epoch": 0.34724896435031216, "grad_norm": 0.901562631007388, "learning_rate": 7.585017247285139e-06, "loss": 0.1424, "step": 11903 }, { "epoch": 0.3472781375809557, "grad_norm": 0.86713749606017, "learning_rate": 7.5846128399918085e-06, "loss": 0.1381, "step": 11904 }, { "epoch": 0.34730731081159927, "grad_norm": 0.7800106219324874, "learning_rate": 7.5842084096236725e-06, "loss": 0.1874, "step": 11905 }, { "epoch": 0.3473364840422428, "grad_norm": 0.8805537185990067, "learning_rate": 7.5838039561843394e-06, "loss": 0.1495, "step": 11906 }, { "epoch": 0.3473656572728864, "grad_norm": 1.0926782261149017, "learning_rate": 7.58339947967742e-06, "loss": 0.1582, "step": 11907 }, { "epoch": 0.34739483050353, "grad_norm": 0.7949222814397244, "learning_rate": 7.58299498010653e-06, "loss": 0.1602, "step": 11908 }, { "epoch": 0.34742400373417354, "grad_norm": 1.0383029204101395, "learning_rate": 7.582590457475277e-06, "loss": 0.1401, "step": 11909 }, { "epoch": 0.3474531769648171, "grad_norm": 0.9646818690641682, "learning_rate": 7.58218591178727e-06, "loss": 0.157, "step": 11910 }, { "epoch": 0.34748235019546064, "grad_norm": 0.6976252489915159, "learning_rate": 7.581781343046125e-06, "loss": 0.1291, "step": 11911 }, { "epoch": 0.3475115234261042, "grad_norm": 0.7660069983827438, "learning_rate": 7.581376751255453e-06, "loss": 0.1608, "step": 11912 }, { "epoch": 0.34754069665674775, "grad_norm": 1.0084938181599934, "learning_rate": 7.580972136418865e-06, "loss": 0.1423, "step": 11913 }, { "epoch": 0.3475698698873913, "grad_norm": 0.8818337085252713, "learning_rate": 7.580567498539975e-06, "loss": 0.1482, "step": 11914 }, { "epoch": 0.3475990431180349, "grad_norm": 0.8588129928066195, "learning_rate": 7.580162837622394e-06, "loss": 0.1395, "step": 11915 }, { "epoch": 0.34762821634867846, "grad_norm": 0.7396995712715371, "learning_rate": 7.579758153669736e-06, "loss": 0.1566, "step": 11916 }, { "epoch": 0.347657389579322, "grad_norm": 1.061775140027179, "learning_rate": 7.579353446685611e-06, "loss": 0.1488, "step": 11917 }, { "epoch": 0.34768656280996557, "grad_norm": 1.0504367619558557, "learning_rate": 7.578948716673636e-06, "loss": 0.1277, "step": 11918 }, { "epoch": 0.3477157360406091, "grad_norm": 0.6785031052925815, "learning_rate": 7.578543963637422e-06, "loss": 0.1393, "step": 11919 }, { "epoch": 0.3477449092712527, "grad_norm": 0.9635945082363817, "learning_rate": 7.578139187580582e-06, "loss": 0.1549, "step": 11920 }, { "epoch": 0.3477740825018963, "grad_norm": 0.8271091528319552, "learning_rate": 7.57773438850673e-06, "loss": 0.1622, "step": 11921 }, { "epoch": 0.34780325573253984, "grad_norm": 0.8516236273741031, "learning_rate": 7.577329566419482e-06, "loss": 0.1502, "step": 11922 }, { "epoch": 0.3478324289631834, "grad_norm": 1.3892241652855426, "learning_rate": 7.5769247213224515e-06, "loss": 0.1534, "step": 11923 }, { "epoch": 0.34786160219382695, "grad_norm": 0.8022308581565714, "learning_rate": 7.576519853219253e-06, "loss": 0.1528, "step": 11924 }, { "epoch": 0.3478907754244705, "grad_norm": 0.9506101986804958, "learning_rate": 7.576114962113499e-06, "loss": 0.1344, "step": 11925 }, { "epoch": 0.34791994865511405, "grad_norm": 0.6805201034378913, "learning_rate": 7.575710048008804e-06, "loss": 0.1411, "step": 11926 }, { "epoch": 0.3479491218857576, "grad_norm": 0.9126539739122491, "learning_rate": 7.575305110908789e-06, "loss": 0.1488, "step": 11927 }, { "epoch": 0.3479782951164012, "grad_norm": 0.7806815575921464, "learning_rate": 7.57490015081706e-06, "loss": 0.1382, "step": 11928 }, { "epoch": 0.34800746834704477, "grad_norm": 1.022223679229018, "learning_rate": 7.5744951677372405e-06, "loss": 0.1357, "step": 11929 }, { "epoch": 0.3480366415776883, "grad_norm": 1.0693245096665438, "learning_rate": 7.574090161672941e-06, "loss": 0.1598, "step": 11930 }, { "epoch": 0.3480658148083319, "grad_norm": 1.0079428372937314, "learning_rate": 7.573685132627779e-06, "loss": 0.1458, "step": 11931 }, { "epoch": 0.34809498803897543, "grad_norm": 0.8614096510807508, "learning_rate": 7.573280080605372e-06, "loss": 0.1711, "step": 11932 }, { "epoch": 0.348124161269619, "grad_norm": 0.8788365902587741, "learning_rate": 7.5728750056093324e-06, "loss": 0.1649, "step": 11933 }, { "epoch": 0.34815333450026253, "grad_norm": 1.0518207279438658, "learning_rate": 7.572469907643281e-06, "loss": 0.154, "step": 11934 }, { "epoch": 0.34818250773090614, "grad_norm": 0.8490076901076699, "learning_rate": 7.572064786710831e-06, "loss": 0.1525, "step": 11935 }, { "epoch": 0.3482116809615497, "grad_norm": 1.1160961260912805, "learning_rate": 7.571659642815601e-06, "loss": 0.1479, "step": 11936 }, { "epoch": 0.34824085419219325, "grad_norm": 0.9834525667146082, "learning_rate": 7.571254475961207e-06, "loss": 0.1856, "step": 11937 }, { "epoch": 0.3482700274228368, "grad_norm": 0.9704653302409625, "learning_rate": 7.570849286151268e-06, "loss": 0.1222, "step": 11938 }, { "epoch": 0.34829920065348036, "grad_norm": 0.7723015389239746, "learning_rate": 7.570444073389401e-06, "loss": 0.1481, "step": 11939 }, { "epoch": 0.3483283738841239, "grad_norm": 1.0652574036750242, "learning_rate": 7.570038837679221e-06, "loss": 0.1449, "step": 11940 }, { "epoch": 0.34835754711476746, "grad_norm": 0.9229097044709855, "learning_rate": 7.569633579024349e-06, "loss": 0.1347, "step": 11941 }, { "epoch": 0.34838672034541107, "grad_norm": 1.0344299975079618, "learning_rate": 7.569228297428401e-06, "loss": 0.1501, "step": 11942 }, { "epoch": 0.3484158935760546, "grad_norm": 1.0146508843935946, "learning_rate": 7.568822992894996e-06, "loss": 0.1346, "step": 11943 }, { "epoch": 0.3484450668066982, "grad_norm": 0.7724812330039791, "learning_rate": 7.5684176654277544e-06, "loss": 0.1345, "step": 11944 }, { "epoch": 0.34847424003734173, "grad_norm": 0.7955342858813292, "learning_rate": 7.568012315030291e-06, "loss": 0.1524, "step": 11945 }, { "epoch": 0.3485034132679853, "grad_norm": 0.8702249736454624, "learning_rate": 7.567606941706227e-06, "loss": 0.1633, "step": 11946 }, { "epoch": 0.34853258649862884, "grad_norm": 0.7052427597011488, "learning_rate": 7.567201545459182e-06, "loss": 0.1411, "step": 11947 }, { "epoch": 0.34856175972927245, "grad_norm": 0.7754105467385218, "learning_rate": 7.566796126292775e-06, "loss": 0.136, "step": 11948 }, { "epoch": 0.348590932959916, "grad_norm": 0.9373317031533087, "learning_rate": 7.566390684210623e-06, "loss": 0.1383, "step": 11949 }, { "epoch": 0.34862010619055955, "grad_norm": 0.750597646362002, "learning_rate": 7.565985219216348e-06, "loss": 0.1305, "step": 11950 }, { "epoch": 0.3486492794212031, "grad_norm": 0.7445725181939357, "learning_rate": 7.56557973131357e-06, "loss": 0.1549, "step": 11951 }, { "epoch": 0.34867845265184666, "grad_norm": 1.1197940421993258, "learning_rate": 7.565174220505908e-06, "loss": 0.1535, "step": 11952 }, { "epoch": 0.3487076258824902, "grad_norm": 0.8255686489900675, "learning_rate": 7.5647686867969836e-06, "loss": 0.1635, "step": 11953 }, { "epoch": 0.34873679911313377, "grad_norm": 0.8335188400060184, "learning_rate": 7.564363130190417e-06, "loss": 0.153, "step": 11954 }, { "epoch": 0.3487659723437774, "grad_norm": 0.9209087781811074, "learning_rate": 7.563957550689829e-06, "loss": 0.1337, "step": 11955 }, { "epoch": 0.34879514557442093, "grad_norm": 0.8560534645406798, "learning_rate": 7.56355194829884e-06, "loss": 0.165, "step": 11956 }, { "epoch": 0.3488243188050645, "grad_norm": 0.8093598768193273, "learning_rate": 7.563146323021069e-06, "loss": 0.1406, "step": 11957 }, { "epoch": 0.34885349203570803, "grad_norm": 0.6959558351912577, "learning_rate": 7.56274067486014e-06, "loss": 0.1449, "step": 11958 }, { "epoch": 0.3488826652663516, "grad_norm": 0.7469802476050259, "learning_rate": 7.562335003819676e-06, "loss": 0.1488, "step": 11959 }, { "epoch": 0.34891183849699514, "grad_norm": 0.7686951957740379, "learning_rate": 7.561929309903295e-06, "loss": 0.1309, "step": 11960 }, { "epoch": 0.3489410117276387, "grad_norm": 0.6687390943009407, "learning_rate": 7.561523593114621e-06, "loss": 0.1376, "step": 11961 }, { "epoch": 0.3489701849582823, "grad_norm": 0.7062388428726224, "learning_rate": 7.561117853457277e-06, "loss": 0.136, "step": 11962 }, { "epoch": 0.34899935818892586, "grad_norm": 0.8304008654549321, "learning_rate": 7.560712090934883e-06, "loss": 0.1302, "step": 11963 }, { "epoch": 0.3490285314195694, "grad_norm": 1.1233598241761529, "learning_rate": 7.560306305551064e-06, "loss": 0.1174, "step": 11964 }, { "epoch": 0.34905770465021296, "grad_norm": 0.6913879268733832, "learning_rate": 7.5599004973094404e-06, "loss": 0.1574, "step": 11965 }, { "epoch": 0.3490868778808565, "grad_norm": 1.0013549561427575, "learning_rate": 7.559494666213636e-06, "loss": 0.153, "step": 11966 }, { "epoch": 0.34911605111150007, "grad_norm": 0.732861297331376, "learning_rate": 7.559088812267274e-06, "loss": 0.1576, "step": 11967 }, { "epoch": 0.3491452243421436, "grad_norm": 0.8105003688198452, "learning_rate": 7.55868293547398e-06, "loss": 0.1535, "step": 11968 }, { "epoch": 0.34917439757278723, "grad_norm": 0.8230910520647171, "learning_rate": 7.558277035837373e-06, "loss": 0.141, "step": 11969 }, { "epoch": 0.3492035708034308, "grad_norm": 0.7551049755951319, "learning_rate": 7.5578711133610815e-06, "loss": 0.1311, "step": 11970 }, { "epoch": 0.34923274403407434, "grad_norm": 0.8307429867540659, "learning_rate": 7.557465168048726e-06, "loss": 0.1257, "step": 11971 }, { "epoch": 0.3492619172647179, "grad_norm": 0.8900528984135553, "learning_rate": 7.557059199903933e-06, "loss": 0.1416, "step": 11972 }, { "epoch": 0.34929109049536144, "grad_norm": 0.7253443237601457, "learning_rate": 7.556653208930325e-06, "loss": 0.1301, "step": 11973 }, { "epoch": 0.349320263726005, "grad_norm": 0.9300315031950055, "learning_rate": 7.556247195131527e-06, "loss": 0.1566, "step": 11974 }, { "epoch": 0.3493494369566486, "grad_norm": 0.7510839853240795, "learning_rate": 7.555841158511166e-06, "loss": 0.1435, "step": 11975 }, { "epoch": 0.34937861018729216, "grad_norm": 0.7661954174756337, "learning_rate": 7.555435099072864e-06, "loss": 0.1615, "step": 11976 }, { "epoch": 0.3494077834179357, "grad_norm": 0.9991595133539672, "learning_rate": 7.555029016820248e-06, "loss": 0.1311, "step": 11977 }, { "epoch": 0.34943695664857927, "grad_norm": 0.8549913344929823, "learning_rate": 7.554622911756943e-06, "loss": 0.127, "step": 11978 }, { "epoch": 0.3494661298792228, "grad_norm": 1.4073594749639478, "learning_rate": 7.554216783886573e-06, "loss": 0.154, "step": 11979 }, { "epoch": 0.3494953031098664, "grad_norm": 0.8859617198424669, "learning_rate": 7.553810633212766e-06, "loss": 0.1617, "step": 11980 }, { "epoch": 0.3495244763405099, "grad_norm": 0.8206997272813257, "learning_rate": 7.553404459739149e-06, "loss": 0.1383, "step": 11981 }, { "epoch": 0.34955364957115354, "grad_norm": 0.9161949749226932, "learning_rate": 7.552998263469344e-06, "loss": 0.1281, "step": 11982 }, { "epoch": 0.3495828228017971, "grad_norm": 1.0145605532643704, "learning_rate": 7.552592044406981e-06, "loss": 0.1389, "step": 11983 }, { "epoch": 0.34961199603244064, "grad_norm": 1.0319637108006643, "learning_rate": 7.552185802555687e-06, "loss": 0.1581, "step": 11984 }, { "epoch": 0.3496411692630842, "grad_norm": 0.8741925693978755, "learning_rate": 7.551779537919086e-06, "loss": 0.1383, "step": 11985 }, { "epoch": 0.34967034249372775, "grad_norm": 1.0485253410979751, "learning_rate": 7.551373250500806e-06, "loss": 0.1572, "step": 11986 }, { "epoch": 0.3496995157243713, "grad_norm": 0.8319919877014683, "learning_rate": 7.550966940304476e-06, "loss": 0.1624, "step": 11987 }, { "epoch": 0.34972868895501485, "grad_norm": 1.2454719213792826, "learning_rate": 7.550560607333721e-06, "loss": 0.1885, "step": 11988 }, { "epoch": 0.34975786218565846, "grad_norm": 1.0859185053467137, "learning_rate": 7.55015425159217e-06, "loss": 0.1508, "step": 11989 }, { "epoch": 0.349787035416302, "grad_norm": 0.8276987795363928, "learning_rate": 7.549747873083451e-06, "loss": 0.1772, "step": 11990 }, { "epoch": 0.34981620864694557, "grad_norm": 0.7641150272930679, "learning_rate": 7.549341471811192e-06, "loss": 0.1423, "step": 11991 }, { "epoch": 0.3498453818775891, "grad_norm": 0.9790484771841722, "learning_rate": 7.54893504777902e-06, "loss": 0.1496, "step": 11992 }, { "epoch": 0.3498745551082327, "grad_norm": 0.6392963206767323, "learning_rate": 7.548528600990565e-06, "loss": 0.1474, "step": 11993 }, { "epoch": 0.34990372833887623, "grad_norm": 0.7092662469286853, "learning_rate": 7.548122131449455e-06, "loss": 0.1316, "step": 11994 }, { "epoch": 0.3499329015695198, "grad_norm": 0.7921631611319702, "learning_rate": 7.547715639159319e-06, "loss": 0.1359, "step": 11995 }, { "epoch": 0.3499620748001634, "grad_norm": 0.7064379363786392, "learning_rate": 7.547309124123785e-06, "loss": 0.1493, "step": 11996 }, { "epoch": 0.34999124803080695, "grad_norm": 0.7506060493415203, "learning_rate": 7.546902586346483e-06, "loss": 0.1337, "step": 11997 }, { "epoch": 0.3500204212614505, "grad_norm": 0.6260097911179058, "learning_rate": 7.5464960258310435e-06, "loss": 0.1353, "step": 11998 }, { "epoch": 0.35004959449209405, "grad_norm": 0.8763805662986133, "learning_rate": 7.546089442581097e-06, "loss": 0.1387, "step": 11999 }, { "epoch": 0.3500787677227376, "grad_norm": 0.8494966847252503, "learning_rate": 7.545682836600269e-06, "loss": 0.1679, "step": 12000 }, { "epoch": 0.35010794095338116, "grad_norm": 0.7976569160323592, "learning_rate": 7.5452762078921935e-06, "loss": 0.1616, "step": 12001 }, { "epoch": 0.3501371141840247, "grad_norm": 0.9691712783063527, "learning_rate": 7.544869556460501e-06, "loss": 0.1499, "step": 12002 }, { "epoch": 0.3501662874146683, "grad_norm": 0.7424149450252084, "learning_rate": 7.544462882308818e-06, "loss": 0.1234, "step": 12003 }, { "epoch": 0.3501954606453119, "grad_norm": 0.7619736975676252, "learning_rate": 7.54405618544078e-06, "loss": 0.1315, "step": 12004 }, { "epoch": 0.3502246338759554, "grad_norm": 0.809507942534479, "learning_rate": 7.543649465860015e-06, "loss": 0.1285, "step": 12005 }, { "epoch": 0.350253807106599, "grad_norm": 0.939740138966658, "learning_rate": 7.543242723570154e-06, "loss": 0.1695, "step": 12006 }, { "epoch": 0.35028298033724253, "grad_norm": 0.9345311448229102, "learning_rate": 7.54283595857483e-06, "loss": 0.1522, "step": 12007 }, { "epoch": 0.3503121535678861, "grad_norm": 0.8543695874272043, "learning_rate": 7.542429170877672e-06, "loss": 0.1683, "step": 12008 }, { "epoch": 0.3503413267985297, "grad_norm": 1.0671623136526542, "learning_rate": 7.542022360482315e-06, "loss": 0.1223, "step": 12009 }, { "epoch": 0.35037050002917325, "grad_norm": 0.7888800643287854, "learning_rate": 7.54161552739239e-06, "loss": 0.1378, "step": 12010 }, { "epoch": 0.3503996732598168, "grad_norm": 0.8173120407487832, "learning_rate": 7.541208671611526e-06, "loss": 0.1497, "step": 12011 }, { "epoch": 0.35042884649046036, "grad_norm": 1.160749866646878, "learning_rate": 7.5408017931433585e-06, "loss": 0.1572, "step": 12012 }, { "epoch": 0.3504580197211039, "grad_norm": 0.9610673319523814, "learning_rate": 7.540394891991519e-06, "loss": 0.1611, "step": 12013 }, { "epoch": 0.35048719295174746, "grad_norm": 0.8450580634548185, "learning_rate": 7.539987968159641e-06, "loss": 0.1463, "step": 12014 }, { "epoch": 0.350516366182391, "grad_norm": 0.9142269293585659, "learning_rate": 7.539581021651357e-06, "loss": 0.1259, "step": 12015 }, { "epoch": 0.3505455394130346, "grad_norm": 0.923217182014225, "learning_rate": 7.539174052470299e-06, "loss": 0.1502, "step": 12016 }, { "epoch": 0.3505747126436782, "grad_norm": 0.9057778560682554, "learning_rate": 7.5387670606201e-06, "loss": 0.1492, "step": 12017 }, { "epoch": 0.35060388587432173, "grad_norm": 1.0498472866033246, "learning_rate": 7.538360046104396e-06, "loss": 0.1529, "step": 12018 }, { "epoch": 0.3506330591049653, "grad_norm": 0.8125394527005271, "learning_rate": 7.537953008926821e-06, "loss": 0.1377, "step": 12019 }, { "epoch": 0.35066223233560884, "grad_norm": 0.8165615823996295, "learning_rate": 7.537545949091005e-06, "loss": 0.1473, "step": 12020 }, { "epoch": 0.3506914055662524, "grad_norm": 0.7409799015249711, "learning_rate": 7.5371388666005866e-06, "loss": 0.1346, "step": 12021 }, { "epoch": 0.35072057879689594, "grad_norm": 0.8383167096198173, "learning_rate": 7.536731761459197e-06, "loss": 0.1342, "step": 12022 }, { "epoch": 0.35074975202753955, "grad_norm": 0.7865520515596042, "learning_rate": 7.536324633670471e-06, "loss": 0.1586, "step": 12023 }, { "epoch": 0.3507789252581831, "grad_norm": 0.757824135026902, "learning_rate": 7.535917483238047e-06, "loss": 0.1237, "step": 12024 }, { "epoch": 0.35080809848882666, "grad_norm": 0.8755553636142435, "learning_rate": 7.535510310165555e-06, "loss": 0.144, "step": 12025 }, { "epoch": 0.3508372717194702, "grad_norm": 0.755078719919549, "learning_rate": 7.535103114456631e-06, "loss": 0.1465, "step": 12026 }, { "epoch": 0.35086644495011376, "grad_norm": 0.9402033884650173, "learning_rate": 7.534695896114913e-06, "loss": 0.1559, "step": 12027 }, { "epoch": 0.3508956181807573, "grad_norm": 0.8540738477485491, "learning_rate": 7.5342886551440355e-06, "loss": 0.1448, "step": 12028 }, { "epoch": 0.35092479141140087, "grad_norm": 0.6425799279128513, "learning_rate": 7.533881391547633e-06, "loss": 0.154, "step": 12029 }, { "epoch": 0.3509539646420445, "grad_norm": 0.8609842832208886, "learning_rate": 7.533474105329343e-06, "loss": 0.1432, "step": 12030 }, { "epoch": 0.35098313787268803, "grad_norm": 0.718611385625933, "learning_rate": 7.5330667964928006e-06, "loss": 0.1636, "step": 12031 }, { "epoch": 0.3510123111033316, "grad_norm": 0.7689415807473821, "learning_rate": 7.5326594650416415e-06, "loss": 0.145, "step": 12032 }, { "epoch": 0.35104148433397514, "grad_norm": 0.6523245428096088, "learning_rate": 7.532252110979505e-06, "loss": 0.1612, "step": 12033 }, { "epoch": 0.3510706575646187, "grad_norm": 0.8062477362030178, "learning_rate": 7.531844734310025e-06, "loss": 0.1475, "step": 12034 }, { "epoch": 0.35109983079526225, "grad_norm": 0.7632641767439492, "learning_rate": 7.53143733503684e-06, "loss": 0.1475, "step": 12035 }, { "epoch": 0.35112900402590586, "grad_norm": 0.8321838797573419, "learning_rate": 7.5310299131635874e-06, "loss": 0.1442, "step": 12036 }, { "epoch": 0.3511581772565494, "grad_norm": 0.753205481611474, "learning_rate": 7.530622468693905e-06, "loss": 0.1425, "step": 12037 }, { "epoch": 0.35118735048719296, "grad_norm": 0.8974771231836078, "learning_rate": 7.530215001631426e-06, "loss": 0.1732, "step": 12038 }, { "epoch": 0.3512165237178365, "grad_norm": 0.6152625349741981, "learning_rate": 7.5298075119797945e-06, "loss": 0.1299, "step": 12039 }, { "epoch": 0.35124569694848007, "grad_norm": 0.7619754294519129, "learning_rate": 7.529399999742644e-06, "loss": 0.1226, "step": 12040 }, { "epoch": 0.3512748701791236, "grad_norm": 0.7399284271059957, "learning_rate": 7.528992464923615e-06, "loss": 0.1615, "step": 12041 }, { "epoch": 0.3513040434097672, "grad_norm": 0.963118361864318, "learning_rate": 7.528584907526343e-06, "loss": 0.1632, "step": 12042 }, { "epoch": 0.3513332166404108, "grad_norm": 0.8250067029946331, "learning_rate": 7.52817732755447e-06, "loss": 0.1532, "step": 12043 }, { "epoch": 0.35136238987105434, "grad_norm": 0.8625709749218062, "learning_rate": 7.5277697250116335e-06, "loss": 0.168, "step": 12044 }, { "epoch": 0.3513915631016979, "grad_norm": 0.8102563031029681, "learning_rate": 7.527362099901472e-06, "loss": 0.1555, "step": 12045 }, { "epoch": 0.35142073633234144, "grad_norm": 0.8905492280962982, "learning_rate": 7.526954452227626e-06, "loss": 0.1348, "step": 12046 }, { "epoch": 0.351449909562985, "grad_norm": 0.8863970891763994, "learning_rate": 7.526546781993731e-06, "loss": 0.1553, "step": 12047 }, { "epoch": 0.35147908279362855, "grad_norm": 0.7320287163447757, "learning_rate": 7.5261390892034315e-06, "loss": 0.1264, "step": 12048 }, { "epoch": 0.3515082560242721, "grad_norm": 0.8328094008963891, "learning_rate": 7.525731373860365e-06, "loss": 0.1497, "step": 12049 }, { "epoch": 0.3515374292549157, "grad_norm": 0.8290748429832105, "learning_rate": 7.525323635968171e-06, "loss": 0.1501, "step": 12050 }, { "epoch": 0.35156660248555927, "grad_norm": 0.7785605742897119, "learning_rate": 7.524915875530493e-06, "loss": 0.1314, "step": 12051 }, { "epoch": 0.3515957757162028, "grad_norm": 0.7153148581576247, "learning_rate": 7.524508092550968e-06, "loss": 0.1189, "step": 12052 }, { "epoch": 0.35162494894684637, "grad_norm": 1.00055863901744, "learning_rate": 7.524100287033235e-06, "loss": 0.1462, "step": 12053 }, { "epoch": 0.3516541221774899, "grad_norm": 0.7853007709733817, "learning_rate": 7.52369245898094e-06, "loss": 0.1563, "step": 12054 }, { "epoch": 0.3516832954081335, "grad_norm": 0.7423218991480369, "learning_rate": 7.523284608397718e-06, "loss": 0.1374, "step": 12055 }, { "epoch": 0.35171246863877703, "grad_norm": 0.9201515724969784, "learning_rate": 7.522876735287217e-06, "loss": 0.1408, "step": 12056 }, { "epoch": 0.35174164186942064, "grad_norm": 0.7792873540258134, "learning_rate": 7.522468839653072e-06, "loss": 0.1389, "step": 12057 }, { "epoch": 0.3517708151000642, "grad_norm": 0.7504657217148921, "learning_rate": 7.522060921498928e-06, "loss": 0.1244, "step": 12058 }, { "epoch": 0.35179998833070775, "grad_norm": 0.7462839313186306, "learning_rate": 7.521652980828427e-06, "loss": 0.1385, "step": 12059 }, { "epoch": 0.3518291615613513, "grad_norm": 0.7949877922378598, "learning_rate": 7.521245017645209e-06, "loss": 0.1484, "step": 12060 }, { "epoch": 0.35185833479199485, "grad_norm": 0.7792433715977214, "learning_rate": 7.520837031952919e-06, "loss": 0.1352, "step": 12061 }, { "epoch": 0.3518875080226384, "grad_norm": 0.7798477650475799, "learning_rate": 7.520429023755196e-06, "loss": 0.1487, "step": 12062 }, { "epoch": 0.351916681253282, "grad_norm": 0.7851479605761774, "learning_rate": 7.520020993055686e-06, "loss": 0.1606, "step": 12063 }, { "epoch": 0.35194585448392557, "grad_norm": 0.7797851748101124, "learning_rate": 7.5196129398580296e-06, "loss": 0.1318, "step": 12064 }, { "epoch": 0.3519750277145691, "grad_norm": 0.9523367546966441, "learning_rate": 7.51920486416587e-06, "loss": 0.1762, "step": 12065 }, { "epoch": 0.3520042009452127, "grad_norm": 0.7943619545766352, "learning_rate": 7.518796765982851e-06, "loss": 0.1298, "step": 12066 }, { "epoch": 0.35203337417585623, "grad_norm": 0.8896828727948359, "learning_rate": 7.518388645312615e-06, "loss": 0.1368, "step": 12067 }, { "epoch": 0.3520625474064998, "grad_norm": 0.7809993755085402, "learning_rate": 7.517980502158806e-06, "loss": 0.1214, "step": 12068 }, { "epoch": 0.35209172063714334, "grad_norm": 0.9034489189629171, "learning_rate": 7.51757233652507e-06, "loss": 0.1715, "step": 12069 }, { "epoch": 0.35212089386778694, "grad_norm": 0.7698354845054213, "learning_rate": 7.5171641484150484e-06, "loss": 0.1403, "step": 12070 }, { "epoch": 0.3521500670984305, "grad_norm": 0.9194387834832348, "learning_rate": 7.516755937832386e-06, "loss": 0.1399, "step": 12071 }, { "epoch": 0.35217924032907405, "grad_norm": 2.4641941778338987, "learning_rate": 7.516347704780726e-06, "loss": 0.1295, "step": 12072 }, { "epoch": 0.3522084135597176, "grad_norm": 0.6255735670416824, "learning_rate": 7.5159394492637175e-06, "loss": 0.1486, "step": 12073 }, { "epoch": 0.35223758679036116, "grad_norm": 0.815201660035071, "learning_rate": 7.5155311712849995e-06, "loss": 0.1256, "step": 12074 }, { "epoch": 0.3522667600210047, "grad_norm": 1.3017787395294462, "learning_rate": 7.515122870848222e-06, "loss": 0.1495, "step": 12075 }, { "epoch": 0.35229593325164826, "grad_norm": 0.8425767756467087, "learning_rate": 7.5147145479570275e-06, "loss": 0.1836, "step": 12076 }, { "epoch": 0.35232510648229187, "grad_norm": 0.8544688733768933, "learning_rate": 7.514306202615059e-06, "loss": 0.1378, "step": 12077 }, { "epoch": 0.3523542797129354, "grad_norm": 0.8529122529537075, "learning_rate": 7.513897834825967e-06, "loss": 0.1688, "step": 12078 }, { "epoch": 0.352383452943579, "grad_norm": 0.8849542775247677, "learning_rate": 7.513489444593396e-06, "loss": 0.1547, "step": 12079 }, { "epoch": 0.35241262617422253, "grad_norm": 0.7622060557294766, "learning_rate": 7.51308103192099e-06, "loss": 0.1611, "step": 12080 }, { "epoch": 0.3524417994048661, "grad_norm": 0.6766312717303754, "learning_rate": 7.512672596812397e-06, "loss": 0.1543, "step": 12081 }, { "epoch": 0.35247097263550964, "grad_norm": 0.7355151435826048, "learning_rate": 7.512264139271264e-06, "loss": 0.1495, "step": 12082 }, { "epoch": 0.3525001458661532, "grad_norm": 0.8755780711695437, "learning_rate": 7.511855659301232e-06, "loss": 0.1688, "step": 12083 }, { "epoch": 0.3525293190967968, "grad_norm": 0.9116905314087498, "learning_rate": 7.511447156905958e-06, "loss": 0.1417, "step": 12084 }, { "epoch": 0.35255849232744035, "grad_norm": 0.859793818173384, "learning_rate": 7.511038632089081e-06, "loss": 0.1485, "step": 12085 }, { "epoch": 0.3525876655580839, "grad_norm": 0.8647161525227471, "learning_rate": 7.510630084854249e-06, "loss": 0.1417, "step": 12086 }, { "epoch": 0.35261683878872746, "grad_norm": 0.8573088874550703, "learning_rate": 7.510221515205113e-06, "loss": 0.1461, "step": 12087 }, { "epoch": 0.352646012019371, "grad_norm": 0.8025920440243713, "learning_rate": 7.509812923145318e-06, "loss": 0.1323, "step": 12088 }, { "epoch": 0.35267518525001457, "grad_norm": 0.9133724786435535, "learning_rate": 7.509404308678512e-06, "loss": 0.1675, "step": 12089 }, { "epoch": 0.3527043584806582, "grad_norm": 1.091990138478178, "learning_rate": 7.5089956718083435e-06, "loss": 0.1413, "step": 12090 }, { "epoch": 0.35273353171130173, "grad_norm": 0.8776644025683882, "learning_rate": 7.508587012538462e-06, "loss": 0.1661, "step": 12091 }, { "epoch": 0.3527627049419453, "grad_norm": 0.931144277232217, "learning_rate": 7.508178330872512e-06, "loss": 0.1553, "step": 12092 }, { "epoch": 0.35279187817258884, "grad_norm": 0.9601895583663163, "learning_rate": 7.507769626814145e-06, "loss": 0.1362, "step": 12093 }, { "epoch": 0.3528210514032324, "grad_norm": 0.8446935053892277, "learning_rate": 7.507360900367011e-06, "loss": 0.1468, "step": 12094 }, { "epoch": 0.35285022463387594, "grad_norm": 0.8883699535563186, "learning_rate": 7.5069521515347565e-06, "loss": 0.1702, "step": 12095 }, { "epoch": 0.3528793978645195, "grad_norm": 0.9515818571757315, "learning_rate": 7.506543380321032e-06, "loss": 0.1325, "step": 12096 }, { "epoch": 0.3529085710951631, "grad_norm": 0.7833744001885308, "learning_rate": 7.5061345867294875e-06, "loss": 0.1505, "step": 12097 }, { "epoch": 0.35293774432580666, "grad_norm": 0.8904929112833724, "learning_rate": 7.505725770763769e-06, "loss": 0.1545, "step": 12098 }, { "epoch": 0.3529669175564502, "grad_norm": 0.7462988140846417, "learning_rate": 7.505316932427531e-06, "loss": 0.1457, "step": 12099 }, { "epoch": 0.35299609078709376, "grad_norm": 0.8340269984700273, "learning_rate": 7.504908071724422e-06, "loss": 0.1401, "step": 12100 }, { "epoch": 0.3530252640177373, "grad_norm": 0.9812345267265725, "learning_rate": 7.5044991886580895e-06, "loss": 0.1387, "step": 12101 }, { "epoch": 0.35305443724838087, "grad_norm": 0.7395035014847263, "learning_rate": 7.504090283232188e-06, "loss": 0.1306, "step": 12102 }, { "epoch": 0.3530836104790244, "grad_norm": 0.9028502130789045, "learning_rate": 7.503681355450365e-06, "loss": 0.1692, "step": 12103 }, { "epoch": 0.35311278370966803, "grad_norm": 0.7143190591113868, "learning_rate": 7.503272405316273e-06, "loss": 0.1315, "step": 12104 }, { "epoch": 0.3531419569403116, "grad_norm": 0.6872319562248489, "learning_rate": 7.502863432833563e-06, "loss": 0.1384, "step": 12105 }, { "epoch": 0.35317113017095514, "grad_norm": 0.9539852257019577, "learning_rate": 7.502454438005886e-06, "loss": 0.1786, "step": 12106 }, { "epoch": 0.3532003034015987, "grad_norm": 0.9964917694248208, "learning_rate": 7.502045420836892e-06, "loss": 0.1495, "step": 12107 }, { "epoch": 0.35322947663224225, "grad_norm": 0.990353130855816, "learning_rate": 7.501636381330234e-06, "loss": 0.157, "step": 12108 }, { "epoch": 0.3532586498628858, "grad_norm": 1.0891538696380902, "learning_rate": 7.5012273194895655e-06, "loss": 0.156, "step": 12109 }, { "epoch": 0.35328782309352935, "grad_norm": 1.0180733110929292, "learning_rate": 7.500818235318533e-06, "loss": 0.1466, "step": 12110 }, { "epoch": 0.35331699632417296, "grad_norm": 1.0325203116977864, "learning_rate": 7.5004091288207956e-06, "loss": 0.1498, "step": 12111 }, { "epoch": 0.3533461695548165, "grad_norm": 0.6384166936673309, "learning_rate": 7.500000000000001e-06, "loss": 0.1304, "step": 12112 }, { "epoch": 0.35337534278546007, "grad_norm": 0.8528566954497445, "learning_rate": 7.499590848859802e-06, "loss": 0.1439, "step": 12113 }, { "epoch": 0.3534045160161036, "grad_norm": 0.8353246956365478, "learning_rate": 7.499181675403855e-06, "loss": 0.1413, "step": 12114 }, { "epoch": 0.3534336892467472, "grad_norm": 0.782769096074687, "learning_rate": 7.49877247963581e-06, "loss": 0.1422, "step": 12115 }, { "epoch": 0.3534628624773907, "grad_norm": 0.748058712896468, "learning_rate": 7.49836326155932e-06, "loss": 0.1436, "step": 12116 }, { "epoch": 0.3534920357080343, "grad_norm": 0.8964088295486463, "learning_rate": 7.4979540211780396e-06, "loss": 0.1508, "step": 12117 }, { "epoch": 0.3535212089386779, "grad_norm": 0.7038898495325068, "learning_rate": 7.497544758495622e-06, "loss": 0.1356, "step": 12118 }, { "epoch": 0.35355038216932144, "grad_norm": 0.6770117758968943, "learning_rate": 7.497135473515719e-06, "loss": 0.1367, "step": 12119 }, { "epoch": 0.353579555399965, "grad_norm": 0.9081026508989675, "learning_rate": 7.49672616624199e-06, "loss": 0.1538, "step": 12120 }, { "epoch": 0.35360872863060855, "grad_norm": 0.9105090322175228, "learning_rate": 7.496316836678085e-06, "loss": 0.1468, "step": 12121 }, { "epoch": 0.3536379018612521, "grad_norm": 1.0005440326874793, "learning_rate": 7.495907484827658e-06, "loss": 0.1595, "step": 12122 }, { "epoch": 0.35366707509189566, "grad_norm": 0.943506731773164, "learning_rate": 7.495498110694364e-06, "loss": 0.1313, "step": 12123 }, { "epoch": 0.35369624832253926, "grad_norm": 0.9540944309194603, "learning_rate": 7.495088714281862e-06, "loss": 0.1451, "step": 12124 }, { "epoch": 0.3537254215531828, "grad_norm": 1.1399722373912111, "learning_rate": 7.494679295593801e-06, "loss": 0.1469, "step": 12125 }, { "epoch": 0.35375459478382637, "grad_norm": 0.8379482755652226, "learning_rate": 7.49426985463384e-06, "loss": 0.1518, "step": 12126 }, { "epoch": 0.3537837680144699, "grad_norm": 0.889161350385342, "learning_rate": 7.493860391405632e-06, "loss": 0.1539, "step": 12127 }, { "epoch": 0.3538129412451135, "grad_norm": 1.0597870595233896, "learning_rate": 7.4934509059128334e-06, "loss": 0.1491, "step": 12128 }, { "epoch": 0.35384211447575703, "grad_norm": 0.7716787255958263, "learning_rate": 7.493041398159102e-06, "loss": 0.1492, "step": 12129 }, { "epoch": 0.3538712877064006, "grad_norm": 0.7270262602920922, "learning_rate": 7.49263186814809e-06, "loss": 0.1298, "step": 12130 }, { "epoch": 0.3539004609370442, "grad_norm": 1.020961829125233, "learning_rate": 7.492222315883458e-06, "loss": 0.149, "step": 12131 }, { "epoch": 0.35392963416768775, "grad_norm": 0.8551619094051521, "learning_rate": 7.491812741368859e-06, "loss": 0.1781, "step": 12132 }, { "epoch": 0.3539588073983313, "grad_norm": 0.7148504836663766, "learning_rate": 7.491403144607951e-06, "loss": 0.1584, "step": 12133 }, { "epoch": 0.35398798062897485, "grad_norm": 0.7310860409671713, "learning_rate": 7.490993525604389e-06, "loss": 0.1528, "step": 12134 }, { "epoch": 0.3540171538596184, "grad_norm": 0.8109727413891358, "learning_rate": 7.490583884361834e-06, "loss": 0.142, "step": 12135 }, { "epoch": 0.35404632709026196, "grad_norm": 0.719566074690827, "learning_rate": 7.49017422088394e-06, "loss": 0.1671, "step": 12136 }, { "epoch": 0.3540755003209055, "grad_norm": 0.7792648338872127, "learning_rate": 7.489764535174363e-06, "loss": 0.15, "step": 12137 }, { "epoch": 0.3541046735515491, "grad_norm": 1.1983618739593251, "learning_rate": 7.489354827236765e-06, "loss": 0.1703, "step": 12138 }, { "epoch": 0.3541338467821927, "grad_norm": 0.8303239451422616, "learning_rate": 7.4889450970748e-06, "loss": 0.133, "step": 12139 }, { "epoch": 0.3541630200128362, "grad_norm": 0.9016886227944668, "learning_rate": 7.488535344692127e-06, "loss": 0.1457, "step": 12140 }, { "epoch": 0.3541921932434798, "grad_norm": 0.8093533800280821, "learning_rate": 7.488125570092406e-06, "loss": 0.1544, "step": 12141 }, { "epoch": 0.35422136647412333, "grad_norm": 0.8239954533166405, "learning_rate": 7.487715773279293e-06, "loss": 0.1475, "step": 12142 }, { "epoch": 0.3542505397047669, "grad_norm": 1.099868567715278, "learning_rate": 7.4873059542564465e-06, "loss": 0.1468, "step": 12143 }, { "epoch": 0.35427971293541044, "grad_norm": 1.0263479613663975, "learning_rate": 7.486896113027528e-06, "loss": 0.1479, "step": 12144 }, { "epoch": 0.35430888616605405, "grad_norm": 0.7351293633729539, "learning_rate": 7.486486249596194e-06, "loss": 0.1572, "step": 12145 }, { "epoch": 0.3543380593966976, "grad_norm": 0.7289195572148732, "learning_rate": 7.486076363966104e-06, "loss": 0.1359, "step": 12146 }, { "epoch": 0.35436723262734116, "grad_norm": 0.9182815224095634, "learning_rate": 7.485666456140918e-06, "loss": 0.1309, "step": 12147 }, { "epoch": 0.3543964058579847, "grad_norm": 0.8835430736644039, "learning_rate": 7.485256526124295e-06, "loss": 0.1657, "step": 12148 }, { "epoch": 0.35442557908862826, "grad_norm": 0.8613889013926775, "learning_rate": 7.484846573919895e-06, "loss": 0.1398, "step": 12149 }, { "epoch": 0.3544547523192718, "grad_norm": 0.9474985870617246, "learning_rate": 7.484436599531377e-06, "loss": 0.1483, "step": 12150 }, { "epoch": 0.3544839255499154, "grad_norm": 0.8719680967907784, "learning_rate": 7.484026602962405e-06, "loss": 0.156, "step": 12151 }, { "epoch": 0.354513098780559, "grad_norm": 0.9192150275876452, "learning_rate": 7.483616584216633e-06, "loss": 0.1311, "step": 12152 }, { "epoch": 0.35454227201120253, "grad_norm": 0.8408136692254884, "learning_rate": 7.483206543297727e-06, "loss": 0.1274, "step": 12153 }, { "epoch": 0.3545714452418461, "grad_norm": 1.3566829517458583, "learning_rate": 7.482796480209346e-06, "loss": 0.1455, "step": 12154 }, { "epoch": 0.35460061847248964, "grad_norm": 0.9014132017831294, "learning_rate": 7.48238639495515e-06, "loss": 0.1445, "step": 12155 }, { "epoch": 0.3546297917031332, "grad_norm": 0.8463275021589733, "learning_rate": 7.481976287538802e-06, "loss": 0.1471, "step": 12156 }, { "epoch": 0.35465896493377674, "grad_norm": 0.9980583664623567, "learning_rate": 7.481566157963961e-06, "loss": 0.1442, "step": 12157 }, { "epoch": 0.35468813816442035, "grad_norm": 0.8458981667551823, "learning_rate": 7.481156006234289e-06, "loss": 0.1308, "step": 12158 }, { "epoch": 0.3547173113950639, "grad_norm": 0.8842798179502824, "learning_rate": 7.480745832353451e-06, "loss": 0.1355, "step": 12159 }, { "epoch": 0.35474648462570746, "grad_norm": 0.9249057662349253, "learning_rate": 7.480335636325104e-06, "loss": 0.1516, "step": 12160 }, { "epoch": 0.354775657856351, "grad_norm": 0.9525575932168225, "learning_rate": 7.479925418152914e-06, "loss": 0.1372, "step": 12161 }, { "epoch": 0.35480483108699457, "grad_norm": 0.9058629692852751, "learning_rate": 7.479515177840542e-06, "loss": 0.1445, "step": 12162 }, { "epoch": 0.3548340043176381, "grad_norm": 0.93263703609085, "learning_rate": 7.479104915391649e-06, "loss": 0.1602, "step": 12163 }, { "epoch": 0.35486317754828167, "grad_norm": 0.8515894085441673, "learning_rate": 7.478694630809899e-06, "loss": 0.1168, "step": 12164 }, { "epoch": 0.3548923507789253, "grad_norm": 0.8805126690114949, "learning_rate": 7.478284324098957e-06, "loss": 0.1408, "step": 12165 }, { "epoch": 0.35492152400956883, "grad_norm": 0.7426349012566554, "learning_rate": 7.4778739952624835e-06, "loss": 0.1591, "step": 12166 }, { "epoch": 0.3549506972402124, "grad_norm": 0.6397230883071294, "learning_rate": 7.477463644304141e-06, "loss": 0.1356, "step": 12167 }, { "epoch": 0.35497987047085594, "grad_norm": 0.8982364452423646, "learning_rate": 7.477053271227596e-06, "loss": 0.1417, "step": 12168 }, { "epoch": 0.3550090437014995, "grad_norm": 0.8447914658551411, "learning_rate": 7.47664287603651e-06, "loss": 0.1737, "step": 12169 }, { "epoch": 0.35503821693214305, "grad_norm": 0.8370376273579871, "learning_rate": 7.476232458734547e-06, "loss": 0.1503, "step": 12170 }, { "epoch": 0.3550673901627866, "grad_norm": 0.8019143284953446, "learning_rate": 7.475822019325374e-06, "loss": 0.129, "step": 12171 }, { "epoch": 0.3550965633934302, "grad_norm": 0.9026727682417437, "learning_rate": 7.475411557812652e-06, "loss": 0.1345, "step": 12172 }, { "epoch": 0.35512573662407376, "grad_norm": 0.8508773884433103, "learning_rate": 7.4750010742000445e-06, "loss": 0.1414, "step": 12173 }, { "epoch": 0.3551549098547173, "grad_norm": 0.9824204270516677, "learning_rate": 7.474590568491222e-06, "loss": 0.1418, "step": 12174 }, { "epoch": 0.35518408308536087, "grad_norm": 0.8934801781268447, "learning_rate": 7.474180040689842e-06, "loss": 0.1409, "step": 12175 }, { "epoch": 0.3552132563160044, "grad_norm": 0.9221120609784726, "learning_rate": 7.473769490799575e-06, "loss": 0.1524, "step": 12176 }, { "epoch": 0.355242429546648, "grad_norm": 0.9789817786148268, "learning_rate": 7.473358918824085e-06, "loss": 0.1619, "step": 12177 }, { "epoch": 0.3552716027772916, "grad_norm": 0.7818843977047737, "learning_rate": 7.472948324767035e-06, "loss": 0.1468, "step": 12178 }, { "epoch": 0.35530077600793514, "grad_norm": 0.8606870497211695, "learning_rate": 7.472537708632095e-06, "loss": 0.1323, "step": 12179 }, { "epoch": 0.3553299492385787, "grad_norm": 0.8906836695819145, "learning_rate": 7.472127070422928e-06, "loss": 0.1628, "step": 12180 }, { "epoch": 0.35535912246922224, "grad_norm": 0.8088359794507107, "learning_rate": 7.4717164101432e-06, "loss": 0.122, "step": 12181 }, { "epoch": 0.3553882956998658, "grad_norm": 0.8566020698157975, "learning_rate": 7.471305727796579e-06, "loss": 0.1158, "step": 12182 }, { "epoch": 0.35541746893050935, "grad_norm": 0.8750096258289879, "learning_rate": 7.470895023386728e-06, "loss": 0.1742, "step": 12183 }, { "epoch": 0.3554466421611529, "grad_norm": 0.8400584190518494, "learning_rate": 7.470484296917319e-06, "loss": 0.1523, "step": 12184 }, { "epoch": 0.3554758153917965, "grad_norm": 1.110400780793465, "learning_rate": 7.470073548392014e-06, "loss": 0.146, "step": 12185 }, { "epoch": 0.35550498862244007, "grad_norm": 0.8514545642807553, "learning_rate": 7.469662777814484e-06, "loss": 0.1428, "step": 12186 }, { "epoch": 0.3555341618530836, "grad_norm": 0.8435930628979348, "learning_rate": 7.469251985188392e-06, "loss": 0.1775, "step": 12187 }, { "epoch": 0.35556333508372717, "grad_norm": 0.727108203502303, "learning_rate": 7.468841170517408e-06, "loss": 0.1388, "step": 12188 }, { "epoch": 0.3555925083143707, "grad_norm": 1.5890076218904514, "learning_rate": 7.468430333805201e-06, "loss": 0.1307, "step": 12189 }, { "epoch": 0.3556216815450143, "grad_norm": 0.8050188464842346, "learning_rate": 7.468019475055436e-06, "loss": 0.1615, "step": 12190 }, { "epoch": 0.35565085477565783, "grad_norm": 0.6650859130140865, "learning_rate": 7.467608594271782e-06, "loss": 0.1486, "step": 12191 }, { "epoch": 0.35568002800630144, "grad_norm": 0.8896317278270026, "learning_rate": 7.467197691457908e-06, "loss": 0.1446, "step": 12192 }, { "epoch": 0.355709201236945, "grad_norm": 0.7923808324329165, "learning_rate": 7.466786766617482e-06, "loss": 0.1321, "step": 12193 }, { "epoch": 0.35573837446758855, "grad_norm": 0.8962565546218314, "learning_rate": 7.466375819754173e-06, "loss": 0.1333, "step": 12194 }, { "epoch": 0.3557675476982321, "grad_norm": 0.8146336102797297, "learning_rate": 7.46596485087165e-06, "loss": 0.1294, "step": 12195 }, { "epoch": 0.35579672092887565, "grad_norm": 0.8661790351439472, "learning_rate": 7.465553859973581e-06, "loss": 0.1432, "step": 12196 }, { "epoch": 0.3558258941595192, "grad_norm": 0.8719026272812894, "learning_rate": 7.465142847063634e-06, "loss": 0.154, "step": 12197 }, { "epoch": 0.35585506739016276, "grad_norm": 0.6680003649006871, "learning_rate": 7.464731812145483e-06, "loss": 0.1184, "step": 12198 }, { "epoch": 0.35588424062080637, "grad_norm": 0.7488816345308577, "learning_rate": 7.464320755222793e-06, "loss": 0.1653, "step": 12199 }, { "epoch": 0.3559134138514499, "grad_norm": 0.8290221349289797, "learning_rate": 7.4639096762992345e-06, "loss": 0.1359, "step": 12200 }, { "epoch": 0.3559425870820935, "grad_norm": 1.0037579553709008, "learning_rate": 7.463498575378482e-06, "loss": 0.1589, "step": 12201 }, { "epoch": 0.35597176031273703, "grad_norm": 0.7962303807839208, "learning_rate": 7.463087452464199e-06, "loss": 0.1403, "step": 12202 }, { "epoch": 0.3560009335433806, "grad_norm": 0.8988662046657548, "learning_rate": 7.462676307560059e-06, "loss": 0.1664, "step": 12203 }, { "epoch": 0.35603010677402414, "grad_norm": 0.8946106540425235, "learning_rate": 7.462265140669735e-06, "loss": 0.1488, "step": 12204 }, { "epoch": 0.35605928000466774, "grad_norm": 0.866532976364385, "learning_rate": 7.461853951796895e-06, "loss": 0.1566, "step": 12205 }, { "epoch": 0.3560884532353113, "grad_norm": 0.8344186923159563, "learning_rate": 7.4614427409452116e-06, "loss": 0.1466, "step": 12206 }, { "epoch": 0.35611762646595485, "grad_norm": 1.290096770545583, "learning_rate": 7.461031508118354e-06, "loss": 0.1577, "step": 12207 }, { "epoch": 0.3561467996965984, "grad_norm": 0.8999158766283113, "learning_rate": 7.4606202533199945e-06, "loss": 0.1697, "step": 12208 }, { "epoch": 0.35617597292724196, "grad_norm": 0.9195358674976805, "learning_rate": 7.460208976553804e-06, "loss": 0.1698, "step": 12209 }, { "epoch": 0.3562051461578855, "grad_norm": 1.03287815865787, "learning_rate": 7.459797677823456e-06, "loss": 0.1179, "step": 12210 }, { "epoch": 0.35623431938852906, "grad_norm": 0.8481792372747801, "learning_rate": 7.4593863571326204e-06, "loss": 0.1521, "step": 12211 }, { "epoch": 0.3562634926191727, "grad_norm": 0.905091407798776, "learning_rate": 7.458975014484972e-06, "loss": 0.1374, "step": 12212 }, { "epoch": 0.3562926658498162, "grad_norm": 0.9903840552254256, "learning_rate": 7.458563649884182e-06, "loss": 0.1383, "step": 12213 }, { "epoch": 0.3563218390804598, "grad_norm": 0.8566119220499994, "learning_rate": 7.458152263333921e-06, "loss": 0.188, "step": 12214 }, { "epoch": 0.35635101231110333, "grad_norm": 0.806007458321855, "learning_rate": 7.457740854837865e-06, "loss": 0.1359, "step": 12215 }, { "epoch": 0.3563801855417469, "grad_norm": 0.846336649082963, "learning_rate": 7.457329424399685e-06, "loss": 0.1636, "step": 12216 }, { "epoch": 0.35640935877239044, "grad_norm": 0.9295740844894287, "learning_rate": 7.456917972023052e-06, "loss": 0.1261, "step": 12217 }, { "epoch": 0.356438532003034, "grad_norm": 0.6394264915549989, "learning_rate": 7.456506497711644e-06, "loss": 0.1388, "step": 12218 }, { "epoch": 0.3564677052336776, "grad_norm": 0.8709909836795148, "learning_rate": 7.456095001469135e-06, "loss": 0.1593, "step": 12219 }, { "epoch": 0.35649687846432115, "grad_norm": 0.9421989226538512, "learning_rate": 7.455683483299192e-06, "loss": 0.1728, "step": 12220 }, { "epoch": 0.3565260516949647, "grad_norm": 0.8111037998968257, "learning_rate": 7.455271943205495e-06, "loss": 0.1569, "step": 12221 }, { "epoch": 0.35655522492560826, "grad_norm": 0.8724484550037249, "learning_rate": 7.4548603811917155e-06, "loss": 0.1377, "step": 12222 }, { "epoch": 0.3565843981562518, "grad_norm": 0.7079865680622673, "learning_rate": 7.454448797261529e-06, "loss": 0.1614, "step": 12223 }, { "epoch": 0.35661357138689537, "grad_norm": 0.7457006911986671, "learning_rate": 7.45403719141861e-06, "loss": 0.1599, "step": 12224 }, { "epoch": 0.3566427446175389, "grad_norm": 0.7775430973464645, "learning_rate": 7.453625563666631e-06, "loss": 0.1179, "step": 12225 }, { "epoch": 0.35667191784818253, "grad_norm": 0.70813607212652, "learning_rate": 7.4532139140092694e-06, "loss": 0.136, "step": 12226 }, { "epoch": 0.3567010910788261, "grad_norm": 0.757494936279595, "learning_rate": 7.452802242450201e-06, "loss": 0.1359, "step": 12227 }, { "epoch": 0.35673026430946964, "grad_norm": 0.7858158925237995, "learning_rate": 7.452390548993098e-06, "loss": 0.1293, "step": 12228 }, { "epoch": 0.3567594375401132, "grad_norm": 0.7511279403357667, "learning_rate": 7.451978833641639e-06, "loss": 0.1476, "step": 12229 }, { "epoch": 0.35678861077075674, "grad_norm": 0.7953282518094266, "learning_rate": 7.451567096399497e-06, "loss": 0.1462, "step": 12230 }, { "epoch": 0.3568177840014003, "grad_norm": 0.7967242197118525, "learning_rate": 7.45115533727035e-06, "loss": 0.1433, "step": 12231 }, { "epoch": 0.3568469572320439, "grad_norm": 0.9466414202769508, "learning_rate": 7.450743556257874e-06, "loss": 0.1494, "step": 12232 }, { "epoch": 0.35687613046268746, "grad_norm": 0.7434133201655584, "learning_rate": 7.450331753365743e-06, "loss": 0.1479, "step": 12233 }, { "epoch": 0.356905303693331, "grad_norm": 1.0130427033764855, "learning_rate": 7.449919928597637e-06, "loss": 0.1489, "step": 12234 }, { "epoch": 0.35693447692397456, "grad_norm": 1.0454899133306563, "learning_rate": 7.449508081957228e-06, "loss": 0.1357, "step": 12235 }, { "epoch": 0.3569636501546181, "grad_norm": 1.060813850156249, "learning_rate": 7.449096213448198e-06, "loss": 0.1651, "step": 12236 }, { "epoch": 0.35699282338526167, "grad_norm": 0.9166046971015583, "learning_rate": 7.44868432307422e-06, "loss": 0.1484, "step": 12237 }, { "epoch": 0.3570219966159052, "grad_norm": 1.0708409711345588, "learning_rate": 7.448272410838975e-06, "loss": 0.1642, "step": 12238 }, { "epoch": 0.35705116984654883, "grad_norm": 0.9934534541385831, "learning_rate": 7.447860476746136e-06, "loss": 0.1529, "step": 12239 }, { "epoch": 0.3570803430771924, "grad_norm": 0.8925425957204665, "learning_rate": 7.447448520799384e-06, "loss": 0.1325, "step": 12240 }, { "epoch": 0.35710951630783594, "grad_norm": 0.8122120542844333, "learning_rate": 7.447036543002396e-06, "loss": 0.1586, "step": 12241 }, { "epoch": 0.3571386895384795, "grad_norm": 0.7963038408521792, "learning_rate": 7.4466245433588495e-06, "loss": 0.1258, "step": 12242 }, { "epoch": 0.35716786276912305, "grad_norm": 0.8484007574660379, "learning_rate": 7.4462125218724236e-06, "loss": 0.1231, "step": 12243 }, { "epoch": 0.3571970359997666, "grad_norm": 0.71054399049409, "learning_rate": 7.445800478546796e-06, "loss": 0.1538, "step": 12244 }, { "epoch": 0.35722620923041015, "grad_norm": 0.8263265182463267, "learning_rate": 7.445388413385646e-06, "loss": 0.1727, "step": 12245 }, { "epoch": 0.35725538246105376, "grad_norm": 0.7054479997653809, "learning_rate": 7.444976326392652e-06, "loss": 0.1351, "step": 12246 }, { "epoch": 0.3572845556916973, "grad_norm": 0.8755809091412908, "learning_rate": 7.444564217571491e-06, "loss": 0.1744, "step": 12247 }, { "epoch": 0.35731372892234087, "grad_norm": 1.0175376639742526, "learning_rate": 7.444152086925847e-06, "loss": 0.1346, "step": 12248 }, { "epoch": 0.3573429021529844, "grad_norm": 0.7427987981336198, "learning_rate": 7.443739934459397e-06, "loss": 0.1399, "step": 12249 }, { "epoch": 0.357372075383628, "grad_norm": 0.933542900671776, "learning_rate": 7.443327760175817e-06, "loss": 0.1365, "step": 12250 }, { "epoch": 0.3574012486142715, "grad_norm": 0.8583530360675189, "learning_rate": 7.442915564078793e-06, "loss": 0.1341, "step": 12251 }, { "epoch": 0.3574304218449151, "grad_norm": 0.7579398069238198, "learning_rate": 7.442503346172001e-06, "loss": 0.1376, "step": 12252 }, { "epoch": 0.3574595950755587, "grad_norm": 0.9293926173443993, "learning_rate": 7.4420911064591215e-06, "loss": 0.1316, "step": 12253 }, { "epoch": 0.35748876830620224, "grad_norm": 0.6904763213668176, "learning_rate": 7.441678844943836e-06, "loss": 0.1417, "step": 12254 }, { "epoch": 0.3575179415368458, "grad_norm": 0.8402918439439921, "learning_rate": 7.441266561629825e-06, "loss": 0.1549, "step": 12255 }, { "epoch": 0.35754711476748935, "grad_norm": 0.7303999063960172, "learning_rate": 7.440854256520769e-06, "loss": 0.1352, "step": 12256 }, { "epoch": 0.3575762879981329, "grad_norm": 0.9822577952961637, "learning_rate": 7.440441929620348e-06, "loss": 0.1388, "step": 12257 }, { "epoch": 0.35760546122877646, "grad_norm": 0.9098563102938438, "learning_rate": 7.4400295809322445e-06, "loss": 0.1602, "step": 12258 }, { "epoch": 0.35763463445942, "grad_norm": 0.9816882972557667, "learning_rate": 7.439617210460139e-06, "loss": 0.1449, "step": 12259 }, { "epoch": 0.3576638076900636, "grad_norm": 0.9220034516470308, "learning_rate": 7.439204818207715e-06, "loss": 0.1276, "step": 12260 }, { "epoch": 0.35769298092070717, "grad_norm": 0.79823479876437, "learning_rate": 7.438792404178652e-06, "loss": 0.1563, "step": 12261 }, { "epoch": 0.3577221541513507, "grad_norm": 0.841057332417277, "learning_rate": 7.4383799683766315e-06, "loss": 0.1494, "step": 12262 }, { "epoch": 0.3577513273819943, "grad_norm": 0.8624899017320787, "learning_rate": 7.437967510805336e-06, "loss": 0.1473, "step": 12263 }, { "epoch": 0.35778050061263783, "grad_norm": 0.8338898053412516, "learning_rate": 7.4375550314684505e-06, "loss": 0.1142, "step": 12264 }, { "epoch": 0.3578096738432814, "grad_norm": 0.7141194529104241, "learning_rate": 7.437142530369654e-06, "loss": 0.1393, "step": 12265 }, { "epoch": 0.357838847073925, "grad_norm": 0.9102631886380496, "learning_rate": 7.436730007512633e-06, "loss": 0.1293, "step": 12266 }, { "epoch": 0.35786802030456855, "grad_norm": 0.8405538358882885, "learning_rate": 7.436317462901068e-06, "loss": 0.1278, "step": 12267 }, { "epoch": 0.3578971935352121, "grad_norm": 0.9766458337122229, "learning_rate": 7.43590489653864e-06, "loss": 0.1511, "step": 12268 }, { "epoch": 0.35792636676585565, "grad_norm": 0.8098832418681778, "learning_rate": 7.4354923084290364e-06, "loss": 0.1767, "step": 12269 }, { "epoch": 0.3579555399964992, "grad_norm": 0.8814530567633131, "learning_rate": 7.435079698575939e-06, "loss": 0.1393, "step": 12270 }, { "epoch": 0.35798471322714276, "grad_norm": 0.864062979438122, "learning_rate": 7.43466706698303e-06, "loss": 0.1381, "step": 12271 }, { "epoch": 0.3580138864577863, "grad_norm": 0.801081478137863, "learning_rate": 7.434254413653995e-06, "loss": 0.1365, "step": 12272 }, { "epoch": 0.3580430596884299, "grad_norm": 0.6406910898646578, "learning_rate": 7.433841738592518e-06, "loss": 0.144, "step": 12273 }, { "epoch": 0.3580722329190735, "grad_norm": 0.7635709007643025, "learning_rate": 7.433429041802282e-06, "loss": 0.1604, "step": 12274 }, { "epoch": 0.358101406149717, "grad_norm": 0.8966671507682924, "learning_rate": 7.433016323286975e-06, "loss": 0.1479, "step": 12275 }, { "epoch": 0.3581305793803606, "grad_norm": 0.7248138408881539, "learning_rate": 7.432603583050277e-06, "loss": 0.1483, "step": 12276 }, { "epoch": 0.35815975261100413, "grad_norm": 0.8051256495884842, "learning_rate": 7.432190821095875e-06, "loss": 0.1753, "step": 12277 }, { "epoch": 0.3581889258416477, "grad_norm": 0.7255186023301744, "learning_rate": 7.431778037427455e-06, "loss": 0.1109, "step": 12278 }, { "epoch": 0.35821809907229124, "grad_norm": 0.7527019200543248, "learning_rate": 7.431365232048701e-06, "loss": 0.1254, "step": 12279 }, { "epoch": 0.35824727230293485, "grad_norm": 0.7222836276234812, "learning_rate": 7.430952404963298e-06, "loss": 0.1373, "step": 12280 }, { "epoch": 0.3582764455335784, "grad_norm": 0.8216852059432174, "learning_rate": 7.430539556174933e-06, "loss": 0.1524, "step": 12281 }, { "epoch": 0.35830561876422196, "grad_norm": 0.9893838644930824, "learning_rate": 7.43012668568729e-06, "loss": 0.156, "step": 12282 }, { "epoch": 0.3583347919948655, "grad_norm": 0.969389833464752, "learning_rate": 7.429713793504056e-06, "loss": 0.1526, "step": 12283 }, { "epoch": 0.35836396522550906, "grad_norm": 0.8041972180659, "learning_rate": 7.429300879628918e-06, "loss": 0.147, "step": 12284 }, { "epoch": 0.3583931384561526, "grad_norm": 1.1458077909766509, "learning_rate": 7.428887944065562e-06, "loss": 0.1642, "step": 12285 }, { "epoch": 0.35842231168679617, "grad_norm": 1.0715062884145345, "learning_rate": 7.428474986817673e-06, "loss": 0.1826, "step": 12286 }, { "epoch": 0.3584514849174398, "grad_norm": 0.7704009576124625, "learning_rate": 7.42806200788894e-06, "loss": 0.156, "step": 12287 }, { "epoch": 0.35848065814808333, "grad_norm": 1.4323846304424777, "learning_rate": 7.427649007283049e-06, "loss": 0.1522, "step": 12288 }, { "epoch": 0.3585098313787269, "grad_norm": 0.8563152652701729, "learning_rate": 7.4272359850036865e-06, "loss": 0.1407, "step": 12289 }, { "epoch": 0.35853900460937044, "grad_norm": 0.7655571933224661, "learning_rate": 7.426822941054541e-06, "loss": 0.1297, "step": 12290 }, { "epoch": 0.358568177840014, "grad_norm": 1.1216758641583837, "learning_rate": 7.4264098754393e-06, "loss": 0.1415, "step": 12291 }, { "epoch": 0.35859735107065754, "grad_norm": 0.9519263359126535, "learning_rate": 7.42599678816165e-06, "loss": 0.1496, "step": 12292 }, { "epoch": 0.35862652430130115, "grad_norm": 0.8922317014353787, "learning_rate": 7.42558367922528e-06, "loss": 0.1518, "step": 12293 }, { "epoch": 0.3586556975319447, "grad_norm": 0.8629977456094764, "learning_rate": 7.42517054863388e-06, "loss": 0.1518, "step": 12294 }, { "epoch": 0.35868487076258826, "grad_norm": 0.6720919543875122, "learning_rate": 7.424757396391133e-06, "loss": 0.1398, "step": 12295 }, { "epoch": 0.3587140439932318, "grad_norm": 0.8753834844981437, "learning_rate": 7.424344222500734e-06, "loss": 0.1722, "step": 12296 }, { "epoch": 0.35874321722387537, "grad_norm": 0.8353090656853341, "learning_rate": 7.423931026966365e-06, "loss": 0.1486, "step": 12297 }, { "epoch": 0.3587723904545189, "grad_norm": 0.769762163570692, "learning_rate": 7.4235178097917216e-06, "loss": 0.172, "step": 12298 }, { "epoch": 0.3588015636851625, "grad_norm": 0.7878399871303298, "learning_rate": 7.4231045709804885e-06, "loss": 0.1523, "step": 12299 }, { "epoch": 0.3588307369158061, "grad_norm": 0.8206114113125107, "learning_rate": 7.422691310536355e-06, "loss": 0.1317, "step": 12300 }, { "epoch": 0.35885991014644963, "grad_norm": 0.7403662408992112, "learning_rate": 7.422278028463013e-06, "loss": 0.1348, "step": 12301 }, { "epoch": 0.3588890833770932, "grad_norm": 0.8872450680061393, "learning_rate": 7.421864724764152e-06, "loss": 0.1471, "step": 12302 }, { "epoch": 0.35891825660773674, "grad_norm": 0.7791741098502287, "learning_rate": 7.421451399443459e-06, "loss": 0.1527, "step": 12303 }, { "epoch": 0.3589474298383803, "grad_norm": 0.685358201498907, "learning_rate": 7.421038052504627e-06, "loss": 0.1365, "step": 12304 }, { "epoch": 0.35897660306902385, "grad_norm": 0.8881283338308029, "learning_rate": 7.4206246839513455e-06, "loss": 0.1728, "step": 12305 }, { "epoch": 0.3590057762996674, "grad_norm": 1.0459925805075425, "learning_rate": 7.420211293787305e-06, "loss": 0.152, "step": 12306 }, { "epoch": 0.359034949530311, "grad_norm": 0.8036929620852763, "learning_rate": 7.419797882016193e-06, "loss": 0.1317, "step": 12307 }, { "epoch": 0.35906412276095456, "grad_norm": 0.8080802933071737, "learning_rate": 7.419384448641706e-06, "loss": 0.1336, "step": 12308 }, { "epoch": 0.3590932959915981, "grad_norm": 0.8066517583138296, "learning_rate": 7.418970993667531e-06, "loss": 0.1296, "step": 12309 }, { "epoch": 0.35912246922224167, "grad_norm": 1.0865065477623663, "learning_rate": 7.41855751709736e-06, "loss": 0.1548, "step": 12310 }, { "epoch": 0.3591516424528852, "grad_norm": 0.8762441744657048, "learning_rate": 7.418144018934888e-06, "loss": 0.1738, "step": 12311 }, { "epoch": 0.3591808156835288, "grad_norm": 1.0911604048878483, "learning_rate": 7.417730499183801e-06, "loss": 0.1347, "step": 12312 }, { "epoch": 0.35920998891417233, "grad_norm": 0.8934094349034861, "learning_rate": 7.417316957847793e-06, "loss": 0.1437, "step": 12313 }, { "epoch": 0.35923916214481594, "grad_norm": 0.9355503288008398, "learning_rate": 7.416903394930556e-06, "loss": 0.1341, "step": 12314 }, { "epoch": 0.3592683353754595, "grad_norm": 0.9136214836862947, "learning_rate": 7.416489810435783e-06, "loss": 0.1835, "step": 12315 }, { "epoch": 0.35929750860610304, "grad_norm": 0.9682915154151832, "learning_rate": 7.4160762043671664e-06, "loss": 0.1408, "step": 12316 }, { "epoch": 0.3593266818367466, "grad_norm": 0.9504391712661863, "learning_rate": 7.415662576728397e-06, "loss": 0.1561, "step": 12317 }, { "epoch": 0.35935585506739015, "grad_norm": 0.8876264488166852, "learning_rate": 7.41524892752317e-06, "loss": 0.122, "step": 12318 }, { "epoch": 0.3593850282980337, "grad_norm": 0.9609503698594272, "learning_rate": 7.414835256755176e-06, "loss": 0.1666, "step": 12319 }, { "epoch": 0.3594142015286773, "grad_norm": 0.7358350385194634, "learning_rate": 7.41442156442811e-06, "loss": 0.1255, "step": 12320 }, { "epoch": 0.35944337475932087, "grad_norm": 0.8460819344876249, "learning_rate": 7.414007850545666e-06, "loss": 0.1373, "step": 12321 }, { "epoch": 0.3594725479899644, "grad_norm": 0.8859639325453393, "learning_rate": 7.4135941151115335e-06, "loss": 0.1689, "step": 12322 }, { "epoch": 0.359501721220608, "grad_norm": 0.9864525481893218, "learning_rate": 7.41318035812941e-06, "loss": 0.1613, "step": 12323 }, { "epoch": 0.3595308944512515, "grad_norm": 0.8165972592542372, "learning_rate": 7.4127665796029905e-06, "loss": 0.1358, "step": 12324 }, { "epoch": 0.3595600676818951, "grad_norm": 0.8562266100046408, "learning_rate": 7.412352779535963e-06, "loss": 0.1519, "step": 12325 }, { "epoch": 0.35958924091253863, "grad_norm": 0.912280309723881, "learning_rate": 7.411938957932029e-06, "loss": 0.1581, "step": 12326 }, { "epoch": 0.35961841414318224, "grad_norm": 0.7717685849872788, "learning_rate": 7.411525114794877e-06, "loss": 0.1407, "step": 12327 }, { "epoch": 0.3596475873738258, "grad_norm": 0.8565656156353816, "learning_rate": 7.411111250128207e-06, "loss": 0.1538, "step": 12328 }, { "epoch": 0.35967676060446935, "grad_norm": 0.8023797838850455, "learning_rate": 7.4106973639357104e-06, "loss": 0.1503, "step": 12329 }, { "epoch": 0.3597059338351129, "grad_norm": 0.8821948443676356, "learning_rate": 7.4102834562210825e-06, "loss": 0.1441, "step": 12330 }, { "epoch": 0.35973510706575645, "grad_norm": 0.7279747327238699, "learning_rate": 7.4098695269880205e-06, "loss": 0.1563, "step": 12331 }, { "epoch": 0.3597642802964, "grad_norm": 0.9236086564391722, "learning_rate": 7.4094555762402174e-06, "loss": 0.1355, "step": 12332 }, { "epoch": 0.35979345352704356, "grad_norm": 0.8103902030532772, "learning_rate": 7.409041603981371e-06, "loss": 0.1609, "step": 12333 }, { "epoch": 0.35982262675768717, "grad_norm": 0.8249193407165809, "learning_rate": 7.408627610215176e-06, "loss": 0.1522, "step": 12334 }, { "epoch": 0.3598517999883307, "grad_norm": 0.6871405848971597, "learning_rate": 7.408213594945328e-06, "loss": 0.1183, "step": 12335 }, { "epoch": 0.3598809732189743, "grad_norm": 0.7755076899987711, "learning_rate": 7.4077995581755255e-06, "loss": 0.1368, "step": 12336 }, { "epoch": 0.35991014644961783, "grad_norm": 0.9100559507869117, "learning_rate": 7.407385499909462e-06, "loss": 0.1535, "step": 12337 }, { "epoch": 0.3599393196802614, "grad_norm": 0.7937661174648724, "learning_rate": 7.406971420150837e-06, "loss": 0.1419, "step": 12338 }, { "epoch": 0.35996849291090494, "grad_norm": 0.8959987231263276, "learning_rate": 7.406557318903344e-06, "loss": 0.1518, "step": 12339 }, { "epoch": 0.3599976661415485, "grad_norm": 0.8906269978011393, "learning_rate": 7.406143196170681e-06, "loss": 0.1796, "step": 12340 }, { "epoch": 0.3600268393721921, "grad_norm": 0.8072700045665477, "learning_rate": 7.405729051956548e-06, "loss": 0.1536, "step": 12341 }, { "epoch": 0.36005601260283565, "grad_norm": 1.0264394048547325, "learning_rate": 7.405314886264639e-06, "loss": 0.1429, "step": 12342 }, { "epoch": 0.3600851858334792, "grad_norm": 0.807448160087179, "learning_rate": 7.404900699098654e-06, "loss": 0.15, "step": 12343 }, { "epoch": 0.36011435906412276, "grad_norm": 0.6762839773443348, "learning_rate": 7.404486490462289e-06, "loss": 0.1373, "step": 12344 }, { "epoch": 0.3601435322947663, "grad_norm": 1.0202249359976419, "learning_rate": 7.404072260359243e-06, "loss": 0.1419, "step": 12345 }, { "epoch": 0.36017270552540986, "grad_norm": 0.8772031024908503, "learning_rate": 7.403658008793213e-06, "loss": 0.1341, "step": 12346 }, { "epoch": 0.3602018787560535, "grad_norm": 0.8549827882854905, "learning_rate": 7.4032437357678985e-06, "loss": 0.1659, "step": 12347 }, { "epoch": 0.360231051986697, "grad_norm": 1.0450883888744094, "learning_rate": 7.4028294412869985e-06, "loss": 0.143, "step": 12348 }, { "epoch": 0.3602602252173406, "grad_norm": 0.7713575426833317, "learning_rate": 7.40241512535421e-06, "loss": 0.1542, "step": 12349 }, { "epoch": 0.36028939844798413, "grad_norm": 0.8330528390504065, "learning_rate": 7.402000787973232e-06, "loss": 0.1447, "step": 12350 }, { "epoch": 0.3603185716786277, "grad_norm": 1.094625320039597, "learning_rate": 7.401586429147767e-06, "loss": 0.1709, "step": 12351 }, { "epoch": 0.36034774490927124, "grad_norm": 0.7813934111383724, "learning_rate": 7.401172048881509e-06, "loss": 0.1559, "step": 12352 }, { "epoch": 0.3603769181399148, "grad_norm": 0.7804873840379437, "learning_rate": 7.400757647178162e-06, "loss": 0.128, "step": 12353 }, { "epoch": 0.3604060913705584, "grad_norm": 0.8758749781555031, "learning_rate": 7.400343224041422e-06, "loss": 0.1431, "step": 12354 }, { "epoch": 0.36043526460120195, "grad_norm": 0.8377924261046195, "learning_rate": 7.399928779474991e-06, "loss": 0.1481, "step": 12355 }, { "epoch": 0.3604644378318455, "grad_norm": 0.7853079153083155, "learning_rate": 7.39951431348257e-06, "loss": 0.1439, "step": 12356 }, { "epoch": 0.36049361106248906, "grad_norm": 0.9347527446114469, "learning_rate": 7.399099826067857e-06, "loss": 0.1273, "step": 12357 }, { "epoch": 0.3605227842931326, "grad_norm": 0.9243839040542723, "learning_rate": 7.398685317234554e-06, "loss": 0.1287, "step": 12358 }, { "epoch": 0.36055195752377617, "grad_norm": 0.836372029203595, "learning_rate": 7.398270786986361e-06, "loss": 0.1491, "step": 12359 }, { "epoch": 0.3605811307544197, "grad_norm": 1.074532348994187, "learning_rate": 7.397856235326979e-06, "loss": 0.1451, "step": 12360 }, { "epoch": 0.36061030398506333, "grad_norm": 0.8130342485484173, "learning_rate": 7.397441662260109e-06, "loss": 0.1262, "step": 12361 }, { "epoch": 0.3606394772157069, "grad_norm": 0.9157609290069918, "learning_rate": 7.3970270677894505e-06, "loss": 0.144, "step": 12362 }, { "epoch": 0.36066865044635044, "grad_norm": 0.9774480662919182, "learning_rate": 7.396612451918709e-06, "loss": 0.1699, "step": 12363 }, { "epoch": 0.360697823676994, "grad_norm": 0.7751576073896935, "learning_rate": 7.396197814651582e-06, "loss": 0.171, "step": 12364 }, { "epoch": 0.36072699690763754, "grad_norm": 1.026047430528191, "learning_rate": 7.3957831559917735e-06, "loss": 0.1491, "step": 12365 }, { "epoch": 0.3607561701382811, "grad_norm": 1.1578198490582807, "learning_rate": 7.395368475942985e-06, "loss": 0.1352, "step": 12366 }, { "epoch": 0.36078534336892465, "grad_norm": 0.7358419225212605, "learning_rate": 7.394953774508918e-06, "loss": 0.1442, "step": 12367 }, { "epoch": 0.36081451659956826, "grad_norm": 0.7855099920570201, "learning_rate": 7.3945390516932765e-06, "loss": 0.1521, "step": 12368 }, { "epoch": 0.3608436898302118, "grad_norm": 0.882718394909379, "learning_rate": 7.394124307499762e-06, "loss": 0.1627, "step": 12369 }, { "epoch": 0.36087286306085536, "grad_norm": 1.1041984778019813, "learning_rate": 7.393709541932076e-06, "loss": 0.1425, "step": 12370 }, { "epoch": 0.3609020362914989, "grad_norm": 0.8117247656766673, "learning_rate": 7.393294754993924e-06, "loss": 0.1487, "step": 12371 }, { "epoch": 0.36093120952214247, "grad_norm": 0.9860067015206712, "learning_rate": 7.392879946689007e-06, "loss": 0.1738, "step": 12372 }, { "epoch": 0.360960382752786, "grad_norm": 0.9697711162001428, "learning_rate": 7.39246511702103e-06, "loss": 0.1522, "step": 12373 }, { "epoch": 0.3609895559834296, "grad_norm": 0.790772645726191, "learning_rate": 7.3920502659936936e-06, "loss": 0.1507, "step": 12374 }, { "epoch": 0.3610187292140732, "grad_norm": 0.708130741606113, "learning_rate": 7.3916353936107045e-06, "loss": 0.1264, "step": 12375 }, { "epoch": 0.36104790244471674, "grad_norm": 1.089621074722289, "learning_rate": 7.3912204998757656e-06, "loss": 0.1417, "step": 12376 }, { "epoch": 0.3610770756753603, "grad_norm": 0.9019908227128198, "learning_rate": 7.390805584792581e-06, "loss": 0.1423, "step": 12377 }, { "epoch": 0.36110624890600385, "grad_norm": 0.8293264150623696, "learning_rate": 7.390390648364855e-06, "loss": 0.147, "step": 12378 }, { "epoch": 0.3611354221366474, "grad_norm": 0.9178389304117276, "learning_rate": 7.389975690596292e-06, "loss": 0.1489, "step": 12379 }, { "epoch": 0.36116459536729095, "grad_norm": 1.0360618918651492, "learning_rate": 7.389560711490595e-06, "loss": 0.1414, "step": 12380 }, { "epoch": 0.36119376859793456, "grad_norm": 0.6703535165588835, "learning_rate": 7.389145711051473e-06, "loss": 0.1349, "step": 12381 }, { "epoch": 0.3612229418285781, "grad_norm": 0.6804167249294869, "learning_rate": 7.388730689282626e-06, "loss": 0.1432, "step": 12382 }, { "epoch": 0.36125211505922167, "grad_norm": 0.7354037221891911, "learning_rate": 7.388315646187763e-06, "loss": 0.1632, "step": 12383 }, { "epoch": 0.3612812882898652, "grad_norm": 0.805908468689585, "learning_rate": 7.3879005817705886e-06, "loss": 0.1374, "step": 12384 }, { "epoch": 0.3613104615205088, "grad_norm": 1.2220610790779731, "learning_rate": 7.387485496034805e-06, "loss": 0.1663, "step": 12385 }, { "epoch": 0.3613396347511523, "grad_norm": 0.6820647865653887, "learning_rate": 7.387070388984123e-06, "loss": 0.1361, "step": 12386 }, { "epoch": 0.3613688079817959, "grad_norm": 0.9643924075132558, "learning_rate": 7.386655260622247e-06, "loss": 0.1438, "step": 12387 }, { "epoch": 0.3613979812124395, "grad_norm": 0.8253438292834724, "learning_rate": 7.386240110952881e-06, "loss": 0.1533, "step": 12388 }, { "epoch": 0.36142715444308304, "grad_norm": 0.931771747767439, "learning_rate": 7.385824939979735e-06, "loss": 0.1348, "step": 12389 }, { "epoch": 0.3614563276737266, "grad_norm": 0.915816847488074, "learning_rate": 7.385409747706511e-06, "loss": 0.164, "step": 12390 }, { "epoch": 0.36148550090437015, "grad_norm": 0.8121290138072851, "learning_rate": 7.38499453413692e-06, "loss": 0.1439, "step": 12391 }, { "epoch": 0.3615146741350137, "grad_norm": 0.7327947356452752, "learning_rate": 7.3845792992746665e-06, "loss": 0.1581, "step": 12392 }, { "epoch": 0.36154384736565726, "grad_norm": 0.8533791669557264, "learning_rate": 7.384164043123458e-06, "loss": 0.1267, "step": 12393 }, { "epoch": 0.3615730205963008, "grad_norm": 0.8947048415332206, "learning_rate": 7.383748765687002e-06, "loss": 0.1297, "step": 12394 }, { "epoch": 0.3616021938269444, "grad_norm": 0.9297131469072029, "learning_rate": 7.383333466969007e-06, "loss": 0.1426, "step": 12395 }, { "epoch": 0.36163136705758797, "grad_norm": 0.9030085673039439, "learning_rate": 7.38291814697318e-06, "loss": 0.1448, "step": 12396 }, { "epoch": 0.3616605402882315, "grad_norm": 0.7871505040381325, "learning_rate": 7.382502805703227e-06, "loss": 0.1374, "step": 12397 }, { "epoch": 0.3616897135188751, "grad_norm": 0.7660459682425128, "learning_rate": 7.382087443162859e-06, "loss": 0.1723, "step": 12398 }, { "epoch": 0.36171888674951863, "grad_norm": 0.7032781138080652, "learning_rate": 7.381672059355782e-06, "loss": 0.1542, "step": 12399 }, { "epoch": 0.3617480599801622, "grad_norm": 1.1822475436946003, "learning_rate": 7.3812566542857055e-06, "loss": 0.1524, "step": 12400 }, { "epoch": 0.36177723321080574, "grad_norm": 0.8368114519356725, "learning_rate": 7.3808412279563394e-06, "loss": 0.1587, "step": 12401 }, { "epoch": 0.36180640644144935, "grad_norm": 0.8324356038995239, "learning_rate": 7.38042578037139e-06, "loss": 0.157, "step": 12402 }, { "epoch": 0.3618355796720929, "grad_norm": 0.6767675483795794, "learning_rate": 7.380010311534568e-06, "loss": 0.1458, "step": 12403 }, { "epoch": 0.36186475290273645, "grad_norm": 0.6801981630620667, "learning_rate": 7.3795948214495816e-06, "loss": 0.1446, "step": 12404 }, { "epoch": 0.36189392613338, "grad_norm": 0.7067335317127671, "learning_rate": 7.379179310120139e-06, "loss": 0.1338, "step": 12405 }, { "epoch": 0.36192309936402356, "grad_norm": 0.9515411579017032, "learning_rate": 7.378763777549955e-06, "loss": 0.1409, "step": 12406 }, { "epoch": 0.3619522725946671, "grad_norm": 0.8145976168747101, "learning_rate": 7.378348223742734e-06, "loss": 0.1328, "step": 12407 }, { "epoch": 0.3619814458253107, "grad_norm": 0.7151310623421635, "learning_rate": 7.377932648702189e-06, "loss": 0.1318, "step": 12408 }, { "epoch": 0.3620106190559543, "grad_norm": 0.8839560193938235, "learning_rate": 7.377517052432027e-06, "loss": 0.1712, "step": 12409 }, { "epoch": 0.36203979228659783, "grad_norm": 0.8707843326698027, "learning_rate": 7.377101434935961e-06, "loss": 0.1531, "step": 12410 }, { "epoch": 0.3620689655172414, "grad_norm": 0.8353989689392917, "learning_rate": 7.376685796217702e-06, "loss": 0.1403, "step": 12411 }, { "epoch": 0.36209813874788493, "grad_norm": 0.7556286038576497, "learning_rate": 7.376270136280958e-06, "loss": 0.1662, "step": 12412 }, { "epoch": 0.3621273119785285, "grad_norm": 1.0941445251976925, "learning_rate": 7.375854455129443e-06, "loss": 0.1254, "step": 12413 }, { "epoch": 0.36215648520917204, "grad_norm": 0.8740104712910368, "learning_rate": 7.375438752766864e-06, "loss": 0.1476, "step": 12414 }, { "epoch": 0.36218565843981565, "grad_norm": 0.7790841822211408, "learning_rate": 7.375023029196937e-06, "loss": 0.1488, "step": 12415 }, { "epoch": 0.3622148316704592, "grad_norm": 0.8175314120822311, "learning_rate": 7.374607284423373e-06, "loss": 0.149, "step": 12416 }, { "epoch": 0.36224400490110276, "grad_norm": 1.1122828174219652, "learning_rate": 7.374191518449878e-06, "loss": 0.1689, "step": 12417 }, { "epoch": 0.3622731781317463, "grad_norm": 0.9378285633316117, "learning_rate": 7.373775731280172e-06, "loss": 0.1331, "step": 12418 }, { "epoch": 0.36230235136238986, "grad_norm": 1.301318761980158, "learning_rate": 7.37335992291796e-06, "loss": 0.1541, "step": 12419 }, { "epoch": 0.3623315245930334, "grad_norm": 0.8942643632562767, "learning_rate": 7.3729440933669575e-06, "loss": 0.1633, "step": 12420 }, { "epoch": 0.36236069782367697, "grad_norm": 0.8358110494033888, "learning_rate": 7.372528242630878e-06, "loss": 0.1183, "step": 12421 }, { "epoch": 0.3623898710543206, "grad_norm": 1.0317164305132445, "learning_rate": 7.372112370713431e-06, "loss": 0.1443, "step": 12422 }, { "epoch": 0.36241904428496413, "grad_norm": 0.8983204452817084, "learning_rate": 7.371696477618333e-06, "loss": 0.1569, "step": 12423 }, { "epoch": 0.3624482175156077, "grad_norm": 0.8926004064186805, "learning_rate": 7.3712805633492935e-06, "loss": 0.1423, "step": 12424 }, { "epoch": 0.36247739074625124, "grad_norm": 0.8670794270314263, "learning_rate": 7.370864627910027e-06, "loss": 0.1456, "step": 12425 }, { "epoch": 0.3625065639768948, "grad_norm": 0.8985125181143718, "learning_rate": 7.370448671304248e-06, "loss": 0.1407, "step": 12426 }, { "epoch": 0.36253573720753834, "grad_norm": 0.7063648041598328, "learning_rate": 7.370032693535669e-06, "loss": 0.1499, "step": 12427 }, { "epoch": 0.3625649104381819, "grad_norm": 0.8830294779583203, "learning_rate": 7.369616694608004e-06, "loss": 0.148, "step": 12428 }, { "epoch": 0.3625940836688255, "grad_norm": 0.8577546795229164, "learning_rate": 7.369200674524966e-06, "loss": 0.1704, "step": 12429 }, { "epoch": 0.36262325689946906, "grad_norm": 0.7825779464405499, "learning_rate": 7.36878463329027e-06, "loss": 0.1719, "step": 12430 }, { "epoch": 0.3626524301301126, "grad_norm": 0.738730076741277, "learning_rate": 7.368368570907633e-06, "loss": 0.136, "step": 12431 }, { "epoch": 0.36268160336075617, "grad_norm": 0.8400014819160159, "learning_rate": 7.367952487380763e-06, "loss": 0.1406, "step": 12432 }, { "epoch": 0.3627107765913997, "grad_norm": 0.7990516261637564, "learning_rate": 7.367536382713381e-06, "loss": 0.1586, "step": 12433 }, { "epoch": 0.3627399498220433, "grad_norm": 0.8288024477863183, "learning_rate": 7.367120256909198e-06, "loss": 0.1413, "step": 12434 }, { "epoch": 0.3627691230526869, "grad_norm": 1.0154243653961876, "learning_rate": 7.366704109971929e-06, "loss": 0.1455, "step": 12435 }, { "epoch": 0.36279829628333043, "grad_norm": 0.8138546523351705, "learning_rate": 7.366287941905295e-06, "loss": 0.1483, "step": 12436 }, { "epoch": 0.362827469513974, "grad_norm": 0.7551388897055035, "learning_rate": 7.365871752713003e-06, "loss": 0.1437, "step": 12437 }, { "epoch": 0.36285664274461754, "grad_norm": 0.8987613971071026, "learning_rate": 7.365455542398775e-06, "loss": 0.1355, "step": 12438 }, { "epoch": 0.3628858159752611, "grad_norm": 0.8256795075390594, "learning_rate": 7.365039310966324e-06, "loss": 0.1478, "step": 12439 }, { "epoch": 0.36291498920590465, "grad_norm": 0.8169888938021671, "learning_rate": 7.364623058419367e-06, "loss": 0.1734, "step": 12440 }, { "epoch": 0.3629441624365482, "grad_norm": 0.925222291897439, "learning_rate": 7.364206784761618e-06, "loss": 0.1717, "step": 12441 }, { "epoch": 0.3629733356671918, "grad_norm": 0.9039461568838147, "learning_rate": 7.363790489996797e-06, "loss": 0.1408, "step": 12442 }, { "epoch": 0.36300250889783536, "grad_norm": 0.8004050674599028, "learning_rate": 7.363374174128619e-06, "loss": 0.1447, "step": 12443 }, { "epoch": 0.3630316821284789, "grad_norm": 0.8243223765501508, "learning_rate": 7.362957837160799e-06, "loss": 0.1367, "step": 12444 }, { "epoch": 0.36306085535912247, "grad_norm": 0.7991490427144883, "learning_rate": 7.362541479097056e-06, "loss": 0.1543, "step": 12445 }, { "epoch": 0.363090028589766, "grad_norm": 1.0016952136525967, "learning_rate": 7.3621250999411085e-06, "loss": 0.1516, "step": 12446 }, { "epoch": 0.3631192018204096, "grad_norm": 1.0991795309553871, "learning_rate": 7.36170869969667e-06, "loss": 0.1734, "step": 12447 }, { "epoch": 0.36314837505105313, "grad_norm": 0.8366821099669521, "learning_rate": 7.361292278367461e-06, "loss": 0.1499, "step": 12448 }, { "epoch": 0.36317754828169674, "grad_norm": 0.7070275659050894, "learning_rate": 7.360875835957198e-06, "loss": 0.1349, "step": 12449 }, { "epoch": 0.3632067215123403, "grad_norm": 0.7723239819916525, "learning_rate": 7.360459372469598e-06, "loss": 0.1388, "step": 12450 }, { "epoch": 0.36323589474298384, "grad_norm": 0.6756298929571691, "learning_rate": 7.360042887908382e-06, "loss": 0.155, "step": 12451 }, { "epoch": 0.3632650679736274, "grad_norm": 0.8827125519419131, "learning_rate": 7.359626382277265e-06, "loss": 0.1327, "step": 12452 }, { "epoch": 0.36329424120427095, "grad_norm": 0.8041012971349712, "learning_rate": 7.359209855579968e-06, "loss": 0.1076, "step": 12453 }, { "epoch": 0.3633234144349145, "grad_norm": 0.697372531692577, "learning_rate": 7.358793307820209e-06, "loss": 0.1375, "step": 12454 }, { "epoch": 0.36335258766555806, "grad_norm": 1.3237811105848196, "learning_rate": 7.358376739001704e-06, "loss": 0.1726, "step": 12455 }, { "epoch": 0.36338176089620167, "grad_norm": 0.9758560351141198, "learning_rate": 7.357960149128177e-06, "loss": 0.1337, "step": 12456 }, { "epoch": 0.3634109341268452, "grad_norm": 0.7721145331937025, "learning_rate": 7.357543538203344e-06, "loss": 0.1452, "step": 12457 }, { "epoch": 0.3634401073574888, "grad_norm": 0.8972062164794726, "learning_rate": 7.357126906230926e-06, "loss": 0.1342, "step": 12458 }, { "epoch": 0.3634692805881323, "grad_norm": 0.9051163989848191, "learning_rate": 7.35671025321464e-06, "loss": 0.1753, "step": 12459 }, { "epoch": 0.3634984538187759, "grad_norm": 1.1198352697714742, "learning_rate": 7.356293579158207e-06, "loss": 0.1451, "step": 12460 }, { "epoch": 0.36352762704941943, "grad_norm": 1.0176748330465637, "learning_rate": 7.355876884065349e-06, "loss": 0.1333, "step": 12461 }, { "epoch": 0.36355680028006304, "grad_norm": 0.9384466077659679, "learning_rate": 7.355460167939783e-06, "loss": 0.1503, "step": 12462 }, { "epoch": 0.3635859735107066, "grad_norm": 0.8246058816548979, "learning_rate": 7.3550434307852335e-06, "loss": 0.1331, "step": 12463 }, { "epoch": 0.36361514674135015, "grad_norm": 1.0233503954893863, "learning_rate": 7.354626672605416e-06, "loss": 0.1419, "step": 12464 }, { "epoch": 0.3636443199719937, "grad_norm": 0.8133834003284872, "learning_rate": 7.354209893404054e-06, "loss": 0.1296, "step": 12465 }, { "epoch": 0.36367349320263725, "grad_norm": 1.2913316892089426, "learning_rate": 7.353793093184869e-06, "loss": 0.1569, "step": 12466 }, { "epoch": 0.3637026664332808, "grad_norm": 0.8334656644246954, "learning_rate": 7.353376271951581e-06, "loss": 0.1505, "step": 12467 }, { "epoch": 0.36373183966392436, "grad_norm": 0.8517273287822625, "learning_rate": 7.352959429707911e-06, "loss": 0.1605, "step": 12468 }, { "epoch": 0.36376101289456797, "grad_norm": 0.8995970232561108, "learning_rate": 7.3525425664575815e-06, "loss": 0.1591, "step": 12469 }, { "epoch": 0.3637901861252115, "grad_norm": 0.7371374702805029, "learning_rate": 7.352125682204313e-06, "loss": 0.1557, "step": 12470 }, { "epoch": 0.3638193593558551, "grad_norm": 0.9365223917183233, "learning_rate": 7.351708776951828e-06, "loss": 0.1453, "step": 12471 }, { "epoch": 0.36384853258649863, "grad_norm": 0.8667120873011884, "learning_rate": 7.351291850703848e-06, "loss": 0.1408, "step": 12472 }, { "epoch": 0.3638777058171422, "grad_norm": 0.7788669940383607, "learning_rate": 7.350874903464097e-06, "loss": 0.1465, "step": 12473 }, { "epoch": 0.36390687904778574, "grad_norm": 0.9206504844211706, "learning_rate": 7.350457935236295e-06, "loss": 0.1449, "step": 12474 }, { "epoch": 0.3639360522784293, "grad_norm": 1.1593282146380695, "learning_rate": 7.350040946024165e-06, "loss": 0.156, "step": 12475 }, { "epoch": 0.3639652255090729, "grad_norm": 0.9126505802412705, "learning_rate": 7.349623935831432e-06, "loss": 0.136, "step": 12476 }, { "epoch": 0.36399439873971645, "grad_norm": 0.7666088844096738, "learning_rate": 7.349206904661816e-06, "loss": 0.1421, "step": 12477 }, { "epoch": 0.36402357197036, "grad_norm": 0.7484162238205191, "learning_rate": 7.348789852519043e-06, "loss": 0.1389, "step": 12478 }, { "epoch": 0.36405274520100356, "grad_norm": 0.8097442914824786, "learning_rate": 7.348372779406834e-06, "loss": 0.1389, "step": 12479 }, { "epoch": 0.3640819184316471, "grad_norm": 0.7987503546657555, "learning_rate": 7.347955685328912e-06, "loss": 0.1251, "step": 12480 }, { "epoch": 0.36411109166229066, "grad_norm": 0.9288490047922632, "learning_rate": 7.347538570289005e-06, "loss": 0.1302, "step": 12481 }, { "epoch": 0.3641402648929342, "grad_norm": 0.6750726528592491, "learning_rate": 7.347121434290834e-06, "loss": 0.1229, "step": 12482 }, { "epoch": 0.3641694381235778, "grad_norm": 0.7746199922297062, "learning_rate": 7.346704277338122e-06, "loss": 0.1453, "step": 12483 }, { "epoch": 0.3641986113542214, "grad_norm": 0.873953568339805, "learning_rate": 7.346287099434593e-06, "loss": 0.1409, "step": 12484 }, { "epoch": 0.36422778458486493, "grad_norm": 0.7120896229151646, "learning_rate": 7.345869900583975e-06, "loss": 0.1519, "step": 12485 }, { "epoch": 0.3642569578155085, "grad_norm": 0.8220019306038706, "learning_rate": 7.345452680789989e-06, "loss": 0.1612, "step": 12486 }, { "epoch": 0.36428613104615204, "grad_norm": 0.8239607206109042, "learning_rate": 7.345035440056363e-06, "loss": 0.1742, "step": 12487 }, { "epoch": 0.3643153042767956, "grad_norm": 0.9195671088239025, "learning_rate": 7.34461817838682e-06, "loss": 0.1548, "step": 12488 }, { "epoch": 0.36434447750743915, "grad_norm": 0.820900638310747, "learning_rate": 7.344200895785083e-06, "loss": 0.1483, "step": 12489 }, { "epoch": 0.36437365073808275, "grad_norm": 0.9859117442782211, "learning_rate": 7.343783592254883e-06, "loss": 0.1403, "step": 12490 }, { "epoch": 0.3644028239687263, "grad_norm": 0.9502163373819177, "learning_rate": 7.3433662677999426e-06, "loss": 0.1631, "step": 12491 }, { "epoch": 0.36443199719936986, "grad_norm": 0.86215746008853, "learning_rate": 7.342948922423985e-06, "loss": 0.1289, "step": 12492 }, { "epoch": 0.3644611704300134, "grad_norm": 1.028385262362884, "learning_rate": 7.342531556130742e-06, "loss": 0.1184, "step": 12493 }, { "epoch": 0.36449034366065697, "grad_norm": 0.8530250849243711, "learning_rate": 7.342114168923935e-06, "loss": 0.1511, "step": 12494 }, { "epoch": 0.3645195168913005, "grad_norm": 0.9723990594033373, "learning_rate": 7.341696760807291e-06, "loss": 0.1756, "step": 12495 }, { "epoch": 0.36454869012194413, "grad_norm": 0.9925847724373023, "learning_rate": 7.341279331784539e-06, "loss": 0.1366, "step": 12496 }, { "epoch": 0.3645778633525877, "grad_norm": 1.0303219629002769, "learning_rate": 7.340861881859403e-06, "loss": 0.1519, "step": 12497 }, { "epoch": 0.36460703658323124, "grad_norm": 0.9082810844900719, "learning_rate": 7.34044441103561e-06, "loss": 0.1422, "step": 12498 }, { "epoch": 0.3646362098138748, "grad_norm": 0.927028182869787, "learning_rate": 7.340026919316889e-06, "loss": 0.1354, "step": 12499 }, { "epoch": 0.36466538304451834, "grad_norm": 0.7169585676932424, "learning_rate": 7.339609406706966e-06, "loss": 0.1121, "step": 12500 }, { "epoch": 0.3646945562751619, "grad_norm": 0.7099762171401988, "learning_rate": 7.339191873209569e-06, "loss": 0.1406, "step": 12501 }, { "epoch": 0.36472372950580545, "grad_norm": 0.8390462407599057, "learning_rate": 7.3387743188284255e-06, "loss": 0.165, "step": 12502 }, { "epoch": 0.36475290273644906, "grad_norm": 0.7523429675407453, "learning_rate": 7.338356743567264e-06, "loss": 0.1242, "step": 12503 }, { "epoch": 0.3647820759670926, "grad_norm": 0.9066002377405343, "learning_rate": 7.3379391474298085e-06, "loss": 0.1673, "step": 12504 }, { "epoch": 0.36481124919773616, "grad_norm": 1.3086197754302376, "learning_rate": 7.337521530419793e-06, "loss": 0.1544, "step": 12505 }, { "epoch": 0.3648404224283797, "grad_norm": 1.0820332839361442, "learning_rate": 7.337103892540945e-06, "loss": 0.1546, "step": 12506 }, { "epoch": 0.36486959565902327, "grad_norm": 0.7992437926444342, "learning_rate": 7.336686233796988e-06, "loss": 0.1554, "step": 12507 }, { "epoch": 0.3648987688896668, "grad_norm": 0.9363009022825539, "learning_rate": 7.336268554191657e-06, "loss": 0.118, "step": 12508 }, { "epoch": 0.3649279421203104, "grad_norm": 1.084394193415658, "learning_rate": 7.335850853728675e-06, "loss": 0.1483, "step": 12509 }, { "epoch": 0.364957115350954, "grad_norm": 0.9791888893527011, "learning_rate": 7.335433132411775e-06, "loss": 0.1494, "step": 12510 }, { "epoch": 0.36498628858159754, "grad_norm": 0.7291919732518494, "learning_rate": 7.335015390244688e-06, "loss": 0.1357, "step": 12511 }, { "epoch": 0.3650154618122411, "grad_norm": 0.8973636101942085, "learning_rate": 7.334597627231138e-06, "loss": 0.1765, "step": 12512 }, { "epoch": 0.36504463504288465, "grad_norm": 0.8235104568007249, "learning_rate": 7.334179843374859e-06, "loss": 0.1397, "step": 12513 }, { "epoch": 0.3650738082735282, "grad_norm": 0.8596961124766973, "learning_rate": 7.333762038679579e-06, "loss": 0.1459, "step": 12514 }, { "epoch": 0.36510298150417175, "grad_norm": 0.8554809964876028, "learning_rate": 7.3333442131490294e-06, "loss": 0.1492, "step": 12515 }, { "epoch": 0.3651321547348153, "grad_norm": 0.8191381647651743, "learning_rate": 7.332926366786939e-06, "loss": 0.1575, "step": 12516 }, { "epoch": 0.3651613279654589, "grad_norm": 0.9718655156205231, "learning_rate": 7.33250849959704e-06, "loss": 0.1343, "step": 12517 }, { "epoch": 0.36519050119610247, "grad_norm": 0.8546476213944995, "learning_rate": 7.3320906115830615e-06, "loss": 0.1414, "step": 12518 }, { "epoch": 0.365219674426746, "grad_norm": 1.244414597345418, "learning_rate": 7.331672702748733e-06, "loss": 0.1709, "step": 12519 }, { "epoch": 0.3652488476573896, "grad_norm": 0.6982264029886699, "learning_rate": 7.331254773097789e-06, "loss": 0.1346, "step": 12520 }, { "epoch": 0.36527802088803313, "grad_norm": 0.9942275033912196, "learning_rate": 7.33083682263396e-06, "loss": 0.1182, "step": 12521 }, { "epoch": 0.3653071941186767, "grad_norm": 0.8936472742380306, "learning_rate": 7.330418851360974e-06, "loss": 0.1383, "step": 12522 }, { "epoch": 0.3653363673493203, "grad_norm": 0.7166159321473673, "learning_rate": 7.330000859282567e-06, "loss": 0.1462, "step": 12523 }, { "epoch": 0.36536554057996384, "grad_norm": 0.9633409343417314, "learning_rate": 7.329582846402467e-06, "loss": 0.1607, "step": 12524 }, { "epoch": 0.3653947138106074, "grad_norm": 1.1610880501000653, "learning_rate": 7.329164812724405e-06, "loss": 0.1804, "step": 12525 }, { "epoch": 0.36542388704125095, "grad_norm": 0.5569456887478516, "learning_rate": 7.32874675825212e-06, "loss": 0.1408, "step": 12526 }, { "epoch": 0.3654530602718945, "grad_norm": 1.125308199847849, "learning_rate": 7.328328682989338e-06, "loss": 0.1547, "step": 12527 }, { "epoch": 0.36548223350253806, "grad_norm": 1.0911837240978715, "learning_rate": 7.327910586939794e-06, "loss": 0.1535, "step": 12528 }, { "epoch": 0.3655114067331816, "grad_norm": 0.6655995335581377, "learning_rate": 7.327492470107218e-06, "loss": 0.1287, "step": 12529 }, { "epoch": 0.3655405799638252, "grad_norm": 0.8633033130423697, "learning_rate": 7.327074332495348e-06, "loss": 0.138, "step": 12530 }, { "epoch": 0.36556975319446877, "grad_norm": 1.029517089838608, "learning_rate": 7.326656174107911e-06, "loss": 0.1588, "step": 12531 }, { "epoch": 0.3655989264251123, "grad_norm": 0.8222599758116758, "learning_rate": 7.326237994948644e-06, "loss": 0.1316, "step": 12532 }, { "epoch": 0.3656280996557559, "grad_norm": 0.815122704375594, "learning_rate": 7.325819795021281e-06, "loss": 0.1559, "step": 12533 }, { "epoch": 0.36565727288639943, "grad_norm": 0.9597640113146232, "learning_rate": 7.325401574329551e-06, "loss": 0.1494, "step": 12534 }, { "epoch": 0.365686446117043, "grad_norm": 0.8239063456490182, "learning_rate": 7.3249833328771935e-06, "loss": 0.151, "step": 12535 }, { "epoch": 0.36571561934768654, "grad_norm": 0.9384049183735227, "learning_rate": 7.3245650706679395e-06, "loss": 0.1573, "step": 12536 }, { "epoch": 0.36574479257833015, "grad_norm": 0.836803788745504, "learning_rate": 7.324146787705522e-06, "loss": 0.1367, "step": 12537 }, { "epoch": 0.3657739658089737, "grad_norm": 0.9357429364670283, "learning_rate": 7.323728483993678e-06, "loss": 0.1714, "step": 12538 }, { "epoch": 0.36580313903961725, "grad_norm": 0.7367031054890589, "learning_rate": 7.323310159536141e-06, "loss": 0.1551, "step": 12539 }, { "epoch": 0.3658323122702608, "grad_norm": 0.8075364455756667, "learning_rate": 7.322891814336645e-06, "loss": 0.1333, "step": 12540 }, { "epoch": 0.36586148550090436, "grad_norm": 0.833073740749177, "learning_rate": 7.3224734483989254e-06, "loss": 0.1683, "step": 12541 }, { "epoch": 0.3658906587315479, "grad_norm": 0.871719146026141, "learning_rate": 7.322055061726717e-06, "loss": 0.1647, "step": 12542 }, { "epoch": 0.36591983196219147, "grad_norm": 0.9603750499039191, "learning_rate": 7.321636654323756e-06, "loss": 0.1398, "step": 12543 }, { "epoch": 0.3659490051928351, "grad_norm": 0.8743735121129615, "learning_rate": 7.321218226193777e-06, "loss": 0.1707, "step": 12544 }, { "epoch": 0.36597817842347863, "grad_norm": 0.9361800895713882, "learning_rate": 7.320799777340516e-06, "loss": 0.1939, "step": 12545 }, { "epoch": 0.3660073516541222, "grad_norm": 1.4397172012087283, "learning_rate": 7.320381307767708e-06, "loss": 0.1726, "step": 12546 }, { "epoch": 0.36603652488476573, "grad_norm": 0.7590116649866849, "learning_rate": 7.319962817479089e-06, "loss": 0.134, "step": 12547 }, { "epoch": 0.3660656981154093, "grad_norm": 0.9971170486586559, "learning_rate": 7.319544306478398e-06, "loss": 0.139, "step": 12548 }, { "epoch": 0.36609487134605284, "grad_norm": 1.0121016816886785, "learning_rate": 7.3191257747693664e-06, "loss": 0.1427, "step": 12549 }, { "epoch": 0.36612404457669645, "grad_norm": 1.0037128635150039, "learning_rate": 7.318707222355735e-06, "loss": 0.1373, "step": 12550 }, { "epoch": 0.36615321780734, "grad_norm": 1.1971441068620639, "learning_rate": 7.318288649241241e-06, "loss": 0.1292, "step": 12551 }, { "epoch": 0.36618239103798356, "grad_norm": 0.8520980034299164, "learning_rate": 7.317870055429615e-06, "loss": 0.1645, "step": 12552 }, { "epoch": 0.3662115642686271, "grad_norm": 0.8578629480944141, "learning_rate": 7.317451440924602e-06, "loss": 0.1567, "step": 12553 }, { "epoch": 0.36624073749927066, "grad_norm": 0.8119415694644823, "learning_rate": 7.317032805729935e-06, "loss": 0.1311, "step": 12554 }, { "epoch": 0.3662699107299142, "grad_norm": 1.067426569165156, "learning_rate": 7.31661414984935e-06, "loss": 0.1482, "step": 12555 }, { "epoch": 0.36629908396055777, "grad_norm": 0.7879180641463571, "learning_rate": 7.3161954732865906e-06, "loss": 0.1483, "step": 12556 }, { "epoch": 0.3663282571912014, "grad_norm": 0.8850431311253087, "learning_rate": 7.315776776045388e-06, "loss": 0.1277, "step": 12557 }, { "epoch": 0.36635743042184493, "grad_norm": 0.7589116034713146, "learning_rate": 7.315358058129485e-06, "loss": 0.1464, "step": 12558 }, { "epoch": 0.3663866036524885, "grad_norm": 0.8775888047374377, "learning_rate": 7.314939319542617e-06, "loss": 0.1578, "step": 12559 }, { "epoch": 0.36641577688313204, "grad_norm": 0.9138593029972256, "learning_rate": 7.314520560288522e-06, "loss": 0.1452, "step": 12560 }, { "epoch": 0.3664449501137756, "grad_norm": 0.9309638777077405, "learning_rate": 7.314101780370942e-06, "loss": 0.1611, "step": 12561 }, { "epoch": 0.36647412334441914, "grad_norm": 0.8200125293243199, "learning_rate": 7.313682979793614e-06, "loss": 0.1687, "step": 12562 }, { "epoch": 0.3665032965750627, "grad_norm": 0.7819398768318264, "learning_rate": 7.313264158560276e-06, "loss": 0.1602, "step": 12563 }, { "epoch": 0.3665324698057063, "grad_norm": 0.960687783706917, "learning_rate": 7.312845316674667e-06, "loss": 0.1535, "step": 12564 }, { "epoch": 0.36656164303634986, "grad_norm": 0.806436860397053, "learning_rate": 7.312426454140528e-06, "loss": 0.1455, "step": 12565 }, { "epoch": 0.3665908162669934, "grad_norm": 0.8037049708673916, "learning_rate": 7.312007570961598e-06, "loss": 0.1467, "step": 12566 }, { "epoch": 0.36661998949763697, "grad_norm": 0.8475448014865734, "learning_rate": 7.311588667141615e-06, "loss": 0.1775, "step": 12567 }, { "epoch": 0.3666491627282805, "grad_norm": 0.8330063194912772, "learning_rate": 7.311169742684321e-06, "loss": 0.1507, "step": 12568 }, { "epoch": 0.3666783359589241, "grad_norm": 0.9139978268192165, "learning_rate": 7.3107507975934555e-06, "loss": 0.1654, "step": 12569 }, { "epoch": 0.3667075091895676, "grad_norm": 0.7029375873164558, "learning_rate": 7.3103318318727566e-06, "loss": 0.123, "step": 12570 }, { "epoch": 0.36673668242021124, "grad_norm": 0.7456527961582567, "learning_rate": 7.30991284552597e-06, "loss": 0.1627, "step": 12571 }, { "epoch": 0.3667658556508548, "grad_norm": 0.8667322478230863, "learning_rate": 7.309493838556832e-06, "loss": 0.1414, "step": 12572 }, { "epoch": 0.36679502888149834, "grad_norm": 0.9750652424929281, "learning_rate": 7.309074810969083e-06, "loss": 0.1484, "step": 12573 }, { "epoch": 0.3668242021121419, "grad_norm": 0.751521085709621, "learning_rate": 7.308655762766466e-06, "loss": 0.1632, "step": 12574 }, { "epoch": 0.36685337534278545, "grad_norm": 0.8418233374470161, "learning_rate": 7.30823669395272e-06, "loss": 0.114, "step": 12575 }, { "epoch": 0.366882548573429, "grad_norm": 0.8811982961030276, "learning_rate": 7.30781760453159e-06, "loss": 0.1416, "step": 12576 }, { "epoch": 0.3669117218040726, "grad_norm": 1.04891068844708, "learning_rate": 7.307398494506814e-06, "loss": 0.1682, "step": 12577 }, { "epoch": 0.36694089503471616, "grad_norm": 1.0826757853124749, "learning_rate": 7.306979363882136e-06, "loss": 0.1396, "step": 12578 }, { "epoch": 0.3669700682653597, "grad_norm": 0.7940050144481171, "learning_rate": 7.306560212661295e-06, "loss": 0.1379, "step": 12579 }, { "epoch": 0.36699924149600327, "grad_norm": 0.6952751578376332, "learning_rate": 7.306141040848037e-06, "loss": 0.1508, "step": 12580 }, { "epoch": 0.3670284147266468, "grad_norm": 0.9643883866133751, "learning_rate": 7.305721848446103e-06, "loss": 0.1702, "step": 12581 }, { "epoch": 0.3670575879572904, "grad_norm": 0.7867314299628712, "learning_rate": 7.305302635459233e-06, "loss": 0.1437, "step": 12582 }, { "epoch": 0.36708676118793393, "grad_norm": 1.0084647801481508, "learning_rate": 7.304883401891173e-06, "loss": 0.1812, "step": 12583 }, { "epoch": 0.36711593441857754, "grad_norm": 0.8342827322264076, "learning_rate": 7.304464147745662e-06, "loss": 0.1561, "step": 12584 }, { "epoch": 0.3671451076492211, "grad_norm": 0.8581378104788102, "learning_rate": 7.3040448730264455e-06, "loss": 0.1453, "step": 12585 }, { "epoch": 0.36717428087986465, "grad_norm": 0.9456648386883989, "learning_rate": 7.303625577737269e-06, "loss": 0.1307, "step": 12586 }, { "epoch": 0.3672034541105082, "grad_norm": 0.84759138025627, "learning_rate": 7.303206261881871e-06, "loss": 0.1276, "step": 12587 }, { "epoch": 0.36723262734115175, "grad_norm": 0.6764654554263654, "learning_rate": 7.302786925463998e-06, "loss": 0.1237, "step": 12588 }, { "epoch": 0.3672618005717953, "grad_norm": 0.8086057539763226, "learning_rate": 7.302367568487393e-06, "loss": 0.1582, "step": 12589 }, { "epoch": 0.36729097380243886, "grad_norm": 1.0471987049130558, "learning_rate": 7.3019481909558e-06, "loss": 0.1749, "step": 12590 }, { "epoch": 0.36732014703308247, "grad_norm": 0.7825322907667858, "learning_rate": 7.301528792872963e-06, "loss": 0.1754, "step": 12591 }, { "epoch": 0.367349320263726, "grad_norm": 0.808594072427891, "learning_rate": 7.301109374242626e-06, "loss": 0.1592, "step": 12592 }, { "epoch": 0.3673784934943696, "grad_norm": 0.7378181528008292, "learning_rate": 7.300689935068534e-06, "loss": 0.1381, "step": 12593 }, { "epoch": 0.3674076667250131, "grad_norm": 1.1631232788413999, "learning_rate": 7.3002704753544316e-06, "loss": 0.1713, "step": 12594 }, { "epoch": 0.3674368399556567, "grad_norm": 0.7272131576528931, "learning_rate": 7.299850995104063e-06, "loss": 0.1346, "step": 12595 }, { "epoch": 0.36746601318630023, "grad_norm": 0.7224715044228271, "learning_rate": 7.2994314943211755e-06, "loss": 0.1414, "step": 12596 }, { "epoch": 0.3674951864169438, "grad_norm": 0.84799081098376, "learning_rate": 7.299011973009511e-06, "loss": 0.145, "step": 12597 }, { "epoch": 0.3675243596475874, "grad_norm": 1.0170357671998496, "learning_rate": 7.298592431172818e-06, "loss": 0.1574, "step": 12598 }, { "epoch": 0.36755353287823095, "grad_norm": 0.8404918700923496, "learning_rate": 7.2981728688148365e-06, "loss": 0.1352, "step": 12599 }, { "epoch": 0.3675827061088745, "grad_norm": 0.8144727261788373, "learning_rate": 7.297753285939319e-06, "loss": 0.1271, "step": 12600 }, { "epoch": 0.36761187933951806, "grad_norm": 1.0073862448209447, "learning_rate": 7.297333682550009e-06, "loss": 0.1326, "step": 12601 }, { "epoch": 0.3676410525701616, "grad_norm": 1.1080962498161437, "learning_rate": 7.296914058650653e-06, "loss": 0.1415, "step": 12602 }, { "epoch": 0.36767022580080516, "grad_norm": 0.7716627843074678, "learning_rate": 7.296494414244996e-06, "loss": 0.1668, "step": 12603 }, { "epoch": 0.36769939903144877, "grad_norm": 0.9608901366792665, "learning_rate": 7.296074749336785e-06, "loss": 0.1642, "step": 12604 }, { "epoch": 0.3677285722620923, "grad_norm": 0.7199984408702625, "learning_rate": 7.295655063929765e-06, "loss": 0.1473, "step": 12605 }, { "epoch": 0.3677577454927359, "grad_norm": 0.8288821214606242, "learning_rate": 7.295235358027686e-06, "loss": 0.1569, "step": 12606 }, { "epoch": 0.36778691872337943, "grad_norm": 0.8112232057577572, "learning_rate": 7.294815631634294e-06, "loss": 0.1401, "step": 12607 }, { "epoch": 0.367816091954023, "grad_norm": 0.654042417879684, "learning_rate": 7.294395884753336e-06, "loss": 0.1356, "step": 12608 }, { "epoch": 0.36784526518466654, "grad_norm": 0.6115312187425573, "learning_rate": 7.293976117388558e-06, "loss": 0.1169, "step": 12609 }, { "epoch": 0.3678744384153101, "grad_norm": 0.8874601262272724, "learning_rate": 7.29355632954371e-06, "loss": 0.1531, "step": 12610 }, { "epoch": 0.3679036116459537, "grad_norm": 0.7446788851902356, "learning_rate": 7.293136521222538e-06, "loss": 0.1137, "step": 12611 }, { "epoch": 0.36793278487659725, "grad_norm": 0.7548866430434173, "learning_rate": 7.292716692428791e-06, "loss": 0.1368, "step": 12612 }, { "epoch": 0.3679619581072408, "grad_norm": 0.7365736692314704, "learning_rate": 7.292296843166217e-06, "loss": 0.1469, "step": 12613 }, { "epoch": 0.36799113133788436, "grad_norm": 0.8264816518176756, "learning_rate": 7.291876973438562e-06, "loss": 0.1535, "step": 12614 }, { "epoch": 0.3680203045685279, "grad_norm": 0.7813558982894001, "learning_rate": 7.291457083249578e-06, "loss": 0.1689, "step": 12615 }, { "epoch": 0.36804947779917146, "grad_norm": 0.8491973925937982, "learning_rate": 7.291037172603013e-06, "loss": 0.1541, "step": 12616 }, { "epoch": 0.368078651029815, "grad_norm": 0.7364897979429192, "learning_rate": 7.2906172415026136e-06, "loss": 0.1657, "step": 12617 }, { "epoch": 0.3681078242604586, "grad_norm": 0.7998676568471853, "learning_rate": 7.290197289952131e-06, "loss": 0.1297, "step": 12618 }, { "epoch": 0.3681369974911022, "grad_norm": 0.828428368684619, "learning_rate": 7.289777317955313e-06, "loss": 0.1625, "step": 12619 }, { "epoch": 0.36816617072174573, "grad_norm": 0.855784368364791, "learning_rate": 7.289357325515911e-06, "loss": 0.1485, "step": 12620 }, { "epoch": 0.3681953439523893, "grad_norm": 0.8547418600021226, "learning_rate": 7.288937312637673e-06, "loss": 0.1478, "step": 12621 }, { "epoch": 0.36822451718303284, "grad_norm": 0.964073973781457, "learning_rate": 7.288517279324349e-06, "loss": 0.1301, "step": 12622 }, { "epoch": 0.3682536904136764, "grad_norm": 0.8274583125272957, "learning_rate": 7.2880972255796875e-06, "loss": 0.1701, "step": 12623 }, { "epoch": 0.36828286364431995, "grad_norm": 0.982394475670459, "learning_rate": 7.287677151407442e-06, "loss": 0.1423, "step": 12624 }, { "epoch": 0.36831203687496356, "grad_norm": 0.787345466973106, "learning_rate": 7.28725705681136e-06, "loss": 0.1225, "step": 12625 }, { "epoch": 0.3683412101056071, "grad_norm": 0.9727213667441302, "learning_rate": 7.286836941795193e-06, "loss": 0.1431, "step": 12626 }, { "epoch": 0.36837038333625066, "grad_norm": 0.9185978711724994, "learning_rate": 7.286416806362693e-06, "loss": 0.1422, "step": 12627 }, { "epoch": 0.3683995565668942, "grad_norm": 0.8118258793856865, "learning_rate": 7.285996650517608e-06, "loss": 0.1366, "step": 12628 }, { "epoch": 0.36842872979753777, "grad_norm": 1.1994577940098725, "learning_rate": 7.285576474263692e-06, "loss": 0.1542, "step": 12629 }, { "epoch": 0.3684579030281813, "grad_norm": 1.0416878269668777, "learning_rate": 7.285156277604693e-06, "loss": 0.1546, "step": 12630 }, { "epoch": 0.3684870762588249, "grad_norm": 1.0730921371275317, "learning_rate": 7.284736060544366e-06, "loss": 0.1426, "step": 12631 }, { "epoch": 0.3685162494894685, "grad_norm": 0.8199071874414507, "learning_rate": 7.284315823086459e-06, "loss": 0.1414, "step": 12632 }, { "epoch": 0.36854542272011204, "grad_norm": 0.9402127848174042, "learning_rate": 7.283895565234729e-06, "loss": 0.133, "step": 12633 }, { "epoch": 0.3685745959507556, "grad_norm": 1.0920864126210375, "learning_rate": 7.283475286992923e-06, "loss": 0.1707, "step": 12634 }, { "epoch": 0.36860376918139914, "grad_norm": 0.8404151566503486, "learning_rate": 7.283054988364793e-06, "loss": 0.1367, "step": 12635 }, { "epoch": 0.3686329424120427, "grad_norm": 0.7585589900651192, "learning_rate": 7.282634669354094e-06, "loss": 0.134, "step": 12636 }, { "epoch": 0.36866211564268625, "grad_norm": 0.9278302596057294, "learning_rate": 7.282214329964578e-06, "loss": 0.1346, "step": 12637 }, { "epoch": 0.36869128887332986, "grad_norm": 0.7470972843530774, "learning_rate": 7.2817939701999974e-06, "loss": 0.1634, "step": 12638 }, { "epoch": 0.3687204621039734, "grad_norm": 0.8306930217254754, "learning_rate": 7.281373590064105e-06, "loss": 0.1637, "step": 12639 }, { "epoch": 0.36874963533461697, "grad_norm": 0.9285373974943016, "learning_rate": 7.280953189560653e-06, "loss": 0.1651, "step": 12640 }, { "epoch": 0.3687788085652605, "grad_norm": 0.9310102750005027, "learning_rate": 7.280532768693396e-06, "loss": 0.1332, "step": 12641 }, { "epoch": 0.36880798179590407, "grad_norm": 0.786755285060495, "learning_rate": 7.280112327466087e-06, "loss": 0.1651, "step": 12642 }, { "epoch": 0.3688371550265476, "grad_norm": 0.9027386530738184, "learning_rate": 7.27969186588248e-06, "loss": 0.1301, "step": 12643 }, { "epoch": 0.3688663282571912, "grad_norm": 0.8144305451701089, "learning_rate": 7.2792713839463255e-06, "loss": 0.1395, "step": 12644 }, { "epoch": 0.3688955014878348, "grad_norm": 1.0296444669740603, "learning_rate": 7.2788508816613836e-06, "loss": 0.1436, "step": 12645 }, { "epoch": 0.36892467471847834, "grad_norm": 0.9730056098013248, "learning_rate": 7.278430359031403e-06, "loss": 0.1472, "step": 12646 }, { "epoch": 0.3689538479491219, "grad_norm": 0.7994043618478935, "learning_rate": 7.278009816060141e-06, "loss": 0.138, "step": 12647 }, { "epoch": 0.36898302117976545, "grad_norm": 0.9740903092945568, "learning_rate": 7.277589252751351e-06, "loss": 0.1426, "step": 12648 }, { "epoch": 0.369012194410409, "grad_norm": 0.9514339440379055, "learning_rate": 7.277168669108787e-06, "loss": 0.1581, "step": 12649 }, { "epoch": 0.36904136764105255, "grad_norm": 0.8376829055759069, "learning_rate": 7.276748065136206e-06, "loss": 0.1339, "step": 12650 }, { "epoch": 0.3690705408716961, "grad_norm": 0.6932765483000005, "learning_rate": 7.27632744083736e-06, "loss": 0.1326, "step": 12651 }, { "epoch": 0.3690997141023397, "grad_norm": 1.0511895405972749, "learning_rate": 7.2759067962160075e-06, "loss": 0.1264, "step": 12652 }, { "epoch": 0.36912888733298327, "grad_norm": 1.0100299524399472, "learning_rate": 7.275486131275903e-06, "loss": 0.1458, "step": 12653 }, { "epoch": 0.3691580605636268, "grad_norm": 0.9109570724650197, "learning_rate": 7.2750654460208e-06, "loss": 0.1526, "step": 12654 }, { "epoch": 0.3691872337942704, "grad_norm": 0.8927953356704228, "learning_rate": 7.274644740454458e-06, "loss": 0.1385, "step": 12655 }, { "epoch": 0.36921640702491393, "grad_norm": 0.8107226697996354, "learning_rate": 7.274224014580627e-06, "loss": 0.15, "step": 12656 }, { "epoch": 0.3692455802555575, "grad_norm": 0.7177052389467927, "learning_rate": 7.27380326840307e-06, "loss": 0.1292, "step": 12657 }, { "epoch": 0.36927475348620104, "grad_norm": 0.8994355904918047, "learning_rate": 7.27338250192554e-06, "loss": 0.1596, "step": 12658 }, { "epoch": 0.36930392671684464, "grad_norm": 0.7671412830821046, "learning_rate": 7.2729617151517915e-06, "loss": 0.1586, "step": 12659 }, { "epoch": 0.3693330999474882, "grad_norm": 0.7200823778342188, "learning_rate": 7.272540908085586e-06, "loss": 0.1628, "step": 12660 }, { "epoch": 0.36936227317813175, "grad_norm": 0.932197094073567, "learning_rate": 7.272120080730677e-06, "loss": 0.1355, "step": 12661 }, { "epoch": 0.3693914464087753, "grad_norm": 0.7493755613652879, "learning_rate": 7.271699233090821e-06, "loss": 0.1338, "step": 12662 }, { "epoch": 0.36942061963941886, "grad_norm": 0.8691183903945077, "learning_rate": 7.271278365169778e-06, "loss": 0.1663, "step": 12663 }, { "epoch": 0.3694497928700624, "grad_norm": 0.7487734779311649, "learning_rate": 7.270857476971303e-06, "loss": 0.1309, "step": 12664 }, { "epoch": 0.369478966100706, "grad_norm": 0.6663862528649757, "learning_rate": 7.270436568499156e-06, "loss": 0.1271, "step": 12665 }, { "epoch": 0.36950813933134957, "grad_norm": 0.8112441244218074, "learning_rate": 7.270015639757092e-06, "loss": 0.1346, "step": 12666 }, { "epoch": 0.3695373125619931, "grad_norm": 3.906884028660531, "learning_rate": 7.269594690748871e-06, "loss": 0.1395, "step": 12667 }, { "epoch": 0.3695664857926367, "grad_norm": 0.9087683544262717, "learning_rate": 7.26917372147825e-06, "loss": 0.1533, "step": 12668 }, { "epoch": 0.36959565902328023, "grad_norm": 0.7699315262554577, "learning_rate": 7.268752731948987e-06, "loss": 0.1395, "step": 12669 }, { "epoch": 0.3696248322539238, "grad_norm": 0.8308879806137981, "learning_rate": 7.268331722164843e-06, "loss": 0.1527, "step": 12670 }, { "epoch": 0.36965400548456734, "grad_norm": 0.7691284472237025, "learning_rate": 7.267910692129574e-06, "loss": 0.1404, "step": 12671 }, { "epoch": 0.36968317871521095, "grad_norm": 0.7064038777247387, "learning_rate": 7.267489641846938e-06, "loss": 0.1552, "step": 12672 }, { "epoch": 0.3697123519458545, "grad_norm": 0.8410894472372811, "learning_rate": 7.267068571320699e-06, "loss": 0.1534, "step": 12673 }, { "epoch": 0.36974152517649805, "grad_norm": 0.954164419387724, "learning_rate": 7.26664748055461e-06, "loss": 0.1376, "step": 12674 }, { "epoch": 0.3697706984071416, "grad_norm": 1.1417621661994168, "learning_rate": 7.266226369552436e-06, "loss": 0.154, "step": 12675 }, { "epoch": 0.36979987163778516, "grad_norm": 0.6217955345989012, "learning_rate": 7.265805238317933e-06, "loss": 0.1463, "step": 12676 }, { "epoch": 0.3698290448684287, "grad_norm": 0.8034245069093413, "learning_rate": 7.2653840868548595e-06, "loss": 0.1723, "step": 12677 }, { "epoch": 0.36985821809907227, "grad_norm": 0.8025368610914108, "learning_rate": 7.264962915166981e-06, "loss": 0.1395, "step": 12678 }, { "epoch": 0.3698873913297159, "grad_norm": 0.8857000147703661, "learning_rate": 7.264541723258053e-06, "loss": 0.1656, "step": 12679 }, { "epoch": 0.36991656456035943, "grad_norm": 0.7973792407931395, "learning_rate": 7.264120511131837e-06, "loss": 0.1508, "step": 12680 }, { "epoch": 0.369945737791003, "grad_norm": 0.8282811444616076, "learning_rate": 7.263699278792093e-06, "loss": 0.148, "step": 12681 }, { "epoch": 0.36997491102164654, "grad_norm": 0.9754627730566209, "learning_rate": 7.263278026242583e-06, "loss": 0.1383, "step": 12682 }, { "epoch": 0.3700040842522901, "grad_norm": 0.8821581951445242, "learning_rate": 7.2628567534870665e-06, "loss": 0.1785, "step": 12683 }, { "epoch": 0.37003325748293364, "grad_norm": 0.8014611317129382, "learning_rate": 7.2624354605293045e-06, "loss": 0.1419, "step": 12684 }, { "epoch": 0.3700624307135772, "grad_norm": 0.928112475567267, "learning_rate": 7.26201414737306e-06, "loss": 0.174, "step": 12685 }, { "epoch": 0.3700916039442208, "grad_norm": 0.9527295221348488, "learning_rate": 7.261592814022094e-06, "loss": 0.1672, "step": 12686 }, { "epoch": 0.37012077717486436, "grad_norm": 0.8100111062669616, "learning_rate": 7.2611714604801655e-06, "loss": 0.1641, "step": 12687 }, { "epoch": 0.3701499504055079, "grad_norm": 1.088862419658833, "learning_rate": 7.260750086751039e-06, "loss": 0.1624, "step": 12688 }, { "epoch": 0.37017912363615146, "grad_norm": 0.8741832877252597, "learning_rate": 7.260328692838475e-06, "loss": 0.1674, "step": 12689 }, { "epoch": 0.370208296866795, "grad_norm": 0.8113410708118879, "learning_rate": 7.259907278746237e-06, "loss": 0.163, "step": 12690 }, { "epoch": 0.37023747009743857, "grad_norm": 0.6559126878198583, "learning_rate": 7.2594858444780845e-06, "loss": 0.1351, "step": 12691 }, { "epoch": 0.3702666433280822, "grad_norm": 1.057230150043581, "learning_rate": 7.259064390037781e-06, "loss": 0.1363, "step": 12692 }, { "epoch": 0.37029581655872573, "grad_norm": 0.8382173020004834, "learning_rate": 7.258642915429093e-06, "loss": 0.1516, "step": 12693 }, { "epoch": 0.3703249897893693, "grad_norm": 0.8541340696797689, "learning_rate": 7.258221420655778e-06, "loss": 0.1599, "step": 12694 }, { "epoch": 0.37035416302001284, "grad_norm": 0.9839161636709558, "learning_rate": 7.257799905721602e-06, "loss": 0.1588, "step": 12695 }, { "epoch": 0.3703833362506564, "grad_norm": 0.8469433734573926, "learning_rate": 7.257378370630328e-06, "loss": 0.143, "step": 12696 }, { "epoch": 0.37041250948129995, "grad_norm": 0.813809899925821, "learning_rate": 7.256956815385718e-06, "loss": 0.155, "step": 12697 }, { "epoch": 0.3704416827119435, "grad_norm": 0.9687633725390876, "learning_rate": 7.2565352399915354e-06, "loss": 0.1422, "step": 12698 }, { "epoch": 0.3704708559425871, "grad_norm": 0.8074508913582549, "learning_rate": 7.256113644451547e-06, "loss": 0.1287, "step": 12699 }, { "epoch": 0.37050002917323066, "grad_norm": 0.9016226971122039, "learning_rate": 7.2556920287695135e-06, "loss": 0.1462, "step": 12700 }, { "epoch": 0.3705292024038742, "grad_norm": 0.9994119263648709, "learning_rate": 7.2552703929491995e-06, "loss": 0.1451, "step": 12701 }, { "epoch": 0.37055837563451777, "grad_norm": 0.8768566263661491, "learning_rate": 7.254848736994371e-06, "loss": 0.1363, "step": 12702 }, { "epoch": 0.3705875488651613, "grad_norm": 0.7873476512517228, "learning_rate": 7.254427060908791e-06, "loss": 0.1594, "step": 12703 }, { "epoch": 0.3706167220958049, "grad_norm": 1.0958104625118537, "learning_rate": 7.254005364696223e-06, "loss": 0.1426, "step": 12704 }, { "epoch": 0.3706458953264484, "grad_norm": 0.6734979153631149, "learning_rate": 7.253583648360435e-06, "loss": 0.1423, "step": 12705 }, { "epoch": 0.37067506855709204, "grad_norm": 0.8778050796298381, "learning_rate": 7.253161911905188e-06, "loss": 0.1423, "step": 12706 }, { "epoch": 0.3707042417877356, "grad_norm": 1.2332592993197071, "learning_rate": 7.25274015533425e-06, "loss": 0.1225, "step": 12707 }, { "epoch": 0.37073341501837914, "grad_norm": 0.7214949137311216, "learning_rate": 7.252318378651388e-06, "loss": 0.1251, "step": 12708 }, { "epoch": 0.3707625882490227, "grad_norm": 0.8838682258775027, "learning_rate": 7.251896581860364e-06, "loss": 0.1438, "step": 12709 }, { "epoch": 0.37079176147966625, "grad_norm": 0.8865027838061383, "learning_rate": 7.2514747649649445e-06, "loss": 0.1368, "step": 12710 }, { "epoch": 0.3708209347103098, "grad_norm": 0.6501735305599404, "learning_rate": 7.2510529279688955e-06, "loss": 0.1391, "step": 12711 }, { "epoch": 0.37085010794095336, "grad_norm": 0.7626504494552498, "learning_rate": 7.250631070875983e-06, "loss": 0.1523, "step": 12712 }, { "epoch": 0.37087928117159696, "grad_norm": 0.8341725753743278, "learning_rate": 7.250209193689975e-06, "loss": 0.1926, "step": 12713 }, { "epoch": 0.3709084544022405, "grad_norm": 0.8088679688713538, "learning_rate": 7.249787296414635e-06, "loss": 0.136, "step": 12714 }, { "epoch": 0.37093762763288407, "grad_norm": 0.6777045927186891, "learning_rate": 7.249365379053731e-06, "loss": 0.1432, "step": 12715 }, { "epoch": 0.3709668008635276, "grad_norm": 0.736509969660584, "learning_rate": 7.248943441611031e-06, "loss": 0.1321, "step": 12716 }, { "epoch": 0.3709959740941712, "grad_norm": 0.9220189050883129, "learning_rate": 7.248521484090299e-06, "loss": 0.1631, "step": 12717 }, { "epoch": 0.37102514732481473, "grad_norm": 0.6296469905412683, "learning_rate": 7.248099506495307e-06, "loss": 0.1554, "step": 12718 }, { "epoch": 0.37105432055545834, "grad_norm": 0.8702416146894282, "learning_rate": 7.247677508829816e-06, "loss": 0.1568, "step": 12719 }, { "epoch": 0.3710834937861019, "grad_norm": 0.7698777512516692, "learning_rate": 7.2472554910976e-06, "loss": 0.143, "step": 12720 }, { "epoch": 0.37111266701674545, "grad_norm": 0.6924246629798406, "learning_rate": 7.246833453302422e-06, "loss": 0.1263, "step": 12721 }, { "epoch": 0.371141840247389, "grad_norm": 0.7094621667323893, "learning_rate": 7.24641139544805e-06, "loss": 0.1511, "step": 12722 }, { "epoch": 0.37117101347803255, "grad_norm": 0.7122842266152872, "learning_rate": 7.2459893175382546e-06, "loss": 0.1508, "step": 12723 }, { "epoch": 0.3712001867086761, "grad_norm": 0.9425692565536175, "learning_rate": 7.245567219576803e-06, "loss": 0.1574, "step": 12724 }, { "epoch": 0.37122935993931966, "grad_norm": 0.8105275883169979, "learning_rate": 7.2451451015674624e-06, "loss": 0.1379, "step": 12725 }, { "epoch": 0.37125853316996327, "grad_norm": 0.7417886750217544, "learning_rate": 7.244722963514002e-06, "loss": 0.1395, "step": 12726 }, { "epoch": 0.3712877064006068, "grad_norm": 0.9187036576494949, "learning_rate": 7.244300805420192e-06, "loss": 0.1376, "step": 12727 }, { "epoch": 0.3713168796312504, "grad_norm": 0.672931933546487, "learning_rate": 7.2438786272897995e-06, "loss": 0.1265, "step": 12728 }, { "epoch": 0.3713460528618939, "grad_norm": 0.8197851589142929, "learning_rate": 7.243456429126594e-06, "loss": 0.1437, "step": 12729 }, { "epoch": 0.3713752260925375, "grad_norm": 0.694977998895702, "learning_rate": 7.243034210934345e-06, "loss": 0.1314, "step": 12730 }, { "epoch": 0.37140439932318103, "grad_norm": 0.7052585724435444, "learning_rate": 7.242611972716823e-06, "loss": 0.1516, "step": 12731 }, { "epoch": 0.3714335725538246, "grad_norm": 1.0067913676235247, "learning_rate": 7.2421897144777965e-06, "loss": 0.1368, "step": 12732 }, { "epoch": 0.3714627457844682, "grad_norm": 0.9442444614065237, "learning_rate": 7.2417674362210365e-06, "loss": 0.1538, "step": 12733 }, { "epoch": 0.37149191901511175, "grad_norm": 0.957447244826272, "learning_rate": 7.241345137950309e-06, "loss": 0.1667, "step": 12734 }, { "epoch": 0.3715210922457553, "grad_norm": 0.8537917080419062, "learning_rate": 7.24092281966939e-06, "loss": 0.1609, "step": 12735 }, { "epoch": 0.37155026547639886, "grad_norm": 0.7917483530332348, "learning_rate": 7.2405004813820465e-06, "loss": 0.1281, "step": 12736 }, { "epoch": 0.3715794387070424, "grad_norm": 1.5691676737995557, "learning_rate": 7.240078123092047e-06, "loss": 0.1313, "step": 12737 }, { "epoch": 0.37160861193768596, "grad_norm": 0.868321333198384, "learning_rate": 7.2396557448031675e-06, "loss": 0.1512, "step": 12738 }, { "epoch": 0.3716377851683295, "grad_norm": 0.7526311165689189, "learning_rate": 7.239233346519176e-06, "loss": 0.1534, "step": 12739 }, { "epoch": 0.3716669583989731, "grad_norm": 0.8363204816108493, "learning_rate": 7.238810928243842e-06, "loss": 0.1577, "step": 12740 }, { "epoch": 0.3716961316296167, "grad_norm": 0.9236118860998855, "learning_rate": 7.238388489980941e-06, "loss": 0.1489, "step": 12741 }, { "epoch": 0.37172530486026023, "grad_norm": 0.7284906094986009, "learning_rate": 7.2379660317342405e-06, "loss": 0.139, "step": 12742 }, { "epoch": 0.3717544780909038, "grad_norm": 1.139559360147518, "learning_rate": 7.237543553507513e-06, "loss": 0.1337, "step": 12743 }, { "epoch": 0.37178365132154734, "grad_norm": 0.9258717371918922, "learning_rate": 7.237121055304532e-06, "loss": 0.1567, "step": 12744 }, { "epoch": 0.3718128245521909, "grad_norm": 0.79707393282504, "learning_rate": 7.236698537129066e-06, "loss": 0.161, "step": 12745 }, { "epoch": 0.37184199778283444, "grad_norm": 0.9828875833309629, "learning_rate": 7.236275998984892e-06, "loss": 0.1242, "step": 12746 }, { "epoch": 0.37187117101347805, "grad_norm": 0.7557707280313231, "learning_rate": 7.235853440875777e-06, "loss": 0.1496, "step": 12747 }, { "epoch": 0.3719003442441216, "grad_norm": 0.9305273171442837, "learning_rate": 7.235430862805499e-06, "loss": 0.1467, "step": 12748 }, { "epoch": 0.37192951747476516, "grad_norm": 0.7835039130081966, "learning_rate": 7.235008264777827e-06, "loss": 0.1452, "step": 12749 }, { "epoch": 0.3719586907054087, "grad_norm": 0.7762569275972072, "learning_rate": 7.2345856467965345e-06, "loss": 0.1373, "step": 12750 }, { "epoch": 0.37198786393605227, "grad_norm": 0.9320611300067672, "learning_rate": 7.2341630088653955e-06, "loss": 0.1359, "step": 12751 }, { "epoch": 0.3720170371666958, "grad_norm": 0.9019528982672234, "learning_rate": 7.233740350988181e-06, "loss": 0.1687, "step": 12752 }, { "epoch": 0.3720462103973394, "grad_norm": 0.7524268631385961, "learning_rate": 7.233317673168667e-06, "loss": 0.1497, "step": 12753 }, { "epoch": 0.372075383627983, "grad_norm": 0.8015622169646974, "learning_rate": 7.232894975410626e-06, "loss": 0.1358, "step": 12754 }, { "epoch": 0.37210455685862653, "grad_norm": 0.7264333250152629, "learning_rate": 7.232472257717831e-06, "loss": 0.1414, "step": 12755 }, { "epoch": 0.3721337300892701, "grad_norm": 0.8230271531325329, "learning_rate": 7.232049520094057e-06, "loss": 0.1623, "step": 12756 }, { "epoch": 0.37216290331991364, "grad_norm": 0.8013650352287885, "learning_rate": 7.231626762543078e-06, "loss": 0.1227, "step": 12757 }, { "epoch": 0.3721920765505572, "grad_norm": 0.8606246876335979, "learning_rate": 7.231203985068666e-06, "loss": 0.1617, "step": 12758 }, { "epoch": 0.37222124978120075, "grad_norm": 0.7493734677857707, "learning_rate": 7.230781187674601e-06, "loss": 0.1349, "step": 12759 }, { "epoch": 0.37225042301184436, "grad_norm": 0.797890250530901, "learning_rate": 7.230358370364652e-06, "loss": 0.1333, "step": 12760 }, { "epoch": 0.3722795962424879, "grad_norm": 0.741412221042629, "learning_rate": 7.2299355331425955e-06, "loss": 0.1275, "step": 12761 }, { "epoch": 0.37230876947313146, "grad_norm": 0.7848481159593104, "learning_rate": 7.229512676012207e-06, "loss": 0.143, "step": 12762 }, { "epoch": 0.372337942703775, "grad_norm": 0.8226034318008352, "learning_rate": 7.229089798977264e-06, "loss": 0.128, "step": 12763 }, { "epoch": 0.37236711593441857, "grad_norm": 0.7372312788201161, "learning_rate": 7.2286669020415355e-06, "loss": 0.1571, "step": 12764 }, { "epoch": 0.3723962891650621, "grad_norm": 0.7415974436887275, "learning_rate": 7.228243985208804e-06, "loss": 0.1278, "step": 12765 }, { "epoch": 0.3724254623957057, "grad_norm": 0.8904885817454987, "learning_rate": 7.227821048482842e-06, "loss": 0.1382, "step": 12766 }, { "epoch": 0.3724546356263493, "grad_norm": 0.8968460568461896, "learning_rate": 7.227398091867422e-06, "loss": 0.1371, "step": 12767 }, { "epoch": 0.37248380885699284, "grad_norm": 0.8371768229407455, "learning_rate": 7.226975115366328e-06, "loss": 0.171, "step": 12768 }, { "epoch": 0.3725129820876364, "grad_norm": 0.9250473502488088, "learning_rate": 7.22655211898333e-06, "loss": 0.1462, "step": 12769 }, { "epoch": 0.37254215531827994, "grad_norm": 1.0230115628904144, "learning_rate": 7.226129102722206e-06, "loss": 0.1575, "step": 12770 }, { "epoch": 0.3725713285489235, "grad_norm": 0.8323563189907527, "learning_rate": 7.225706066586733e-06, "loss": 0.1377, "step": 12771 }, { "epoch": 0.37260050177956705, "grad_norm": 0.7385433873269618, "learning_rate": 7.225283010580686e-06, "loss": 0.16, "step": 12772 }, { "epoch": 0.3726296750102106, "grad_norm": 0.8241995662093278, "learning_rate": 7.224859934707845e-06, "loss": 0.1419, "step": 12773 }, { "epoch": 0.3726588482408542, "grad_norm": 0.8881407002886734, "learning_rate": 7.224436838971986e-06, "loss": 0.1512, "step": 12774 }, { "epoch": 0.37268802147149777, "grad_norm": 0.9006077207869064, "learning_rate": 7.224013723376886e-06, "loss": 0.1388, "step": 12775 }, { "epoch": 0.3727171947021413, "grad_norm": 0.840964875196712, "learning_rate": 7.223590587926322e-06, "loss": 0.1629, "step": 12776 }, { "epoch": 0.37274636793278487, "grad_norm": 1.0528834947135033, "learning_rate": 7.223167432624071e-06, "loss": 0.1651, "step": 12777 }, { "epoch": 0.3727755411634284, "grad_norm": 0.765849402535143, "learning_rate": 7.2227442574739135e-06, "loss": 0.145, "step": 12778 }, { "epoch": 0.372804714394072, "grad_norm": 0.914936798327709, "learning_rate": 7.222321062479625e-06, "loss": 0.1496, "step": 12779 }, { "epoch": 0.3728338876247156, "grad_norm": 1.0107411168454763, "learning_rate": 7.221897847644985e-06, "loss": 0.1625, "step": 12780 }, { "epoch": 0.37286306085535914, "grad_norm": 0.8774221008451132, "learning_rate": 7.221474612973771e-06, "loss": 0.1465, "step": 12781 }, { "epoch": 0.3728922340860027, "grad_norm": 0.818555050504513, "learning_rate": 7.22105135846976e-06, "loss": 0.1539, "step": 12782 }, { "epoch": 0.37292140731664625, "grad_norm": 0.957235837635529, "learning_rate": 7.220628084136736e-06, "loss": 0.1549, "step": 12783 }, { "epoch": 0.3729505805472898, "grad_norm": 0.7835090422931695, "learning_rate": 7.220204789978473e-06, "loss": 0.1404, "step": 12784 }, { "epoch": 0.37297975377793335, "grad_norm": 0.7670782905317178, "learning_rate": 7.219781475998753e-06, "loss": 0.1768, "step": 12785 }, { "epoch": 0.3730089270085769, "grad_norm": 1.1532221791237023, "learning_rate": 7.219358142201352e-06, "loss": 0.1443, "step": 12786 }, { "epoch": 0.3730381002392205, "grad_norm": 0.8264581414476969, "learning_rate": 7.218934788590053e-06, "loss": 0.1532, "step": 12787 }, { "epoch": 0.37306727346986407, "grad_norm": 0.9916087496784959, "learning_rate": 7.218511415168633e-06, "loss": 0.1493, "step": 12788 }, { "epoch": 0.3730964467005076, "grad_norm": 1.1611645508019375, "learning_rate": 7.218088021940872e-06, "loss": 0.1409, "step": 12789 }, { "epoch": 0.3731256199311512, "grad_norm": 0.9272351621046583, "learning_rate": 7.217664608910552e-06, "loss": 0.1555, "step": 12790 }, { "epoch": 0.37315479316179473, "grad_norm": 0.9172760740376449, "learning_rate": 7.217241176081451e-06, "loss": 0.1396, "step": 12791 }, { "epoch": 0.3731839663924383, "grad_norm": 1.1666594732936182, "learning_rate": 7.21681772345735e-06, "loss": 0.1465, "step": 12792 }, { "epoch": 0.37321313962308184, "grad_norm": 0.8439716641350714, "learning_rate": 7.21639425104203e-06, "loss": 0.1538, "step": 12793 }, { "epoch": 0.37324231285372544, "grad_norm": 1.0805611965721982, "learning_rate": 7.215970758839272e-06, "loss": 0.1595, "step": 12794 }, { "epoch": 0.373271486084369, "grad_norm": 1.3128693885816374, "learning_rate": 7.215547246852856e-06, "loss": 0.1618, "step": 12795 }, { "epoch": 0.37330065931501255, "grad_norm": 1.1045141047839067, "learning_rate": 7.21512371508656e-06, "loss": 0.1512, "step": 12796 }, { "epoch": 0.3733298325456561, "grad_norm": 0.592389833864529, "learning_rate": 7.214700163544171e-06, "loss": 0.1291, "step": 12797 }, { "epoch": 0.37335900577629966, "grad_norm": 1.5000220461646439, "learning_rate": 7.2142765922294675e-06, "loss": 0.1526, "step": 12798 }, { "epoch": 0.3733881790069432, "grad_norm": 1.2455101272119433, "learning_rate": 7.213853001146229e-06, "loss": 0.1382, "step": 12799 }, { "epoch": 0.37341735223758676, "grad_norm": 0.6794075304930273, "learning_rate": 7.213429390298243e-06, "loss": 0.1493, "step": 12800 }, { "epoch": 0.3734465254682304, "grad_norm": 0.8178729732341539, "learning_rate": 7.213005759689286e-06, "loss": 0.1403, "step": 12801 }, { "epoch": 0.3734756986988739, "grad_norm": 1.2423852804172166, "learning_rate": 7.212582109323141e-06, "loss": 0.1624, "step": 12802 }, { "epoch": 0.3735048719295175, "grad_norm": 0.7730886036964798, "learning_rate": 7.212158439203593e-06, "loss": 0.1575, "step": 12803 }, { "epoch": 0.37353404516016103, "grad_norm": 1.0462063519020495, "learning_rate": 7.21173474933442e-06, "loss": 0.1495, "step": 12804 }, { "epoch": 0.3735632183908046, "grad_norm": 0.9880394441696694, "learning_rate": 7.2113110397194094e-06, "loss": 0.1446, "step": 12805 }, { "epoch": 0.37359239162144814, "grad_norm": 0.8399042504911042, "learning_rate": 7.210887310362341e-06, "loss": 0.139, "step": 12806 }, { "epoch": 0.37362156485209175, "grad_norm": 0.7712592400776408, "learning_rate": 7.2104635612669984e-06, "loss": 0.13, "step": 12807 }, { "epoch": 0.3736507380827353, "grad_norm": 0.9868046955871935, "learning_rate": 7.210039792437165e-06, "loss": 0.132, "step": 12808 }, { "epoch": 0.37367991131337885, "grad_norm": 0.6052468986562363, "learning_rate": 7.2096160038766225e-06, "loss": 0.1276, "step": 12809 }, { "epoch": 0.3737090845440224, "grad_norm": 1.004714779689122, "learning_rate": 7.209192195589159e-06, "loss": 0.1359, "step": 12810 }, { "epoch": 0.37373825777466596, "grad_norm": 0.8037176755728264, "learning_rate": 7.208768367578551e-06, "loss": 0.1413, "step": 12811 }, { "epoch": 0.3737674310053095, "grad_norm": 0.8238673228541763, "learning_rate": 7.208344519848589e-06, "loss": 0.1505, "step": 12812 }, { "epoch": 0.37379660423595307, "grad_norm": 0.7690389986364415, "learning_rate": 7.207920652403054e-06, "loss": 0.1531, "step": 12813 }, { "epoch": 0.3738257774665967, "grad_norm": 0.8254209981613991, "learning_rate": 7.207496765245729e-06, "loss": 0.1368, "step": 12814 }, { "epoch": 0.37385495069724023, "grad_norm": 0.6900741912062895, "learning_rate": 7.207072858380402e-06, "loss": 0.1407, "step": 12815 }, { "epoch": 0.3738841239278838, "grad_norm": 1.0600254361303463, "learning_rate": 7.206648931810855e-06, "loss": 0.135, "step": 12816 }, { "epoch": 0.37391329715852734, "grad_norm": 0.7431866458866175, "learning_rate": 7.20622498554087e-06, "loss": 0.1552, "step": 12817 }, { "epoch": 0.3739424703891709, "grad_norm": 0.7518029155696899, "learning_rate": 7.205801019574239e-06, "loss": 0.1536, "step": 12818 }, { "epoch": 0.37397164361981444, "grad_norm": 0.7642362741752822, "learning_rate": 7.205377033914742e-06, "loss": 0.1578, "step": 12819 }, { "epoch": 0.374000816850458, "grad_norm": 1.0284505970533113, "learning_rate": 7.204953028566164e-06, "loss": 0.1736, "step": 12820 }, { "epoch": 0.3740299900811016, "grad_norm": 0.8088192319957287, "learning_rate": 7.204529003532292e-06, "loss": 0.1518, "step": 12821 }, { "epoch": 0.37405916331174516, "grad_norm": 0.8304772257381413, "learning_rate": 7.204104958816913e-06, "loss": 0.1312, "step": 12822 }, { "epoch": 0.3740883365423887, "grad_norm": 0.7755346740607266, "learning_rate": 7.203680894423809e-06, "loss": 0.1232, "step": 12823 }, { "epoch": 0.37411750977303226, "grad_norm": 0.7693245134642306, "learning_rate": 7.203256810356769e-06, "loss": 0.1512, "step": 12824 }, { "epoch": 0.3741466830036758, "grad_norm": 0.9523896806829728, "learning_rate": 7.202832706619579e-06, "loss": 0.1314, "step": 12825 }, { "epoch": 0.37417585623431937, "grad_norm": 0.7951488704850891, "learning_rate": 7.202408583216023e-06, "loss": 0.127, "step": 12826 }, { "epoch": 0.3742050294649629, "grad_norm": 0.8067102693565923, "learning_rate": 7.201984440149889e-06, "loss": 0.1682, "step": 12827 }, { "epoch": 0.37423420269560653, "grad_norm": 1.0685204648025133, "learning_rate": 7.2015602774249645e-06, "loss": 0.1596, "step": 12828 }, { "epoch": 0.3742633759262501, "grad_norm": 0.9378162171489898, "learning_rate": 7.201136095045035e-06, "loss": 0.1532, "step": 12829 }, { "epoch": 0.37429254915689364, "grad_norm": 0.6825283671666629, "learning_rate": 7.200711893013889e-06, "loss": 0.1652, "step": 12830 }, { "epoch": 0.3743217223875372, "grad_norm": 0.7794609561132333, "learning_rate": 7.200287671335311e-06, "loss": 0.1462, "step": 12831 }, { "epoch": 0.37435089561818075, "grad_norm": 0.6949545272455027, "learning_rate": 7.199863430013088e-06, "loss": 0.1355, "step": 12832 }, { "epoch": 0.3743800688488243, "grad_norm": 0.7559217194938304, "learning_rate": 7.1994391690510136e-06, "loss": 0.1292, "step": 12833 }, { "epoch": 0.3744092420794679, "grad_norm": 0.7526174945214693, "learning_rate": 7.19901488845287e-06, "loss": 0.1494, "step": 12834 }, { "epoch": 0.37443841531011146, "grad_norm": 0.7730864831196734, "learning_rate": 7.1985905882224446e-06, "loss": 0.1545, "step": 12835 }, { "epoch": 0.374467588540755, "grad_norm": 0.8467018024327737, "learning_rate": 7.198166268363529e-06, "loss": 0.1504, "step": 12836 }, { "epoch": 0.37449676177139857, "grad_norm": 0.9572708813983742, "learning_rate": 7.19774192887991e-06, "loss": 0.145, "step": 12837 }, { "epoch": 0.3745259350020421, "grad_norm": 0.7930922126743046, "learning_rate": 7.197317569775375e-06, "loss": 0.1393, "step": 12838 }, { "epoch": 0.3745551082326857, "grad_norm": 0.8380399611026271, "learning_rate": 7.196893191053713e-06, "loss": 0.1508, "step": 12839 }, { "epoch": 0.3745842814633292, "grad_norm": 0.9770374218791077, "learning_rate": 7.196468792718714e-06, "loss": 0.1626, "step": 12840 }, { "epoch": 0.37461345469397284, "grad_norm": 1.0658859121375772, "learning_rate": 7.196044374774165e-06, "loss": 0.1444, "step": 12841 }, { "epoch": 0.3746426279246164, "grad_norm": 0.8047978745261797, "learning_rate": 7.1956199372238555e-06, "loss": 0.1416, "step": 12842 }, { "epoch": 0.37467180115525994, "grad_norm": 1.0186389228583306, "learning_rate": 7.1951954800715775e-06, "loss": 0.1496, "step": 12843 }, { "epoch": 0.3747009743859035, "grad_norm": 0.9815262561047814, "learning_rate": 7.194771003321116e-06, "loss": 0.1628, "step": 12844 }, { "epoch": 0.37473014761654705, "grad_norm": 0.8453425149772271, "learning_rate": 7.194346506976264e-06, "loss": 0.178, "step": 12845 }, { "epoch": 0.3747593208471906, "grad_norm": 0.7025099970289065, "learning_rate": 7.193921991040811e-06, "loss": 0.1168, "step": 12846 }, { "epoch": 0.37478849407783416, "grad_norm": 0.8768540199574046, "learning_rate": 7.193497455518545e-06, "loss": 0.1491, "step": 12847 }, { "epoch": 0.37481766730847776, "grad_norm": 0.9173644426660783, "learning_rate": 7.193072900413258e-06, "loss": 0.1634, "step": 12848 }, { "epoch": 0.3748468405391213, "grad_norm": 0.76045222399714, "learning_rate": 7.192648325728739e-06, "loss": 0.1586, "step": 12849 }, { "epoch": 0.37487601376976487, "grad_norm": 0.7832145092698721, "learning_rate": 7.1922237314687795e-06, "loss": 0.14, "step": 12850 }, { "epoch": 0.3749051870004084, "grad_norm": 0.8572299816846876, "learning_rate": 7.191799117637169e-06, "loss": 0.1504, "step": 12851 }, { "epoch": 0.374934360231052, "grad_norm": 0.8418590093246815, "learning_rate": 7.191374484237701e-06, "loss": 0.1604, "step": 12852 }, { "epoch": 0.37496353346169553, "grad_norm": 0.83495815477393, "learning_rate": 7.1909498312741635e-06, "loss": 0.1268, "step": 12853 }, { "epoch": 0.3749927066923391, "grad_norm": 0.6949037949282069, "learning_rate": 7.190525158750349e-06, "loss": 0.1617, "step": 12854 }, { "epoch": 0.3750218799229827, "grad_norm": 0.7707164045187853, "learning_rate": 7.19010046667005e-06, "loss": 0.1202, "step": 12855 }, { "epoch": 0.37505105315362625, "grad_norm": 0.934571627521969, "learning_rate": 7.189675755037055e-06, "loss": 0.1565, "step": 12856 }, { "epoch": 0.3750802263842698, "grad_norm": 0.7682593052696219, "learning_rate": 7.189251023855158e-06, "loss": 0.114, "step": 12857 }, { "epoch": 0.37510939961491335, "grad_norm": 0.7097243328524351, "learning_rate": 7.188826273128152e-06, "loss": 0.1389, "step": 12858 }, { "epoch": 0.3751385728455569, "grad_norm": 1.101588816045312, "learning_rate": 7.188401502859825e-06, "loss": 0.17, "step": 12859 }, { "epoch": 0.37516774607620046, "grad_norm": 0.809860658631005, "learning_rate": 7.187976713053975e-06, "loss": 0.1359, "step": 12860 }, { "epoch": 0.375196919306844, "grad_norm": 0.776896204454263, "learning_rate": 7.187551903714389e-06, "loss": 0.1436, "step": 12861 }, { "epoch": 0.3752260925374876, "grad_norm": 0.6367088342375902, "learning_rate": 7.187127074844862e-06, "loss": 0.1186, "step": 12862 }, { "epoch": 0.3752552657681312, "grad_norm": 0.7681994472771605, "learning_rate": 7.186702226449187e-06, "loss": 0.1259, "step": 12863 }, { "epoch": 0.3752844389987747, "grad_norm": 0.9294077857237404, "learning_rate": 7.186277358531158e-06, "loss": 0.1253, "step": 12864 }, { "epoch": 0.3753136122294183, "grad_norm": 0.805502573751167, "learning_rate": 7.185852471094563e-06, "loss": 0.141, "step": 12865 }, { "epoch": 0.37534278546006183, "grad_norm": 0.6881424002631252, "learning_rate": 7.185427564143201e-06, "loss": 0.1303, "step": 12866 }, { "epoch": 0.3753719586907054, "grad_norm": 1.030590331604276, "learning_rate": 7.1850026376808645e-06, "loss": 0.1442, "step": 12867 }, { "epoch": 0.375401131921349, "grad_norm": 0.7952529500074451, "learning_rate": 7.1845776917113445e-06, "loss": 0.1242, "step": 12868 }, { "epoch": 0.37543030515199255, "grad_norm": 0.8270142445276427, "learning_rate": 7.184152726238437e-06, "loss": 0.1419, "step": 12869 }, { "epoch": 0.3754594783826361, "grad_norm": 0.8080599199572762, "learning_rate": 7.183727741265935e-06, "loss": 0.1572, "step": 12870 }, { "epoch": 0.37548865161327966, "grad_norm": 0.7388164255739978, "learning_rate": 7.183302736797632e-06, "loss": 0.1318, "step": 12871 }, { "epoch": 0.3755178248439232, "grad_norm": 0.8551495141645852, "learning_rate": 7.182877712837326e-06, "loss": 0.1535, "step": 12872 }, { "epoch": 0.37554699807456676, "grad_norm": 0.7926117580264666, "learning_rate": 7.182452669388809e-06, "loss": 0.1422, "step": 12873 }, { "epoch": 0.3755761713052103, "grad_norm": 0.9528508626438138, "learning_rate": 7.182027606455873e-06, "loss": 0.1427, "step": 12874 }, { "epoch": 0.3756053445358539, "grad_norm": 0.7720977339248862, "learning_rate": 7.181602524042317e-06, "loss": 0.1211, "step": 12875 }, { "epoch": 0.3756345177664975, "grad_norm": 0.7329168311726614, "learning_rate": 7.1811774221519336e-06, "loss": 0.136, "step": 12876 }, { "epoch": 0.37566369099714103, "grad_norm": 0.7780701546134563, "learning_rate": 7.180752300788518e-06, "loss": 0.1687, "step": 12877 }, { "epoch": 0.3756928642277846, "grad_norm": 0.885036411558389, "learning_rate": 7.180327159955869e-06, "loss": 0.1305, "step": 12878 }, { "epoch": 0.37572203745842814, "grad_norm": 0.8002865762299235, "learning_rate": 7.179901999657778e-06, "loss": 0.1281, "step": 12879 }, { "epoch": 0.3757512106890717, "grad_norm": 0.6139885216483737, "learning_rate": 7.179476819898042e-06, "loss": 0.1277, "step": 12880 }, { "epoch": 0.37578038391971524, "grad_norm": 0.8295300050875486, "learning_rate": 7.179051620680457e-06, "loss": 0.1667, "step": 12881 }, { "epoch": 0.37580955715035885, "grad_norm": 0.7792411625735932, "learning_rate": 7.178626402008821e-06, "loss": 0.1501, "step": 12882 }, { "epoch": 0.3758387303810024, "grad_norm": 0.6584314271683108, "learning_rate": 7.178201163886928e-06, "loss": 0.1292, "step": 12883 }, { "epoch": 0.37586790361164596, "grad_norm": 0.8692277701651672, "learning_rate": 7.177775906318574e-06, "loss": 0.1196, "step": 12884 }, { "epoch": 0.3758970768422895, "grad_norm": 0.8391969923653462, "learning_rate": 7.177350629307558e-06, "loss": 0.1338, "step": 12885 }, { "epoch": 0.37592625007293307, "grad_norm": 0.9623909496126544, "learning_rate": 7.176925332857674e-06, "loss": 0.1491, "step": 12886 }, { "epoch": 0.3759554233035766, "grad_norm": 0.9077478299947973, "learning_rate": 7.176500016972721e-06, "loss": 0.1353, "step": 12887 }, { "epoch": 0.3759845965342202, "grad_norm": 0.7651834974419126, "learning_rate": 7.176074681656495e-06, "loss": 0.1477, "step": 12888 }, { "epoch": 0.3760137697648638, "grad_norm": 1.081742506430794, "learning_rate": 7.175649326912794e-06, "loss": 0.1251, "step": 12889 }, { "epoch": 0.37604294299550733, "grad_norm": 0.7679017013848687, "learning_rate": 7.175223952745416e-06, "loss": 0.1296, "step": 12890 }, { "epoch": 0.3760721162261509, "grad_norm": 0.9669289364213036, "learning_rate": 7.174798559158157e-06, "loss": 0.1408, "step": 12891 }, { "epoch": 0.37610128945679444, "grad_norm": 0.9629585317633905, "learning_rate": 7.174373146154814e-06, "loss": 0.1561, "step": 12892 }, { "epoch": 0.376130462687438, "grad_norm": 0.850242793603612, "learning_rate": 7.17394771373919e-06, "loss": 0.154, "step": 12893 }, { "epoch": 0.37615963591808155, "grad_norm": 0.9562489098328032, "learning_rate": 7.173522261915078e-06, "loss": 0.1596, "step": 12894 }, { "epoch": 0.37618880914872516, "grad_norm": 0.8542218411673976, "learning_rate": 7.173096790686278e-06, "loss": 0.1416, "step": 12895 }, { "epoch": 0.3762179823793687, "grad_norm": 0.8199126257197301, "learning_rate": 7.172671300056588e-06, "loss": 0.1225, "step": 12896 }, { "epoch": 0.37624715561001226, "grad_norm": 0.8759460699810758, "learning_rate": 7.172245790029808e-06, "loss": 0.1641, "step": 12897 }, { "epoch": 0.3762763288406558, "grad_norm": 1.0116621949880957, "learning_rate": 7.1718202606097366e-06, "loss": 0.1689, "step": 12898 }, { "epoch": 0.37630550207129937, "grad_norm": 0.6673690742963837, "learning_rate": 7.171394711800172e-06, "loss": 0.1214, "step": 12899 }, { "epoch": 0.3763346753019429, "grad_norm": 1.007074597015964, "learning_rate": 7.1709691436049145e-06, "loss": 0.1502, "step": 12900 }, { "epoch": 0.3763638485325865, "grad_norm": 0.8534217065999815, "learning_rate": 7.170543556027762e-06, "loss": 0.1431, "step": 12901 }, { "epoch": 0.3763930217632301, "grad_norm": 0.8005454422205138, "learning_rate": 7.170117949072514e-06, "loss": 0.1458, "step": 12902 }, { "epoch": 0.37642219499387364, "grad_norm": 0.7868223042884781, "learning_rate": 7.1696923227429724e-06, "loss": 0.1325, "step": 12903 }, { "epoch": 0.3764513682245172, "grad_norm": 0.8432534370096046, "learning_rate": 7.169266677042934e-06, "loss": 0.1387, "step": 12904 }, { "epoch": 0.37648054145516074, "grad_norm": 0.707858434326484, "learning_rate": 7.168841011976202e-06, "loss": 0.1401, "step": 12905 }, { "epoch": 0.3765097146858043, "grad_norm": 0.7993405197909869, "learning_rate": 7.168415327546575e-06, "loss": 0.1545, "step": 12906 }, { "epoch": 0.37653888791644785, "grad_norm": 0.7133277890137376, "learning_rate": 7.167989623757853e-06, "loss": 0.1249, "step": 12907 }, { "epoch": 0.3765680611470914, "grad_norm": 0.6799619895613896, "learning_rate": 7.1675639006138385e-06, "loss": 0.1432, "step": 12908 }, { "epoch": 0.376597234377735, "grad_norm": 0.8732558172520097, "learning_rate": 7.16713815811833e-06, "loss": 0.129, "step": 12909 }, { "epoch": 0.37662640760837857, "grad_norm": 0.8703644826462984, "learning_rate": 7.166712396275128e-06, "loss": 0.1416, "step": 12910 }, { "epoch": 0.3766555808390221, "grad_norm": 0.8734050869939769, "learning_rate": 7.166286615088037e-06, "loss": 0.1283, "step": 12911 }, { "epoch": 0.3766847540696657, "grad_norm": 0.8412952623074301, "learning_rate": 7.165860814560855e-06, "loss": 0.1562, "step": 12912 }, { "epoch": 0.3767139273003092, "grad_norm": 0.9821024713743136, "learning_rate": 7.165434994697386e-06, "loss": 0.1526, "step": 12913 }, { "epoch": 0.3767431005309528, "grad_norm": 0.7847728972948989, "learning_rate": 7.16500915550143e-06, "loss": 0.1633, "step": 12914 }, { "epoch": 0.37677227376159633, "grad_norm": 0.872635241897278, "learning_rate": 7.1645832969767894e-06, "loss": 0.1648, "step": 12915 }, { "epoch": 0.37680144699223994, "grad_norm": 0.847901694637845, "learning_rate": 7.164157419127263e-06, "loss": 0.1575, "step": 12916 }, { "epoch": 0.3768306202228835, "grad_norm": 0.7788239673278788, "learning_rate": 7.1637315219566585e-06, "loss": 0.1414, "step": 12917 }, { "epoch": 0.37685979345352705, "grad_norm": 0.9305316814725858, "learning_rate": 7.1633056054687756e-06, "loss": 0.1474, "step": 12918 }, { "epoch": 0.3768889666841706, "grad_norm": 0.768186906984129, "learning_rate": 7.162879669667415e-06, "loss": 0.1465, "step": 12919 }, { "epoch": 0.37691813991481415, "grad_norm": 0.7541898117802722, "learning_rate": 7.162453714556383e-06, "loss": 0.1497, "step": 12920 }, { "epoch": 0.3769473131454577, "grad_norm": 0.7062473964100255, "learning_rate": 7.162027740139479e-06, "loss": 0.1327, "step": 12921 }, { "epoch": 0.3769764863761013, "grad_norm": 0.8258927353984997, "learning_rate": 7.1616017464205065e-06, "loss": 0.1422, "step": 12922 }, { "epoch": 0.37700565960674487, "grad_norm": 0.6491862551330999, "learning_rate": 7.1611757334032725e-06, "loss": 0.1427, "step": 12923 }, { "epoch": 0.3770348328373884, "grad_norm": 1.10500134042551, "learning_rate": 7.160749701091576e-06, "loss": 0.1605, "step": 12924 }, { "epoch": 0.377064006068032, "grad_norm": 0.8126912899723046, "learning_rate": 7.160323649489221e-06, "loss": 0.1173, "step": 12925 }, { "epoch": 0.37709317929867553, "grad_norm": 0.8426200014264674, "learning_rate": 7.159897578600014e-06, "loss": 0.1391, "step": 12926 }, { "epoch": 0.3771223525293191, "grad_norm": 0.8834136266003784, "learning_rate": 7.1594714884277564e-06, "loss": 0.1502, "step": 12927 }, { "epoch": 0.37715152575996264, "grad_norm": 1.0371223791268027, "learning_rate": 7.1590453789762525e-06, "loss": 0.1424, "step": 12928 }, { "epoch": 0.37718069899060624, "grad_norm": 0.9614534965231251, "learning_rate": 7.158619250249307e-06, "loss": 0.1144, "step": 12929 }, { "epoch": 0.3772098722212498, "grad_norm": 1.0766179123513977, "learning_rate": 7.158193102250724e-06, "loss": 0.1418, "step": 12930 }, { "epoch": 0.37723904545189335, "grad_norm": 0.7851736856001515, "learning_rate": 7.157766934984308e-06, "loss": 0.1536, "step": 12931 }, { "epoch": 0.3772682186825369, "grad_norm": 1.1128148525327382, "learning_rate": 7.157340748453864e-06, "loss": 0.1436, "step": 12932 }, { "epoch": 0.37729739191318046, "grad_norm": 0.97632633608461, "learning_rate": 7.1569145426631985e-06, "loss": 0.1474, "step": 12933 }, { "epoch": 0.377326565143824, "grad_norm": 0.8353249735371481, "learning_rate": 7.156488317616111e-06, "loss": 0.1593, "step": 12934 }, { "epoch": 0.37735573837446756, "grad_norm": 0.6876320909100034, "learning_rate": 7.156062073316414e-06, "loss": 0.126, "step": 12935 }, { "epoch": 0.3773849116051112, "grad_norm": 0.9650734550691265, "learning_rate": 7.155635809767909e-06, "loss": 0.1413, "step": 12936 }, { "epoch": 0.3774140848357547, "grad_norm": 1.3062486775554714, "learning_rate": 7.1552095269744e-06, "loss": 0.1696, "step": 12937 }, { "epoch": 0.3774432580663983, "grad_norm": 0.7289393306992145, "learning_rate": 7.154783224939697e-06, "loss": 0.1399, "step": 12938 }, { "epoch": 0.37747243129704183, "grad_norm": 1.007827707825707, "learning_rate": 7.154356903667604e-06, "loss": 0.1212, "step": 12939 }, { "epoch": 0.3775016045276854, "grad_norm": 0.953456318475476, "learning_rate": 7.153930563161926e-06, "loss": 0.1374, "step": 12940 }, { "epoch": 0.37753077775832894, "grad_norm": 0.7861556916629966, "learning_rate": 7.15350420342647e-06, "loss": 0.1574, "step": 12941 }, { "epoch": 0.3775599509889725, "grad_norm": 1.035179138482058, "learning_rate": 7.1530778244650425e-06, "loss": 0.1313, "step": 12942 }, { "epoch": 0.3775891242196161, "grad_norm": 0.7924070480833347, "learning_rate": 7.1526514262814495e-06, "loss": 0.1394, "step": 12943 }, { "epoch": 0.37761829745025965, "grad_norm": 1.0319999178575172, "learning_rate": 7.1522250088795e-06, "loss": 0.1638, "step": 12944 }, { "epoch": 0.3776474706809032, "grad_norm": 0.8222215758395307, "learning_rate": 7.1517985722630005e-06, "loss": 0.1532, "step": 12945 }, { "epoch": 0.37767664391154676, "grad_norm": 1.104517354869253, "learning_rate": 7.151372116435753e-06, "loss": 0.1387, "step": 12946 }, { "epoch": 0.3777058171421903, "grad_norm": 0.9845709166685269, "learning_rate": 7.150945641401571e-06, "loss": 0.148, "step": 12947 }, { "epoch": 0.37773499037283387, "grad_norm": 0.8761587873510057, "learning_rate": 7.150519147164261e-06, "loss": 0.1431, "step": 12948 }, { "epoch": 0.3777641636034775, "grad_norm": 1.0675377919863407, "learning_rate": 7.150092633727627e-06, "loss": 0.1531, "step": 12949 }, { "epoch": 0.37779333683412103, "grad_norm": 0.8704741403693905, "learning_rate": 7.149666101095482e-06, "loss": 0.1831, "step": 12950 }, { "epoch": 0.3778225100647646, "grad_norm": 0.71824553247317, "learning_rate": 7.149239549271629e-06, "loss": 0.1243, "step": 12951 }, { "epoch": 0.37785168329540814, "grad_norm": 0.9730730253029378, "learning_rate": 7.148812978259878e-06, "loss": 0.2166, "step": 12952 }, { "epoch": 0.3778808565260517, "grad_norm": 0.8428157727675645, "learning_rate": 7.148386388064039e-06, "loss": 0.1237, "step": 12953 }, { "epoch": 0.37791002975669524, "grad_norm": 0.8006041165399888, "learning_rate": 7.14795977868792e-06, "loss": 0.1371, "step": 12954 }, { "epoch": 0.3779392029873388, "grad_norm": 0.9305105700919174, "learning_rate": 7.147533150135327e-06, "loss": 0.1249, "step": 12955 }, { "epoch": 0.3779683762179824, "grad_norm": 1.0914651019831614, "learning_rate": 7.147106502410071e-06, "loss": 0.1459, "step": 12956 }, { "epoch": 0.37799754944862596, "grad_norm": 0.885704244639512, "learning_rate": 7.146679835515962e-06, "loss": 0.1446, "step": 12957 }, { "epoch": 0.3780267226792695, "grad_norm": 0.958352530010404, "learning_rate": 7.146253149456806e-06, "loss": 0.1447, "step": 12958 }, { "epoch": 0.37805589590991306, "grad_norm": 0.9641981998357005, "learning_rate": 7.145826444236415e-06, "loss": 0.1344, "step": 12959 }, { "epoch": 0.3780850691405566, "grad_norm": 0.8479666204307287, "learning_rate": 7.1453997198586e-06, "loss": 0.1658, "step": 12960 }, { "epoch": 0.37811424237120017, "grad_norm": 0.7779690532002743, "learning_rate": 7.144972976327164e-06, "loss": 0.1219, "step": 12961 }, { "epoch": 0.3781434156018437, "grad_norm": 1.2539229754779762, "learning_rate": 7.144546213645924e-06, "loss": 0.1535, "step": 12962 }, { "epoch": 0.37817258883248733, "grad_norm": 0.7621838741709405, "learning_rate": 7.144119431818689e-06, "loss": 0.1582, "step": 12963 }, { "epoch": 0.3782017620631309, "grad_norm": 0.735321625004359, "learning_rate": 7.1436926308492645e-06, "loss": 0.1305, "step": 12964 }, { "epoch": 0.37823093529377444, "grad_norm": 0.959420232020522, "learning_rate": 7.1432658107414665e-06, "loss": 0.1298, "step": 12965 }, { "epoch": 0.378260108524418, "grad_norm": 0.7998303281463885, "learning_rate": 7.142838971499101e-06, "loss": 0.1726, "step": 12966 }, { "epoch": 0.37828928175506155, "grad_norm": 0.9765520966833411, "learning_rate": 7.142412113125981e-06, "loss": 0.1813, "step": 12967 }, { "epoch": 0.3783184549857051, "grad_norm": 0.7140966566107453, "learning_rate": 7.141985235625918e-06, "loss": 0.1365, "step": 12968 }, { "epoch": 0.37834762821634865, "grad_norm": 1.017120231765769, "learning_rate": 7.141558339002721e-06, "loss": 0.1651, "step": 12969 }, { "epoch": 0.37837680144699226, "grad_norm": 0.9148143008092731, "learning_rate": 7.141131423260204e-06, "loss": 0.1353, "step": 12970 }, { "epoch": 0.3784059746776358, "grad_norm": 0.7676960095955689, "learning_rate": 7.140704488402175e-06, "loss": 0.1281, "step": 12971 }, { "epoch": 0.37843514790827937, "grad_norm": 0.7908730580557988, "learning_rate": 7.1402775344324485e-06, "loss": 0.1282, "step": 12972 }, { "epoch": 0.3784643211389229, "grad_norm": 1.0511472798732373, "learning_rate": 7.1398505613548345e-06, "loss": 0.1518, "step": 12973 }, { "epoch": 0.3784934943695665, "grad_norm": 0.771793661445887, "learning_rate": 7.1394235691731454e-06, "loss": 0.1521, "step": 12974 }, { "epoch": 0.37852266760021, "grad_norm": 0.6078147772896247, "learning_rate": 7.1389965578911946e-06, "loss": 0.1351, "step": 12975 }, { "epoch": 0.37855184083085364, "grad_norm": 1.0553111873365606, "learning_rate": 7.138569527512791e-06, "loss": 0.1486, "step": 12976 }, { "epoch": 0.3785810140614972, "grad_norm": 0.8961806256233492, "learning_rate": 7.13814247804175e-06, "loss": 0.1513, "step": 12977 }, { "epoch": 0.37861018729214074, "grad_norm": 0.8875527816798312, "learning_rate": 7.137715409481884e-06, "loss": 0.131, "step": 12978 }, { "epoch": 0.3786393605227843, "grad_norm": 0.7366436156084128, "learning_rate": 7.137288321837005e-06, "loss": 0.1421, "step": 12979 }, { "epoch": 0.37866853375342785, "grad_norm": 0.6727231513794392, "learning_rate": 7.136861215110926e-06, "loss": 0.1209, "step": 12980 }, { "epoch": 0.3786977069840714, "grad_norm": 0.8167695900482286, "learning_rate": 7.1364340893074605e-06, "loss": 0.1485, "step": 12981 }, { "epoch": 0.37872688021471496, "grad_norm": 0.7501467648092552, "learning_rate": 7.13600694443042e-06, "loss": 0.1541, "step": 12982 }, { "epoch": 0.37875605344535856, "grad_norm": 0.6557522570342736, "learning_rate": 7.135579780483621e-06, "loss": 0.1574, "step": 12983 }, { "epoch": 0.3787852266760021, "grad_norm": 0.6872754374578743, "learning_rate": 7.1351525974708756e-06, "loss": 0.1316, "step": 12984 }, { "epoch": 0.37881439990664567, "grad_norm": 0.8949837223811141, "learning_rate": 7.134725395395997e-06, "loss": 0.1539, "step": 12985 }, { "epoch": 0.3788435731372892, "grad_norm": 0.7704934776306336, "learning_rate": 7.1342981742627996e-06, "loss": 0.1363, "step": 12986 }, { "epoch": 0.3788727463679328, "grad_norm": 0.6718863266055916, "learning_rate": 7.133870934075098e-06, "loss": 0.1555, "step": 12987 }, { "epoch": 0.37890191959857633, "grad_norm": 0.8177509554857887, "learning_rate": 7.133443674836705e-06, "loss": 0.1389, "step": 12988 }, { "epoch": 0.3789310928292199, "grad_norm": 0.7196452370218708, "learning_rate": 7.133016396551438e-06, "loss": 0.1379, "step": 12989 }, { "epoch": 0.3789602660598635, "grad_norm": 0.9257648968287675, "learning_rate": 7.132589099223108e-06, "loss": 0.1585, "step": 12990 }, { "epoch": 0.37898943929050705, "grad_norm": 0.9373042126222249, "learning_rate": 7.132161782855533e-06, "loss": 0.144, "step": 12991 }, { "epoch": 0.3790186125211506, "grad_norm": 0.9099163048196881, "learning_rate": 7.131734447452525e-06, "loss": 0.1406, "step": 12992 }, { "epoch": 0.37904778575179415, "grad_norm": 0.9153891170724695, "learning_rate": 7.131307093017902e-06, "loss": 0.1458, "step": 12993 }, { "epoch": 0.3790769589824377, "grad_norm": 0.8824702923566206, "learning_rate": 7.130879719555477e-06, "loss": 0.1382, "step": 12994 }, { "epoch": 0.37910613221308126, "grad_norm": 0.7086844386030828, "learning_rate": 7.130452327069068e-06, "loss": 0.1244, "step": 12995 }, { "epoch": 0.3791353054437248, "grad_norm": 0.9866606689997597, "learning_rate": 7.130024915562488e-06, "loss": 0.1161, "step": 12996 }, { "epoch": 0.3791644786743684, "grad_norm": 0.7773186359509144, "learning_rate": 7.129597485039554e-06, "loss": 0.1569, "step": 12997 }, { "epoch": 0.379193651905012, "grad_norm": 0.7173249266886189, "learning_rate": 7.129170035504084e-06, "loss": 0.134, "step": 12998 }, { "epoch": 0.37922282513565553, "grad_norm": 0.7722682844543123, "learning_rate": 7.1287425669598896e-06, "loss": 0.1384, "step": 12999 }, { "epoch": 0.3792519983662991, "grad_norm": 0.7188234623188318, "learning_rate": 7.128315079410792e-06, "loss": 0.123, "step": 13000 }, { "epoch": 0.37928117159694263, "grad_norm": 0.7432564586110186, "learning_rate": 7.1278875728606035e-06, "loss": 0.1349, "step": 13001 }, { "epoch": 0.3793103448275862, "grad_norm": 0.7902680911141535, "learning_rate": 7.127460047313144e-06, "loss": 0.1161, "step": 13002 }, { "epoch": 0.37933951805822974, "grad_norm": 2.027281163123131, "learning_rate": 7.127032502772229e-06, "loss": 0.1601, "step": 13003 }, { "epoch": 0.37936869128887335, "grad_norm": 0.7195166679385525, "learning_rate": 7.1266049392416745e-06, "loss": 0.1426, "step": 13004 }, { "epoch": 0.3793978645195169, "grad_norm": 0.8912337214678676, "learning_rate": 7.126177356725299e-06, "loss": 0.149, "step": 13005 }, { "epoch": 0.37942703775016046, "grad_norm": 0.8818951914893414, "learning_rate": 7.1257497552269205e-06, "loss": 0.138, "step": 13006 }, { "epoch": 0.379456210980804, "grad_norm": 0.8222407696262598, "learning_rate": 7.1253221347503545e-06, "loss": 0.144, "step": 13007 }, { "epoch": 0.37948538421144756, "grad_norm": 0.6831361651877006, "learning_rate": 7.1248944952994204e-06, "loss": 0.1586, "step": 13008 }, { "epoch": 0.3795145574420911, "grad_norm": 0.9650011422116275, "learning_rate": 7.124466836877936e-06, "loss": 0.1533, "step": 13009 }, { "epoch": 0.3795437306727347, "grad_norm": 0.8199863927822962, "learning_rate": 7.12403915948972e-06, "loss": 0.1564, "step": 13010 }, { "epoch": 0.3795729039033783, "grad_norm": 0.7205981841075796, "learning_rate": 7.123611463138585e-06, "loss": 0.1476, "step": 13011 }, { "epoch": 0.37960207713402183, "grad_norm": 0.8891586204009748, "learning_rate": 7.123183747828357e-06, "loss": 0.149, "step": 13012 }, { "epoch": 0.3796312503646654, "grad_norm": 0.7511475967066265, "learning_rate": 7.122756013562853e-06, "loss": 0.1216, "step": 13013 }, { "epoch": 0.37966042359530894, "grad_norm": 0.7172693632880247, "learning_rate": 7.122328260345887e-06, "loss": 0.1317, "step": 13014 }, { "epoch": 0.3796895968259525, "grad_norm": 0.753777054067735, "learning_rate": 7.1219004881812824e-06, "loss": 0.1508, "step": 13015 }, { "epoch": 0.37971877005659604, "grad_norm": 0.7407290835739581, "learning_rate": 7.1214726970728566e-06, "loss": 0.1519, "step": 13016 }, { "epoch": 0.37974794328723965, "grad_norm": 0.8642151428868987, "learning_rate": 7.121044887024428e-06, "loss": 0.1323, "step": 13017 }, { "epoch": 0.3797771165178832, "grad_norm": 0.8638983260524838, "learning_rate": 7.120617058039818e-06, "loss": 0.1499, "step": 13018 }, { "epoch": 0.37980628974852676, "grad_norm": 0.6458164310364573, "learning_rate": 7.120189210122846e-06, "loss": 0.1285, "step": 13019 }, { "epoch": 0.3798354629791703, "grad_norm": 1.307966356872709, "learning_rate": 7.11976134327733e-06, "loss": 0.1395, "step": 13020 }, { "epoch": 0.37986463620981387, "grad_norm": 0.8898874170684433, "learning_rate": 7.119333457507089e-06, "loss": 0.1521, "step": 13021 }, { "epoch": 0.3798938094404574, "grad_norm": 1.0613668946563664, "learning_rate": 7.118905552815946e-06, "loss": 0.1528, "step": 13022 }, { "epoch": 0.379922982671101, "grad_norm": 0.8766091331398129, "learning_rate": 7.118477629207721e-06, "loss": 0.1492, "step": 13023 }, { "epoch": 0.3799521559017446, "grad_norm": 0.8012986552951176, "learning_rate": 7.1180496866862325e-06, "loss": 0.1617, "step": 13024 }, { "epoch": 0.37998132913238813, "grad_norm": 1.0196559398984344, "learning_rate": 7.1176217252553035e-06, "loss": 0.1431, "step": 13025 }, { "epoch": 0.3800105023630317, "grad_norm": 0.7495552245143045, "learning_rate": 7.117193744918751e-06, "loss": 0.1444, "step": 13026 }, { "epoch": 0.38003967559367524, "grad_norm": 0.7002159722388028, "learning_rate": 7.116765745680399e-06, "loss": 0.1675, "step": 13027 }, { "epoch": 0.3800688488243188, "grad_norm": 0.9144575591113243, "learning_rate": 7.116337727544069e-06, "loss": 0.157, "step": 13028 }, { "epoch": 0.38009802205496235, "grad_norm": 0.8715475008276659, "learning_rate": 7.115909690513578e-06, "loss": 0.1237, "step": 13029 }, { "epoch": 0.3801271952856059, "grad_norm": 0.898523550872658, "learning_rate": 7.1154816345927545e-06, "loss": 0.1552, "step": 13030 }, { "epoch": 0.3801563685162495, "grad_norm": 1.0962867135529077, "learning_rate": 7.1150535597854135e-06, "loss": 0.1262, "step": 13031 }, { "epoch": 0.38018554174689306, "grad_norm": 0.821932644955272, "learning_rate": 7.11462546609538e-06, "loss": 0.1273, "step": 13032 }, { "epoch": 0.3802147149775366, "grad_norm": 0.9093602231829235, "learning_rate": 7.114197353526474e-06, "loss": 0.152, "step": 13033 }, { "epoch": 0.38024388820818017, "grad_norm": 0.7960080562623917, "learning_rate": 7.1137692220825196e-06, "loss": 0.1424, "step": 13034 }, { "epoch": 0.3802730614388237, "grad_norm": 0.8044031144666941, "learning_rate": 7.113341071767338e-06, "loss": 0.1409, "step": 13035 }, { "epoch": 0.3803022346694673, "grad_norm": 0.6914698331714102, "learning_rate": 7.112912902584752e-06, "loss": 0.1299, "step": 13036 }, { "epoch": 0.3803314079001109, "grad_norm": 0.7620713834767586, "learning_rate": 7.112484714538584e-06, "loss": 0.1319, "step": 13037 }, { "epoch": 0.38036058113075444, "grad_norm": 0.9237652496892056, "learning_rate": 7.1120565076326565e-06, "loss": 0.1477, "step": 13038 }, { "epoch": 0.380389754361398, "grad_norm": 0.9166525200244503, "learning_rate": 7.1116282818707924e-06, "loss": 0.1348, "step": 13039 }, { "epoch": 0.38041892759204154, "grad_norm": 0.6828208987571173, "learning_rate": 7.111200037256816e-06, "loss": 0.1348, "step": 13040 }, { "epoch": 0.3804481008226851, "grad_norm": 1.1488067358228407, "learning_rate": 7.110771773794548e-06, "loss": 0.1427, "step": 13041 }, { "epoch": 0.38047727405332865, "grad_norm": 0.882183824494278, "learning_rate": 7.110343491487815e-06, "loss": 0.1495, "step": 13042 }, { "epoch": 0.3805064472839722, "grad_norm": 0.8755335334471303, "learning_rate": 7.109915190340439e-06, "loss": 0.1413, "step": 13043 }, { "epoch": 0.3805356205146158, "grad_norm": 0.7890303215104586, "learning_rate": 7.109486870356243e-06, "loss": 0.1446, "step": 13044 }, { "epoch": 0.38056479374525937, "grad_norm": 0.8572201015853493, "learning_rate": 7.1090585315390525e-06, "loss": 0.1505, "step": 13045 }, { "epoch": 0.3805939669759029, "grad_norm": 0.9668789016106413, "learning_rate": 7.108630173892691e-06, "loss": 0.1593, "step": 13046 }, { "epoch": 0.3806231402065465, "grad_norm": 0.7463194331800527, "learning_rate": 7.108201797420983e-06, "loss": 0.1483, "step": 13047 }, { "epoch": 0.38065231343719, "grad_norm": 0.8313397874101502, "learning_rate": 7.107773402127751e-06, "loss": 0.1519, "step": 13048 }, { "epoch": 0.3806814866678336, "grad_norm": 1.0873651459527833, "learning_rate": 7.107344988016822e-06, "loss": 0.1429, "step": 13049 }, { "epoch": 0.38071065989847713, "grad_norm": 0.764576326893396, "learning_rate": 7.1069165550920205e-06, "loss": 0.1449, "step": 13050 }, { "epoch": 0.38073983312912074, "grad_norm": 1.3986039501018053, "learning_rate": 7.106488103357171e-06, "loss": 0.1521, "step": 13051 }, { "epoch": 0.3807690063597643, "grad_norm": 1.060778432603687, "learning_rate": 7.106059632816098e-06, "loss": 0.1485, "step": 13052 }, { "epoch": 0.38079817959040785, "grad_norm": 0.9756185368049597, "learning_rate": 7.105631143472628e-06, "loss": 0.1331, "step": 13053 }, { "epoch": 0.3808273528210514, "grad_norm": 1.0338252019646097, "learning_rate": 7.105202635330586e-06, "loss": 0.1438, "step": 13054 }, { "epoch": 0.38085652605169495, "grad_norm": 0.8616962415237442, "learning_rate": 7.104774108393797e-06, "loss": 0.1547, "step": 13055 }, { "epoch": 0.3808856992823385, "grad_norm": 0.7960991086389061, "learning_rate": 7.104345562666086e-06, "loss": 0.1371, "step": 13056 }, { "epoch": 0.38091487251298206, "grad_norm": 1.0248147656572244, "learning_rate": 7.1039169981512825e-06, "loss": 0.1285, "step": 13057 }, { "epoch": 0.38094404574362567, "grad_norm": 0.9138140422863517, "learning_rate": 7.103488414853209e-06, "loss": 0.1336, "step": 13058 }, { "epoch": 0.3809732189742692, "grad_norm": 0.8605081926102031, "learning_rate": 7.103059812775693e-06, "loss": 0.1356, "step": 13059 }, { "epoch": 0.3810023922049128, "grad_norm": 0.8019811823044132, "learning_rate": 7.102631191922561e-06, "loss": 0.165, "step": 13060 }, { "epoch": 0.38103156543555633, "grad_norm": 0.9293022660798531, "learning_rate": 7.10220255229764e-06, "loss": 0.1649, "step": 13061 }, { "epoch": 0.3810607386661999, "grad_norm": 1.134806044489274, "learning_rate": 7.101773893904756e-06, "loss": 0.1581, "step": 13062 }, { "epoch": 0.38108991189684344, "grad_norm": 0.855958036612699, "learning_rate": 7.101345216747737e-06, "loss": 0.1561, "step": 13063 }, { "epoch": 0.38111908512748705, "grad_norm": 1.1020334433221302, "learning_rate": 7.100916520830409e-06, "loss": 0.1393, "step": 13064 }, { "epoch": 0.3811482583581306, "grad_norm": 0.903005414426244, "learning_rate": 7.1004878061565995e-06, "loss": 0.157, "step": 13065 }, { "epoch": 0.38117743158877415, "grad_norm": 0.7590175862443029, "learning_rate": 7.100059072730136e-06, "loss": 0.1682, "step": 13066 }, { "epoch": 0.3812066048194177, "grad_norm": 0.7066565815456609, "learning_rate": 7.0996303205548486e-06, "loss": 0.1373, "step": 13067 }, { "epoch": 0.38123577805006126, "grad_norm": 0.7771638731156558, "learning_rate": 7.099201549634561e-06, "loss": 0.1505, "step": 13068 }, { "epoch": 0.3812649512807048, "grad_norm": 0.7399993588177523, "learning_rate": 7.098772759973104e-06, "loss": 0.1556, "step": 13069 }, { "epoch": 0.38129412451134836, "grad_norm": 0.693851392144675, "learning_rate": 7.098343951574305e-06, "loss": 0.1546, "step": 13070 }, { "epoch": 0.381323297741992, "grad_norm": 0.697594934361742, "learning_rate": 7.097915124441991e-06, "loss": 0.1422, "step": 13071 }, { "epoch": 0.3813524709726355, "grad_norm": 0.6922999767936167, "learning_rate": 7.097486278579993e-06, "loss": 0.1237, "step": 13072 }, { "epoch": 0.3813816442032791, "grad_norm": 0.9004585109504485, "learning_rate": 7.097057413992136e-06, "loss": 0.1779, "step": 13073 }, { "epoch": 0.38141081743392263, "grad_norm": 0.820602806872622, "learning_rate": 7.096628530682253e-06, "loss": 0.1257, "step": 13074 }, { "epoch": 0.3814399906645662, "grad_norm": 0.7324287376510917, "learning_rate": 7.096199628654171e-06, "loss": 0.1286, "step": 13075 }, { "epoch": 0.38146916389520974, "grad_norm": 0.7892389193274874, "learning_rate": 7.095770707911718e-06, "loss": 0.128, "step": 13076 }, { "epoch": 0.3814983371258533, "grad_norm": 0.9323040225014304, "learning_rate": 7.0953417684587255e-06, "loss": 0.1447, "step": 13077 }, { "epoch": 0.3815275103564969, "grad_norm": 0.7199735658137739, "learning_rate": 7.094912810299021e-06, "loss": 0.1501, "step": 13078 }, { "epoch": 0.38155668358714045, "grad_norm": 1.115182105137788, "learning_rate": 7.094483833436435e-06, "loss": 0.1658, "step": 13079 }, { "epoch": 0.381585856817784, "grad_norm": 0.8367324438425654, "learning_rate": 7.094054837874798e-06, "loss": 0.1457, "step": 13080 }, { "epoch": 0.38161503004842756, "grad_norm": 0.8292608413906478, "learning_rate": 7.093625823617939e-06, "loss": 0.1598, "step": 13081 }, { "epoch": 0.3816442032790711, "grad_norm": 0.9212773809358213, "learning_rate": 7.0931967906696885e-06, "loss": 0.1188, "step": 13082 }, { "epoch": 0.38167337650971467, "grad_norm": 0.6563577012682009, "learning_rate": 7.092767739033877e-06, "loss": 0.1668, "step": 13083 }, { "epoch": 0.3817025497403582, "grad_norm": 0.9401194032639766, "learning_rate": 7.092338668714333e-06, "loss": 0.1635, "step": 13084 }, { "epoch": 0.38173172297100183, "grad_norm": 0.9556742187372048, "learning_rate": 7.0919095797148915e-06, "loss": 0.1319, "step": 13085 }, { "epoch": 0.3817608962016454, "grad_norm": 0.7710741032344843, "learning_rate": 7.091480472039378e-06, "loss": 0.1431, "step": 13086 }, { "epoch": 0.38179006943228894, "grad_norm": 0.7858500377077914, "learning_rate": 7.091051345691628e-06, "loss": 0.1552, "step": 13087 }, { "epoch": 0.3818192426629325, "grad_norm": 1.3675361224142528, "learning_rate": 7.090622200675471e-06, "loss": 0.1448, "step": 13088 }, { "epoch": 0.38184841589357604, "grad_norm": 0.8874926855421642, "learning_rate": 7.090193036994737e-06, "loss": 0.1455, "step": 13089 }, { "epoch": 0.3818775891242196, "grad_norm": 0.9731620384555049, "learning_rate": 7.089763854653259e-06, "loss": 0.1553, "step": 13090 }, { "epoch": 0.3819067623548632, "grad_norm": 1.0635409840006158, "learning_rate": 7.089334653654868e-06, "loss": 0.1485, "step": 13091 }, { "epoch": 0.38193593558550676, "grad_norm": 0.8482488111094789, "learning_rate": 7.088905434003396e-06, "loss": 0.1582, "step": 13092 }, { "epoch": 0.3819651088161503, "grad_norm": 1.218399775578868, "learning_rate": 7.088476195702675e-06, "loss": 0.1472, "step": 13093 }, { "epoch": 0.38199428204679386, "grad_norm": 0.7200220502156465, "learning_rate": 7.088046938756536e-06, "loss": 0.1344, "step": 13094 }, { "epoch": 0.3820234552774374, "grad_norm": 0.8267643247609752, "learning_rate": 7.0876176631688144e-06, "loss": 0.1283, "step": 13095 }, { "epoch": 0.38205262850808097, "grad_norm": 0.7760840832630073, "learning_rate": 7.0871883689433396e-06, "loss": 0.1716, "step": 13096 }, { "epoch": 0.3820818017387245, "grad_norm": 0.5994343031843402, "learning_rate": 7.086759056083945e-06, "loss": 0.141, "step": 13097 }, { "epoch": 0.38211097496936813, "grad_norm": 0.7332000190947274, "learning_rate": 7.086329724594464e-06, "loss": 0.1295, "step": 13098 }, { "epoch": 0.3821401482000117, "grad_norm": 0.7322465399146401, "learning_rate": 7.0859003744787296e-06, "loss": 0.1495, "step": 13099 }, { "epoch": 0.38216932143065524, "grad_norm": 0.7286991271251654, "learning_rate": 7.085471005740575e-06, "loss": 0.1384, "step": 13100 }, { "epoch": 0.3821984946612988, "grad_norm": 0.6617685520670704, "learning_rate": 7.085041618383831e-06, "loss": 0.1475, "step": 13101 }, { "epoch": 0.38222766789194235, "grad_norm": 0.8567712621782794, "learning_rate": 7.084612212412336e-06, "loss": 0.1403, "step": 13102 }, { "epoch": 0.3822568411225859, "grad_norm": 0.8779255613544709, "learning_rate": 7.08418278782992e-06, "loss": 0.1615, "step": 13103 }, { "epoch": 0.38228601435322945, "grad_norm": 0.740055980900757, "learning_rate": 7.083753344640415e-06, "loss": 0.144, "step": 13104 }, { "epoch": 0.38231518758387306, "grad_norm": 0.8713575859818685, "learning_rate": 7.083323882847661e-06, "loss": 0.1327, "step": 13105 }, { "epoch": 0.3823443608145166, "grad_norm": 0.8144970970507229, "learning_rate": 7.082894402455487e-06, "loss": 0.138, "step": 13106 }, { "epoch": 0.38237353404516017, "grad_norm": 0.7131895739917116, "learning_rate": 7.08246490346773e-06, "loss": 0.1428, "step": 13107 }, { "epoch": 0.3824027072758037, "grad_norm": 0.8303449996677388, "learning_rate": 7.082035385888222e-06, "loss": 0.1426, "step": 13108 }, { "epoch": 0.3824318805064473, "grad_norm": 0.8555770600779227, "learning_rate": 7.081605849720799e-06, "loss": 0.1676, "step": 13109 }, { "epoch": 0.38246105373709083, "grad_norm": 0.7928736545267687, "learning_rate": 7.081176294969298e-06, "loss": 0.1326, "step": 13110 }, { "epoch": 0.3824902269677344, "grad_norm": 0.9716808502627015, "learning_rate": 7.08074672163755e-06, "loss": 0.1356, "step": 13111 }, { "epoch": 0.382519400198378, "grad_norm": 0.800261002745498, "learning_rate": 7.080317129729392e-06, "loss": 0.145, "step": 13112 }, { "epoch": 0.38254857342902154, "grad_norm": 0.8046887385196647, "learning_rate": 7.079887519248661e-06, "loss": 0.1428, "step": 13113 }, { "epoch": 0.3825777466596651, "grad_norm": 0.7150547588897157, "learning_rate": 7.079457890199188e-06, "loss": 0.1237, "step": 13114 }, { "epoch": 0.38260691989030865, "grad_norm": 0.6849095150734505, "learning_rate": 7.0790282425848145e-06, "loss": 0.1412, "step": 13115 }, { "epoch": 0.3826360931209522, "grad_norm": 0.9194422123303154, "learning_rate": 7.07859857640937e-06, "loss": 0.1403, "step": 13116 }, { "epoch": 0.38266526635159576, "grad_norm": 0.8675519789641049, "learning_rate": 7.0781688916766965e-06, "loss": 0.1443, "step": 13117 }, { "epoch": 0.3826944395822393, "grad_norm": 0.7194996006589511, "learning_rate": 7.0777391883906265e-06, "loss": 0.1138, "step": 13118 }, { "epoch": 0.3827236128128829, "grad_norm": 0.9397140559046668, "learning_rate": 7.077309466554996e-06, "loss": 0.1591, "step": 13119 }, { "epoch": 0.38275278604352647, "grad_norm": 0.7076092649402206, "learning_rate": 7.076879726173643e-06, "loss": 0.1613, "step": 13120 }, { "epoch": 0.38278195927417, "grad_norm": 0.7465014711959881, "learning_rate": 7.0764499672504035e-06, "loss": 0.1294, "step": 13121 }, { "epoch": 0.3828111325048136, "grad_norm": 0.8638462707328708, "learning_rate": 7.0760201897891145e-06, "loss": 0.1346, "step": 13122 }, { "epoch": 0.38284030573545713, "grad_norm": 0.8553734920619475, "learning_rate": 7.075590393793612e-06, "loss": 0.1529, "step": 13123 }, { "epoch": 0.3828694789661007, "grad_norm": 0.7383771376980681, "learning_rate": 7.075160579267734e-06, "loss": 0.1461, "step": 13124 }, { "epoch": 0.3828986521967443, "grad_norm": 0.6253729740034147, "learning_rate": 7.074730746215319e-06, "loss": 0.1432, "step": 13125 }, { "epoch": 0.38292782542738785, "grad_norm": 0.8786975519092382, "learning_rate": 7.074300894640202e-06, "loss": 0.1548, "step": 13126 }, { "epoch": 0.3829569986580314, "grad_norm": 0.8880575694395363, "learning_rate": 7.073871024546224e-06, "loss": 0.139, "step": 13127 }, { "epoch": 0.38298617188867495, "grad_norm": 0.775338023325945, "learning_rate": 7.073441135937218e-06, "loss": 0.1441, "step": 13128 }, { "epoch": 0.3830153451193185, "grad_norm": 0.7930982428849666, "learning_rate": 7.073011228817026e-06, "loss": 0.1584, "step": 13129 }, { "epoch": 0.38304451834996206, "grad_norm": 0.911810955626411, "learning_rate": 7.072581303189485e-06, "loss": 0.1488, "step": 13130 }, { "epoch": 0.3830736915806056, "grad_norm": 0.9098857458468019, "learning_rate": 7.072151359058431e-06, "loss": 0.1604, "step": 13131 }, { "epoch": 0.3831028648112492, "grad_norm": 0.7879204619318778, "learning_rate": 7.071721396427706e-06, "loss": 0.1378, "step": 13132 }, { "epoch": 0.3831320380418928, "grad_norm": 0.8966769901371093, "learning_rate": 7.071291415301147e-06, "loss": 0.1387, "step": 13133 }, { "epoch": 0.38316121127253633, "grad_norm": 1.0341074388373561, "learning_rate": 7.070861415682591e-06, "loss": 0.1482, "step": 13134 }, { "epoch": 0.3831903845031799, "grad_norm": 1.2304936071236645, "learning_rate": 7.0704313975758795e-06, "loss": 0.1419, "step": 13135 }, { "epoch": 0.38321955773382343, "grad_norm": 1.0014628454773198, "learning_rate": 7.07000136098485e-06, "loss": 0.1381, "step": 13136 }, { "epoch": 0.383248730964467, "grad_norm": 1.0036603629484904, "learning_rate": 7.069571305913344e-06, "loss": 0.1453, "step": 13137 }, { "epoch": 0.38327790419511054, "grad_norm": 1.1866706294109137, "learning_rate": 7.0691412323651985e-06, "loss": 0.1406, "step": 13138 }, { "epoch": 0.38330707742575415, "grad_norm": 0.8985576384858317, "learning_rate": 7.0687111403442545e-06, "loss": 0.1393, "step": 13139 }, { "epoch": 0.3833362506563977, "grad_norm": 0.9242137471340901, "learning_rate": 7.068281029854352e-06, "loss": 0.1672, "step": 13140 }, { "epoch": 0.38336542388704126, "grad_norm": 0.8816153269856454, "learning_rate": 7.067850900899328e-06, "loss": 0.169, "step": 13141 }, { "epoch": 0.3833945971176848, "grad_norm": 0.7916098566601042, "learning_rate": 7.067420753483026e-06, "loss": 0.1299, "step": 13142 }, { "epoch": 0.38342377034832836, "grad_norm": 0.7133509619734466, "learning_rate": 7.066990587609286e-06, "loss": 0.1358, "step": 13143 }, { "epoch": 0.3834529435789719, "grad_norm": 0.8723191442988169, "learning_rate": 7.066560403281946e-06, "loss": 0.1394, "step": 13144 }, { "epoch": 0.38348211680961547, "grad_norm": 0.7490395641106506, "learning_rate": 7.06613020050485e-06, "loss": 0.137, "step": 13145 }, { "epoch": 0.3835112900402591, "grad_norm": 0.8581450018827587, "learning_rate": 7.065699979281834e-06, "loss": 0.1422, "step": 13146 }, { "epoch": 0.38354046327090263, "grad_norm": 0.778933914965665, "learning_rate": 7.065269739616744e-06, "loss": 0.1629, "step": 13147 }, { "epoch": 0.3835696365015462, "grad_norm": 0.8985223450460263, "learning_rate": 7.064839481513417e-06, "loss": 0.1286, "step": 13148 }, { "epoch": 0.38359880973218974, "grad_norm": 0.758957710394968, "learning_rate": 7.064409204975696e-06, "loss": 0.1746, "step": 13149 }, { "epoch": 0.3836279829628333, "grad_norm": 0.7899024126793673, "learning_rate": 7.0639789100074255e-06, "loss": 0.1471, "step": 13150 }, { "epoch": 0.38365715619347684, "grad_norm": 0.6982124085047007, "learning_rate": 7.06354859661244e-06, "loss": 0.1378, "step": 13151 }, { "epoch": 0.38368632942412045, "grad_norm": 0.7941912606468112, "learning_rate": 7.0631182647945884e-06, "loss": 0.1433, "step": 13152 }, { "epoch": 0.383715502654764, "grad_norm": 0.8363233650555554, "learning_rate": 7.062687914557708e-06, "loss": 0.1489, "step": 13153 }, { "epoch": 0.38374467588540756, "grad_norm": 0.829309201774025, "learning_rate": 7.062257545905642e-06, "loss": 0.1674, "step": 13154 }, { "epoch": 0.3837738491160511, "grad_norm": 0.7630820142471877, "learning_rate": 7.061827158842234e-06, "loss": 0.1302, "step": 13155 }, { "epoch": 0.38380302234669467, "grad_norm": 0.7884424514086901, "learning_rate": 7.061396753371323e-06, "loss": 0.1648, "step": 13156 }, { "epoch": 0.3838321955773382, "grad_norm": 0.8154456882331151, "learning_rate": 7.060966329496757e-06, "loss": 0.1626, "step": 13157 }, { "epoch": 0.3838613688079818, "grad_norm": 1.0104256131077718, "learning_rate": 7.060535887222373e-06, "loss": 0.1508, "step": 13158 }, { "epoch": 0.3838905420386254, "grad_norm": 0.7977958642526866, "learning_rate": 7.060105426552018e-06, "loss": 0.1506, "step": 13159 }, { "epoch": 0.38391971526926894, "grad_norm": 1.062430256860251, "learning_rate": 7.0596749474895344e-06, "loss": 0.1234, "step": 13160 }, { "epoch": 0.3839488884999125, "grad_norm": 0.8411417714500453, "learning_rate": 7.059244450038762e-06, "loss": 0.1573, "step": 13161 }, { "epoch": 0.38397806173055604, "grad_norm": 0.8339153474587397, "learning_rate": 7.058813934203549e-06, "loss": 0.1486, "step": 13162 }, { "epoch": 0.3840072349611996, "grad_norm": 0.9894703493526382, "learning_rate": 7.058383399987736e-06, "loss": 0.1361, "step": 13163 }, { "epoch": 0.38403640819184315, "grad_norm": 1.129688469946021, "learning_rate": 7.057952847395166e-06, "loss": 0.1368, "step": 13164 }, { "epoch": 0.3840655814224867, "grad_norm": 0.7600692887581971, "learning_rate": 7.057522276429686e-06, "loss": 0.1551, "step": 13165 }, { "epoch": 0.3840947546531303, "grad_norm": 1.2455831289902193, "learning_rate": 7.057091687095138e-06, "loss": 0.1595, "step": 13166 }, { "epoch": 0.38412392788377386, "grad_norm": 1.0068548520666945, "learning_rate": 7.056661079395366e-06, "loss": 0.1375, "step": 13167 }, { "epoch": 0.3841531011144174, "grad_norm": 0.7662769792406645, "learning_rate": 7.056230453334214e-06, "loss": 0.1468, "step": 13168 }, { "epoch": 0.38418227434506097, "grad_norm": 0.9628656124681166, "learning_rate": 7.055799808915529e-06, "loss": 0.1484, "step": 13169 }, { "epoch": 0.3842114475757045, "grad_norm": 0.8008510300215405, "learning_rate": 7.0553691461431536e-06, "loss": 0.1185, "step": 13170 }, { "epoch": 0.3842406208063481, "grad_norm": 0.7761874409154677, "learning_rate": 7.054938465020933e-06, "loss": 0.1371, "step": 13171 }, { "epoch": 0.38426979403699163, "grad_norm": 2.5130980677053087, "learning_rate": 7.054507765552712e-06, "loss": 0.1527, "step": 13172 }, { "epoch": 0.38429896726763524, "grad_norm": 1.1640321970693506, "learning_rate": 7.054077047742336e-06, "loss": 0.1329, "step": 13173 }, { "epoch": 0.3843281404982788, "grad_norm": 1.0871499764143346, "learning_rate": 7.053646311593651e-06, "loss": 0.1514, "step": 13174 }, { "epoch": 0.38435731372892235, "grad_norm": 0.7590588990257113, "learning_rate": 7.053215557110503e-06, "loss": 0.1531, "step": 13175 }, { "epoch": 0.3843864869595659, "grad_norm": 0.929740989737801, "learning_rate": 7.052784784296735e-06, "loss": 0.1473, "step": 13176 }, { "epoch": 0.38441566019020945, "grad_norm": 1.277307219791055, "learning_rate": 7.052353993156196e-06, "loss": 0.1456, "step": 13177 }, { "epoch": 0.384444833420853, "grad_norm": 0.9094694212534672, "learning_rate": 7.051923183692728e-06, "loss": 0.1575, "step": 13178 }, { "epoch": 0.3844740066514966, "grad_norm": 0.8590938719441122, "learning_rate": 7.0514923559101814e-06, "loss": 0.1427, "step": 13179 }, { "epoch": 0.38450317988214017, "grad_norm": 0.8323348273163871, "learning_rate": 7.0510615098124005e-06, "loss": 0.1211, "step": 13180 }, { "epoch": 0.3845323531127837, "grad_norm": 0.7958251264863333, "learning_rate": 7.0506306454032326e-06, "loss": 0.1386, "step": 13181 }, { "epoch": 0.3845615263434273, "grad_norm": 0.9837312411046082, "learning_rate": 7.050199762686522e-06, "loss": 0.1261, "step": 13182 }, { "epoch": 0.3845906995740708, "grad_norm": 0.7655586884409743, "learning_rate": 7.04976886166612e-06, "loss": 0.1474, "step": 13183 }, { "epoch": 0.3846198728047144, "grad_norm": 0.7334294555695986, "learning_rate": 7.049337942345868e-06, "loss": 0.1354, "step": 13184 }, { "epoch": 0.38464904603535793, "grad_norm": 0.8762714129354264, "learning_rate": 7.048907004729619e-06, "loss": 0.1414, "step": 13185 }, { "epoch": 0.38467821926600154, "grad_norm": 0.7870794442118512, "learning_rate": 7.048476048821215e-06, "loss": 0.146, "step": 13186 }, { "epoch": 0.3847073924966451, "grad_norm": 1.01210760370242, "learning_rate": 7.048045074624508e-06, "loss": 0.1389, "step": 13187 }, { "epoch": 0.38473656572728865, "grad_norm": 0.7542164408527335, "learning_rate": 7.047614082143342e-06, "loss": 0.1475, "step": 13188 }, { "epoch": 0.3847657389579322, "grad_norm": 0.8052815624525067, "learning_rate": 7.047183071381566e-06, "loss": 0.134, "step": 13189 }, { "epoch": 0.38479491218857576, "grad_norm": 0.962643863131615, "learning_rate": 7.046752042343029e-06, "loss": 0.1905, "step": 13190 }, { "epoch": 0.3848240854192193, "grad_norm": 0.9266481751109104, "learning_rate": 7.046320995031578e-06, "loss": 0.1385, "step": 13191 }, { "epoch": 0.38485325864986286, "grad_norm": 0.6338790428294409, "learning_rate": 7.045889929451063e-06, "loss": 0.1332, "step": 13192 }, { "epoch": 0.38488243188050647, "grad_norm": 0.7303123833273253, "learning_rate": 7.045458845605329e-06, "loss": 0.1191, "step": 13193 }, { "epoch": 0.38491160511115, "grad_norm": 0.8025559681918394, "learning_rate": 7.045027743498227e-06, "loss": 0.1405, "step": 13194 }, { "epoch": 0.3849407783417936, "grad_norm": 0.8078721313870865, "learning_rate": 7.044596623133607e-06, "loss": 0.1535, "step": 13195 }, { "epoch": 0.38496995157243713, "grad_norm": 0.8735492269731023, "learning_rate": 7.044165484515315e-06, "loss": 0.1308, "step": 13196 }, { "epoch": 0.3849991248030807, "grad_norm": 0.8228735997223994, "learning_rate": 7.043734327647202e-06, "loss": 0.1489, "step": 13197 }, { "epoch": 0.38502829803372424, "grad_norm": 0.6732863492182765, "learning_rate": 7.043303152533119e-06, "loss": 0.1392, "step": 13198 }, { "epoch": 0.3850574712643678, "grad_norm": 0.8134206062713474, "learning_rate": 7.042871959176909e-06, "loss": 0.2068, "step": 13199 }, { "epoch": 0.3850866444950114, "grad_norm": 0.838629080481506, "learning_rate": 7.0424407475824285e-06, "loss": 0.1358, "step": 13200 }, { "epoch": 0.38511581772565495, "grad_norm": 0.8938669022470852, "learning_rate": 7.042009517753525e-06, "loss": 0.1642, "step": 13201 }, { "epoch": 0.3851449909562985, "grad_norm": 0.8037364863431298, "learning_rate": 7.041578269694047e-06, "loss": 0.13, "step": 13202 }, { "epoch": 0.38517416418694206, "grad_norm": 1.0073783371034495, "learning_rate": 7.041147003407845e-06, "loss": 0.1806, "step": 13203 }, { "epoch": 0.3852033374175856, "grad_norm": 0.8013140474535293, "learning_rate": 7.04071571889877e-06, "loss": 0.1547, "step": 13204 }, { "epoch": 0.38523251064822917, "grad_norm": 0.8313544827264453, "learning_rate": 7.040284416170673e-06, "loss": 0.1378, "step": 13205 }, { "epoch": 0.3852616838788728, "grad_norm": 1.025391914818843, "learning_rate": 7.039853095227404e-06, "loss": 0.1379, "step": 13206 }, { "epoch": 0.3852908571095163, "grad_norm": 0.7223974582346451, "learning_rate": 7.039421756072814e-06, "loss": 0.1182, "step": 13207 }, { "epoch": 0.3853200303401599, "grad_norm": 0.7760834738644458, "learning_rate": 7.038990398710751e-06, "loss": 0.1189, "step": 13208 }, { "epoch": 0.38534920357080343, "grad_norm": 1.148551727361324, "learning_rate": 7.03855902314507e-06, "loss": 0.1506, "step": 13209 }, { "epoch": 0.385378376801447, "grad_norm": 0.7109625283204699, "learning_rate": 7.0381276293796204e-06, "loss": 0.1336, "step": 13210 }, { "epoch": 0.38540755003209054, "grad_norm": 1.0315526502324017, "learning_rate": 7.0376962174182536e-06, "loss": 0.1532, "step": 13211 }, { "epoch": 0.3854367232627341, "grad_norm": 0.7497842649954283, "learning_rate": 7.037264787264823e-06, "loss": 0.1611, "step": 13212 }, { "epoch": 0.3854658964933777, "grad_norm": 1.0430596271386305, "learning_rate": 7.036833338923177e-06, "loss": 0.1495, "step": 13213 }, { "epoch": 0.38549506972402126, "grad_norm": 0.7477285953945649, "learning_rate": 7.03640187239717e-06, "loss": 0.1399, "step": 13214 }, { "epoch": 0.3855242429546648, "grad_norm": 1.1289120089937266, "learning_rate": 7.035970387690652e-06, "loss": 0.1332, "step": 13215 }, { "epoch": 0.38555341618530836, "grad_norm": 0.93720400927447, "learning_rate": 7.035538884807478e-06, "loss": 0.1217, "step": 13216 }, { "epoch": 0.3855825894159519, "grad_norm": 0.7350282797287049, "learning_rate": 7.035107363751499e-06, "loss": 0.1256, "step": 13217 }, { "epoch": 0.38561176264659547, "grad_norm": 1.0483884820992884, "learning_rate": 7.034675824526566e-06, "loss": 0.1359, "step": 13218 }, { "epoch": 0.385640935877239, "grad_norm": 0.9309292583366686, "learning_rate": 7.034244267136533e-06, "loss": 0.121, "step": 13219 }, { "epoch": 0.38567010910788263, "grad_norm": 1.0145449977165903, "learning_rate": 7.033812691585253e-06, "loss": 0.1367, "step": 13220 }, { "epoch": 0.3856992823385262, "grad_norm": 0.8552576548131363, "learning_rate": 7.033381097876578e-06, "loss": 0.1324, "step": 13221 }, { "epoch": 0.38572845556916974, "grad_norm": 1.1621448997547186, "learning_rate": 7.032949486014364e-06, "loss": 0.1327, "step": 13222 }, { "epoch": 0.3857576287998133, "grad_norm": 0.960819941259101, "learning_rate": 7.032517856002461e-06, "loss": 0.1475, "step": 13223 }, { "epoch": 0.38578680203045684, "grad_norm": 0.9398773742574359, "learning_rate": 7.0320862078447235e-06, "loss": 0.1235, "step": 13224 }, { "epoch": 0.3858159752611004, "grad_norm": 0.8729047289873096, "learning_rate": 7.0316545415450065e-06, "loss": 0.1163, "step": 13225 }, { "epoch": 0.38584514849174395, "grad_norm": 1.0931647627080159, "learning_rate": 7.0312228571071614e-06, "loss": 0.1569, "step": 13226 }, { "epoch": 0.38587432172238756, "grad_norm": 1.0550094070111407, "learning_rate": 7.030791154535045e-06, "loss": 0.1408, "step": 13227 }, { "epoch": 0.3859034949530311, "grad_norm": 0.7257333429410203, "learning_rate": 7.03035943383251e-06, "loss": 0.1593, "step": 13228 }, { "epoch": 0.38593266818367467, "grad_norm": 0.959882441307829, "learning_rate": 7.029927695003408e-06, "loss": 0.1429, "step": 13229 }, { "epoch": 0.3859618414143182, "grad_norm": 0.7871133713404713, "learning_rate": 7.029495938051599e-06, "loss": 0.141, "step": 13230 }, { "epoch": 0.38599101464496177, "grad_norm": 0.7285164992562347, "learning_rate": 7.029064162980934e-06, "loss": 0.1525, "step": 13231 }, { "epoch": 0.3860201878756053, "grad_norm": 0.708274471085333, "learning_rate": 7.028632369795267e-06, "loss": 0.1383, "step": 13232 }, { "epoch": 0.3860493611062489, "grad_norm": 0.9021603207471135, "learning_rate": 7.028200558498457e-06, "loss": 0.1407, "step": 13233 }, { "epoch": 0.3860785343368925, "grad_norm": 0.7355522210034191, "learning_rate": 7.0277687290943555e-06, "loss": 0.1314, "step": 13234 }, { "epoch": 0.38610770756753604, "grad_norm": 0.9074463689364065, "learning_rate": 7.027336881586818e-06, "loss": 0.1462, "step": 13235 }, { "epoch": 0.3861368807981796, "grad_norm": 0.704192428681642, "learning_rate": 7.026905015979702e-06, "loss": 0.1215, "step": 13236 }, { "epoch": 0.38616605402882315, "grad_norm": 0.6116731736115757, "learning_rate": 7.026473132276862e-06, "loss": 0.1287, "step": 13237 }, { "epoch": 0.3861952272594667, "grad_norm": 0.9547230536130067, "learning_rate": 7.026041230482152e-06, "loss": 0.145, "step": 13238 }, { "epoch": 0.38622440049011025, "grad_norm": 0.8319729792584203, "learning_rate": 7.02560931059943e-06, "loss": 0.1458, "step": 13239 }, { "epoch": 0.38625357372075386, "grad_norm": 0.7360962858435293, "learning_rate": 7.025177372632554e-06, "loss": 0.1336, "step": 13240 }, { "epoch": 0.3862827469513974, "grad_norm": 0.7169362741111335, "learning_rate": 7.0247454165853746e-06, "loss": 0.1566, "step": 13241 }, { "epoch": 0.38631192018204097, "grad_norm": 0.842506903764658, "learning_rate": 7.024313442461753e-06, "loss": 0.1573, "step": 13242 }, { "epoch": 0.3863410934126845, "grad_norm": 0.7194110795473797, "learning_rate": 7.023881450265544e-06, "loss": 0.144, "step": 13243 }, { "epoch": 0.3863702666433281, "grad_norm": 0.7806393290389207, "learning_rate": 7.023449440000605e-06, "loss": 0.128, "step": 13244 }, { "epoch": 0.38639943987397163, "grad_norm": 1.0850628310588, "learning_rate": 7.023017411670792e-06, "loss": 0.1547, "step": 13245 }, { "epoch": 0.3864286131046152, "grad_norm": 0.7937628761505633, "learning_rate": 7.022585365279963e-06, "loss": 0.1176, "step": 13246 }, { "epoch": 0.3864577863352588, "grad_norm": 1.016898103575818, "learning_rate": 7.022153300831974e-06, "loss": 0.1545, "step": 13247 }, { "epoch": 0.38648695956590234, "grad_norm": 0.8049622544118625, "learning_rate": 7.021721218330684e-06, "loss": 0.1433, "step": 13248 }, { "epoch": 0.3865161327965459, "grad_norm": 0.9263397793203935, "learning_rate": 7.021289117779948e-06, "loss": 0.1645, "step": 13249 }, { "epoch": 0.38654530602718945, "grad_norm": 1.1031347446761475, "learning_rate": 7.020856999183626e-06, "loss": 0.1427, "step": 13250 }, { "epoch": 0.386574479257833, "grad_norm": 1.4868112119703023, "learning_rate": 7.020424862545576e-06, "loss": 0.1485, "step": 13251 }, { "epoch": 0.38660365248847656, "grad_norm": 1.1050750802395453, "learning_rate": 7.019992707869655e-06, "loss": 0.1403, "step": 13252 }, { "epoch": 0.3866328257191201, "grad_norm": 0.9099726322467829, "learning_rate": 7.019560535159719e-06, "loss": 0.1497, "step": 13253 }, { "epoch": 0.3866619989497637, "grad_norm": 1.1939293331015322, "learning_rate": 7.019128344419631e-06, "loss": 0.1517, "step": 13254 }, { "epoch": 0.38669117218040727, "grad_norm": 1.2814198179595297, "learning_rate": 7.018696135653248e-06, "loss": 0.146, "step": 13255 }, { "epoch": 0.3867203454110508, "grad_norm": 0.7766726570343186, "learning_rate": 7.018263908864424e-06, "loss": 0.1398, "step": 13256 }, { "epoch": 0.3867495186416944, "grad_norm": 0.7753074895094298, "learning_rate": 7.017831664057026e-06, "loss": 0.1347, "step": 13257 }, { "epoch": 0.38677869187233793, "grad_norm": 0.8792880022194638, "learning_rate": 7.0173994012349066e-06, "loss": 0.127, "step": 13258 }, { "epoch": 0.3868078651029815, "grad_norm": 0.9984398289008541, "learning_rate": 7.016967120401925e-06, "loss": 0.1777, "step": 13259 }, { "epoch": 0.38683703833362504, "grad_norm": 0.8389228509562606, "learning_rate": 7.016534821561947e-06, "loss": 0.1692, "step": 13260 }, { "epoch": 0.38686621156426865, "grad_norm": 0.8388674127325592, "learning_rate": 7.016102504718824e-06, "loss": 0.1249, "step": 13261 }, { "epoch": 0.3868953847949122, "grad_norm": 0.7382572353971572, "learning_rate": 7.015670169876419e-06, "loss": 0.1502, "step": 13262 }, { "epoch": 0.38692455802555575, "grad_norm": 0.8735915659877994, "learning_rate": 7.015237817038594e-06, "loss": 0.1485, "step": 13263 }, { "epoch": 0.3869537312561993, "grad_norm": 0.9981665930892641, "learning_rate": 7.014805446209205e-06, "loss": 0.1262, "step": 13264 }, { "epoch": 0.38698290448684286, "grad_norm": 0.6620816291523073, "learning_rate": 7.014373057392115e-06, "loss": 0.1389, "step": 13265 }, { "epoch": 0.3870120777174864, "grad_norm": 0.7700687214631495, "learning_rate": 7.013940650591182e-06, "loss": 0.1435, "step": 13266 }, { "epoch": 0.38704125094813, "grad_norm": 1.1417505579337457, "learning_rate": 7.01350822581027e-06, "loss": 0.1269, "step": 13267 }, { "epoch": 0.3870704241787736, "grad_norm": 0.9066213369944394, "learning_rate": 7.013075783053235e-06, "loss": 0.1709, "step": 13268 }, { "epoch": 0.38709959740941713, "grad_norm": 0.6524092610583785, "learning_rate": 7.012643322323941e-06, "loss": 0.132, "step": 13269 }, { "epoch": 0.3871287706400607, "grad_norm": 0.8563818341021124, "learning_rate": 7.012210843626248e-06, "loss": 0.128, "step": 13270 }, { "epoch": 0.38715794387070424, "grad_norm": 0.9548941536063597, "learning_rate": 7.011778346964015e-06, "loss": 0.137, "step": 13271 }, { "epoch": 0.3871871171013478, "grad_norm": 0.9557896221863059, "learning_rate": 7.011345832341109e-06, "loss": 0.1452, "step": 13272 }, { "epoch": 0.38721629033199134, "grad_norm": 0.9963487168990761, "learning_rate": 7.0109132997613845e-06, "loss": 0.1533, "step": 13273 }, { "epoch": 0.38724546356263495, "grad_norm": 0.998368277925134, "learning_rate": 7.010480749228706e-06, "loss": 0.1653, "step": 13274 }, { "epoch": 0.3872746367932785, "grad_norm": 0.8492515173009113, "learning_rate": 7.010048180746938e-06, "loss": 0.1196, "step": 13275 }, { "epoch": 0.38730381002392206, "grad_norm": 0.967085633972716, "learning_rate": 7.009615594319937e-06, "loss": 0.1659, "step": 13276 }, { "epoch": 0.3873329832545656, "grad_norm": 0.8020883773551066, "learning_rate": 7.0091829899515684e-06, "loss": 0.1476, "step": 13277 }, { "epoch": 0.38736215648520916, "grad_norm": 0.9411406020672687, "learning_rate": 7.008750367645694e-06, "loss": 0.1428, "step": 13278 }, { "epoch": 0.3873913297158527, "grad_norm": 0.864130362302704, "learning_rate": 7.008317727406175e-06, "loss": 0.1338, "step": 13279 }, { "epoch": 0.38742050294649627, "grad_norm": 0.7606777040854541, "learning_rate": 7.007885069236876e-06, "loss": 0.1323, "step": 13280 }, { "epoch": 0.3874496761771399, "grad_norm": 0.8536732065673777, "learning_rate": 7.0074523931416585e-06, "loss": 0.157, "step": 13281 }, { "epoch": 0.38747884940778343, "grad_norm": 1.2402798134330753, "learning_rate": 7.007019699124385e-06, "loss": 0.1519, "step": 13282 }, { "epoch": 0.387508022638427, "grad_norm": 0.9339407090209275, "learning_rate": 7.006586987188917e-06, "loss": 0.1507, "step": 13283 }, { "epoch": 0.38753719586907054, "grad_norm": 1.0502150476663425, "learning_rate": 7.006154257339121e-06, "loss": 0.1617, "step": 13284 }, { "epoch": 0.3875663690997141, "grad_norm": 1.1563302839339988, "learning_rate": 7.00572150957886e-06, "loss": 0.1357, "step": 13285 }, { "epoch": 0.38759554233035765, "grad_norm": 0.8922102358620652, "learning_rate": 7.005288743911994e-06, "loss": 0.1296, "step": 13286 }, { "epoch": 0.3876247155610012, "grad_norm": 0.8754802960953985, "learning_rate": 7.004855960342389e-06, "loss": 0.1306, "step": 13287 }, { "epoch": 0.3876538887916448, "grad_norm": 0.9678799900693552, "learning_rate": 7.00442315887391e-06, "loss": 0.1411, "step": 13288 }, { "epoch": 0.38768306202228836, "grad_norm": 1.3212304721783794, "learning_rate": 7.003990339510417e-06, "loss": 0.1213, "step": 13289 }, { "epoch": 0.3877122352529319, "grad_norm": 0.8836057308626918, "learning_rate": 7.003557502255779e-06, "loss": 0.1385, "step": 13290 }, { "epoch": 0.38774140848357547, "grad_norm": 0.9399206699618007, "learning_rate": 7.003124647113857e-06, "loss": 0.1624, "step": 13291 }, { "epoch": 0.387770581714219, "grad_norm": 0.8276290543556397, "learning_rate": 7.002691774088517e-06, "loss": 0.1562, "step": 13292 }, { "epoch": 0.3877997549448626, "grad_norm": 0.9554146122010095, "learning_rate": 7.002258883183621e-06, "loss": 0.1451, "step": 13293 }, { "epoch": 0.3878289281755062, "grad_norm": 0.8170182403350386, "learning_rate": 7.001825974403038e-06, "loss": 0.1402, "step": 13294 }, { "epoch": 0.38785810140614974, "grad_norm": 0.7558694011352974, "learning_rate": 7.001393047750629e-06, "loss": 0.139, "step": 13295 }, { "epoch": 0.3878872746367933, "grad_norm": 0.698040528931186, "learning_rate": 7.000960103230261e-06, "loss": 0.1372, "step": 13296 }, { "epoch": 0.38791644786743684, "grad_norm": 0.931884498583288, "learning_rate": 7.000527140845801e-06, "loss": 0.1233, "step": 13297 }, { "epoch": 0.3879456210980804, "grad_norm": 0.9023811833610386, "learning_rate": 7.000094160601109e-06, "loss": 0.1408, "step": 13298 }, { "epoch": 0.38797479432872395, "grad_norm": 0.8712635751305197, "learning_rate": 6.999661162500056e-06, "loss": 0.1636, "step": 13299 }, { "epoch": 0.3880039675593675, "grad_norm": 0.9558331606284869, "learning_rate": 6.999228146546504e-06, "loss": 0.1234, "step": 13300 }, { "epoch": 0.3880331407900111, "grad_norm": 0.9861588803013244, "learning_rate": 6.998795112744321e-06, "loss": 0.153, "step": 13301 }, { "epoch": 0.38806231402065466, "grad_norm": 1.109512376081367, "learning_rate": 6.9983620610973725e-06, "loss": 0.1598, "step": 13302 }, { "epoch": 0.3880914872512982, "grad_norm": 0.8147009888384774, "learning_rate": 6.997928991609525e-06, "loss": 0.1363, "step": 13303 }, { "epoch": 0.38812066048194177, "grad_norm": 0.9339738820826606, "learning_rate": 6.997495904284643e-06, "loss": 0.1644, "step": 13304 }, { "epoch": 0.3881498337125853, "grad_norm": 0.7825712474625479, "learning_rate": 6.9970627991265964e-06, "loss": 0.1512, "step": 13305 }, { "epoch": 0.3881790069432289, "grad_norm": 0.9293887480382017, "learning_rate": 6.9966296761392485e-06, "loss": 0.1671, "step": 13306 }, { "epoch": 0.38820818017387243, "grad_norm": 0.8516994438558391, "learning_rate": 6.9961965353264675e-06, "loss": 0.1322, "step": 13307 }, { "epoch": 0.38823735340451604, "grad_norm": 0.9549232915876097, "learning_rate": 6.995763376692121e-06, "loss": 0.1544, "step": 13308 }, { "epoch": 0.3882665266351596, "grad_norm": 1.013295450340856, "learning_rate": 6.995330200240075e-06, "loss": 0.1342, "step": 13309 }, { "epoch": 0.38829569986580315, "grad_norm": 0.6954548142395225, "learning_rate": 6.994897005974197e-06, "loss": 0.1104, "step": 13310 }, { "epoch": 0.3883248730964467, "grad_norm": 0.8169893974116368, "learning_rate": 6.9944637938983555e-06, "loss": 0.1254, "step": 13311 }, { "epoch": 0.38835404632709025, "grad_norm": 1.1914272777175088, "learning_rate": 6.994030564016418e-06, "loss": 0.1555, "step": 13312 }, { "epoch": 0.3883832195577338, "grad_norm": 0.8296728806910973, "learning_rate": 6.993597316332249e-06, "loss": 0.154, "step": 13313 }, { "epoch": 0.38841239278837736, "grad_norm": 0.8159415283837939, "learning_rate": 6.9931640508497215e-06, "loss": 0.1387, "step": 13314 }, { "epoch": 0.38844156601902097, "grad_norm": 1.022868637756655, "learning_rate": 6.9927307675727005e-06, "loss": 0.1463, "step": 13315 }, { "epoch": 0.3884707392496645, "grad_norm": 0.9184870012599347, "learning_rate": 6.9922974665050534e-06, "loss": 0.1829, "step": 13316 }, { "epoch": 0.3884999124803081, "grad_norm": 1.0162453871884387, "learning_rate": 6.991864147650653e-06, "loss": 0.1591, "step": 13317 }, { "epoch": 0.3885290857109516, "grad_norm": 0.6958059252750741, "learning_rate": 6.991430811013363e-06, "loss": 0.1392, "step": 13318 }, { "epoch": 0.3885582589415952, "grad_norm": 1.5786897357763856, "learning_rate": 6.990997456597054e-06, "loss": 0.1395, "step": 13319 }, { "epoch": 0.38858743217223873, "grad_norm": 0.7972810695226562, "learning_rate": 6.990564084405595e-06, "loss": 0.149, "step": 13320 }, { "epoch": 0.38861660540288234, "grad_norm": 0.8777016036161785, "learning_rate": 6.990130694442857e-06, "loss": 0.1416, "step": 13321 }, { "epoch": 0.3886457786335259, "grad_norm": 0.8780807240183021, "learning_rate": 6.989697286712705e-06, "loss": 0.1377, "step": 13322 }, { "epoch": 0.38867495186416945, "grad_norm": 0.9176289988609246, "learning_rate": 6.9892638612190125e-06, "loss": 0.1512, "step": 13323 }, { "epoch": 0.388704125094813, "grad_norm": 0.803467416298351, "learning_rate": 6.988830417965645e-06, "loss": 0.1515, "step": 13324 }, { "epoch": 0.38873329832545656, "grad_norm": 0.811414526116803, "learning_rate": 6.988396956956476e-06, "loss": 0.1527, "step": 13325 }, { "epoch": 0.3887624715561001, "grad_norm": 0.725293480837676, "learning_rate": 6.987963478195373e-06, "loss": 0.1594, "step": 13326 }, { "epoch": 0.38879164478674366, "grad_norm": 0.8542884036141146, "learning_rate": 6.9875299816862075e-06, "loss": 0.141, "step": 13327 }, { "epoch": 0.38882081801738727, "grad_norm": 0.9948677236482435, "learning_rate": 6.987096467432847e-06, "loss": 0.1649, "step": 13328 }, { "epoch": 0.3888499912480308, "grad_norm": 0.8379567960194645, "learning_rate": 6.986662935439165e-06, "loss": 0.1173, "step": 13329 }, { "epoch": 0.3888791644786744, "grad_norm": 0.9104299231378886, "learning_rate": 6.98622938570903e-06, "loss": 0.1329, "step": 13330 }, { "epoch": 0.38890833770931793, "grad_norm": 1.2147220613229637, "learning_rate": 6.985795818246313e-06, "loss": 0.134, "step": 13331 }, { "epoch": 0.3889375109399615, "grad_norm": 1.1428833879959506, "learning_rate": 6.985362233054887e-06, "loss": 0.1697, "step": 13332 }, { "epoch": 0.38896668417060504, "grad_norm": 0.7664877068593712, "learning_rate": 6.984928630138619e-06, "loss": 0.1414, "step": 13333 }, { "epoch": 0.3889958574012486, "grad_norm": 0.7097562592324536, "learning_rate": 6.984495009501381e-06, "loss": 0.1476, "step": 13334 }, { "epoch": 0.3890250306318922, "grad_norm": 0.9832394396281896, "learning_rate": 6.984061371147047e-06, "loss": 0.1442, "step": 13335 }, { "epoch": 0.38905420386253575, "grad_norm": 0.8274496434558746, "learning_rate": 6.983627715079487e-06, "loss": 0.1457, "step": 13336 }, { "epoch": 0.3890833770931793, "grad_norm": 0.7244927244327911, "learning_rate": 6.98319404130257e-06, "loss": 0.1492, "step": 13337 }, { "epoch": 0.38911255032382286, "grad_norm": 1.043038759570755, "learning_rate": 6.982760349820172e-06, "loss": 0.1507, "step": 13338 }, { "epoch": 0.3891417235544664, "grad_norm": 0.9942063133788643, "learning_rate": 6.9823266406361625e-06, "loss": 0.1362, "step": 13339 }, { "epoch": 0.38917089678510997, "grad_norm": 0.8422806446955047, "learning_rate": 6.981892913754414e-06, "loss": 0.1769, "step": 13340 }, { "epoch": 0.3892000700157535, "grad_norm": 0.8708336803825205, "learning_rate": 6.981459169178799e-06, "loss": 0.1208, "step": 13341 }, { "epoch": 0.3892292432463971, "grad_norm": 0.82161203254612, "learning_rate": 6.98102540691319e-06, "loss": 0.156, "step": 13342 }, { "epoch": 0.3892584164770407, "grad_norm": 0.8041841095006205, "learning_rate": 6.980591626961457e-06, "loss": 0.1447, "step": 13343 }, { "epoch": 0.38928758970768423, "grad_norm": 0.9053273291678036, "learning_rate": 6.980157829327476e-06, "loss": 0.1366, "step": 13344 }, { "epoch": 0.3893167629383278, "grad_norm": 0.8936512087957569, "learning_rate": 6.979724014015119e-06, "loss": 0.1458, "step": 13345 }, { "epoch": 0.38934593616897134, "grad_norm": 0.9754935926371973, "learning_rate": 6.979290181028258e-06, "loss": 0.144, "step": 13346 }, { "epoch": 0.3893751093996149, "grad_norm": 0.6794009994474512, "learning_rate": 6.978856330370768e-06, "loss": 0.1295, "step": 13347 }, { "epoch": 0.3894042826302585, "grad_norm": 0.7867241800532104, "learning_rate": 6.97842246204652e-06, "loss": 0.1337, "step": 13348 }, { "epoch": 0.38943345586090206, "grad_norm": 0.714191117427281, "learning_rate": 6.977988576059387e-06, "loss": 0.1414, "step": 13349 }, { "epoch": 0.3894626290915456, "grad_norm": 0.8831457005697209, "learning_rate": 6.977554672413247e-06, "loss": 0.1369, "step": 13350 }, { "epoch": 0.38949180232218916, "grad_norm": 0.662207327080798, "learning_rate": 6.97712075111197e-06, "loss": 0.153, "step": 13351 }, { "epoch": 0.3895209755528327, "grad_norm": 0.9824106029973495, "learning_rate": 6.97668681215943e-06, "loss": 0.1417, "step": 13352 }, { "epoch": 0.38955014878347627, "grad_norm": 0.7797362553881462, "learning_rate": 6.976252855559504e-06, "loss": 0.1465, "step": 13353 }, { "epoch": 0.3895793220141198, "grad_norm": 0.7245514585214006, "learning_rate": 6.975818881316062e-06, "loss": 0.161, "step": 13354 }, { "epoch": 0.38960849524476343, "grad_norm": 0.6144009335532287, "learning_rate": 6.975384889432981e-06, "loss": 0.1372, "step": 13355 }, { "epoch": 0.389637668475407, "grad_norm": 0.8314983607142742, "learning_rate": 6.974950879914136e-06, "loss": 0.14, "step": 13356 }, { "epoch": 0.38966684170605054, "grad_norm": 0.7598820440301378, "learning_rate": 6.9745168527634024e-06, "loss": 0.109, "step": 13357 }, { "epoch": 0.3896960149366941, "grad_norm": 0.667888523275534, "learning_rate": 6.974082807984651e-06, "loss": 0.1733, "step": 13358 }, { "epoch": 0.38972518816733764, "grad_norm": 0.7147882986652229, "learning_rate": 6.973648745581761e-06, "loss": 0.1278, "step": 13359 }, { "epoch": 0.3897543613979812, "grad_norm": 0.9553106230970906, "learning_rate": 6.973214665558606e-06, "loss": 0.1422, "step": 13360 }, { "epoch": 0.38978353462862475, "grad_norm": 0.7756088344563122, "learning_rate": 6.972780567919061e-06, "loss": 0.1396, "step": 13361 }, { "epoch": 0.38981270785926836, "grad_norm": 0.847104542280691, "learning_rate": 6.972346452667003e-06, "loss": 0.1432, "step": 13362 }, { "epoch": 0.3898418810899119, "grad_norm": 0.8712932944498577, "learning_rate": 6.971912319806306e-06, "loss": 0.1497, "step": 13363 }, { "epoch": 0.38987105432055547, "grad_norm": 0.6655785423789258, "learning_rate": 6.971478169340846e-06, "loss": 0.1272, "step": 13364 }, { "epoch": 0.389900227551199, "grad_norm": 0.7828140569257632, "learning_rate": 6.971044001274502e-06, "loss": 0.1402, "step": 13365 }, { "epoch": 0.38992940078184257, "grad_norm": 0.888757757668912, "learning_rate": 6.970609815611146e-06, "loss": 0.1665, "step": 13366 }, { "epoch": 0.3899585740124861, "grad_norm": 1.1091736288901854, "learning_rate": 6.970175612354655e-06, "loss": 0.1469, "step": 13367 }, { "epoch": 0.3899877472431297, "grad_norm": 0.873249582296831, "learning_rate": 6.969741391508907e-06, "loss": 0.1707, "step": 13368 }, { "epoch": 0.3900169204737733, "grad_norm": 0.7237473938954652, "learning_rate": 6.969307153077779e-06, "loss": 0.1505, "step": 13369 }, { "epoch": 0.39004609370441684, "grad_norm": 0.8722380633299138, "learning_rate": 6.968872897065147e-06, "loss": 0.1459, "step": 13370 }, { "epoch": 0.3900752669350604, "grad_norm": 0.8690157356299223, "learning_rate": 6.9684386234748866e-06, "loss": 0.141, "step": 13371 }, { "epoch": 0.39010444016570395, "grad_norm": 0.7648898813933638, "learning_rate": 6.968004332310877e-06, "loss": 0.121, "step": 13372 }, { "epoch": 0.3901336133963475, "grad_norm": 0.6912289339659948, "learning_rate": 6.967570023576993e-06, "loss": 0.1603, "step": 13373 }, { "epoch": 0.39016278662699105, "grad_norm": 0.7626884920461074, "learning_rate": 6.967135697277114e-06, "loss": 0.1539, "step": 13374 }, { "epoch": 0.3901919598576346, "grad_norm": 0.9089730229210611, "learning_rate": 6.96670135341512e-06, "loss": 0.125, "step": 13375 }, { "epoch": 0.3902211330882782, "grad_norm": 0.7886555626671125, "learning_rate": 6.966266991994881e-06, "loss": 0.1466, "step": 13376 }, { "epoch": 0.39025030631892177, "grad_norm": 0.980473515537376, "learning_rate": 6.965832613020284e-06, "loss": 0.1729, "step": 13377 }, { "epoch": 0.3902794795495653, "grad_norm": 0.8537764799859051, "learning_rate": 6.9653982164952e-06, "loss": 0.1566, "step": 13378 }, { "epoch": 0.3903086527802089, "grad_norm": 1.0210186184483199, "learning_rate": 6.96496380242351e-06, "loss": 0.1522, "step": 13379 }, { "epoch": 0.39033782601085243, "grad_norm": 0.8263969947570391, "learning_rate": 6.964529370809095e-06, "loss": 0.1384, "step": 13380 }, { "epoch": 0.390366999241496, "grad_norm": 0.9348121446069635, "learning_rate": 6.964094921655828e-06, "loss": 0.1613, "step": 13381 }, { "epoch": 0.3903961724721396, "grad_norm": 0.9743860353075883, "learning_rate": 6.963660454967591e-06, "loss": 0.1304, "step": 13382 }, { "epoch": 0.39042534570278314, "grad_norm": 0.712965059417837, "learning_rate": 6.963225970748262e-06, "loss": 0.1601, "step": 13383 }, { "epoch": 0.3904545189334267, "grad_norm": 0.8278049793772592, "learning_rate": 6.96279146900172e-06, "loss": 0.1621, "step": 13384 }, { "epoch": 0.39048369216407025, "grad_norm": 0.8431031487562697, "learning_rate": 6.962356949731846e-06, "loss": 0.1639, "step": 13385 }, { "epoch": 0.3905128653947138, "grad_norm": 0.8061050375402079, "learning_rate": 6.961922412942517e-06, "loss": 0.1272, "step": 13386 }, { "epoch": 0.39054203862535736, "grad_norm": 0.6606946199278728, "learning_rate": 6.9614878586376125e-06, "loss": 0.1391, "step": 13387 }, { "epoch": 0.3905712118560009, "grad_norm": 0.9617599754709144, "learning_rate": 6.961053286821012e-06, "loss": 0.1427, "step": 13388 }, { "epoch": 0.3906003850866445, "grad_norm": 0.9996814338324447, "learning_rate": 6.960618697496597e-06, "loss": 0.1594, "step": 13389 }, { "epoch": 0.3906295583172881, "grad_norm": 0.7699719287102674, "learning_rate": 6.960184090668245e-06, "loss": 0.1336, "step": 13390 }, { "epoch": 0.3906587315479316, "grad_norm": 0.8229926159042056, "learning_rate": 6.959749466339839e-06, "loss": 0.1605, "step": 13391 }, { "epoch": 0.3906879047785752, "grad_norm": 0.8933858508674338, "learning_rate": 6.959314824515258e-06, "loss": 0.1386, "step": 13392 }, { "epoch": 0.39071707800921873, "grad_norm": 0.7196626303364317, "learning_rate": 6.95888016519838e-06, "loss": 0.137, "step": 13393 }, { "epoch": 0.3907462512398623, "grad_norm": 0.8334043516529545, "learning_rate": 6.958445488393088e-06, "loss": 0.1375, "step": 13394 }, { "epoch": 0.39077542447050584, "grad_norm": 0.7419972030408932, "learning_rate": 6.958010794103263e-06, "loss": 0.1373, "step": 13395 }, { "epoch": 0.39080459770114945, "grad_norm": 0.8216999238492296, "learning_rate": 6.957576082332784e-06, "loss": 0.1451, "step": 13396 }, { "epoch": 0.390833770931793, "grad_norm": 0.7050367296930518, "learning_rate": 6.9571413530855345e-06, "loss": 0.1328, "step": 13397 }, { "epoch": 0.39086294416243655, "grad_norm": 0.8815943297971746, "learning_rate": 6.956706606365393e-06, "loss": 0.1341, "step": 13398 }, { "epoch": 0.3908921173930801, "grad_norm": 0.9252955919633908, "learning_rate": 6.956271842176242e-06, "loss": 0.1364, "step": 13399 }, { "epoch": 0.39092129062372366, "grad_norm": 0.8930728103288988, "learning_rate": 6.9558370605219634e-06, "loss": 0.1286, "step": 13400 }, { "epoch": 0.3909504638543672, "grad_norm": 0.9163816687984387, "learning_rate": 6.955402261406439e-06, "loss": 0.1381, "step": 13401 }, { "epoch": 0.39097963708501077, "grad_norm": 0.7680791483130143, "learning_rate": 6.954967444833549e-06, "loss": 0.1574, "step": 13402 }, { "epoch": 0.3910088103156544, "grad_norm": 1.0069978034553984, "learning_rate": 6.954532610807176e-06, "loss": 0.1443, "step": 13403 }, { "epoch": 0.39103798354629793, "grad_norm": 0.8988432582345715, "learning_rate": 6.954097759331204e-06, "loss": 0.1377, "step": 13404 }, { "epoch": 0.3910671567769415, "grad_norm": 0.7066239225388304, "learning_rate": 6.953662890409512e-06, "loss": 0.155, "step": 13405 }, { "epoch": 0.39109633000758504, "grad_norm": 1.1597307283036862, "learning_rate": 6.9532280040459855e-06, "loss": 0.1424, "step": 13406 }, { "epoch": 0.3911255032382286, "grad_norm": 0.8405187499121104, "learning_rate": 6.952793100244506e-06, "loss": 0.1663, "step": 13407 }, { "epoch": 0.39115467646887214, "grad_norm": 0.7460433118762413, "learning_rate": 6.952358179008954e-06, "loss": 0.1335, "step": 13408 }, { "epoch": 0.39118384969951575, "grad_norm": 1.059038470736294, "learning_rate": 6.951923240343217e-06, "loss": 0.151, "step": 13409 }, { "epoch": 0.3912130229301593, "grad_norm": 0.7679939106601765, "learning_rate": 6.951488284251173e-06, "loss": 0.1437, "step": 13410 }, { "epoch": 0.39124219616080286, "grad_norm": 0.8507353232572379, "learning_rate": 6.9510533107367066e-06, "loss": 0.144, "step": 13411 }, { "epoch": 0.3912713693914464, "grad_norm": 1.205972558740319, "learning_rate": 6.950618319803704e-06, "loss": 0.1485, "step": 13412 }, { "epoch": 0.39130054262208996, "grad_norm": 1.0747486516695628, "learning_rate": 6.950183311456046e-06, "loss": 0.1662, "step": 13413 }, { "epoch": 0.3913297158527335, "grad_norm": 0.8034709340146459, "learning_rate": 6.9497482856976175e-06, "loss": 0.1401, "step": 13414 }, { "epoch": 0.39135888908337707, "grad_norm": 1.2788093437326604, "learning_rate": 6.949313242532301e-06, "loss": 0.1371, "step": 13415 }, { "epoch": 0.3913880623140207, "grad_norm": 0.8976758945386955, "learning_rate": 6.94887818196398e-06, "loss": 0.1456, "step": 13416 }, { "epoch": 0.39141723554466423, "grad_norm": 0.6312697081513999, "learning_rate": 6.948443103996543e-06, "loss": 0.1163, "step": 13417 }, { "epoch": 0.3914464087753078, "grad_norm": 0.775990863607962, "learning_rate": 6.948008008633868e-06, "loss": 0.1408, "step": 13418 }, { "epoch": 0.39147558200595134, "grad_norm": 0.9341190483712725, "learning_rate": 6.947572895879844e-06, "loss": 0.1793, "step": 13419 }, { "epoch": 0.3915047552365949, "grad_norm": 0.6071239175759338, "learning_rate": 6.947137765738354e-06, "loss": 0.1297, "step": 13420 }, { "epoch": 0.39153392846723845, "grad_norm": 0.6793513193876647, "learning_rate": 6.946702618213284e-06, "loss": 0.1071, "step": 13421 }, { "epoch": 0.391563101697882, "grad_norm": 0.7551884994473822, "learning_rate": 6.946267453308518e-06, "loss": 0.1516, "step": 13422 }, { "epoch": 0.3915922749285256, "grad_norm": 0.780841912088871, "learning_rate": 6.945832271027937e-06, "loss": 0.1603, "step": 13423 }, { "epoch": 0.39162144815916916, "grad_norm": 0.7038816360485793, "learning_rate": 6.945397071375433e-06, "loss": 0.1259, "step": 13424 }, { "epoch": 0.3916506213898127, "grad_norm": 0.7328245116045685, "learning_rate": 6.944961854354888e-06, "loss": 0.1238, "step": 13425 }, { "epoch": 0.39167979462045627, "grad_norm": 1.2517861853783918, "learning_rate": 6.944526619970187e-06, "loss": 0.1537, "step": 13426 }, { "epoch": 0.3917089678510998, "grad_norm": 0.743241132460899, "learning_rate": 6.944091368225218e-06, "loss": 0.155, "step": 13427 }, { "epoch": 0.3917381410817434, "grad_norm": 0.6935536329792923, "learning_rate": 6.9436560991238635e-06, "loss": 0.1292, "step": 13428 }, { "epoch": 0.3917673143123869, "grad_norm": 0.7756549210802175, "learning_rate": 6.943220812670013e-06, "loss": 0.1563, "step": 13429 }, { "epoch": 0.39179648754303054, "grad_norm": 0.7414237234309615, "learning_rate": 6.94278550886755e-06, "loss": 0.1534, "step": 13430 }, { "epoch": 0.3918256607736741, "grad_norm": 0.8426514748873748, "learning_rate": 6.942350187720361e-06, "loss": 0.1469, "step": 13431 }, { "epoch": 0.39185483400431764, "grad_norm": 0.7615308497476291, "learning_rate": 6.941914849232336e-06, "loss": 0.1314, "step": 13432 }, { "epoch": 0.3918840072349612, "grad_norm": 0.7505485574867814, "learning_rate": 6.941479493407356e-06, "loss": 0.1398, "step": 13433 }, { "epoch": 0.39191318046560475, "grad_norm": 0.9822685707588608, "learning_rate": 6.9410441202493115e-06, "loss": 0.1497, "step": 13434 }, { "epoch": 0.3919423536962483, "grad_norm": 0.8013111594305649, "learning_rate": 6.940608729762088e-06, "loss": 0.1599, "step": 13435 }, { "epoch": 0.3919715269268919, "grad_norm": 0.7633362671154748, "learning_rate": 6.940173321949574e-06, "loss": 0.1313, "step": 13436 }, { "epoch": 0.39200070015753546, "grad_norm": 0.7355768933169572, "learning_rate": 6.9397378968156555e-06, "loss": 0.1291, "step": 13437 }, { "epoch": 0.392029873388179, "grad_norm": 0.8647746265161046, "learning_rate": 6.9393024543642195e-06, "loss": 0.1692, "step": 13438 }, { "epoch": 0.39205904661882257, "grad_norm": 0.8840514013776822, "learning_rate": 6.938866994599156e-06, "loss": 0.1305, "step": 13439 }, { "epoch": 0.3920882198494661, "grad_norm": 0.8111839257480463, "learning_rate": 6.938431517524349e-06, "loss": 0.1401, "step": 13440 }, { "epoch": 0.3921173930801097, "grad_norm": 1.1505431095945406, "learning_rate": 6.937996023143687e-06, "loss": 0.1444, "step": 13441 }, { "epoch": 0.39214656631075323, "grad_norm": 1.006634496209286, "learning_rate": 6.937560511461062e-06, "loss": 0.1443, "step": 13442 }, { "epoch": 0.39217573954139684, "grad_norm": 0.7314659622389533, "learning_rate": 6.937124982480358e-06, "loss": 0.1428, "step": 13443 }, { "epoch": 0.3922049127720404, "grad_norm": 0.839361978663376, "learning_rate": 6.936689436205464e-06, "loss": 0.1324, "step": 13444 }, { "epoch": 0.39223408600268395, "grad_norm": 0.7894591607237862, "learning_rate": 6.936253872640269e-06, "loss": 0.1252, "step": 13445 }, { "epoch": 0.3922632592333275, "grad_norm": 0.8514628703439183, "learning_rate": 6.935818291788663e-06, "loss": 0.1134, "step": 13446 }, { "epoch": 0.39229243246397105, "grad_norm": 0.7229729683223703, "learning_rate": 6.935382693654532e-06, "loss": 0.1392, "step": 13447 }, { "epoch": 0.3923216056946146, "grad_norm": 0.866114985431533, "learning_rate": 6.934947078241767e-06, "loss": 0.1354, "step": 13448 }, { "epoch": 0.39235077892525816, "grad_norm": 1.0060248452344418, "learning_rate": 6.934511445554257e-06, "loss": 0.1407, "step": 13449 }, { "epoch": 0.39237995215590177, "grad_norm": 0.920134761062441, "learning_rate": 6.934075795595889e-06, "loss": 0.1301, "step": 13450 }, { "epoch": 0.3924091253865453, "grad_norm": 1.0085656115031563, "learning_rate": 6.933640128370556e-06, "loss": 0.1618, "step": 13451 }, { "epoch": 0.3924382986171889, "grad_norm": 0.9004872747952627, "learning_rate": 6.933204443882144e-06, "loss": 0.1413, "step": 13452 }, { "epoch": 0.3924674718478324, "grad_norm": 0.8948913061908531, "learning_rate": 6.932768742134545e-06, "loss": 0.1173, "step": 13453 }, { "epoch": 0.392496645078476, "grad_norm": 1.1737819694857237, "learning_rate": 6.932333023131647e-06, "loss": 0.1371, "step": 13454 }, { "epoch": 0.39252581830911953, "grad_norm": 0.8046936114660276, "learning_rate": 6.9318972868773425e-06, "loss": 0.1329, "step": 13455 }, { "epoch": 0.3925549915397631, "grad_norm": 0.7641640235500119, "learning_rate": 6.931461533375518e-06, "loss": 0.1575, "step": 13456 }, { "epoch": 0.3925841647704067, "grad_norm": 0.8387902511728224, "learning_rate": 6.931025762630069e-06, "loss": 0.1237, "step": 13457 }, { "epoch": 0.39261333800105025, "grad_norm": 0.8401778315870334, "learning_rate": 6.930589974644881e-06, "loss": 0.1409, "step": 13458 }, { "epoch": 0.3926425112316938, "grad_norm": 1.0466468885461653, "learning_rate": 6.930154169423849e-06, "loss": 0.1375, "step": 13459 }, { "epoch": 0.39267168446233736, "grad_norm": 1.0032165642616175, "learning_rate": 6.929718346970858e-06, "loss": 0.1496, "step": 13460 }, { "epoch": 0.3927008576929809, "grad_norm": 1.0900803003505035, "learning_rate": 6.929282507289804e-06, "loss": 0.1302, "step": 13461 }, { "epoch": 0.39273003092362446, "grad_norm": 0.8295190180096297, "learning_rate": 6.928846650384575e-06, "loss": 0.1875, "step": 13462 }, { "epoch": 0.39275920415426807, "grad_norm": 1.060580655896145, "learning_rate": 6.928410776259065e-06, "loss": 0.1562, "step": 13463 }, { "epoch": 0.3927883773849116, "grad_norm": 0.9378510304402614, "learning_rate": 6.927974884917163e-06, "loss": 0.1248, "step": 13464 }, { "epoch": 0.3928175506155552, "grad_norm": 0.7853299827127223, "learning_rate": 6.927538976362762e-06, "loss": 0.1579, "step": 13465 }, { "epoch": 0.39284672384619873, "grad_norm": 1.0478984293700828, "learning_rate": 6.9271030505997535e-06, "loss": 0.1588, "step": 13466 }, { "epoch": 0.3928758970768423, "grad_norm": 1.0617111371064418, "learning_rate": 6.92666710763203e-06, "loss": 0.1735, "step": 13467 }, { "epoch": 0.39290507030748584, "grad_norm": 0.8479044160090914, "learning_rate": 6.926231147463481e-06, "loss": 0.1472, "step": 13468 }, { "epoch": 0.3929342435381294, "grad_norm": 1.0243936652671943, "learning_rate": 6.925795170098e-06, "loss": 0.1569, "step": 13469 }, { "epoch": 0.392963416768773, "grad_norm": 1.0772117900981126, "learning_rate": 6.92535917553948e-06, "loss": 0.1321, "step": 13470 }, { "epoch": 0.39299258999941655, "grad_norm": 0.8288398664135171, "learning_rate": 6.924923163791811e-06, "loss": 0.1268, "step": 13471 }, { "epoch": 0.3930217632300601, "grad_norm": 1.0995106201194746, "learning_rate": 6.92448713485889e-06, "loss": 0.1864, "step": 13472 }, { "epoch": 0.39305093646070366, "grad_norm": 1.0389480358599599, "learning_rate": 6.924051088744606e-06, "loss": 0.1525, "step": 13473 }, { "epoch": 0.3930801096913472, "grad_norm": 0.8380733324749676, "learning_rate": 6.923615025452854e-06, "loss": 0.1595, "step": 13474 }, { "epoch": 0.39310928292199077, "grad_norm": 0.9623144556101351, "learning_rate": 6.923178944987525e-06, "loss": 0.1691, "step": 13475 }, { "epoch": 0.3931384561526343, "grad_norm": 0.9322633572796867, "learning_rate": 6.922742847352515e-06, "loss": 0.1632, "step": 13476 }, { "epoch": 0.39316762938327793, "grad_norm": 0.8082087471222191, "learning_rate": 6.922306732551716e-06, "loss": 0.1625, "step": 13477 }, { "epoch": 0.3931968026139215, "grad_norm": 0.8813435088048738, "learning_rate": 6.92187060058902e-06, "loss": 0.1624, "step": 13478 }, { "epoch": 0.39322597584456503, "grad_norm": 0.7664965159860045, "learning_rate": 6.921434451468323e-06, "loss": 0.1414, "step": 13479 }, { "epoch": 0.3932551490752086, "grad_norm": 0.7388090242828428, "learning_rate": 6.9209982851935165e-06, "loss": 0.1285, "step": 13480 }, { "epoch": 0.39328432230585214, "grad_norm": 0.7635029803592907, "learning_rate": 6.920562101768498e-06, "loss": 0.124, "step": 13481 }, { "epoch": 0.3933134955364957, "grad_norm": 0.6998395776978557, "learning_rate": 6.920125901197159e-06, "loss": 0.1263, "step": 13482 }, { "epoch": 0.39334266876713925, "grad_norm": 0.6459048921764976, "learning_rate": 6.919689683483392e-06, "loss": 0.1464, "step": 13483 }, { "epoch": 0.39337184199778286, "grad_norm": 0.8050426883242157, "learning_rate": 6.919253448631097e-06, "loss": 0.1255, "step": 13484 }, { "epoch": 0.3934010152284264, "grad_norm": 0.7900914807108079, "learning_rate": 6.918817196644163e-06, "loss": 0.1559, "step": 13485 }, { "epoch": 0.39343018845906996, "grad_norm": 0.7080388530461845, "learning_rate": 6.918380927526488e-06, "loss": 0.1344, "step": 13486 }, { "epoch": 0.3934593616897135, "grad_norm": 0.7611724563438523, "learning_rate": 6.917944641281966e-06, "loss": 0.14, "step": 13487 }, { "epoch": 0.39348853492035707, "grad_norm": 0.8121858794534704, "learning_rate": 6.917508337914493e-06, "loss": 0.1348, "step": 13488 }, { "epoch": 0.3935177081510006, "grad_norm": 0.873557338320025, "learning_rate": 6.9170720174279615e-06, "loss": 0.1295, "step": 13489 }, { "epoch": 0.3935468813816442, "grad_norm": 0.6910521841080438, "learning_rate": 6.91663567982627e-06, "loss": 0.154, "step": 13490 }, { "epoch": 0.3935760546122878, "grad_norm": 0.9495453741411941, "learning_rate": 6.9161993251133135e-06, "loss": 0.1504, "step": 13491 }, { "epoch": 0.39360522784293134, "grad_norm": 0.99695846990306, "learning_rate": 6.915762953292985e-06, "loss": 0.1413, "step": 13492 }, { "epoch": 0.3936344010735749, "grad_norm": 0.9585221962636161, "learning_rate": 6.915326564369183e-06, "loss": 0.1543, "step": 13493 }, { "epoch": 0.39366357430421844, "grad_norm": 0.872098656123676, "learning_rate": 6.914890158345802e-06, "loss": 0.143, "step": 13494 }, { "epoch": 0.393692747534862, "grad_norm": 0.9465268504551351, "learning_rate": 6.91445373522674e-06, "loss": 0.139, "step": 13495 }, { "epoch": 0.39372192076550555, "grad_norm": 0.8381989221421502, "learning_rate": 6.91401729501589e-06, "loss": 0.1464, "step": 13496 }, { "epoch": 0.39375109399614916, "grad_norm": 1.0099884439074915, "learning_rate": 6.913580837717153e-06, "loss": 0.1449, "step": 13497 }, { "epoch": 0.3937802672267927, "grad_norm": 0.8391478691306493, "learning_rate": 6.9131443633344205e-06, "loss": 0.1398, "step": 13498 }, { "epoch": 0.39380944045743627, "grad_norm": 0.8009808303418667, "learning_rate": 6.912707871871595e-06, "loss": 0.145, "step": 13499 }, { "epoch": 0.3938386136880798, "grad_norm": 0.8945467445430153, "learning_rate": 6.9122713633325674e-06, "loss": 0.1391, "step": 13500 }, { "epoch": 0.3938677869187234, "grad_norm": 1.0227978992708744, "learning_rate": 6.911834837721239e-06, "loss": 0.1238, "step": 13501 }, { "epoch": 0.3938969601493669, "grad_norm": 0.8293197513229508, "learning_rate": 6.911398295041506e-06, "loss": 0.147, "step": 13502 }, { "epoch": 0.3939261333800105, "grad_norm": 1.8600840272680774, "learning_rate": 6.910961735297265e-06, "loss": 0.1383, "step": 13503 }, { "epoch": 0.3939553066106541, "grad_norm": 1.1716484242902854, "learning_rate": 6.910525158492413e-06, "loss": 0.134, "step": 13504 }, { "epoch": 0.39398447984129764, "grad_norm": 0.8756867062019763, "learning_rate": 6.910088564630848e-06, "loss": 0.1427, "step": 13505 }, { "epoch": 0.3940136530719412, "grad_norm": 1.069655355328582, "learning_rate": 6.909651953716469e-06, "loss": 0.1647, "step": 13506 }, { "epoch": 0.39404282630258475, "grad_norm": 0.9480922403444754, "learning_rate": 6.9092153257531735e-06, "loss": 0.1522, "step": 13507 }, { "epoch": 0.3940719995332283, "grad_norm": 0.8298992396590289, "learning_rate": 6.90877868074486e-06, "loss": 0.1612, "step": 13508 }, { "epoch": 0.39410117276387185, "grad_norm": 0.8143344578505003, "learning_rate": 6.908342018695424e-06, "loss": 0.1712, "step": 13509 }, { "epoch": 0.3941303459945154, "grad_norm": 0.9898841472787201, "learning_rate": 6.907905339608768e-06, "loss": 0.1536, "step": 13510 }, { "epoch": 0.394159519225159, "grad_norm": 0.9673297942709088, "learning_rate": 6.907468643488788e-06, "loss": 0.1527, "step": 13511 }, { "epoch": 0.39418869245580257, "grad_norm": 0.7036049003008338, "learning_rate": 6.907031930339384e-06, "loss": 0.1442, "step": 13512 }, { "epoch": 0.3942178656864461, "grad_norm": 0.9303468860530448, "learning_rate": 6.906595200164452e-06, "loss": 0.1375, "step": 13513 }, { "epoch": 0.3942470389170897, "grad_norm": 0.8843318750722188, "learning_rate": 6.906158452967895e-06, "loss": 0.1303, "step": 13514 }, { "epoch": 0.39427621214773323, "grad_norm": 0.8193348564637531, "learning_rate": 6.905721688753611e-06, "loss": 0.1471, "step": 13515 }, { "epoch": 0.3943053853783768, "grad_norm": 0.8446213699654381, "learning_rate": 6.905284907525496e-06, "loss": 0.1309, "step": 13516 }, { "epoch": 0.39433455860902034, "grad_norm": 0.8891430358286819, "learning_rate": 6.9048481092874545e-06, "loss": 0.1643, "step": 13517 }, { "epoch": 0.39436373183966394, "grad_norm": 0.7375669001339739, "learning_rate": 6.9044112940433825e-06, "loss": 0.1563, "step": 13518 }, { "epoch": 0.3943929050703075, "grad_norm": 0.7680674624667125, "learning_rate": 6.903974461797182e-06, "loss": 0.1512, "step": 13519 }, { "epoch": 0.39442207830095105, "grad_norm": 0.8104362183563902, "learning_rate": 6.903537612552752e-06, "loss": 0.1507, "step": 13520 }, { "epoch": 0.3944512515315946, "grad_norm": 0.7865974776961054, "learning_rate": 6.903100746313992e-06, "loss": 0.1476, "step": 13521 }, { "epoch": 0.39448042476223816, "grad_norm": 0.9050056481214198, "learning_rate": 6.902663863084803e-06, "loss": 0.1507, "step": 13522 }, { "epoch": 0.3945095979928817, "grad_norm": 0.9189139522724291, "learning_rate": 6.902226962869085e-06, "loss": 0.1403, "step": 13523 }, { "epoch": 0.3945387712235253, "grad_norm": 1.0383020721973253, "learning_rate": 6.90179004567074e-06, "loss": 0.1543, "step": 13524 }, { "epoch": 0.3945679444541689, "grad_norm": 0.8674877597307317, "learning_rate": 6.9013531114936664e-06, "loss": 0.1621, "step": 13525 }, { "epoch": 0.3945971176848124, "grad_norm": 0.9702152424831368, "learning_rate": 6.900916160341766e-06, "loss": 0.1304, "step": 13526 }, { "epoch": 0.394626290915456, "grad_norm": 0.8763961594611642, "learning_rate": 6.90047919221894e-06, "loss": 0.1557, "step": 13527 }, { "epoch": 0.39465546414609953, "grad_norm": 0.8700884550279984, "learning_rate": 6.90004220712909e-06, "loss": 0.1639, "step": 13528 }, { "epoch": 0.3946846373767431, "grad_norm": 0.7770719129623878, "learning_rate": 6.899605205076118e-06, "loss": 0.1496, "step": 13529 }, { "epoch": 0.39471381060738664, "grad_norm": 0.9202876285706476, "learning_rate": 6.899168186063922e-06, "loss": 0.1551, "step": 13530 }, { "epoch": 0.39474298383803025, "grad_norm": 0.760391271866602, "learning_rate": 6.898731150096405e-06, "loss": 0.1592, "step": 13531 }, { "epoch": 0.3947721570686738, "grad_norm": 0.6977262503265874, "learning_rate": 6.898294097177472e-06, "loss": 0.1259, "step": 13532 }, { "epoch": 0.39480133029931735, "grad_norm": 0.8164748693757632, "learning_rate": 6.897857027311021e-06, "loss": 0.1295, "step": 13533 }, { "epoch": 0.3948305035299609, "grad_norm": 0.7817836696599647, "learning_rate": 6.897419940500957e-06, "loss": 0.1544, "step": 13534 }, { "epoch": 0.39485967676060446, "grad_norm": 0.8495968672593023, "learning_rate": 6.8969828367511795e-06, "loss": 0.1492, "step": 13535 }, { "epoch": 0.394888849991248, "grad_norm": 0.8039639482440903, "learning_rate": 6.896545716065591e-06, "loss": 0.1255, "step": 13536 }, { "epoch": 0.39491802322189157, "grad_norm": 0.9095502768520879, "learning_rate": 6.896108578448098e-06, "loss": 0.1574, "step": 13537 }, { "epoch": 0.3949471964525352, "grad_norm": 0.8898926453378159, "learning_rate": 6.8956714239025976e-06, "loss": 0.1813, "step": 13538 }, { "epoch": 0.39497636968317873, "grad_norm": 0.9486733008022501, "learning_rate": 6.895234252432996e-06, "loss": 0.146, "step": 13539 }, { "epoch": 0.3950055429138223, "grad_norm": 1.0384175536627858, "learning_rate": 6.894797064043196e-06, "loss": 0.1532, "step": 13540 }, { "epoch": 0.39503471614446584, "grad_norm": 0.7370307247044519, "learning_rate": 6.894359858737099e-06, "loss": 0.1528, "step": 13541 }, { "epoch": 0.3950638893751094, "grad_norm": 0.8463856413184923, "learning_rate": 6.893922636518612e-06, "loss": 0.1347, "step": 13542 }, { "epoch": 0.39509306260575294, "grad_norm": 0.9238022439981455, "learning_rate": 6.893485397391633e-06, "loss": 0.1662, "step": 13543 }, { "epoch": 0.3951222358363965, "grad_norm": 0.7820990008330788, "learning_rate": 6.89304814136007e-06, "loss": 0.1378, "step": 13544 }, { "epoch": 0.3951514090670401, "grad_norm": 1.0888335436583485, "learning_rate": 6.892610868427824e-06, "loss": 0.1963, "step": 13545 }, { "epoch": 0.39518058229768366, "grad_norm": 0.9156504059078454, "learning_rate": 6.8921735785988e-06, "loss": 0.1188, "step": 13546 }, { "epoch": 0.3952097555283272, "grad_norm": 0.8877364499479483, "learning_rate": 6.891736271876903e-06, "loss": 0.1421, "step": 13547 }, { "epoch": 0.39523892875897076, "grad_norm": 0.758630855278139, "learning_rate": 6.8912989482660365e-06, "loss": 0.1265, "step": 13548 }, { "epoch": 0.3952681019896143, "grad_norm": 0.8116769721432958, "learning_rate": 6.890861607770103e-06, "loss": 0.1463, "step": 13549 }, { "epoch": 0.39529727522025787, "grad_norm": 0.7628585711814966, "learning_rate": 6.890424250393009e-06, "loss": 0.1325, "step": 13550 }, { "epoch": 0.3953264484509015, "grad_norm": 0.7730110063315181, "learning_rate": 6.889986876138659e-06, "loss": 0.1433, "step": 13551 }, { "epoch": 0.39535562168154503, "grad_norm": 0.7051345359852466, "learning_rate": 6.889549485010957e-06, "loss": 0.1457, "step": 13552 }, { "epoch": 0.3953847949121886, "grad_norm": 1.1425198602171238, "learning_rate": 6.889112077013808e-06, "loss": 0.1305, "step": 13553 }, { "epoch": 0.39541396814283214, "grad_norm": 0.6616249465596958, "learning_rate": 6.888674652151117e-06, "loss": 0.1214, "step": 13554 }, { "epoch": 0.3954431413734757, "grad_norm": 0.7753742877667316, "learning_rate": 6.88823721042679e-06, "loss": 0.1596, "step": 13555 }, { "epoch": 0.39547231460411925, "grad_norm": 0.9304339527547232, "learning_rate": 6.887799751844732e-06, "loss": 0.1646, "step": 13556 }, { "epoch": 0.3955014878347628, "grad_norm": 0.7840134291357677, "learning_rate": 6.8873622764088495e-06, "loss": 0.1346, "step": 13557 }, { "epoch": 0.3955306610654064, "grad_norm": 0.7153077068367624, "learning_rate": 6.886924784123046e-06, "loss": 0.1479, "step": 13558 }, { "epoch": 0.39555983429604996, "grad_norm": 0.7315921902262976, "learning_rate": 6.8864872749912296e-06, "loss": 0.128, "step": 13559 }, { "epoch": 0.3955890075266935, "grad_norm": 0.8785516750944411, "learning_rate": 6.886049749017304e-06, "loss": 0.1436, "step": 13560 }, { "epoch": 0.39561818075733707, "grad_norm": 0.8587352973909724, "learning_rate": 6.885612206205175e-06, "loss": 0.1454, "step": 13561 }, { "epoch": 0.3956473539879806, "grad_norm": 0.7551958300928429, "learning_rate": 6.885174646558754e-06, "loss": 0.1262, "step": 13562 }, { "epoch": 0.3956765272186242, "grad_norm": 0.6814931549444815, "learning_rate": 6.8847370700819415e-06, "loss": 0.1461, "step": 13563 }, { "epoch": 0.3957057004492677, "grad_norm": 0.7909599958417884, "learning_rate": 6.8842994767786466e-06, "loss": 0.1431, "step": 13564 }, { "epoch": 0.39573487367991134, "grad_norm": 0.7321566528171322, "learning_rate": 6.883861866652776e-06, "loss": 0.1372, "step": 13565 }, { "epoch": 0.3957640469105549, "grad_norm": 0.8094397867003983, "learning_rate": 6.883424239708236e-06, "loss": 0.1453, "step": 13566 }, { "epoch": 0.39579322014119844, "grad_norm": 0.8227919385291982, "learning_rate": 6.882986595948935e-06, "loss": 0.14, "step": 13567 }, { "epoch": 0.395822393371842, "grad_norm": 0.7330943694807599, "learning_rate": 6.882548935378778e-06, "loss": 0.1081, "step": 13568 }, { "epoch": 0.39585156660248555, "grad_norm": 0.9788886462500496, "learning_rate": 6.8821112580016734e-06, "loss": 0.1501, "step": 13569 }, { "epoch": 0.3958807398331291, "grad_norm": 0.85249551709532, "learning_rate": 6.881673563821529e-06, "loss": 0.1696, "step": 13570 }, { "epoch": 0.39590991306377266, "grad_norm": 0.8054367868561896, "learning_rate": 6.881235852842253e-06, "loss": 0.1185, "step": 13571 }, { "epoch": 0.39593908629441626, "grad_norm": 0.9278896903449173, "learning_rate": 6.880798125067752e-06, "loss": 0.1567, "step": 13572 }, { "epoch": 0.3959682595250598, "grad_norm": 0.8227234897426949, "learning_rate": 6.880360380501934e-06, "loss": 0.1617, "step": 13573 }, { "epoch": 0.39599743275570337, "grad_norm": 1.1067236153092375, "learning_rate": 6.879922619148709e-06, "loss": 0.1452, "step": 13574 }, { "epoch": 0.3960266059863469, "grad_norm": 0.8642894651343764, "learning_rate": 6.879484841011981e-06, "loss": 0.1243, "step": 13575 }, { "epoch": 0.3960557792169905, "grad_norm": 0.9586605647173144, "learning_rate": 6.8790470460956625e-06, "loss": 0.1472, "step": 13576 }, { "epoch": 0.39608495244763403, "grad_norm": 0.9086036286416684, "learning_rate": 6.878609234403661e-06, "loss": 0.1367, "step": 13577 }, { "epoch": 0.39611412567827764, "grad_norm": 0.8747271647171905, "learning_rate": 6.878171405939883e-06, "loss": 0.1354, "step": 13578 }, { "epoch": 0.3961432989089212, "grad_norm": 0.8553925512113649, "learning_rate": 6.8777335607082415e-06, "loss": 0.1206, "step": 13579 }, { "epoch": 0.39617247213956475, "grad_norm": 0.7722446172228927, "learning_rate": 6.8772956987126415e-06, "loss": 0.1282, "step": 13580 }, { "epoch": 0.3962016453702083, "grad_norm": 0.9474777884206562, "learning_rate": 6.876857819956993e-06, "loss": 0.1769, "step": 13581 }, { "epoch": 0.39623081860085185, "grad_norm": 0.7971638941395826, "learning_rate": 6.876419924445208e-06, "loss": 0.1277, "step": 13582 }, { "epoch": 0.3962599918314954, "grad_norm": 0.7513084934398179, "learning_rate": 6.875982012181192e-06, "loss": 0.151, "step": 13583 }, { "epoch": 0.39628916506213896, "grad_norm": 0.8124671974017118, "learning_rate": 6.875544083168857e-06, "loss": 0.1452, "step": 13584 }, { "epoch": 0.39631833829278257, "grad_norm": 0.9957779640245273, "learning_rate": 6.875106137412112e-06, "loss": 0.1395, "step": 13585 }, { "epoch": 0.3963475115234261, "grad_norm": 0.8307337501671527, "learning_rate": 6.874668174914867e-06, "loss": 0.1231, "step": 13586 }, { "epoch": 0.3963766847540697, "grad_norm": 0.907476529519294, "learning_rate": 6.874230195681032e-06, "loss": 0.1488, "step": 13587 }, { "epoch": 0.39640585798471323, "grad_norm": 1.0410896304727788, "learning_rate": 6.8737921997145175e-06, "loss": 0.1439, "step": 13588 }, { "epoch": 0.3964350312153568, "grad_norm": 0.9864157463871159, "learning_rate": 6.8733541870192345e-06, "loss": 0.1469, "step": 13589 }, { "epoch": 0.39646420444600033, "grad_norm": 1.1234569276469955, "learning_rate": 6.87291615759909e-06, "loss": 0.1379, "step": 13590 }, { "epoch": 0.3964933776766439, "grad_norm": 1.0268759686645481, "learning_rate": 6.872478111457999e-06, "loss": 0.1488, "step": 13591 }, { "epoch": 0.3965225509072875, "grad_norm": 0.8466032986845579, "learning_rate": 6.8720400485998705e-06, "loss": 0.1311, "step": 13592 }, { "epoch": 0.39655172413793105, "grad_norm": 0.8212839396277573, "learning_rate": 6.871601969028614e-06, "loss": 0.1406, "step": 13593 }, { "epoch": 0.3965808973685746, "grad_norm": 0.80401134328649, "learning_rate": 6.871163872748144e-06, "loss": 0.1567, "step": 13594 }, { "epoch": 0.39661007059921816, "grad_norm": 0.9520256305586496, "learning_rate": 6.870725759762369e-06, "loss": 0.1368, "step": 13595 }, { "epoch": 0.3966392438298617, "grad_norm": 0.7466076877669924, "learning_rate": 6.870287630075198e-06, "loss": 0.1294, "step": 13596 }, { "epoch": 0.39666841706050526, "grad_norm": 0.8538214083824109, "learning_rate": 6.8698494836905494e-06, "loss": 0.1671, "step": 13597 }, { "epoch": 0.3966975902911488, "grad_norm": 0.9462874808801192, "learning_rate": 6.8694113206123305e-06, "loss": 0.1356, "step": 13598 }, { "epoch": 0.3967267635217924, "grad_norm": 0.7981356657539314, "learning_rate": 6.868973140844453e-06, "loss": 0.1427, "step": 13599 }, { "epoch": 0.396755936752436, "grad_norm": 0.8499232209187003, "learning_rate": 6.868534944390828e-06, "loss": 0.1233, "step": 13600 }, { "epoch": 0.39678510998307953, "grad_norm": 0.6653544955477789, "learning_rate": 6.868096731255371e-06, "loss": 0.1829, "step": 13601 }, { "epoch": 0.3968142832137231, "grad_norm": 1.0097988328038208, "learning_rate": 6.867658501441991e-06, "loss": 0.1641, "step": 13602 }, { "epoch": 0.39684345644436664, "grad_norm": 0.8557864152496123, "learning_rate": 6.867220254954602e-06, "loss": 0.1121, "step": 13603 }, { "epoch": 0.3968726296750102, "grad_norm": 0.9184535543711677, "learning_rate": 6.866781991797118e-06, "loss": 0.1562, "step": 13604 }, { "epoch": 0.39690180290565374, "grad_norm": 0.7206300689972719, "learning_rate": 6.866343711973446e-06, "loss": 0.1425, "step": 13605 }, { "epoch": 0.39693097613629735, "grad_norm": 0.9796407534578669, "learning_rate": 6.865905415487506e-06, "loss": 0.1546, "step": 13606 }, { "epoch": 0.3969601493669409, "grad_norm": 1.1280364906262048, "learning_rate": 6.8654671023432085e-06, "loss": 0.1535, "step": 13607 }, { "epoch": 0.39698932259758446, "grad_norm": 0.8940280196917338, "learning_rate": 6.865028772544464e-06, "loss": 0.1372, "step": 13608 }, { "epoch": 0.397018495828228, "grad_norm": 1.0597706161073654, "learning_rate": 6.8645904260951905e-06, "loss": 0.1337, "step": 13609 }, { "epoch": 0.39704766905887157, "grad_norm": 0.7998501927757913, "learning_rate": 6.864152062999297e-06, "loss": 0.1434, "step": 13610 }, { "epoch": 0.3970768422895151, "grad_norm": 0.9850858201656993, "learning_rate": 6.863713683260696e-06, "loss": 0.1447, "step": 13611 }, { "epoch": 0.39710601552015873, "grad_norm": 0.7793562916951422, "learning_rate": 6.863275286883308e-06, "loss": 0.1464, "step": 13612 }, { "epoch": 0.3971351887508023, "grad_norm": 0.7311343531268348, "learning_rate": 6.862836873871043e-06, "loss": 0.1352, "step": 13613 }, { "epoch": 0.39716436198144583, "grad_norm": 0.8864138963850705, "learning_rate": 6.862398444227813e-06, "loss": 0.1684, "step": 13614 }, { "epoch": 0.3971935352120894, "grad_norm": 0.8371271507064795, "learning_rate": 6.861959997957537e-06, "loss": 0.1689, "step": 13615 }, { "epoch": 0.39722270844273294, "grad_norm": 0.759781032712083, "learning_rate": 6.861521535064124e-06, "loss": 0.154, "step": 13616 }, { "epoch": 0.3972518816733765, "grad_norm": 0.814907302855488, "learning_rate": 6.861083055551492e-06, "loss": 0.1781, "step": 13617 }, { "epoch": 0.39728105490402005, "grad_norm": 0.9859344627740455, "learning_rate": 6.860644559423555e-06, "loss": 0.1365, "step": 13618 }, { "epoch": 0.39731022813466366, "grad_norm": 0.5475142826293896, "learning_rate": 6.860206046684229e-06, "loss": 0.1287, "step": 13619 }, { "epoch": 0.3973394013653072, "grad_norm": 1.0550977484094017, "learning_rate": 6.859767517337425e-06, "loss": 0.1729, "step": 13620 }, { "epoch": 0.39736857459595076, "grad_norm": 0.7835031097060281, "learning_rate": 6.859328971387062e-06, "loss": 0.145, "step": 13621 }, { "epoch": 0.3973977478265943, "grad_norm": 0.7424203829276208, "learning_rate": 6.858890408837054e-06, "loss": 0.135, "step": 13622 }, { "epoch": 0.39742692105723787, "grad_norm": 0.7568285103001061, "learning_rate": 6.858451829691314e-06, "loss": 0.1366, "step": 13623 }, { "epoch": 0.3974560942878814, "grad_norm": 0.9075307612597083, "learning_rate": 6.858013233953762e-06, "loss": 0.1382, "step": 13624 }, { "epoch": 0.397485267518525, "grad_norm": 0.8407214729416896, "learning_rate": 6.85757462162831e-06, "loss": 0.1301, "step": 13625 }, { "epoch": 0.3975144407491686, "grad_norm": 0.7890127917039195, "learning_rate": 6.857135992718875e-06, "loss": 0.1441, "step": 13626 }, { "epoch": 0.39754361397981214, "grad_norm": 0.8125349100335384, "learning_rate": 6.856697347229375e-06, "loss": 0.1199, "step": 13627 }, { "epoch": 0.3975727872104557, "grad_norm": 0.9964706228097383, "learning_rate": 6.856258685163724e-06, "loss": 0.1468, "step": 13628 }, { "epoch": 0.39760196044109924, "grad_norm": 0.6975779232087724, "learning_rate": 6.855820006525838e-06, "loss": 0.1579, "step": 13629 }, { "epoch": 0.3976311336717428, "grad_norm": 0.9108399539603893, "learning_rate": 6.855381311319633e-06, "loss": 0.168, "step": 13630 }, { "epoch": 0.39766030690238635, "grad_norm": 1.0386356101843057, "learning_rate": 6.854942599549028e-06, "loss": 0.1458, "step": 13631 }, { "epoch": 0.3976894801330299, "grad_norm": 0.6874329075659427, "learning_rate": 6.854503871217937e-06, "loss": 0.1123, "step": 13632 }, { "epoch": 0.3977186533636735, "grad_norm": 0.8603785550408117, "learning_rate": 6.854065126330279e-06, "loss": 0.149, "step": 13633 }, { "epoch": 0.39774782659431707, "grad_norm": 0.985375576240278, "learning_rate": 6.853626364889972e-06, "loss": 0.1348, "step": 13634 }, { "epoch": 0.3977769998249606, "grad_norm": 0.885232077058918, "learning_rate": 6.853187586900927e-06, "loss": 0.1339, "step": 13635 }, { "epoch": 0.3978061730556042, "grad_norm": 0.8530249718279882, "learning_rate": 6.852748792367069e-06, "loss": 0.1265, "step": 13636 }, { "epoch": 0.3978353462862477, "grad_norm": 0.8670925090412506, "learning_rate": 6.852309981292311e-06, "loss": 0.1629, "step": 13637 }, { "epoch": 0.3978645195168913, "grad_norm": 1.0442530482132857, "learning_rate": 6.851871153680572e-06, "loss": 0.1393, "step": 13638 }, { "epoch": 0.3978936927475349, "grad_norm": 1.0169475190648418, "learning_rate": 6.851432309535769e-06, "loss": 0.1215, "step": 13639 }, { "epoch": 0.39792286597817844, "grad_norm": 0.9061511494073102, "learning_rate": 6.8509934488618205e-06, "loss": 0.1738, "step": 13640 }, { "epoch": 0.397952039208822, "grad_norm": 0.964644869692467, "learning_rate": 6.850554571662643e-06, "loss": 0.1317, "step": 13641 }, { "epoch": 0.39798121243946555, "grad_norm": 1.1840934691961553, "learning_rate": 6.850115677942159e-06, "loss": 0.1435, "step": 13642 }, { "epoch": 0.3980103856701091, "grad_norm": 0.6787396057643946, "learning_rate": 6.8496767677042816e-06, "loss": 0.1275, "step": 13643 }, { "epoch": 0.39803955890075265, "grad_norm": 0.8043946569604296, "learning_rate": 6.849237840952933e-06, "loss": 0.1441, "step": 13644 }, { "epoch": 0.3980687321313962, "grad_norm": 1.013178519533018, "learning_rate": 6.8487988976920286e-06, "loss": 0.1477, "step": 13645 }, { "epoch": 0.3980979053620398, "grad_norm": 0.686504512547161, "learning_rate": 6.84835993792549e-06, "loss": 0.1303, "step": 13646 }, { "epoch": 0.39812707859268337, "grad_norm": 1.4665453513914628, "learning_rate": 6.847920961657235e-06, "loss": 0.1974, "step": 13647 }, { "epoch": 0.3981562518233269, "grad_norm": 0.8053405976829205, "learning_rate": 6.847481968891183e-06, "loss": 0.1443, "step": 13648 }, { "epoch": 0.3981854250539705, "grad_norm": 0.7239652844664418, "learning_rate": 6.847042959631253e-06, "loss": 0.1338, "step": 13649 }, { "epoch": 0.39821459828461403, "grad_norm": 0.8310026643884398, "learning_rate": 6.846603933881364e-06, "loss": 0.1483, "step": 13650 }, { "epoch": 0.3982437715152576, "grad_norm": 0.7626654642274614, "learning_rate": 6.846164891645436e-06, "loss": 0.1319, "step": 13651 }, { "epoch": 0.39827294474590114, "grad_norm": 0.804123077163358, "learning_rate": 6.84572583292739e-06, "loss": 0.1735, "step": 13652 }, { "epoch": 0.39830211797654475, "grad_norm": 0.9864754575496317, "learning_rate": 6.845286757731142e-06, "loss": 0.1313, "step": 13653 }, { "epoch": 0.3983312912071883, "grad_norm": 0.82570891583435, "learning_rate": 6.844847666060617e-06, "loss": 0.1441, "step": 13654 }, { "epoch": 0.39836046443783185, "grad_norm": 0.9266173515462345, "learning_rate": 6.844408557919731e-06, "loss": 0.1235, "step": 13655 }, { "epoch": 0.3983896376684754, "grad_norm": 0.7049451437364153, "learning_rate": 6.843969433312404e-06, "loss": 0.1462, "step": 13656 }, { "epoch": 0.39841881089911896, "grad_norm": 0.761751592673055, "learning_rate": 6.8435302922425606e-06, "loss": 0.1295, "step": 13657 }, { "epoch": 0.3984479841297625, "grad_norm": 0.7271112240939605, "learning_rate": 6.843091134714117e-06, "loss": 0.1522, "step": 13658 }, { "epoch": 0.39847715736040606, "grad_norm": 0.9028992364270692, "learning_rate": 6.842651960730997e-06, "loss": 0.131, "step": 13659 }, { "epoch": 0.3985063305910497, "grad_norm": 0.685376002217138, "learning_rate": 6.842212770297121e-06, "loss": 0.1241, "step": 13660 }, { "epoch": 0.3985355038216932, "grad_norm": 0.7221454328676604, "learning_rate": 6.8417735634164075e-06, "loss": 0.1232, "step": 13661 }, { "epoch": 0.3985646770523368, "grad_norm": 1.018529010873739, "learning_rate": 6.841334340092779e-06, "loss": 0.1513, "step": 13662 }, { "epoch": 0.39859385028298033, "grad_norm": 0.9545248989083854, "learning_rate": 6.840895100330159e-06, "loss": 0.1615, "step": 13663 }, { "epoch": 0.3986230235136239, "grad_norm": 0.7540961625778743, "learning_rate": 6.840455844132465e-06, "loss": 0.1313, "step": 13664 }, { "epoch": 0.39865219674426744, "grad_norm": 0.769118084821783, "learning_rate": 6.840016571503622e-06, "loss": 0.1515, "step": 13665 }, { "epoch": 0.39868136997491105, "grad_norm": 0.8988004881390081, "learning_rate": 6.8395772824475494e-06, "loss": 0.1445, "step": 13666 }, { "epoch": 0.3987105432055546, "grad_norm": 0.9666765933995999, "learning_rate": 6.839137976968171e-06, "loss": 0.1395, "step": 13667 }, { "epoch": 0.39873971643619815, "grad_norm": 0.8060915256768564, "learning_rate": 6.838698655069406e-06, "loss": 0.1276, "step": 13668 }, { "epoch": 0.3987688896668417, "grad_norm": 0.7311542730813468, "learning_rate": 6.83825931675518e-06, "loss": 0.1384, "step": 13669 }, { "epoch": 0.39879806289748526, "grad_norm": 0.836450015113188, "learning_rate": 6.8378199620294126e-06, "loss": 0.1395, "step": 13670 }, { "epoch": 0.3988272361281288, "grad_norm": 0.9349376752476497, "learning_rate": 6.837380590896028e-06, "loss": 0.1684, "step": 13671 }, { "epoch": 0.39885640935877237, "grad_norm": 0.7928181811245086, "learning_rate": 6.836941203358947e-06, "loss": 0.1472, "step": 13672 }, { "epoch": 0.398885582589416, "grad_norm": 0.7648200196571773, "learning_rate": 6.836501799422095e-06, "loss": 0.1435, "step": 13673 }, { "epoch": 0.39891475582005953, "grad_norm": 0.9884217383835263, "learning_rate": 6.836062379089393e-06, "loss": 0.192, "step": 13674 }, { "epoch": 0.3989439290507031, "grad_norm": 0.8091423813958553, "learning_rate": 6.8356229423647636e-06, "loss": 0.1224, "step": 13675 }, { "epoch": 0.39897310228134664, "grad_norm": 0.9697528805961072, "learning_rate": 6.83518348925213e-06, "loss": 0.1444, "step": 13676 }, { "epoch": 0.3990022755119902, "grad_norm": 0.8652562414182313, "learning_rate": 6.834744019755419e-06, "loss": 0.1361, "step": 13677 }, { "epoch": 0.39903144874263374, "grad_norm": 0.899832730824932, "learning_rate": 6.8343045338785495e-06, "loss": 0.1439, "step": 13678 }, { "epoch": 0.3990606219732773, "grad_norm": 1.0012039375355022, "learning_rate": 6.833865031625448e-06, "loss": 0.1555, "step": 13679 }, { "epoch": 0.3990897952039209, "grad_norm": 1.0975808984182382, "learning_rate": 6.833425513000036e-06, "loss": 0.1648, "step": 13680 }, { "epoch": 0.39911896843456446, "grad_norm": 0.6872895853973296, "learning_rate": 6.8329859780062395e-06, "loss": 0.1328, "step": 13681 }, { "epoch": 0.399148141665208, "grad_norm": 0.7616114784834984, "learning_rate": 6.832546426647983e-06, "loss": 0.1425, "step": 13682 }, { "epoch": 0.39917731489585156, "grad_norm": 0.8331362403542113, "learning_rate": 6.832106858929186e-06, "loss": 0.1538, "step": 13683 }, { "epoch": 0.3992064881264951, "grad_norm": 0.6674530606614226, "learning_rate": 6.831667274853779e-06, "loss": 0.1409, "step": 13684 }, { "epoch": 0.39923566135713867, "grad_norm": 0.7260908785878939, "learning_rate": 6.831227674425684e-06, "loss": 0.1325, "step": 13685 }, { "epoch": 0.3992648345877822, "grad_norm": 0.974648249879995, "learning_rate": 6.830788057648824e-06, "loss": 0.1601, "step": 13686 }, { "epoch": 0.39929400781842583, "grad_norm": 0.8437984069947647, "learning_rate": 6.830348424527126e-06, "loss": 0.1522, "step": 13687 }, { "epoch": 0.3993231810490694, "grad_norm": 0.7086230213971858, "learning_rate": 6.829908775064514e-06, "loss": 0.1467, "step": 13688 }, { "epoch": 0.39935235427971294, "grad_norm": 0.9228994496897659, "learning_rate": 6.829469109264915e-06, "loss": 0.1451, "step": 13689 }, { "epoch": 0.3993815275103565, "grad_norm": 0.9013430047366084, "learning_rate": 6.82902942713225e-06, "loss": 0.1356, "step": 13690 }, { "epoch": 0.39941070074100005, "grad_norm": 0.7251464735188569, "learning_rate": 6.828589728670447e-06, "loss": 0.1452, "step": 13691 }, { "epoch": 0.3994398739716436, "grad_norm": 0.7455558487477596, "learning_rate": 6.828150013883433e-06, "loss": 0.1348, "step": 13692 }, { "epoch": 0.3994690472022872, "grad_norm": 0.9193250319521461, "learning_rate": 6.8277102827751305e-06, "loss": 0.1729, "step": 13693 }, { "epoch": 0.39949822043293076, "grad_norm": 0.9482756342979466, "learning_rate": 6.827270535349469e-06, "loss": 0.1405, "step": 13694 }, { "epoch": 0.3995273936635743, "grad_norm": 0.9751141984532288, "learning_rate": 6.826830771610371e-06, "loss": 0.143, "step": 13695 }, { "epoch": 0.39955656689421787, "grad_norm": 1.044181444455976, "learning_rate": 6.8263909915617646e-06, "loss": 0.141, "step": 13696 }, { "epoch": 0.3995857401248614, "grad_norm": 0.8687203231252904, "learning_rate": 6.825951195207575e-06, "loss": 0.1363, "step": 13697 }, { "epoch": 0.399614913355505, "grad_norm": 0.8530636291008991, "learning_rate": 6.825511382551729e-06, "loss": 0.1484, "step": 13698 }, { "epoch": 0.39964408658614853, "grad_norm": 0.9079172967032355, "learning_rate": 6.825071553598152e-06, "loss": 0.1423, "step": 13699 }, { "epoch": 0.39967325981679214, "grad_norm": 0.8637818657784768, "learning_rate": 6.824631708350774e-06, "loss": 0.1404, "step": 13700 }, { "epoch": 0.3997024330474357, "grad_norm": 0.9649551259914709, "learning_rate": 6.824191846813517e-06, "loss": 0.1404, "step": 13701 }, { "epoch": 0.39973160627807924, "grad_norm": 0.9593491688199949, "learning_rate": 6.8237519689903145e-06, "loss": 0.1235, "step": 13702 }, { "epoch": 0.3997607795087228, "grad_norm": 1.0040722232054902, "learning_rate": 6.823312074885087e-06, "loss": 0.1352, "step": 13703 }, { "epoch": 0.39978995273936635, "grad_norm": 0.8886624316938437, "learning_rate": 6.822872164501765e-06, "loss": 0.1529, "step": 13704 }, { "epoch": 0.3998191259700099, "grad_norm": 0.7400950797262666, "learning_rate": 6.822432237844275e-06, "loss": 0.1644, "step": 13705 }, { "epoch": 0.39984829920065346, "grad_norm": 0.8593681653423954, "learning_rate": 6.821992294916546e-06, "loss": 0.1345, "step": 13706 }, { "epoch": 0.39987747243129707, "grad_norm": 1.0256629022542472, "learning_rate": 6.821552335722504e-06, "loss": 0.1315, "step": 13707 }, { "epoch": 0.3999066456619406, "grad_norm": 0.7381785662894601, "learning_rate": 6.821112360266079e-06, "loss": 0.1226, "step": 13708 }, { "epoch": 0.39993581889258417, "grad_norm": 1.0433674533863682, "learning_rate": 6.820672368551198e-06, "loss": 0.1328, "step": 13709 }, { "epoch": 0.3999649921232277, "grad_norm": 0.7546346570667153, "learning_rate": 6.8202323605817854e-06, "loss": 0.1311, "step": 13710 }, { "epoch": 0.3999941653538713, "grad_norm": 1.12112052236558, "learning_rate": 6.819792336361775e-06, "loss": 0.1355, "step": 13711 }, { "epoch": 0.40002333858451483, "grad_norm": 0.697245421308078, "learning_rate": 6.819352295895093e-06, "loss": 0.1349, "step": 13712 }, { "epoch": 0.4000525118151584, "grad_norm": 0.8054019674828651, "learning_rate": 6.818912239185666e-06, "loss": 0.1358, "step": 13713 }, { "epoch": 0.400081685045802, "grad_norm": 0.6075957489490795, "learning_rate": 6.8184721662374285e-06, "loss": 0.1464, "step": 13714 }, { "epoch": 0.40011085827644555, "grad_norm": 0.7445938503867925, "learning_rate": 6.818032077054304e-06, "loss": 0.1593, "step": 13715 }, { "epoch": 0.4001400315070891, "grad_norm": 0.628204010319937, "learning_rate": 6.817591971640221e-06, "loss": 0.1461, "step": 13716 }, { "epoch": 0.40016920473773265, "grad_norm": 0.733320092149337, "learning_rate": 6.817151849999114e-06, "loss": 0.1495, "step": 13717 }, { "epoch": 0.4001983779683762, "grad_norm": 0.8620401004457822, "learning_rate": 6.8167117121349065e-06, "loss": 0.1189, "step": 13718 }, { "epoch": 0.40022755119901976, "grad_norm": 0.699680097500003, "learning_rate": 6.8162715580515324e-06, "loss": 0.1396, "step": 13719 }, { "epoch": 0.40025672442966337, "grad_norm": 0.650314855260613, "learning_rate": 6.815831387752918e-06, "loss": 0.1318, "step": 13720 }, { "epoch": 0.4002858976603069, "grad_norm": 0.7255972396355026, "learning_rate": 6.815391201242996e-06, "loss": 0.128, "step": 13721 }, { "epoch": 0.4003150708909505, "grad_norm": 0.9121216969140034, "learning_rate": 6.8149509985256935e-06, "loss": 0.1406, "step": 13722 }, { "epoch": 0.40034424412159403, "grad_norm": 0.7030736458426388, "learning_rate": 6.814510779604942e-06, "loss": 0.1463, "step": 13723 }, { "epoch": 0.4003734173522376, "grad_norm": 0.8533675344816903, "learning_rate": 6.814070544484672e-06, "loss": 0.1815, "step": 13724 }, { "epoch": 0.40040259058288113, "grad_norm": 0.8935322043974434, "learning_rate": 6.813630293168811e-06, "loss": 0.1594, "step": 13725 }, { "epoch": 0.4004317638135247, "grad_norm": 0.6425807864399719, "learning_rate": 6.813190025661294e-06, "loss": 0.1313, "step": 13726 }, { "epoch": 0.4004609370441683, "grad_norm": 1.0053122257531732, "learning_rate": 6.8127497419660495e-06, "loss": 0.1411, "step": 13727 }, { "epoch": 0.40049011027481185, "grad_norm": 0.7250950628887969, "learning_rate": 6.8123094420870065e-06, "loss": 0.1133, "step": 13728 }, { "epoch": 0.4005192835054554, "grad_norm": 1.0213365217185995, "learning_rate": 6.811869126028099e-06, "loss": 0.1864, "step": 13729 }, { "epoch": 0.40054845673609896, "grad_norm": 0.6960357927011381, "learning_rate": 6.811428793793255e-06, "loss": 0.1475, "step": 13730 }, { "epoch": 0.4005776299667425, "grad_norm": 0.7879005124766262, "learning_rate": 6.810988445386406e-06, "loss": 0.1352, "step": 13731 }, { "epoch": 0.40060680319738606, "grad_norm": 0.8364188457649364, "learning_rate": 6.810548080811487e-06, "loss": 0.1369, "step": 13732 }, { "epoch": 0.4006359764280296, "grad_norm": 0.8757253023910055, "learning_rate": 6.810107700072427e-06, "loss": 0.1345, "step": 13733 }, { "epoch": 0.4006651496586732, "grad_norm": 0.6263167723830406, "learning_rate": 6.809667303173156e-06, "loss": 0.1249, "step": 13734 }, { "epoch": 0.4006943228893168, "grad_norm": 0.7551049383541194, "learning_rate": 6.809226890117609e-06, "loss": 0.1325, "step": 13735 }, { "epoch": 0.40072349611996033, "grad_norm": 0.7788834650234634, "learning_rate": 6.8087864609097154e-06, "loss": 0.1575, "step": 13736 }, { "epoch": 0.4007526693506039, "grad_norm": 0.8587180472217778, "learning_rate": 6.8083460155534075e-06, "loss": 0.1224, "step": 13737 }, { "epoch": 0.40078184258124744, "grad_norm": 1.0791596637441496, "learning_rate": 6.807905554052619e-06, "loss": 0.1848, "step": 13738 }, { "epoch": 0.400811015811891, "grad_norm": 0.8389357021892137, "learning_rate": 6.8074650764112815e-06, "loss": 0.1581, "step": 13739 }, { "epoch": 0.40084018904253454, "grad_norm": 0.7995316925510891, "learning_rate": 6.807024582633325e-06, "loss": 0.1397, "step": 13740 }, { "epoch": 0.40086936227317815, "grad_norm": 0.7666141779090255, "learning_rate": 6.806584072722686e-06, "loss": 0.1596, "step": 13741 }, { "epoch": 0.4008985355038217, "grad_norm": 0.7958196756378516, "learning_rate": 6.806143546683297e-06, "loss": 0.1502, "step": 13742 }, { "epoch": 0.40092770873446526, "grad_norm": 0.8472601015465253, "learning_rate": 6.8057030045190866e-06, "loss": 0.1631, "step": 13743 }, { "epoch": 0.4009568819651088, "grad_norm": 0.7462214077864776, "learning_rate": 6.805262446233993e-06, "loss": 0.1466, "step": 13744 }, { "epoch": 0.40098605519575237, "grad_norm": 0.7412816926561623, "learning_rate": 6.804821871831947e-06, "loss": 0.1179, "step": 13745 }, { "epoch": 0.4010152284263959, "grad_norm": 0.7237460861176316, "learning_rate": 6.804381281316881e-06, "loss": 0.1392, "step": 13746 }, { "epoch": 0.4010444016570395, "grad_norm": 0.7882012158566257, "learning_rate": 6.803940674692732e-06, "loss": 0.1333, "step": 13747 }, { "epoch": 0.4010735748876831, "grad_norm": 0.7731565601826743, "learning_rate": 6.80350005196343e-06, "loss": 0.1558, "step": 13748 }, { "epoch": 0.40110274811832664, "grad_norm": 0.7430322833263876, "learning_rate": 6.803059413132909e-06, "loss": 0.1547, "step": 13749 }, { "epoch": 0.4011319213489702, "grad_norm": 0.8498513542633379, "learning_rate": 6.802618758205105e-06, "loss": 0.1714, "step": 13750 }, { "epoch": 0.40116109457961374, "grad_norm": 1.0071163344423861, "learning_rate": 6.802178087183951e-06, "loss": 0.1469, "step": 13751 }, { "epoch": 0.4011902678102573, "grad_norm": 0.9074375671638603, "learning_rate": 6.801737400073381e-06, "loss": 0.1431, "step": 13752 }, { "epoch": 0.40121944104090085, "grad_norm": 0.8852197129256496, "learning_rate": 6.80129669687733e-06, "loss": 0.1714, "step": 13753 }, { "epoch": 0.40124861427154446, "grad_norm": 0.6976559907887856, "learning_rate": 6.800855977599732e-06, "loss": 0.1385, "step": 13754 }, { "epoch": 0.401277787502188, "grad_norm": 0.879096578156981, "learning_rate": 6.80041524224452e-06, "loss": 0.1416, "step": 13755 }, { "epoch": 0.40130696073283156, "grad_norm": 0.7184119635969515, "learning_rate": 6.799974490815633e-06, "loss": 0.1414, "step": 13756 }, { "epoch": 0.4013361339634751, "grad_norm": 0.8268474876082909, "learning_rate": 6.799533723317003e-06, "loss": 0.1153, "step": 13757 }, { "epoch": 0.40136530719411867, "grad_norm": 0.9037195838981106, "learning_rate": 6.799092939752564e-06, "loss": 0.1534, "step": 13758 }, { "epoch": 0.4013944804247622, "grad_norm": 0.7823091877787984, "learning_rate": 6.798652140126255e-06, "loss": 0.1586, "step": 13759 }, { "epoch": 0.4014236536554058, "grad_norm": 0.9153302275706843, "learning_rate": 6.798211324442008e-06, "loss": 0.1513, "step": 13760 }, { "epoch": 0.4014528268860494, "grad_norm": 0.7831550550923494, "learning_rate": 6.7977704927037595e-06, "loss": 0.1422, "step": 13761 }, { "epoch": 0.40148200011669294, "grad_norm": 0.7939853484381711, "learning_rate": 6.797329644915445e-06, "loss": 0.1663, "step": 13762 }, { "epoch": 0.4015111733473365, "grad_norm": 0.5919398204852676, "learning_rate": 6.796888781081e-06, "loss": 0.1304, "step": 13763 }, { "epoch": 0.40154034657798005, "grad_norm": 0.8825133697731564, "learning_rate": 6.796447901204362e-06, "loss": 0.1343, "step": 13764 }, { "epoch": 0.4015695198086236, "grad_norm": 0.8454693897606342, "learning_rate": 6.796007005289465e-06, "loss": 0.1412, "step": 13765 }, { "epoch": 0.40159869303926715, "grad_norm": 0.9068838026048358, "learning_rate": 6.795566093340247e-06, "loss": 0.1245, "step": 13766 }, { "epoch": 0.4016278662699107, "grad_norm": 0.8999832981873501, "learning_rate": 6.795125165360643e-06, "loss": 0.1392, "step": 13767 }, { "epoch": 0.4016570395005543, "grad_norm": 0.8406459172297751, "learning_rate": 6.79468422135459e-06, "loss": 0.1506, "step": 13768 }, { "epoch": 0.40168621273119787, "grad_norm": 0.8693911267360379, "learning_rate": 6.794243261326025e-06, "loss": 0.1271, "step": 13769 }, { "epoch": 0.4017153859618414, "grad_norm": 0.8783531792446729, "learning_rate": 6.7938022852788845e-06, "loss": 0.1581, "step": 13770 }, { "epoch": 0.401744559192485, "grad_norm": 0.9904763397049472, "learning_rate": 6.793361293217105e-06, "loss": 0.1473, "step": 13771 }, { "epoch": 0.4017737324231285, "grad_norm": 0.8116511921077783, "learning_rate": 6.792920285144624e-06, "loss": 0.1223, "step": 13772 }, { "epoch": 0.4018029056537721, "grad_norm": 1.127403999058117, "learning_rate": 6.792479261065379e-06, "loss": 0.1488, "step": 13773 }, { "epoch": 0.40183207888441563, "grad_norm": 0.7887529480367309, "learning_rate": 6.792038220983308e-06, "loss": 0.1434, "step": 13774 }, { "epoch": 0.40186125211505924, "grad_norm": 0.8417388114250335, "learning_rate": 6.791597164902346e-06, "loss": 0.1776, "step": 13775 }, { "epoch": 0.4018904253457028, "grad_norm": 0.9964791878290662, "learning_rate": 6.791156092826434e-06, "loss": 0.1535, "step": 13776 }, { "epoch": 0.40191959857634635, "grad_norm": 0.8273039584371675, "learning_rate": 6.790715004759506e-06, "loss": 0.1346, "step": 13777 }, { "epoch": 0.4019487718069899, "grad_norm": 0.658956263251214, "learning_rate": 6.790273900705502e-06, "loss": 0.1111, "step": 13778 }, { "epoch": 0.40197794503763346, "grad_norm": 0.9148364028349939, "learning_rate": 6.789832780668362e-06, "loss": 0.1333, "step": 13779 }, { "epoch": 0.402007118268277, "grad_norm": 0.976282860395019, "learning_rate": 6.78939164465202e-06, "loss": 0.1378, "step": 13780 }, { "epoch": 0.4020362914989206, "grad_norm": 0.8420970097414102, "learning_rate": 6.788950492660417e-06, "loss": 0.1425, "step": 13781 }, { "epoch": 0.40206546472956417, "grad_norm": 0.8257070823340206, "learning_rate": 6.788509324697492e-06, "loss": 0.1568, "step": 13782 }, { "epoch": 0.4020946379602077, "grad_norm": 0.9999952522008241, "learning_rate": 6.7880681407671835e-06, "loss": 0.1807, "step": 13783 }, { "epoch": 0.4021238111908513, "grad_norm": 1.4350027163849897, "learning_rate": 6.787626940873427e-06, "loss": 0.1382, "step": 13784 }, { "epoch": 0.40215298442149483, "grad_norm": 0.944498623984756, "learning_rate": 6.787185725020166e-06, "loss": 0.1454, "step": 13785 }, { "epoch": 0.4021821576521384, "grad_norm": 0.8353992898859052, "learning_rate": 6.7867444932113365e-06, "loss": 0.1152, "step": 13786 }, { "epoch": 0.40221133088278194, "grad_norm": 0.9867069702543034, "learning_rate": 6.7863032454508786e-06, "loss": 0.1368, "step": 13787 }, { "epoch": 0.40224050411342555, "grad_norm": 1.0051752584566038, "learning_rate": 6.785861981742732e-06, "loss": 0.1765, "step": 13788 }, { "epoch": 0.4022696773440691, "grad_norm": 0.981644628910491, "learning_rate": 6.785420702090837e-06, "loss": 0.1785, "step": 13789 }, { "epoch": 0.40229885057471265, "grad_norm": 0.8079146604087067, "learning_rate": 6.7849794064991306e-06, "loss": 0.1426, "step": 13790 }, { "epoch": 0.4023280238053562, "grad_norm": 0.9380987146825817, "learning_rate": 6.784538094971555e-06, "loss": 0.181, "step": 13791 }, { "epoch": 0.40235719703599976, "grad_norm": 0.8754134722520672, "learning_rate": 6.784096767512048e-06, "loss": 0.1339, "step": 13792 }, { "epoch": 0.4023863702666433, "grad_norm": 0.851925529761588, "learning_rate": 6.783655424124551e-06, "loss": 0.1466, "step": 13793 }, { "epoch": 0.40241554349728687, "grad_norm": 0.7035318098262453, "learning_rate": 6.783214064813007e-06, "loss": 0.1496, "step": 13794 }, { "epoch": 0.4024447167279305, "grad_norm": 0.9123551210521693, "learning_rate": 6.782772689581352e-06, "loss": 0.1548, "step": 13795 }, { "epoch": 0.402473889958574, "grad_norm": 0.807820870976712, "learning_rate": 6.782331298433527e-06, "loss": 0.1428, "step": 13796 }, { "epoch": 0.4025030631892176, "grad_norm": 0.937659808531094, "learning_rate": 6.781889891373475e-06, "loss": 0.164, "step": 13797 }, { "epoch": 0.40253223641986113, "grad_norm": 0.8513003427067009, "learning_rate": 6.781448468405134e-06, "loss": 0.1585, "step": 13798 }, { "epoch": 0.4025614096505047, "grad_norm": 0.6954541883374559, "learning_rate": 6.781007029532447e-06, "loss": 0.1357, "step": 13799 }, { "epoch": 0.40259058288114824, "grad_norm": 1.1038917546829061, "learning_rate": 6.780565574759355e-06, "loss": 0.1552, "step": 13800 }, { "epoch": 0.4026197561117918, "grad_norm": 0.9587137507817821, "learning_rate": 6.780124104089797e-06, "loss": 0.1531, "step": 13801 }, { "epoch": 0.4026489293424354, "grad_norm": 0.9066835913109763, "learning_rate": 6.779682617527716e-06, "loss": 0.1333, "step": 13802 }, { "epoch": 0.40267810257307896, "grad_norm": 0.8230750795006987, "learning_rate": 6.779241115077055e-06, "loss": 0.1264, "step": 13803 }, { "epoch": 0.4027072758037225, "grad_norm": 0.7413107909208608, "learning_rate": 6.778799596741754e-06, "loss": 0.1504, "step": 13804 }, { "epoch": 0.40273644903436606, "grad_norm": 0.9141975771969796, "learning_rate": 6.778358062525754e-06, "loss": 0.1257, "step": 13805 }, { "epoch": 0.4027656222650096, "grad_norm": 0.7411382445326381, "learning_rate": 6.7779165124329996e-06, "loss": 0.1517, "step": 13806 }, { "epoch": 0.40279479549565317, "grad_norm": 0.8542320160237078, "learning_rate": 6.777474946467429e-06, "loss": 0.1217, "step": 13807 }, { "epoch": 0.4028239687262968, "grad_norm": 0.8449350094060898, "learning_rate": 6.777033364632985e-06, "loss": 0.1413, "step": 13808 }, { "epoch": 0.40285314195694033, "grad_norm": 0.6907324546454325, "learning_rate": 6.776591766933615e-06, "loss": 0.125, "step": 13809 }, { "epoch": 0.4028823151875839, "grad_norm": 0.8094075390524268, "learning_rate": 6.776150153373256e-06, "loss": 0.1598, "step": 13810 }, { "epoch": 0.40291148841822744, "grad_norm": 0.7738383526866942, "learning_rate": 6.775708523955853e-06, "loss": 0.1332, "step": 13811 }, { "epoch": 0.402940661648871, "grad_norm": 1.0297724380694226, "learning_rate": 6.775266878685347e-06, "loss": 0.1307, "step": 13812 }, { "epoch": 0.40296983487951454, "grad_norm": 0.6461842201212494, "learning_rate": 6.774825217565683e-06, "loss": 0.1213, "step": 13813 }, { "epoch": 0.4029990081101581, "grad_norm": 0.8215650821296505, "learning_rate": 6.774383540600802e-06, "loss": 0.1348, "step": 13814 }, { "epoch": 0.4030281813408017, "grad_norm": 0.7159503319524297, "learning_rate": 6.773941847794649e-06, "loss": 0.1274, "step": 13815 }, { "epoch": 0.40305735457144526, "grad_norm": 1.0293789422131108, "learning_rate": 6.773500139151168e-06, "loss": 0.1448, "step": 13816 }, { "epoch": 0.4030865278020888, "grad_norm": 0.784740719277153, "learning_rate": 6.7730584146743e-06, "loss": 0.1541, "step": 13817 }, { "epoch": 0.40311570103273237, "grad_norm": 0.7981040480949031, "learning_rate": 6.772616674367989e-06, "loss": 0.1421, "step": 13818 }, { "epoch": 0.4031448742633759, "grad_norm": 1.3657133072490035, "learning_rate": 6.772174918236181e-06, "loss": 0.1348, "step": 13819 }, { "epoch": 0.40317404749401947, "grad_norm": 1.1758712817903028, "learning_rate": 6.771733146282816e-06, "loss": 0.1657, "step": 13820 }, { "epoch": 0.403203220724663, "grad_norm": 0.7212966744394569, "learning_rate": 6.7712913585118434e-06, "loss": 0.1284, "step": 13821 }, { "epoch": 0.40323239395530663, "grad_norm": 0.8898275224673643, "learning_rate": 6.770849554927203e-06, "loss": 0.1612, "step": 13822 }, { "epoch": 0.4032615671859502, "grad_norm": 1.1733907420083407, "learning_rate": 6.77040773553284e-06, "loss": 0.1539, "step": 13823 }, { "epoch": 0.40329074041659374, "grad_norm": 0.8545844898165131, "learning_rate": 6.7699659003327e-06, "loss": 0.136, "step": 13824 }, { "epoch": 0.4033199136472373, "grad_norm": 0.6994677993077235, "learning_rate": 6.769524049330727e-06, "loss": 0.1172, "step": 13825 }, { "epoch": 0.40334908687788085, "grad_norm": 1.154375089571428, "learning_rate": 6.769082182530866e-06, "loss": 0.1431, "step": 13826 }, { "epoch": 0.4033782601085244, "grad_norm": 0.9552074751750567, "learning_rate": 6.76864029993706e-06, "loss": 0.1682, "step": 13827 }, { "epoch": 0.40340743333916795, "grad_norm": 0.9023359896616971, "learning_rate": 6.768198401553258e-06, "loss": 0.1644, "step": 13828 }, { "epoch": 0.40343660656981156, "grad_norm": 0.9605314386736236, "learning_rate": 6.767756487383401e-06, "loss": 0.135, "step": 13829 }, { "epoch": 0.4034657798004551, "grad_norm": 1.0007858002830456, "learning_rate": 6.767314557431437e-06, "loss": 0.1452, "step": 13830 }, { "epoch": 0.40349495303109867, "grad_norm": 0.7484454973488388, "learning_rate": 6.76687261170131e-06, "loss": 0.148, "step": 13831 }, { "epoch": 0.4035241262617422, "grad_norm": 0.7623243774816232, "learning_rate": 6.766430650196966e-06, "loss": 0.1243, "step": 13832 }, { "epoch": 0.4035532994923858, "grad_norm": 0.8400999008001412, "learning_rate": 6.76598867292235e-06, "loss": 0.1515, "step": 13833 }, { "epoch": 0.40358247272302933, "grad_norm": 0.7423313540985967, "learning_rate": 6.765546679881412e-06, "loss": 0.1226, "step": 13834 }, { "epoch": 0.40361164595367294, "grad_norm": 0.8349194568129509, "learning_rate": 6.765104671078091e-06, "loss": 0.1341, "step": 13835 }, { "epoch": 0.4036408191843165, "grad_norm": 0.9111840040080945, "learning_rate": 6.764662646516339e-06, "loss": 0.1444, "step": 13836 }, { "epoch": 0.40366999241496004, "grad_norm": 0.8620917387428572, "learning_rate": 6.7642206062001e-06, "loss": 0.1619, "step": 13837 }, { "epoch": 0.4036991656456036, "grad_norm": 1.0970345348337347, "learning_rate": 6.763778550133319e-06, "loss": 0.1235, "step": 13838 }, { "epoch": 0.40372833887624715, "grad_norm": 0.7091912525413521, "learning_rate": 6.763336478319946e-06, "loss": 0.1236, "step": 13839 }, { "epoch": 0.4037575121068907, "grad_norm": 0.9205896424246612, "learning_rate": 6.762894390763926e-06, "loss": 0.1356, "step": 13840 }, { "epoch": 0.40378668533753426, "grad_norm": 0.6969497000615098, "learning_rate": 6.762452287469203e-06, "loss": 0.1302, "step": 13841 }, { "epoch": 0.40381585856817787, "grad_norm": 0.9055544492457576, "learning_rate": 6.762010168439729e-06, "loss": 0.1581, "step": 13842 }, { "epoch": 0.4038450317988214, "grad_norm": 0.9098400181396813, "learning_rate": 6.7615680336794485e-06, "loss": 0.1268, "step": 13843 }, { "epoch": 0.40387420502946497, "grad_norm": 0.8242693953386512, "learning_rate": 6.761125883192309e-06, "loss": 0.1566, "step": 13844 }, { "epoch": 0.4039033782601085, "grad_norm": 0.729184034319107, "learning_rate": 6.7606837169822585e-06, "loss": 0.1531, "step": 13845 }, { "epoch": 0.4039325514907521, "grad_norm": 0.681249633724076, "learning_rate": 6.7602415350532425e-06, "loss": 0.132, "step": 13846 }, { "epoch": 0.40396172472139563, "grad_norm": 0.8098940345284736, "learning_rate": 6.759799337409212e-06, "loss": 0.1305, "step": 13847 }, { "epoch": 0.4039908979520392, "grad_norm": 0.9028666825782868, "learning_rate": 6.759357124054113e-06, "loss": 0.1411, "step": 13848 }, { "epoch": 0.4040200711826828, "grad_norm": 0.7851334979480659, "learning_rate": 6.758914894991892e-06, "loss": 0.1659, "step": 13849 }, { "epoch": 0.40404924441332635, "grad_norm": 0.8001877139594606, "learning_rate": 6.7584726502264994e-06, "loss": 0.1306, "step": 13850 }, { "epoch": 0.4040784176439699, "grad_norm": 0.8583107281229556, "learning_rate": 6.7580303897618845e-06, "loss": 0.1512, "step": 13851 }, { "epoch": 0.40410759087461345, "grad_norm": 0.9444092669818012, "learning_rate": 6.757588113601993e-06, "loss": 0.147, "step": 13852 }, { "epoch": 0.404136764105257, "grad_norm": 0.959684922988974, "learning_rate": 6.757145821750772e-06, "loss": 0.1303, "step": 13853 }, { "epoch": 0.40416593733590056, "grad_norm": 1.23689071723602, "learning_rate": 6.7567035142121765e-06, "loss": 0.156, "step": 13854 }, { "epoch": 0.4041951105665441, "grad_norm": 0.960208884381623, "learning_rate": 6.7562611909901485e-06, "loss": 0.1291, "step": 13855 }, { "epoch": 0.4042242837971877, "grad_norm": 0.8049659595421716, "learning_rate": 6.755818852088641e-06, "loss": 0.1299, "step": 13856 }, { "epoch": 0.4042534570278313, "grad_norm": 0.9656629770882997, "learning_rate": 6.755376497511602e-06, "loss": 0.1591, "step": 13857 }, { "epoch": 0.40428263025847483, "grad_norm": 0.7832230564762516, "learning_rate": 6.75493412726298e-06, "loss": 0.1533, "step": 13858 }, { "epoch": 0.4043118034891184, "grad_norm": 0.993521752311356, "learning_rate": 6.754491741346726e-06, "loss": 0.1564, "step": 13859 }, { "epoch": 0.40434097671976194, "grad_norm": 0.7183243951121271, "learning_rate": 6.754049339766787e-06, "loss": 0.121, "step": 13860 }, { "epoch": 0.4043701499504055, "grad_norm": 0.7915070967006594, "learning_rate": 6.753606922527116e-06, "loss": 0.1203, "step": 13861 }, { "epoch": 0.40439932318104904, "grad_norm": 0.9709987998774869, "learning_rate": 6.75316448963166e-06, "loss": 0.1401, "step": 13862 }, { "epoch": 0.40442849641169265, "grad_norm": 0.9789264146470273, "learning_rate": 6.75272204108437e-06, "loss": 0.1619, "step": 13863 }, { "epoch": 0.4044576696423362, "grad_norm": 0.8771147484309805, "learning_rate": 6.752279576889197e-06, "loss": 0.152, "step": 13864 }, { "epoch": 0.40448684287297976, "grad_norm": 1.167611679520056, "learning_rate": 6.751837097050089e-06, "loss": 0.1554, "step": 13865 }, { "epoch": 0.4045160161036233, "grad_norm": 0.9789670282267943, "learning_rate": 6.751394601570999e-06, "loss": 0.1502, "step": 13866 }, { "epoch": 0.40454518933426686, "grad_norm": 0.8399982462391157, "learning_rate": 6.750952090455875e-06, "loss": 0.1338, "step": 13867 }, { "epoch": 0.4045743625649104, "grad_norm": 0.8032501860187403, "learning_rate": 6.750509563708667e-06, "loss": 0.126, "step": 13868 }, { "epoch": 0.404603535795554, "grad_norm": 0.9235085166172942, "learning_rate": 6.750067021333331e-06, "loss": 0.1584, "step": 13869 }, { "epoch": 0.4046327090261976, "grad_norm": 1.0484610246377482, "learning_rate": 6.749624463333812e-06, "loss": 0.1671, "step": 13870 }, { "epoch": 0.40466188225684113, "grad_norm": 1.0220461955362339, "learning_rate": 6.749181889714065e-06, "loss": 0.1477, "step": 13871 }, { "epoch": 0.4046910554874847, "grad_norm": 0.8179953457084675, "learning_rate": 6.748739300478038e-06, "loss": 0.1577, "step": 13872 }, { "epoch": 0.40472022871812824, "grad_norm": 0.9453756368427266, "learning_rate": 6.748296695629686e-06, "loss": 0.1306, "step": 13873 }, { "epoch": 0.4047494019487718, "grad_norm": 0.9213055063477555, "learning_rate": 6.747854075172957e-06, "loss": 0.1742, "step": 13874 }, { "epoch": 0.40477857517941535, "grad_norm": 0.7635865055614118, "learning_rate": 6.747411439111804e-06, "loss": 0.1376, "step": 13875 }, { "epoch": 0.40480774841005895, "grad_norm": 0.6466773592673615, "learning_rate": 6.746968787450179e-06, "loss": 0.1379, "step": 13876 }, { "epoch": 0.4048369216407025, "grad_norm": 1.035500235797688, "learning_rate": 6.746526120192034e-06, "loss": 0.1361, "step": 13877 }, { "epoch": 0.40486609487134606, "grad_norm": 0.8626095453778869, "learning_rate": 6.74608343734132e-06, "loss": 0.1423, "step": 13878 }, { "epoch": 0.4048952681019896, "grad_norm": 0.678155769586051, "learning_rate": 6.7456407389019914e-06, "loss": 0.1245, "step": 13879 }, { "epoch": 0.40492444133263317, "grad_norm": 0.9401401481724677, "learning_rate": 6.745198024877997e-06, "loss": 0.1586, "step": 13880 }, { "epoch": 0.4049536145632767, "grad_norm": 1.68582785052616, "learning_rate": 6.744755295273293e-06, "loss": 0.121, "step": 13881 }, { "epoch": 0.4049827877939203, "grad_norm": 0.7929425097320372, "learning_rate": 6.74431255009183e-06, "loss": 0.1316, "step": 13882 }, { "epoch": 0.4050119610245639, "grad_norm": 0.8223080449054122, "learning_rate": 6.743869789337561e-06, "loss": 0.1426, "step": 13883 }, { "epoch": 0.40504113425520744, "grad_norm": 0.9757420853281957, "learning_rate": 6.743427013014439e-06, "loss": 0.158, "step": 13884 }, { "epoch": 0.405070307485851, "grad_norm": 0.7557406490237888, "learning_rate": 6.742984221126415e-06, "loss": 0.1408, "step": 13885 }, { "epoch": 0.40509948071649454, "grad_norm": 1.1250477990480532, "learning_rate": 6.7425414136774455e-06, "loss": 0.1428, "step": 13886 }, { "epoch": 0.4051286539471381, "grad_norm": 0.7119943340348683, "learning_rate": 6.742098590671482e-06, "loss": 0.1488, "step": 13887 }, { "epoch": 0.40515782717778165, "grad_norm": 0.8331837913748907, "learning_rate": 6.741655752112477e-06, "loss": 0.1548, "step": 13888 }, { "epoch": 0.4051870004084252, "grad_norm": 1.1710702037678071, "learning_rate": 6.741212898004387e-06, "loss": 0.1301, "step": 13889 }, { "epoch": 0.4052161736390688, "grad_norm": 0.8428828786739003, "learning_rate": 6.740770028351162e-06, "loss": 0.1437, "step": 13890 }, { "epoch": 0.40524534686971236, "grad_norm": 0.735871318414556, "learning_rate": 6.74032714315676e-06, "loss": 0.1369, "step": 13891 }, { "epoch": 0.4052745201003559, "grad_norm": 1.1021626705588285, "learning_rate": 6.739884242425131e-06, "loss": 0.1389, "step": 13892 }, { "epoch": 0.40530369333099947, "grad_norm": 0.841698844264047, "learning_rate": 6.739441326160232e-06, "loss": 0.1433, "step": 13893 }, { "epoch": 0.405332866561643, "grad_norm": 0.8515204871458688, "learning_rate": 6.7389983943660166e-06, "loss": 0.1331, "step": 13894 }, { "epoch": 0.4053620397922866, "grad_norm": 0.8197276598858572, "learning_rate": 6.738555447046435e-06, "loss": 0.1428, "step": 13895 }, { "epoch": 0.4053912130229302, "grad_norm": 0.6604443901026178, "learning_rate": 6.73811248420545e-06, "loss": 0.1458, "step": 13896 }, { "epoch": 0.40542038625357374, "grad_norm": 0.6456051881981346, "learning_rate": 6.73766950584701e-06, "loss": 0.1314, "step": 13897 }, { "epoch": 0.4054495594842173, "grad_norm": 0.9471223019182881, "learning_rate": 6.73722651197507e-06, "loss": 0.1425, "step": 13898 }, { "epoch": 0.40547873271486085, "grad_norm": 0.9073627970640993, "learning_rate": 6.736783502593588e-06, "loss": 0.1476, "step": 13899 }, { "epoch": 0.4055079059455044, "grad_norm": 0.7660624869700193, "learning_rate": 6.7363404777065165e-06, "loss": 0.1361, "step": 13900 }, { "epoch": 0.40553707917614795, "grad_norm": 0.6532465243325225, "learning_rate": 6.735897437317814e-06, "loss": 0.1342, "step": 13901 }, { "epoch": 0.4055662524067915, "grad_norm": 0.9244306440291867, "learning_rate": 6.73545438143143e-06, "loss": 0.1538, "step": 13902 }, { "epoch": 0.4055954256374351, "grad_norm": 1.262680194200995, "learning_rate": 6.735011310051326e-06, "loss": 0.1483, "step": 13903 }, { "epoch": 0.40562459886807867, "grad_norm": 0.7438170902124872, "learning_rate": 6.734568223181454e-06, "loss": 0.1292, "step": 13904 }, { "epoch": 0.4056537720987222, "grad_norm": 0.8378501698766265, "learning_rate": 6.734125120825772e-06, "loss": 0.1339, "step": 13905 }, { "epoch": 0.4056829453293658, "grad_norm": 0.7786892693350929, "learning_rate": 6.733682002988234e-06, "loss": 0.1544, "step": 13906 }, { "epoch": 0.4057121185600093, "grad_norm": 0.7707653887747307, "learning_rate": 6.733238869672798e-06, "loss": 0.1526, "step": 13907 }, { "epoch": 0.4057412917906529, "grad_norm": 0.824372124571805, "learning_rate": 6.732795720883418e-06, "loss": 0.1449, "step": 13908 }, { "epoch": 0.40577046502129643, "grad_norm": 0.8756119621790452, "learning_rate": 6.732352556624054e-06, "loss": 0.1457, "step": 13909 }, { "epoch": 0.40579963825194004, "grad_norm": 0.8489565094040106, "learning_rate": 6.731909376898655e-06, "loss": 0.1546, "step": 13910 }, { "epoch": 0.4058288114825836, "grad_norm": 0.8823960475572548, "learning_rate": 6.731466181711187e-06, "loss": 0.1331, "step": 13911 }, { "epoch": 0.40585798471322715, "grad_norm": 0.9194805386582351, "learning_rate": 6.7310229710656e-06, "loss": 0.1301, "step": 13912 }, { "epoch": 0.4058871579438707, "grad_norm": 0.8924955956329476, "learning_rate": 6.730579744965853e-06, "loss": 0.1783, "step": 13913 }, { "epoch": 0.40591633117451426, "grad_norm": 0.7918824527035137, "learning_rate": 6.730136503415905e-06, "loss": 0.1293, "step": 13914 }, { "epoch": 0.4059455044051578, "grad_norm": 1.1705718073496392, "learning_rate": 6.72969324641971e-06, "loss": 0.1413, "step": 13915 }, { "epoch": 0.40597467763580136, "grad_norm": 1.3340861208821262, "learning_rate": 6.7292499739812265e-06, "loss": 0.1422, "step": 13916 }, { "epoch": 0.40600385086644497, "grad_norm": 0.7533503981511502, "learning_rate": 6.7288066861044135e-06, "loss": 0.153, "step": 13917 }, { "epoch": 0.4060330240970885, "grad_norm": 0.7670577018488539, "learning_rate": 6.728363382793226e-06, "loss": 0.1093, "step": 13918 }, { "epoch": 0.4060621973277321, "grad_norm": 1.0263661022388535, "learning_rate": 6.727920064051623e-06, "loss": 0.1281, "step": 13919 }, { "epoch": 0.40609137055837563, "grad_norm": 1.0922248593278725, "learning_rate": 6.727476729883562e-06, "loss": 0.1392, "step": 13920 }, { "epoch": 0.4061205437890192, "grad_norm": 0.8025404990280155, "learning_rate": 6.727033380293e-06, "loss": 0.1514, "step": 13921 }, { "epoch": 0.40614971701966274, "grad_norm": 0.9046361301964011, "learning_rate": 6.726590015283898e-06, "loss": 0.1371, "step": 13922 }, { "epoch": 0.40617889025030635, "grad_norm": 0.9155983421745632, "learning_rate": 6.726146634860211e-06, "loss": 0.1564, "step": 13923 }, { "epoch": 0.4062080634809499, "grad_norm": 0.9383638023490466, "learning_rate": 6.725703239025902e-06, "loss": 0.1161, "step": 13924 }, { "epoch": 0.40623723671159345, "grad_norm": 0.7331267287852953, "learning_rate": 6.7252598277849224e-06, "loss": 0.1147, "step": 13925 }, { "epoch": 0.406266409942237, "grad_norm": 0.7813416010787895, "learning_rate": 6.724816401141238e-06, "loss": 0.14, "step": 13926 }, { "epoch": 0.40629558317288056, "grad_norm": 0.7237744179297166, "learning_rate": 6.724372959098804e-06, "loss": 0.1195, "step": 13927 }, { "epoch": 0.4063247564035241, "grad_norm": 0.7361917382200347, "learning_rate": 6.723929501661577e-06, "loss": 0.1245, "step": 13928 }, { "epoch": 0.40635392963416767, "grad_norm": 0.8156374258026021, "learning_rate": 6.7234860288335226e-06, "loss": 0.1464, "step": 13929 }, { "epoch": 0.4063831028648113, "grad_norm": 0.892837999734048, "learning_rate": 6.723042540618594e-06, "loss": 0.127, "step": 13930 }, { "epoch": 0.4064122760954548, "grad_norm": 0.7440565252222753, "learning_rate": 6.722599037020754e-06, "loss": 0.1363, "step": 13931 }, { "epoch": 0.4064414493260984, "grad_norm": 0.983731475863947, "learning_rate": 6.722155518043961e-06, "loss": 0.1534, "step": 13932 }, { "epoch": 0.40647062255674193, "grad_norm": 0.8136877659832364, "learning_rate": 6.721711983692174e-06, "loss": 0.1436, "step": 13933 }, { "epoch": 0.4064997957873855, "grad_norm": 0.7771830696086671, "learning_rate": 6.721268433969354e-06, "loss": 0.1349, "step": 13934 }, { "epoch": 0.40652896901802904, "grad_norm": 0.8047228295417147, "learning_rate": 6.720824868879461e-06, "loss": 0.1331, "step": 13935 }, { "epoch": 0.4065581422486726, "grad_norm": 1.1618088355115954, "learning_rate": 6.720381288426453e-06, "loss": 0.1345, "step": 13936 }, { "epoch": 0.4065873154793162, "grad_norm": 0.8696414737240781, "learning_rate": 6.719937692614291e-06, "loss": 0.13, "step": 13937 }, { "epoch": 0.40661648870995976, "grad_norm": 0.8758969354676212, "learning_rate": 6.719494081446938e-06, "loss": 0.1376, "step": 13938 }, { "epoch": 0.4066456619406033, "grad_norm": 0.9729964811323002, "learning_rate": 6.719050454928352e-06, "loss": 0.144, "step": 13939 }, { "epoch": 0.40667483517124686, "grad_norm": 0.7917882584987335, "learning_rate": 6.718606813062491e-06, "loss": 0.1451, "step": 13940 }, { "epoch": 0.4067040084018904, "grad_norm": 1.034237100355872, "learning_rate": 6.718163155853324e-06, "loss": 0.1478, "step": 13941 }, { "epoch": 0.40673318163253397, "grad_norm": 0.8205713409659606, "learning_rate": 6.717719483304802e-06, "loss": 0.131, "step": 13942 }, { "epoch": 0.4067623548631775, "grad_norm": 0.9173721626722815, "learning_rate": 6.717275795420891e-06, "loss": 0.1312, "step": 13943 }, { "epoch": 0.40679152809382113, "grad_norm": 0.8580739255366375, "learning_rate": 6.716832092205553e-06, "loss": 0.127, "step": 13944 }, { "epoch": 0.4068207013244647, "grad_norm": 0.7384111949999307, "learning_rate": 6.716388373662748e-06, "loss": 0.1455, "step": 13945 }, { "epoch": 0.40684987455510824, "grad_norm": 0.7810865620045411, "learning_rate": 6.7159446397964365e-06, "loss": 0.1337, "step": 13946 }, { "epoch": 0.4068790477857518, "grad_norm": 0.608604351621167, "learning_rate": 6.71550089061058e-06, "loss": 0.1504, "step": 13947 }, { "epoch": 0.40690822101639534, "grad_norm": 0.7273311664070611, "learning_rate": 6.715057126109144e-06, "loss": 0.1451, "step": 13948 }, { "epoch": 0.4069373942470389, "grad_norm": 0.8768176956995616, "learning_rate": 6.714613346296084e-06, "loss": 0.1259, "step": 13949 }, { "epoch": 0.4069665674776825, "grad_norm": 0.6964834610105543, "learning_rate": 6.7141695511753665e-06, "loss": 0.1361, "step": 13950 }, { "epoch": 0.40699574070832606, "grad_norm": 0.8443813847819621, "learning_rate": 6.7137257407509535e-06, "loss": 0.1344, "step": 13951 }, { "epoch": 0.4070249139389696, "grad_norm": 0.7819794995447436, "learning_rate": 6.7132819150268055e-06, "loss": 0.1478, "step": 13952 }, { "epoch": 0.40705408716961317, "grad_norm": 1.0522374776671786, "learning_rate": 6.712838074006886e-06, "loss": 0.1512, "step": 13953 }, { "epoch": 0.4070832604002567, "grad_norm": 0.7433847992382673, "learning_rate": 6.712394217695157e-06, "loss": 0.124, "step": 13954 }, { "epoch": 0.4071124336309003, "grad_norm": 0.8055643101783762, "learning_rate": 6.71195034609558e-06, "loss": 0.1505, "step": 13955 }, { "epoch": 0.4071416068615438, "grad_norm": 0.8761014103587407, "learning_rate": 6.711506459212121e-06, "loss": 0.1362, "step": 13956 }, { "epoch": 0.40717078009218743, "grad_norm": 0.8862245770951604, "learning_rate": 6.7110625570487396e-06, "loss": 0.1293, "step": 13957 }, { "epoch": 0.407199953322831, "grad_norm": 0.7771320225609402, "learning_rate": 6.7106186396094e-06, "loss": 0.1555, "step": 13958 }, { "epoch": 0.40722912655347454, "grad_norm": 0.8519494872743051, "learning_rate": 6.710174706898066e-06, "loss": 0.1249, "step": 13959 }, { "epoch": 0.4072582997841181, "grad_norm": 0.8705566898812466, "learning_rate": 6.7097307589187e-06, "loss": 0.141, "step": 13960 }, { "epoch": 0.40728747301476165, "grad_norm": 0.8819784237527349, "learning_rate": 6.709286795675267e-06, "loss": 0.1804, "step": 13961 }, { "epoch": 0.4073166462454052, "grad_norm": 0.8451911639438492, "learning_rate": 6.708842817171728e-06, "loss": 0.1475, "step": 13962 }, { "epoch": 0.40734581947604875, "grad_norm": 0.8277653220416917, "learning_rate": 6.708398823412048e-06, "loss": 0.1511, "step": 13963 }, { "epoch": 0.40737499270669236, "grad_norm": 0.7493702203092666, "learning_rate": 6.707954814400194e-06, "loss": 0.1412, "step": 13964 }, { "epoch": 0.4074041659373359, "grad_norm": 1.0245989458513236, "learning_rate": 6.707510790140125e-06, "loss": 0.1299, "step": 13965 }, { "epoch": 0.40743333916797947, "grad_norm": 0.9824661100612829, "learning_rate": 6.707066750635808e-06, "loss": 0.1336, "step": 13966 }, { "epoch": 0.407462512398623, "grad_norm": 0.6584938280855426, "learning_rate": 6.706622695891205e-06, "loss": 0.1147, "step": 13967 }, { "epoch": 0.4074916856292666, "grad_norm": 1.0934168843934498, "learning_rate": 6.7061786259102836e-06, "loss": 0.1276, "step": 13968 }, { "epoch": 0.40752085885991013, "grad_norm": 1.0310521905153107, "learning_rate": 6.705734540697007e-06, "loss": 0.1469, "step": 13969 }, { "epoch": 0.4075500320905537, "grad_norm": 0.8286522643462688, "learning_rate": 6.705290440255339e-06, "loss": 0.1515, "step": 13970 }, { "epoch": 0.4075792053211973, "grad_norm": 0.9236250938075553, "learning_rate": 6.704846324589245e-06, "loss": 0.1248, "step": 13971 }, { "epoch": 0.40760837855184084, "grad_norm": 1.0354918369542758, "learning_rate": 6.704402193702688e-06, "loss": 0.1536, "step": 13972 }, { "epoch": 0.4076375517824844, "grad_norm": 0.8376302250466713, "learning_rate": 6.703958047599638e-06, "loss": 0.1354, "step": 13973 }, { "epoch": 0.40766672501312795, "grad_norm": 0.8339055544052391, "learning_rate": 6.703513886284057e-06, "loss": 0.1143, "step": 13974 }, { "epoch": 0.4076958982437715, "grad_norm": 0.7567474029921512, "learning_rate": 6.703069709759908e-06, "loss": 0.1284, "step": 13975 }, { "epoch": 0.40772507147441506, "grad_norm": 0.9610262830773799, "learning_rate": 6.702625518031163e-06, "loss": 0.1504, "step": 13976 }, { "epoch": 0.4077542447050586, "grad_norm": 0.8865308435034972, "learning_rate": 6.702181311101782e-06, "loss": 0.1793, "step": 13977 }, { "epoch": 0.4077834179357022, "grad_norm": 0.6574608969593219, "learning_rate": 6.7017370889757316e-06, "loss": 0.1374, "step": 13978 }, { "epoch": 0.4078125911663458, "grad_norm": 1.0120752558987616, "learning_rate": 6.701292851656981e-06, "loss": 0.136, "step": 13979 }, { "epoch": 0.4078417643969893, "grad_norm": 1.9644773530936295, "learning_rate": 6.700848599149492e-06, "loss": 0.1435, "step": 13980 }, { "epoch": 0.4078709376276329, "grad_norm": 1.160227490228442, "learning_rate": 6.7004043314572334e-06, "loss": 0.1643, "step": 13981 }, { "epoch": 0.40790011085827643, "grad_norm": 0.7789937767422863, "learning_rate": 6.699960048584171e-06, "loss": 0.1537, "step": 13982 }, { "epoch": 0.40792928408892, "grad_norm": 0.8362555771905549, "learning_rate": 6.699515750534271e-06, "loss": 0.162, "step": 13983 }, { "epoch": 0.4079584573195636, "grad_norm": 1.1339844282933316, "learning_rate": 6.699071437311499e-06, "loss": 0.1443, "step": 13984 }, { "epoch": 0.40798763055020715, "grad_norm": 0.9865979361972813, "learning_rate": 6.6986271089198255e-06, "loss": 0.1389, "step": 13985 }, { "epoch": 0.4080168037808507, "grad_norm": 0.6972531061018157, "learning_rate": 6.698182765363213e-06, "loss": 0.1508, "step": 13986 }, { "epoch": 0.40804597701149425, "grad_norm": 0.6979498330401717, "learning_rate": 6.69773840664563e-06, "loss": 0.1378, "step": 13987 }, { "epoch": 0.4080751502421378, "grad_norm": 0.8524600649031473, "learning_rate": 6.697294032771044e-06, "loss": 0.1461, "step": 13988 }, { "epoch": 0.40810432347278136, "grad_norm": 0.7584244475405565, "learning_rate": 6.696849643743423e-06, "loss": 0.1661, "step": 13989 }, { "epoch": 0.4081334967034249, "grad_norm": 0.7086604018047018, "learning_rate": 6.69640523956673e-06, "loss": 0.1431, "step": 13990 }, { "epoch": 0.4081626699340685, "grad_norm": 0.829624793440343, "learning_rate": 6.69596082024494e-06, "loss": 0.1413, "step": 13991 }, { "epoch": 0.4081918431647121, "grad_norm": 0.891454242123997, "learning_rate": 6.695516385782015e-06, "loss": 0.156, "step": 13992 }, { "epoch": 0.40822101639535563, "grad_norm": 0.7184089098495967, "learning_rate": 6.6950719361819235e-06, "loss": 0.1315, "step": 13993 }, { "epoch": 0.4082501896259992, "grad_norm": 0.8348011185440816, "learning_rate": 6.694627471448637e-06, "loss": 0.14, "step": 13994 }, { "epoch": 0.40827936285664274, "grad_norm": 0.7968066873687387, "learning_rate": 6.694182991586119e-06, "loss": 0.1303, "step": 13995 }, { "epoch": 0.4083085360872863, "grad_norm": 0.824678742292664, "learning_rate": 6.69373849659834e-06, "loss": 0.1311, "step": 13996 }, { "epoch": 0.40833770931792984, "grad_norm": 1.0167257240538659, "learning_rate": 6.693293986489269e-06, "loss": 0.1352, "step": 13997 }, { "epoch": 0.40836688254857345, "grad_norm": 1.0375429848464421, "learning_rate": 6.692849461262871e-06, "loss": 0.1482, "step": 13998 }, { "epoch": 0.408396055779217, "grad_norm": 0.8255680474348037, "learning_rate": 6.692404920923119e-06, "loss": 0.1226, "step": 13999 }, { "epoch": 0.40842522900986056, "grad_norm": 1.3039704058943398, "learning_rate": 6.69196036547398e-06, "loss": 0.1601, "step": 14000 }, { "epoch": 0.4084544022405041, "grad_norm": 1.0278586590171908, "learning_rate": 6.6915157949194235e-06, "loss": 0.1416, "step": 14001 }, { "epoch": 0.40848357547114766, "grad_norm": 0.8303631477247092, "learning_rate": 6.691071209263416e-06, "loss": 0.1479, "step": 14002 }, { "epoch": 0.4085127487017912, "grad_norm": 1.3375067488748817, "learning_rate": 6.690626608509929e-06, "loss": 0.1316, "step": 14003 }, { "epoch": 0.40854192193243477, "grad_norm": 0.8226352275152343, "learning_rate": 6.690181992662932e-06, "loss": 0.1434, "step": 14004 }, { "epoch": 0.4085710951630784, "grad_norm": 0.8093347680830837, "learning_rate": 6.689737361726392e-06, "loss": 0.163, "step": 14005 }, { "epoch": 0.40860026839372193, "grad_norm": 1.2951534940925888, "learning_rate": 6.689292715704282e-06, "loss": 0.1386, "step": 14006 }, { "epoch": 0.4086294416243655, "grad_norm": 0.946998839121098, "learning_rate": 6.6888480546005695e-06, "loss": 0.1399, "step": 14007 }, { "epoch": 0.40865861485500904, "grad_norm": 0.8178770682230518, "learning_rate": 6.688403378419224e-06, "loss": 0.1551, "step": 14008 }, { "epoch": 0.4086877880856526, "grad_norm": 0.6966696986146201, "learning_rate": 6.687958687164217e-06, "loss": 0.1274, "step": 14009 }, { "epoch": 0.40871696131629615, "grad_norm": 0.7511313940706461, "learning_rate": 6.6875139808395175e-06, "loss": 0.1575, "step": 14010 }, { "epoch": 0.40874613454693975, "grad_norm": 0.9318682540544598, "learning_rate": 6.687069259449095e-06, "loss": 0.152, "step": 14011 }, { "epoch": 0.4087753077775833, "grad_norm": 0.8470311858234206, "learning_rate": 6.686624522996922e-06, "loss": 0.1517, "step": 14012 }, { "epoch": 0.40880448100822686, "grad_norm": 0.7094907056735673, "learning_rate": 6.686179771486967e-06, "loss": 0.1488, "step": 14013 }, { "epoch": 0.4088336542388704, "grad_norm": 0.9403389917136222, "learning_rate": 6.685735004923203e-06, "loss": 0.1317, "step": 14014 }, { "epoch": 0.40886282746951397, "grad_norm": 0.797529144309354, "learning_rate": 6.685290223309598e-06, "loss": 0.1355, "step": 14015 }, { "epoch": 0.4088920007001575, "grad_norm": 0.7167927773370429, "learning_rate": 6.684845426650126e-06, "loss": 0.1369, "step": 14016 }, { "epoch": 0.4089211739308011, "grad_norm": 0.6890553891716754, "learning_rate": 6.684400614948754e-06, "loss": 0.1257, "step": 14017 }, { "epoch": 0.4089503471614447, "grad_norm": 0.7739541831571997, "learning_rate": 6.683955788209455e-06, "loss": 0.1415, "step": 14018 }, { "epoch": 0.40897952039208824, "grad_norm": 1.0386203791338848, "learning_rate": 6.6835109464362035e-06, "loss": 0.1455, "step": 14019 }, { "epoch": 0.4090086936227318, "grad_norm": 0.8341902669434255, "learning_rate": 6.683066089632965e-06, "loss": 0.1272, "step": 14020 }, { "epoch": 0.40903786685337534, "grad_norm": 0.7716372682979363, "learning_rate": 6.682621217803718e-06, "loss": 0.123, "step": 14021 }, { "epoch": 0.4090670400840189, "grad_norm": 0.86523064711934, "learning_rate": 6.682176330952428e-06, "loss": 0.1428, "step": 14022 }, { "epoch": 0.40909621331466245, "grad_norm": 0.9150572348856957, "learning_rate": 6.681731429083068e-06, "loss": 0.1674, "step": 14023 }, { "epoch": 0.409125386545306, "grad_norm": 0.8572875063355055, "learning_rate": 6.681286512199614e-06, "loss": 0.1531, "step": 14024 }, { "epoch": 0.4091545597759496, "grad_norm": 0.8999123305672735, "learning_rate": 6.680841580306035e-06, "loss": 0.1212, "step": 14025 }, { "epoch": 0.40918373300659316, "grad_norm": 0.7772689415316842, "learning_rate": 6.6803966334063035e-06, "loss": 0.1492, "step": 14026 }, { "epoch": 0.4092129062372367, "grad_norm": 1.009921908207947, "learning_rate": 6.67995167150439e-06, "loss": 0.1471, "step": 14027 }, { "epoch": 0.40924207946788027, "grad_norm": 1.6112817363116887, "learning_rate": 6.679506694604271e-06, "loss": 0.1364, "step": 14028 }, { "epoch": 0.4092712526985238, "grad_norm": 1.0200856189028351, "learning_rate": 6.679061702709916e-06, "loss": 0.1575, "step": 14029 }, { "epoch": 0.4093004259291674, "grad_norm": 0.7929669231238768, "learning_rate": 6.6786166958253e-06, "loss": 0.1415, "step": 14030 }, { "epoch": 0.40932959915981093, "grad_norm": 0.7426867630516113, "learning_rate": 6.678171673954394e-06, "loss": 0.1445, "step": 14031 }, { "epoch": 0.40935877239045454, "grad_norm": 0.7855405902335418, "learning_rate": 6.677726637101172e-06, "loss": 0.1343, "step": 14032 }, { "epoch": 0.4093879456210981, "grad_norm": 0.9421841198000807, "learning_rate": 6.677281585269607e-06, "loss": 0.1424, "step": 14033 }, { "epoch": 0.40941711885174165, "grad_norm": 0.6584107554121822, "learning_rate": 6.676836518463674e-06, "loss": 0.1483, "step": 14034 }, { "epoch": 0.4094462920823852, "grad_norm": 0.8858738294690848, "learning_rate": 6.676391436687343e-06, "loss": 0.1567, "step": 14035 }, { "epoch": 0.40947546531302875, "grad_norm": 0.881739182410847, "learning_rate": 6.67594633994459e-06, "loss": 0.1418, "step": 14036 }, { "epoch": 0.4095046385436723, "grad_norm": 0.870955021038668, "learning_rate": 6.67550122823939e-06, "loss": 0.1675, "step": 14037 }, { "epoch": 0.4095338117743159, "grad_norm": 1.054631251684788, "learning_rate": 6.675056101575711e-06, "loss": 0.146, "step": 14038 }, { "epoch": 0.40956298500495947, "grad_norm": 0.7255648835603584, "learning_rate": 6.674610959957535e-06, "loss": 0.1225, "step": 14039 }, { "epoch": 0.409592158235603, "grad_norm": 0.9959082762794741, "learning_rate": 6.67416580338883e-06, "loss": 0.1515, "step": 14040 }, { "epoch": 0.4096213314662466, "grad_norm": 0.7235494032116029, "learning_rate": 6.673720631873572e-06, "loss": 0.1448, "step": 14041 }, { "epoch": 0.4096505046968901, "grad_norm": 0.8384810544456339, "learning_rate": 6.673275445415736e-06, "loss": 0.1511, "step": 14042 }, { "epoch": 0.4096796779275337, "grad_norm": 0.7629219454266113, "learning_rate": 6.672830244019297e-06, "loss": 0.1502, "step": 14043 }, { "epoch": 0.40970885115817723, "grad_norm": 0.7565425254724589, "learning_rate": 6.6723850276882285e-06, "loss": 0.1356, "step": 14044 }, { "epoch": 0.40973802438882084, "grad_norm": 0.8899515804339725, "learning_rate": 6.671939796426507e-06, "loss": 0.1403, "step": 14045 }, { "epoch": 0.4097671976194644, "grad_norm": 0.7793189627037008, "learning_rate": 6.671494550238105e-06, "loss": 0.1241, "step": 14046 }, { "epoch": 0.40979637085010795, "grad_norm": 0.6827247511238914, "learning_rate": 6.671049289126997e-06, "loss": 0.1348, "step": 14047 }, { "epoch": 0.4098255440807515, "grad_norm": 1.9911398154900386, "learning_rate": 6.670604013097162e-06, "loss": 0.1434, "step": 14048 }, { "epoch": 0.40985471731139506, "grad_norm": 0.9247532299116882, "learning_rate": 6.670158722152574e-06, "loss": 0.1661, "step": 14049 }, { "epoch": 0.4098838905420386, "grad_norm": 0.8381483537966381, "learning_rate": 6.669713416297205e-06, "loss": 0.1287, "step": 14050 }, { "epoch": 0.40991306377268216, "grad_norm": 0.8890645281254, "learning_rate": 6.669268095535035e-06, "loss": 0.1566, "step": 14051 }, { "epoch": 0.40994223700332577, "grad_norm": 0.7941016350632227, "learning_rate": 6.668822759870037e-06, "loss": 0.1402, "step": 14052 }, { "epoch": 0.4099714102339693, "grad_norm": 0.7829813544891336, "learning_rate": 6.668377409306188e-06, "loss": 0.1505, "step": 14053 }, { "epoch": 0.4100005834646129, "grad_norm": 0.98636950730738, "learning_rate": 6.6679320438474645e-06, "loss": 0.1397, "step": 14054 }, { "epoch": 0.41002975669525643, "grad_norm": 0.8465746512410242, "learning_rate": 6.667486663497842e-06, "loss": 0.1855, "step": 14055 }, { "epoch": 0.4100589299259, "grad_norm": 0.831950303805014, "learning_rate": 6.667041268261295e-06, "loss": 0.1486, "step": 14056 }, { "epoch": 0.41008810315654354, "grad_norm": 0.8592891237764462, "learning_rate": 6.6665958581418025e-06, "loss": 0.1418, "step": 14057 }, { "epoch": 0.4101172763871871, "grad_norm": 0.8936015891060176, "learning_rate": 6.66615043314334e-06, "loss": 0.1677, "step": 14058 }, { "epoch": 0.4101464496178307, "grad_norm": 0.7843285603489508, "learning_rate": 6.665704993269884e-06, "loss": 0.131, "step": 14059 }, { "epoch": 0.41017562284847425, "grad_norm": 1.0010218166116722, "learning_rate": 6.665259538525413e-06, "loss": 0.1392, "step": 14060 }, { "epoch": 0.4102047960791178, "grad_norm": 0.6198236187489929, "learning_rate": 6.664814068913901e-06, "loss": 0.1254, "step": 14061 }, { "epoch": 0.41023396930976136, "grad_norm": 0.8065200834730276, "learning_rate": 6.664368584439326e-06, "loss": 0.1636, "step": 14062 }, { "epoch": 0.4102631425404049, "grad_norm": 0.9592785676171229, "learning_rate": 6.663923085105666e-06, "loss": 0.1471, "step": 14063 }, { "epoch": 0.41029231577104847, "grad_norm": 0.829300522966816, "learning_rate": 6.663477570916898e-06, "loss": 0.1339, "step": 14064 }, { "epoch": 0.4103214890016921, "grad_norm": 0.685073210625489, "learning_rate": 6.663032041876999e-06, "loss": 0.127, "step": 14065 }, { "epoch": 0.41035066223233563, "grad_norm": 1.0905391998464318, "learning_rate": 6.662586497989948e-06, "loss": 0.1348, "step": 14066 }, { "epoch": 0.4103798354629792, "grad_norm": 0.8738693239342453, "learning_rate": 6.66214093925972e-06, "loss": 0.1454, "step": 14067 }, { "epoch": 0.41040900869362273, "grad_norm": 0.7284334938180899, "learning_rate": 6.661695365690295e-06, "loss": 0.1296, "step": 14068 }, { "epoch": 0.4104381819242663, "grad_norm": 0.8823968510654029, "learning_rate": 6.661249777285652e-06, "loss": 0.1495, "step": 14069 }, { "epoch": 0.41046735515490984, "grad_norm": 1.1569356128860702, "learning_rate": 6.6608041740497665e-06, "loss": 0.1479, "step": 14070 }, { "epoch": 0.4104965283855534, "grad_norm": 0.7709293904305606, "learning_rate": 6.660358555986617e-06, "loss": 0.1469, "step": 14071 }, { "epoch": 0.410525701616197, "grad_norm": 1.1563332843555503, "learning_rate": 6.659912923100184e-06, "loss": 0.1276, "step": 14072 }, { "epoch": 0.41055487484684056, "grad_norm": 1.0544475281449661, "learning_rate": 6.659467275394443e-06, "loss": 0.1611, "step": 14073 }, { "epoch": 0.4105840480774841, "grad_norm": 0.7960526210038408, "learning_rate": 6.659021612873375e-06, "loss": 0.1441, "step": 14074 }, { "epoch": 0.41061322130812766, "grad_norm": 0.6788930089530776, "learning_rate": 6.658575935540958e-06, "loss": 0.1473, "step": 14075 }, { "epoch": 0.4106423945387712, "grad_norm": 0.7889712563877609, "learning_rate": 6.658130243401173e-06, "loss": 0.1659, "step": 14076 }, { "epoch": 0.41067156776941477, "grad_norm": 0.9817376923813845, "learning_rate": 6.6576845364579946e-06, "loss": 0.1658, "step": 14077 }, { "epoch": 0.4107007410000583, "grad_norm": 0.8720322947938877, "learning_rate": 6.657238814715406e-06, "loss": 0.1385, "step": 14078 }, { "epoch": 0.41072991423070193, "grad_norm": 0.7252926333411631, "learning_rate": 6.656793078177384e-06, "loss": 0.1358, "step": 14079 }, { "epoch": 0.4107590874613455, "grad_norm": 0.9131611531639156, "learning_rate": 6.656347326847907e-06, "loss": 0.1743, "step": 14080 }, { "epoch": 0.41078826069198904, "grad_norm": 1.0655033481842031, "learning_rate": 6.65590156073096e-06, "loss": 0.134, "step": 14081 }, { "epoch": 0.4108174339226326, "grad_norm": 0.758471771459251, "learning_rate": 6.655455779830517e-06, "loss": 0.1321, "step": 14082 }, { "epoch": 0.41084660715327614, "grad_norm": 0.7336107284578407, "learning_rate": 6.65500998415056e-06, "loss": 0.1508, "step": 14083 }, { "epoch": 0.4108757803839197, "grad_norm": 0.9340191362850672, "learning_rate": 6.65456417369507e-06, "loss": 0.1252, "step": 14084 }, { "epoch": 0.41090495361456325, "grad_norm": 0.9134081318565378, "learning_rate": 6.654118348468026e-06, "loss": 0.1506, "step": 14085 }, { "epoch": 0.41093412684520686, "grad_norm": 0.8297543236724341, "learning_rate": 6.653672508473408e-06, "loss": 0.1541, "step": 14086 }, { "epoch": 0.4109633000758504, "grad_norm": 0.9345170815107622, "learning_rate": 6.653226653715197e-06, "loss": 0.1435, "step": 14087 }, { "epoch": 0.41099247330649397, "grad_norm": 0.9732913954697464, "learning_rate": 6.652780784197371e-06, "loss": 0.132, "step": 14088 }, { "epoch": 0.4110216465371375, "grad_norm": 0.8123372090511947, "learning_rate": 6.652334899923914e-06, "loss": 0.1371, "step": 14089 }, { "epoch": 0.4110508197677811, "grad_norm": 0.8663632495507103, "learning_rate": 6.651889000898807e-06, "loss": 0.1317, "step": 14090 }, { "epoch": 0.4110799929984246, "grad_norm": 0.7733858395994007, "learning_rate": 6.651443087126028e-06, "loss": 0.1443, "step": 14091 }, { "epoch": 0.41110916622906823, "grad_norm": 0.8989077850530516, "learning_rate": 6.650997158609559e-06, "loss": 0.1538, "step": 14092 }, { "epoch": 0.4111383394597118, "grad_norm": 0.8221939205733133, "learning_rate": 6.650551215353381e-06, "loss": 0.1374, "step": 14093 }, { "epoch": 0.41116751269035534, "grad_norm": 0.7128271885855784, "learning_rate": 6.650105257361478e-06, "loss": 0.1489, "step": 14094 }, { "epoch": 0.4111966859209989, "grad_norm": 0.8325895993213441, "learning_rate": 6.649659284637826e-06, "loss": 0.148, "step": 14095 }, { "epoch": 0.41122585915164245, "grad_norm": 0.7752183889670637, "learning_rate": 6.649213297186413e-06, "loss": 0.1383, "step": 14096 }, { "epoch": 0.411255032382286, "grad_norm": 1.1079170416632855, "learning_rate": 6.648767295011216e-06, "loss": 0.1705, "step": 14097 }, { "epoch": 0.41128420561292955, "grad_norm": 0.9474382692371365, "learning_rate": 6.648321278116216e-06, "loss": 0.1579, "step": 14098 }, { "epoch": 0.41131337884357316, "grad_norm": 1.0019968475051197, "learning_rate": 6.6478752465054005e-06, "loss": 0.1465, "step": 14099 }, { "epoch": 0.4113425520742167, "grad_norm": 0.8485983597306349, "learning_rate": 6.6474292001827475e-06, "loss": 0.127, "step": 14100 }, { "epoch": 0.41137172530486027, "grad_norm": 0.9700884182027817, "learning_rate": 6.646983139152239e-06, "loss": 0.1749, "step": 14101 }, { "epoch": 0.4114008985355038, "grad_norm": 1.0427712116214656, "learning_rate": 6.646537063417858e-06, "loss": 0.1512, "step": 14102 }, { "epoch": 0.4114300717661474, "grad_norm": 0.866105005706269, "learning_rate": 6.646090972983588e-06, "loss": 0.1433, "step": 14103 }, { "epoch": 0.41145924499679093, "grad_norm": 0.8438360414774141, "learning_rate": 6.64564486785341e-06, "loss": 0.1414, "step": 14104 }, { "epoch": 0.4114884182274345, "grad_norm": 1.0502680853296236, "learning_rate": 6.6451987480313085e-06, "loss": 0.1477, "step": 14105 }, { "epoch": 0.4115175914580781, "grad_norm": 0.8129688190336931, "learning_rate": 6.644752613521266e-06, "loss": 0.1629, "step": 14106 }, { "epoch": 0.41154676468872164, "grad_norm": 0.7820296113578517, "learning_rate": 6.644306464327261e-06, "loss": 0.1461, "step": 14107 }, { "epoch": 0.4115759379193652, "grad_norm": 0.7590526242828608, "learning_rate": 6.643860300453283e-06, "loss": 0.1413, "step": 14108 }, { "epoch": 0.41160511115000875, "grad_norm": 0.7411650521546309, "learning_rate": 6.643414121903313e-06, "loss": 0.1463, "step": 14109 }, { "epoch": 0.4116342843806523, "grad_norm": 0.8121287706621918, "learning_rate": 6.642967928681333e-06, "loss": 0.1591, "step": 14110 }, { "epoch": 0.41166345761129586, "grad_norm": 0.7481389329202898, "learning_rate": 6.64252172079133e-06, "loss": 0.1483, "step": 14111 }, { "epoch": 0.4116926308419394, "grad_norm": 0.7614692513847493, "learning_rate": 6.642075498237283e-06, "loss": 0.1315, "step": 14112 }, { "epoch": 0.411721804072583, "grad_norm": 0.7969165419936783, "learning_rate": 6.641629261023177e-06, "loss": 0.1459, "step": 14113 }, { "epoch": 0.4117509773032266, "grad_norm": 0.7107974912860358, "learning_rate": 6.6411830091529984e-06, "loss": 0.1466, "step": 14114 }, { "epoch": 0.4117801505338701, "grad_norm": 0.891300069827346, "learning_rate": 6.640736742630729e-06, "loss": 0.1385, "step": 14115 }, { "epoch": 0.4118093237645137, "grad_norm": 0.8454777967311086, "learning_rate": 6.6402904614603546e-06, "loss": 0.1702, "step": 14116 }, { "epoch": 0.41183849699515723, "grad_norm": 0.7000881618005538, "learning_rate": 6.639844165645858e-06, "loss": 0.149, "step": 14117 }, { "epoch": 0.4118676702258008, "grad_norm": 0.8146304024869583, "learning_rate": 6.639397855191223e-06, "loss": 0.1594, "step": 14118 }, { "epoch": 0.41189684345644434, "grad_norm": 0.7466521834770206, "learning_rate": 6.638951530100437e-06, "loss": 0.137, "step": 14119 }, { "epoch": 0.41192601668708795, "grad_norm": 0.8587020813308973, "learning_rate": 6.638505190377482e-06, "loss": 0.1641, "step": 14120 }, { "epoch": 0.4119551899177315, "grad_norm": 0.8324195692325999, "learning_rate": 6.6380588360263455e-06, "loss": 0.1492, "step": 14121 }, { "epoch": 0.41198436314837505, "grad_norm": 0.7463544353546875, "learning_rate": 6.637612467051008e-06, "loss": 0.1268, "step": 14122 }, { "epoch": 0.4120135363790186, "grad_norm": 0.8463558671027969, "learning_rate": 6.6371660834554586e-06, "loss": 0.1402, "step": 14123 }, { "epoch": 0.41204270960966216, "grad_norm": 0.8271932931175923, "learning_rate": 6.6367196852436826e-06, "loss": 0.1337, "step": 14124 }, { "epoch": 0.4120718828403057, "grad_norm": 0.7450770554727912, "learning_rate": 6.636273272419661e-06, "loss": 0.1491, "step": 14125 }, { "epoch": 0.4121010560709493, "grad_norm": 0.757882374639118, "learning_rate": 6.635826844987385e-06, "loss": 0.1154, "step": 14126 }, { "epoch": 0.4121302293015929, "grad_norm": 0.7802016560566676, "learning_rate": 6.6353804029508376e-06, "loss": 0.1556, "step": 14127 }, { "epoch": 0.41215940253223643, "grad_norm": 0.7780001687262968, "learning_rate": 6.634933946314002e-06, "loss": 0.1251, "step": 14128 }, { "epoch": 0.41218857576288, "grad_norm": 1.3313772094344312, "learning_rate": 6.634487475080867e-06, "loss": 0.1476, "step": 14129 }, { "epoch": 0.41221774899352354, "grad_norm": 1.0211085561052402, "learning_rate": 6.634040989255419e-06, "loss": 0.1289, "step": 14130 }, { "epoch": 0.4122469222241671, "grad_norm": 0.966672387384863, "learning_rate": 6.633594488841642e-06, "loss": 0.1565, "step": 14131 }, { "epoch": 0.41227609545481064, "grad_norm": 0.8019155350181045, "learning_rate": 6.633147973843525e-06, "loss": 0.1419, "step": 14132 }, { "epoch": 0.41230526868545425, "grad_norm": 0.8361654518518425, "learning_rate": 6.632701444265052e-06, "loss": 0.1281, "step": 14133 }, { "epoch": 0.4123344419160978, "grad_norm": 1.112056236003766, "learning_rate": 6.632254900110209e-06, "loss": 0.1575, "step": 14134 }, { "epoch": 0.41236361514674136, "grad_norm": 0.8007026380726106, "learning_rate": 6.631808341382986e-06, "loss": 0.1367, "step": 14135 }, { "epoch": 0.4123927883773849, "grad_norm": 0.8146151167565929, "learning_rate": 6.631361768087368e-06, "loss": 0.1343, "step": 14136 }, { "epoch": 0.41242196160802846, "grad_norm": 0.8897923106760964, "learning_rate": 6.630915180227338e-06, "loss": 0.1306, "step": 14137 }, { "epoch": 0.412451134838672, "grad_norm": 0.705878373741667, "learning_rate": 6.630468577806889e-06, "loss": 0.1419, "step": 14138 }, { "epoch": 0.41248030806931557, "grad_norm": 0.6555732784778344, "learning_rate": 6.630021960830007e-06, "loss": 0.1403, "step": 14139 }, { "epoch": 0.4125094812999592, "grad_norm": 0.7100573139655991, "learning_rate": 6.6295753293006745e-06, "loss": 0.1513, "step": 14140 }, { "epoch": 0.41253865453060273, "grad_norm": 0.7408558010398478, "learning_rate": 6.629128683222886e-06, "loss": 0.1658, "step": 14141 }, { "epoch": 0.4125678277612463, "grad_norm": 0.8762724704427244, "learning_rate": 6.628682022600624e-06, "loss": 0.1433, "step": 14142 }, { "epoch": 0.41259700099188984, "grad_norm": 0.8278080142316044, "learning_rate": 6.628235347437878e-06, "loss": 0.1414, "step": 14143 }, { "epoch": 0.4126261742225334, "grad_norm": 0.7474656768972003, "learning_rate": 6.627788657738635e-06, "loss": 0.1421, "step": 14144 }, { "epoch": 0.41265534745317695, "grad_norm": 0.8289650710922033, "learning_rate": 6.627341953506884e-06, "loss": 0.149, "step": 14145 }, { "epoch": 0.4126845206838205, "grad_norm": 0.7614300428490463, "learning_rate": 6.6268952347466124e-06, "loss": 0.1202, "step": 14146 }, { "epoch": 0.4127136939144641, "grad_norm": 0.7006325906898112, "learning_rate": 6.6264485014618086e-06, "loss": 0.1455, "step": 14147 }, { "epoch": 0.41274286714510766, "grad_norm": 0.7188461142267647, "learning_rate": 6.62600175365646e-06, "loss": 0.1263, "step": 14148 }, { "epoch": 0.4127720403757512, "grad_norm": 0.8691992835050434, "learning_rate": 6.6255549913345564e-06, "loss": 0.1604, "step": 14149 }, { "epoch": 0.41280121360639477, "grad_norm": 0.7837695638760147, "learning_rate": 6.625108214500086e-06, "loss": 0.1374, "step": 14150 }, { "epoch": 0.4128303868370383, "grad_norm": 0.7637990297262751, "learning_rate": 6.624661423157038e-06, "loss": 0.1407, "step": 14151 }, { "epoch": 0.4128595600676819, "grad_norm": 0.9888144909637261, "learning_rate": 6.624214617309399e-06, "loss": 0.1278, "step": 14152 }, { "epoch": 0.4128887332983255, "grad_norm": 0.9697521391268682, "learning_rate": 6.623767796961161e-06, "loss": 0.1527, "step": 14153 }, { "epoch": 0.41291790652896904, "grad_norm": 0.9551915536592764, "learning_rate": 6.623320962116312e-06, "loss": 0.152, "step": 14154 }, { "epoch": 0.4129470797596126, "grad_norm": 0.8308831093437837, "learning_rate": 6.62287411277884e-06, "loss": 0.1397, "step": 14155 }, { "epoch": 0.41297625299025614, "grad_norm": 1.2305136064429363, "learning_rate": 6.622427248952736e-06, "loss": 0.1501, "step": 14156 }, { "epoch": 0.4130054262208997, "grad_norm": 1.108882331570643, "learning_rate": 6.621980370641988e-06, "loss": 0.1297, "step": 14157 }, { "epoch": 0.41303459945154325, "grad_norm": 0.8715651073385032, "learning_rate": 6.621533477850588e-06, "loss": 0.1454, "step": 14158 }, { "epoch": 0.4130637726821868, "grad_norm": 0.9190812515719307, "learning_rate": 6.621086570582523e-06, "loss": 0.1424, "step": 14159 }, { "epoch": 0.4130929459128304, "grad_norm": 0.8071557816911274, "learning_rate": 6.6206396488417835e-06, "loss": 0.1179, "step": 14160 }, { "epoch": 0.41312211914347396, "grad_norm": 0.9930843256093073, "learning_rate": 6.620192712632361e-06, "loss": 0.1329, "step": 14161 }, { "epoch": 0.4131512923741175, "grad_norm": 0.7460166055800551, "learning_rate": 6.619745761958245e-06, "loss": 0.1516, "step": 14162 }, { "epoch": 0.41318046560476107, "grad_norm": 0.914321139545343, "learning_rate": 6.619298796823426e-06, "loss": 0.1269, "step": 14163 }, { "epoch": 0.4132096388354046, "grad_norm": 0.808775444438091, "learning_rate": 6.6188518172318925e-06, "loss": 0.1492, "step": 14164 }, { "epoch": 0.4132388120660482, "grad_norm": 0.8998984093268817, "learning_rate": 6.6184048231876375e-06, "loss": 0.1353, "step": 14165 }, { "epoch": 0.41326798529669173, "grad_norm": 0.8507902726391359, "learning_rate": 6.61795781469465e-06, "loss": 0.1577, "step": 14166 }, { "epoch": 0.41329715852733534, "grad_norm": 0.9064598430984236, "learning_rate": 6.61751079175692e-06, "loss": 0.1189, "step": 14167 }, { "epoch": 0.4133263317579789, "grad_norm": 0.7571406762091855, "learning_rate": 6.617063754378442e-06, "loss": 0.1302, "step": 14168 }, { "epoch": 0.41335550498862245, "grad_norm": 0.8348164600558217, "learning_rate": 6.616616702563204e-06, "loss": 0.147, "step": 14169 }, { "epoch": 0.413384678219266, "grad_norm": 0.933216345129164, "learning_rate": 6.6161696363151986e-06, "loss": 0.1358, "step": 14170 }, { "epoch": 0.41341385144990955, "grad_norm": 0.9152142623685499, "learning_rate": 6.615722555638416e-06, "loss": 0.1344, "step": 14171 }, { "epoch": 0.4134430246805531, "grad_norm": 0.9600133495594002, "learning_rate": 6.615275460536847e-06, "loss": 0.1343, "step": 14172 }, { "epoch": 0.41347219791119666, "grad_norm": 1.096416770061301, "learning_rate": 6.614828351014487e-06, "loss": 0.1331, "step": 14173 }, { "epoch": 0.41350137114184027, "grad_norm": 0.8320915241840282, "learning_rate": 6.614381227075323e-06, "loss": 0.1345, "step": 14174 }, { "epoch": 0.4135305443724838, "grad_norm": 0.88374329390917, "learning_rate": 6.613934088723349e-06, "loss": 0.1344, "step": 14175 }, { "epoch": 0.4135597176031274, "grad_norm": 1.157412923880576, "learning_rate": 6.613486935962556e-06, "loss": 0.142, "step": 14176 }, { "epoch": 0.41358889083377093, "grad_norm": 1.0243470901503362, "learning_rate": 6.613039768796938e-06, "loss": 0.1504, "step": 14177 }, { "epoch": 0.4136180640644145, "grad_norm": 0.8814405839444529, "learning_rate": 6.6125925872304865e-06, "loss": 0.1441, "step": 14178 }, { "epoch": 0.41364723729505803, "grad_norm": 1.2422971184799716, "learning_rate": 6.612145391267192e-06, "loss": 0.1579, "step": 14179 }, { "epoch": 0.41367641052570164, "grad_norm": 1.04550647310779, "learning_rate": 6.611698180911048e-06, "loss": 0.1321, "step": 14180 }, { "epoch": 0.4137055837563452, "grad_norm": 1.1812782521171592, "learning_rate": 6.611250956166049e-06, "loss": 0.1289, "step": 14181 }, { "epoch": 0.41373475698698875, "grad_norm": 0.8090381359730179, "learning_rate": 6.610803717036185e-06, "loss": 0.1316, "step": 14182 }, { "epoch": 0.4137639302176323, "grad_norm": 0.9203050921883699, "learning_rate": 6.6103564635254505e-06, "loss": 0.1409, "step": 14183 }, { "epoch": 0.41379310344827586, "grad_norm": 0.8137031224583076, "learning_rate": 6.609909195637837e-06, "loss": 0.1416, "step": 14184 }, { "epoch": 0.4138222766789194, "grad_norm": 0.8290285424710755, "learning_rate": 6.60946191337734e-06, "loss": 0.1514, "step": 14185 }, { "epoch": 0.41385144990956296, "grad_norm": 0.7854435898787894, "learning_rate": 6.609014616747951e-06, "loss": 0.1499, "step": 14186 }, { "epoch": 0.41388062314020657, "grad_norm": 0.7491527552813726, "learning_rate": 6.608567305753661e-06, "loss": 0.1494, "step": 14187 }, { "epoch": 0.4139097963708501, "grad_norm": 0.8043017259004319, "learning_rate": 6.60811998039847e-06, "loss": 0.1351, "step": 14188 }, { "epoch": 0.4139389696014937, "grad_norm": 0.6789911228331924, "learning_rate": 6.607672640686365e-06, "loss": 0.1221, "step": 14189 }, { "epoch": 0.41396814283213723, "grad_norm": 0.8191645199722037, "learning_rate": 6.607225286621342e-06, "loss": 0.1367, "step": 14190 }, { "epoch": 0.4139973160627808, "grad_norm": 0.7379204395749995, "learning_rate": 6.6067779182073974e-06, "loss": 0.1267, "step": 14191 }, { "epoch": 0.41402648929342434, "grad_norm": 0.951248367738476, "learning_rate": 6.606330535448523e-06, "loss": 0.1474, "step": 14192 }, { "epoch": 0.4140556625240679, "grad_norm": 0.7327792998003283, "learning_rate": 6.605883138348712e-06, "loss": 0.1197, "step": 14193 }, { "epoch": 0.4140848357547115, "grad_norm": 0.7894826381083477, "learning_rate": 6.605435726911959e-06, "loss": 0.1226, "step": 14194 }, { "epoch": 0.41411400898535505, "grad_norm": 0.9321632495459524, "learning_rate": 6.604988301142261e-06, "loss": 0.1353, "step": 14195 }, { "epoch": 0.4141431822159986, "grad_norm": 0.7572130205053703, "learning_rate": 6.604540861043609e-06, "loss": 0.1309, "step": 14196 }, { "epoch": 0.41417235544664216, "grad_norm": 0.7593881376595455, "learning_rate": 6.60409340662e-06, "loss": 0.1374, "step": 14197 }, { "epoch": 0.4142015286772857, "grad_norm": 0.7482341088492079, "learning_rate": 6.603645937875428e-06, "loss": 0.129, "step": 14198 }, { "epoch": 0.41423070190792927, "grad_norm": 0.8130791401643425, "learning_rate": 6.603198454813888e-06, "loss": 0.1208, "step": 14199 }, { "epoch": 0.4142598751385728, "grad_norm": 0.8647814715625625, "learning_rate": 6.602750957439374e-06, "loss": 0.151, "step": 14200 }, { "epoch": 0.41428904836921643, "grad_norm": 0.7238282092367369, "learning_rate": 6.6023034457558846e-06, "loss": 0.1615, "step": 14201 }, { "epoch": 0.41431822159986, "grad_norm": 0.6866369237153953, "learning_rate": 6.6018559197674094e-06, "loss": 0.1333, "step": 14202 }, { "epoch": 0.41434739483050353, "grad_norm": 0.7287680754131481, "learning_rate": 6.601408379477949e-06, "loss": 0.1366, "step": 14203 }, { "epoch": 0.4143765680611471, "grad_norm": 0.6889348026106697, "learning_rate": 6.600960824891496e-06, "loss": 0.1266, "step": 14204 }, { "epoch": 0.41440574129179064, "grad_norm": 0.7416949954049658, "learning_rate": 6.600513256012047e-06, "loss": 0.1528, "step": 14205 }, { "epoch": 0.4144349145224342, "grad_norm": 0.9181624808884554, "learning_rate": 6.600065672843597e-06, "loss": 0.1505, "step": 14206 }, { "epoch": 0.4144640877530778, "grad_norm": 0.7794913680494193, "learning_rate": 6.599618075390144e-06, "loss": 0.1306, "step": 14207 }, { "epoch": 0.41449326098372136, "grad_norm": 0.9401234257825831, "learning_rate": 6.599170463655682e-06, "loss": 0.1277, "step": 14208 }, { "epoch": 0.4145224342143649, "grad_norm": 0.7856146179718172, "learning_rate": 6.598722837644208e-06, "loss": 0.1561, "step": 14209 }, { "epoch": 0.41455160744500846, "grad_norm": 0.8748548482321112, "learning_rate": 6.5982751973597185e-06, "loss": 0.1282, "step": 14210 }, { "epoch": 0.414580780675652, "grad_norm": 1.0139285038810066, "learning_rate": 6.597827542806209e-06, "loss": 0.1159, "step": 14211 }, { "epoch": 0.41460995390629557, "grad_norm": 0.9835177770005753, "learning_rate": 6.597379873987677e-06, "loss": 0.1689, "step": 14212 }, { "epoch": 0.4146391271369391, "grad_norm": 0.8719525758696153, "learning_rate": 6.596932190908119e-06, "loss": 0.133, "step": 14213 }, { "epoch": 0.41466830036758273, "grad_norm": 0.9193789647211665, "learning_rate": 6.59648449357153e-06, "loss": 0.129, "step": 14214 }, { "epoch": 0.4146974735982263, "grad_norm": 0.8156922230844491, "learning_rate": 6.596036781981909e-06, "loss": 0.1466, "step": 14215 }, { "epoch": 0.41472664682886984, "grad_norm": 0.6906246025773246, "learning_rate": 6.595589056143255e-06, "loss": 0.1423, "step": 14216 }, { "epoch": 0.4147558200595134, "grad_norm": 0.777090503405836, "learning_rate": 6.59514131605956e-06, "loss": 0.1397, "step": 14217 }, { "epoch": 0.41478499329015694, "grad_norm": 0.8214239367704292, "learning_rate": 6.594693561734826e-06, "loss": 0.1652, "step": 14218 }, { "epoch": 0.4148141665208005, "grad_norm": 0.8431376353961676, "learning_rate": 6.594245793173049e-06, "loss": 0.1308, "step": 14219 }, { "epoch": 0.41484333975144405, "grad_norm": 0.7942742018087572, "learning_rate": 6.593798010378223e-06, "loss": 0.1349, "step": 14220 }, { "epoch": 0.41487251298208766, "grad_norm": 0.8673458762180498, "learning_rate": 6.593350213354353e-06, "loss": 0.1354, "step": 14221 }, { "epoch": 0.4149016862127312, "grad_norm": 0.7163674722069435, "learning_rate": 6.59290240210543e-06, "loss": 0.1356, "step": 14222 }, { "epoch": 0.41493085944337477, "grad_norm": 0.7939042595607231, "learning_rate": 6.592454576635454e-06, "loss": 0.1536, "step": 14223 }, { "epoch": 0.4149600326740183, "grad_norm": 0.7638234883573528, "learning_rate": 6.592006736948425e-06, "loss": 0.1406, "step": 14224 }, { "epoch": 0.4149892059046619, "grad_norm": 0.6860195885204661, "learning_rate": 6.59155888304834e-06, "loss": 0.1451, "step": 14225 }, { "epoch": 0.4150183791353054, "grad_norm": 0.6717922327951451, "learning_rate": 6.5911110149391976e-06, "loss": 0.1486, "step": 14226 }, { "epoch": 0.415047552365949, "grad_norm": 0.7714242591097691, "learning_rate": 6.590663132624995e-06, "loss": 0.1322, "step": 14227 }, { "epoch": 0.4150767255965926, "grad_norm": 0.71453038723994, "learning_rate": 6.590215236109731e-06, "loss": 0.1491, "step": 14228 }, { "epoch": 0.41510589882723614, "grad_norm": 0.8724219654697126, "learning_rate": 6.589767325397407e-06, "loss": 0.134, "step": 14229 }, { "epoch": 0.4151350720578797, "grad_norm": 0.9593656382652455, "learning_rate": 6.589319400492018e-06, "loss": 0.152, "step": 14230 }, { "epoch": 0.41516424528852325, "grad_norm": 0.9514974888951511, "learning_rate": 6.588871461397567e-06, "loss": 0.13, "step": 14231 }, { "epoch": 0.4151934185191668, "grad_norm": 1.1342312671634571, "learning_rate": 6.588423508118048e-06, "loss": 0.1552, "step": 14232 }, { "epoch": 0.41522259174981035, "grad_norm": 1.0483684124579047, "learning_rate": 6.587975540657465e-06, "loss": 0.142, "step": 14233 }, { "epoch": 0.4152517649804539, "grad_norm": 1.0360670981700808, "learning_rate": 6.587527559019815e-06, "loss": 0.1457, "step": 14234 }, { "epoch": 0.4152809382110975, "grad_norm": 0.9073539394442789, "learning_rate": 6.5870795632090965e-06, "loss": 0.1359, "step": 14235 }, { "epoch": 0.41531011144174107, "grad_norm": 1.0480011025979166, "learning_rate": 6.586631553229313e-06, "loss": 0.1496, "step": 14236 }, { "epoch": 0.4153392846723846, "grad_norm": 0.7141874492710977, "learning_rate": 6.5861835290844615e-06, "loss": 0.1516, "step": 14237 }, { "epoch": 0.4153684579030282, "grad_norm": 0.7419194638758977, "learning_rate": 6.585735490778541e-06, "loss": 0.136, "step": 14238 }, { "epoch": 0.41539763113367173, "grad_norm": 0.6361997929822755, "learning_rate": 6.585287438315553e-06, "loss": 0.1412, "step": 14239 }, { "epoch": 0.4154268043643153, "grad_norm": 1.7003927530082934, "learning_rate": 6.5848393716994966e-06, "loss": 0.1341, "step": 14240 }, { "epoch": 0.4154559775949589, "grad_norm": 0.8609963655650889, "learning_rate": 6.5843912909343734e-06, "loss": 0.1401, "step": 14241 }, { "epoch": 0.41548515082560245, "grad_norm": 0.8800828096276959, "learning_rate": 6.583943196024182e-06, "loss": 0.1525, "step": 14242 }, { "epoch": 0.415514324056246, "grad_norm": 0.735639549049615, "learning_rate": 6.583495086972924e-06, "loss": 0.1498, "step": 14243 }, { "epoch": 0.41554349728688955, "grad_norm": 1.018300855877259, "learning_rate": 6.5830469637846e-06, "loss": 0.1597, "step": 14244 }, { "epoch": 0.4155726705175331, "grad_norm": 0.79913859611663, "learning_rate": 6.582598826463211e-06, "loss": 0.1591, "step": 14245 }, { "epoch": 0.41560184374817666, "grad_norm": 0.827353692461927, "learning_rate": 6.58215067501276e-06, "loss": 0.1286, "step": 14246 }, { "epoch": 0.4156310169788202, "grad_norm": 0.67302947878698, "learning_rate": 6.5817025094372415e-06, "loss": 0.135, "step": 14247 }, { "epoch": 0.4156601902094638, "grad_norm": 1.0326698294250154, "learning_rate": 6.581254329740663e-06, "loss": 0.1242, "step": 14248 }, { "epoch": 0.4156893634401074, "grad_norm": 0.7923780509932484, "learning_rate": 6.580806135927021e-06, "loss": 0.1433, "step": 14249 }, { "epoch": 0.4157185366707509, "grad_norm": 0.8635468660717607, "learning_rate": 6.580357928000321e-06, "loss": 0.1393, "step": 14250 }, { "epoch": 0.4157477099013945, "grad_norm": 1.1185468153332023, "learning_rate": 6.579909705964562e-06, "loss": 0.1444, "step": 14251 }, { "epoch": 0.41577688313203803, "grad_norm": 0.9428650205713817, "learning_rate": 6.5794614698237465e-06, "loss": 0.1466, "step": 14252 }, { "epoch": 0.4158060563626816, "grad_norm": 0.7958016525600325, "learning_rate": 6.579013219581876e-06, "loss": 0.1425, "step": 14253 }, { "epoch": 0.41583522959332514, "grad_norm": 1.004094928846351, "learning_rate": 6.578564955242952e-06, "loss": 0.1507, "step": 14254 }, { "epoch": 0.41586440282396875, "grad_norm": 0.7654458813658859, "learning_rate": 6.578116676810979e-06, "loss": 0.1497, "step": 14255 }, { "epoch": 0.4158935760546123, "grad_norm": 1.0215519857910305, "learning_rate": 6.577668384289955e-06, "loss": 0.1243, "step": 14256 }, { "epoch": 0.41592274928525585, "grad_norm": 0.8578334702034305, "learning_rate": 6.577220077683884e-06, "loss": 0.13, "step": 14257 }, { "epoch": 0.4159519225158994, "grad_norm": 0.7564010945740879, "learning_rate": 6.57677175699677e-06, "loss": 0.1471, "step": 14258 }, { "epoch": 0.41598109574654296, "grad_norm": 0.9085508334254474, "learning_rate": 6.576323422232612e-06, "loss": 0.1234, "step": 14259 }, { "epoch": 0.4160102689771865, "grad_norm": 0.8234747830665352, "learning_rate": 6.575875073395417e-06, "loss": 0.1196, "step": 14260 }, { "epoch": 0.41603944220783007, "grad_norm": 0.7241153612141866, "learning_rate": 6.5754267104891855e-06, "loss": 0.1273, "step": 14261 }, { "epoch": 0.4160686154384737, "grad_norm": 0.696367475586553, "learning_rate": 6.574978333517918e-06, "loss": 0.1653, "step": 14262 }, { "epoch": 0.41609778866911723, "grad_norm": 0.7856989032450266, "learning_rate": 6.574529942485623e-06, "loss": 0.1326, "step": 14263 }, { "epoch": 0.4161269618997608, "grad_norm": 0.9594314337708004, "learning_rate": 6.574081537396299e-06, "loss": 0.1446, "step": 14264 }, { "epoch": 0.41615613513040434, "grad_norm": 0.7745130245349363, "learning_rate": 6.573633118253951e-06, "loss": 0.1201, "step": 14265 }, { "epoch": 0.4161853083610479, "grad_norm": 0.8539250642295578, "learning_rate": 6.5731846850625824e-06, "loss": 0.1628, "step": 14266 }, { "epoch": 0.41621448159169144, "grad_norm": 0.8183359594746717, "learning_rate": 6.572736237826196e-06, "loss": 0.1347, "step": 14267 }, { "epoch": 0.41624365482233505, "grad_norm": 0.8686788464309594, "learning_rate": 6.572287776548797e-06, "loss": 0.1516, "step": 14268 }, { "epoch": 0.4162728280529786, "grad_norm": 0.9002621558730742, "learning_rate": 6.571839301234386e-06, "loss": 0.1465, "step": 14269 }, { "epoch": 0.41630200128362216, "grad_norm": 0.8924838825693661, "learning_rate": 6.571390811886971e-06, "loss": 0.129, "step": 14270 }, { "epoch": 0.4163311745142657, "grad_norm": 0.7981411053326004, "learning_rate": 6.570942308510553e-06, "loss": 0.1241, "step": 14271 }, { "epoch": 0.41636034774490926, "grad_norm": 1.0778965375656522, "learning_rate": 6.570493791109137e-06, "loss": 0.1224, "step": 14272 }, { "epoch": 0.4163895209755528, "grad_norm": 1.0137178303005545, "learning_rate": 6.570045259686728e-06, "loss": 0.1176, "step": 14273 }, { "epoch": 0.41641869420619637, "grad_norm": 0.8599627669058896, "learning_rate": 6.569596714247328e-06, "loss": 0.1361, "step": 14274 }, { "epoch": 0.41644786743684, "grad_norm": 1.2350963687471905, "learning_rate": 6.569148154794945e-06, "loss": 0.1503, "step": 14275 }, { "epoch": 0.41647704066748353, "grad_norm": 1.1456342598343054, "learning_rate": 6.568699581333583e-06, "loss": 0.1484, "step": 14276 }, { "epoch": 0.4165062138981271, "grad_norm": 0.896590792772618, "learning_rate": 6.568250993867242e-06, "loss": 0.15, "step": 14277 }, { "epoch": 0.41653538712877064, "grad_norm": 0.9495093826242231, "learning_rate": 6.567802392399934e-06, "loss": 0.1306, "step": 14278 }, { "epoch": 0.4165645603594142, "grad_norm": 2.4680776091761025, "learning_rate": 6.567353776935659e-06, "loss": 0.117, "step": 14279 }, { "epoch": 0.41659373359005775, "grad_norm": 1.0618009436803515, "learning_rate": 6.566905147478422e-06, "loss": 0.1496, "step": 14280 }, { "epoch": 0.4166229068207013, "grad_norm": 0.9761818292036695, "learning_rate": 6.5664565040322325e-06, "loss": 0.1631, "step": 14281 }, { "epoch": 0.4166520800513449, "grad_norm": 0.8427616603059416, "learning_rate": 6.566007846601092e-06, "loss": 0.145, "step": 14282 }, { "epoch": 0.41668125328198846, "grad_norm": 1.1511932611745979, "learning_rate": 6.565559175189008e-06, "loss": 0.1511, "step": 14283 }, { "epoch": 0.416710426512632, "grad_norm": 1.165155613581096, "learning_rate": 6.565110489799985e-06, "loss": 0.1333, "step": 14284 }, { "epoch": 0.41673959974327557, "grad_norm": 0.8810995836888235, "learning_rate": 6.564661790438029e-06, "loss": 0.1482, "step": 14285 }, { "epoch": 0.4167687729739191, "grad_norm": 0.9295967316792687, "learning_rate": 6.564213077107147e-06, "loss": 0.1291, "step": 14286 }, { "epoch": 0.4167979462045627, "grad_norm": 1.298575314455602, "learning_rate": 6.563764349811342e-06, "loss": 0.1496, "step": 14287 }, { "epoch": 0.41682711943520623, "grad_norm": 0.8940517436562201, "learning_rate": 6.563315608554624e-06, "loss": 0.1503, "step": 14288 }, { "epoch": 0.41685629266584984, "grad_norm": 0.949590246522986, "learning_rate": 6.562866853340997e-06, "loss": 0.1398, "step": 14289 }, { "epoch": 0.4168854658964934, "grad_norm": 0.7319174477402393, "learning_rate": 6.562418084174467e-06, "loss": 0.1338, "step": 14290 }, { "epoch": 0.41691463912713694, "grad_norm": 0.8444589471790982, "learning_rate": 6.561969301059044e-06, "loss": 0.1373, "step": 14291 }, { "epoch": 0.4169438123577805, "grad_norm": 0.9852698717575947, "learning_rate": 6.561520503998728e-06, "loss": 0.1312, "step": 14292 }, { "epoch": 0.41697298558842405, "grad_norm": 0.8153453148167985, "learning_rate": 6.561071692997533e-06, "loss": 0.1603, "step": 14293 }, { "epoch": 0.4170021588190676, "grad_norm": 0.8232336892224774, "learning_rate": 6.560622868059461e-06, "loss": 0.1622, "step": 14294 }, { "epoch": 0.4170313320497112, "grad_norm": 0.912693790723682, "learning_rate": 6.56017402918852e-06, "loss": 0.176, "step": 14295 }, { "epoch": 0.41706050528035477, "grad_norm": 0.7135774955610511, "learning_rate": 6.559725176388719e-06, "loss": 0.1303, "step": 14296 }, { "epoch": 0.4170896785109983, "grad_norm": 0.8108366963627028, "learning_rate": 6.559276309664064e-06, "loss": 0.1459, "step": 14297 }, { "epoch": 0.41711885174164187, "grad_norm": 0.8494333748366347, "learning_rate": 6.558827429018562e-06, "loss": 0.1343, "step": 14298 }, { "epoch": 0.4171480249722854, "grad_norm": 0.66545406291422, "learning_rate": 6.5583785344562204e-06, "loss": 0.1463, "step": 14299 }, { "epoch": 0.417177198202929, "grad_norm": 1.0578226466632783, "learning_rate": 6.557929625981048e-06, "loss": 0.1539, "step": 14300 }, { "epoch": 0.41720637143357253, "grad_norm": 0.7599889971263503, "learning_rate": 6.557480703597051e-06, "loss": 0.1684, "step": 14301 }, { "epoch": 0.41723554466421614, "grad_norm": 0.7508250181418441, "learning_rate": 6.5570317673082385e-06, "loss": 0.1659, "step": 14302 }, { "epoch": 0.4172647178948597, "grad_norm": 0.8196772922829928, "learning_rate": 6.5565828171186175e-06, "loss": 0.1467, "step": 14303 }, { "epoch": 0.41729389112550325, "grad_norm": 0.8000356929982854, "learning_rate": 6.556133853032197e-06, "loss": 0.1399, "step": 14304 }, { "epoch": 0.4173230643561468, "grad_norm": 0.8587873979531085, "learning_rate": 6.555684875052985e-06, "loss": 0.1308, "step": 14305 }, { "epoch": 0.41735223758679035, "grad_norm": 0.7785555743013844, "learning_rate": 6.555235883184991e-06, "loss": 0.1305, "step": 14306 }, { "epoch": 0.4173814108174339, "grad_norm": 0.7203983435626452, "learning_rate": 6.55478687743222e-06, "loss": 0.1392, "step": 14307 }, { "epoch": 0.41741058404807746, "grad_norm": 0.8982224874390651, "learning_rate": 6.554337857798686e-06, "loss": 0.1395, "step": 14308 }, { "epoch": 0.41743975727872107, "grad_norm": 0.9033788887895698, "learning_rate": 6.553888824288393e-06, "loss": 0.1334, "step": 14309 }, { "epoch": 0.4174689305093646, "grad_norm": 0.880630137005619, "learning_rate": 6.55343977690535e-06, "loss": 0.1487, "step": 14310 }, { "epoch": 0.4174981037400082, "grad_norm": 0.7806417589295644, "learning_rate": 6.55299071565357e-06, "loss": 0.1225, "step": 14311 }, { "epoch": 0.41752727697065173, "grad_norm": 0.9294498284452497, "learning_rate": 6.552541640537058e-06, "loss": 0.1645, "step": 14312 }, { "epoch": 0.4175564502012953, "grad_norm": 0.9978673562227008, "learning_rate": 6.552092551559825e-06, "loss": 0.1535, "step": 14313 }, { "epoch": 0.41758562343193883, "grad_norm": 0.9282543665195457, "learning_rate": 6.55164344872588e-06, "loss": 0.1527, "step": 14314 }, { "epoch": 0.4176147966625824, "grad_norm": 0.8826576051342357, "learning_rate": 6.551194332039235e-06, "loss": 0.129, "step": 14315 }, { "epoch": 0.417643969893226, "grad_norm": 0.8600713731233143, "learning_rate": 6.550745201503894e-06, "loss": 0.1515, "step": 14316 }, { "epoch": 0.41767314312386955, "grad_norm": 1.198180508330796, "learning_rate": 6.550296057123872e-06, "loss": 0.1312, "step": 14317 }, { "epoch": 0.4177023163545131, "grad_norm": 0.94208104984621, "learning_rate": 6.549846898903176e-06, "loss": 0.1515, "step": 14318 }, { "epoch": 0.41773148958515666, "grad_norm": 0.8037888861901492, "learning_rate": 6.549397726845817e-06, "loss": 0.156, "step": 14319 }, { "epoch": 0.4177606628158002, "grad_norm": 0.8551748986067956, "learning_rate": 6.548948540955806e-06, "loss": 0.143, "step": 14320 }, { "epoch": 0.41778983604644376, "grad_norm": 0.9323192723203955, "learning_rate": 6.548499341237152e-06, "loss": 0.1407, "step": 14321 }, { "epoch": 0.41781900927708737, "grad_norm": 0.7603429502452196, "learning_rate": 6.548050127693865e-06, "loss": 0.1391, "step": 14322 }, { "epoch": 0.4178481825077309, "grad_norm": 1.425711060718987, "learning_rate": 6.547600900329957e-06, "loss": 0.1343, "step": 14323 }, { "epoch": 0.4178773557383745, "grad_norm": 0.9718586473008777, "learning_rate": 6.547151659149435e-06, "loss": 0.139, "step": 14324 }, { "epoch": 0.41790652896901803, "grad_norm": 0.9740842615198593, "learning_rate": 6.546702404156313e-06, "loss": 0.1285, "step": 14325 }, { "epoch": 0.4179357021996616, "grad_norm": 0.7313133104116794, "learning_rate": 6.546253135354603e-06, "loss": 0.1466, "step": 14326 }, { "epoch": 0.41796487543030514, "grad_norm": 0.64246601816848, "learning_rate": 6.545803852748314e-06, "loss": 0.1342, "step": 14327 }, { "epoch": 0.4179940486609487, "grad_norm": 0.805222785127508, "learning_rate": 6.545354556341457e-06, "loss": 0.1233, "step": 14328 }, { "epoch": 0.4180232218915923, "grad_norm": 0.7740098512065345, "learning_rate": 6.544905246138042e-06, "loss": 0.145, "step": 14329 }, { "epoch": 0.41805239512223585, "grad_norm": 0.8370453777839455, "learning_rate": 6.544455922142084e-06, "loss": 0.136, "step": 14330 }, { "epoch": 0.4180815683528794, "grad_norm": 0.855682185482877, "learning_rate": 6.54400658435759e-06, "loss": 0.1557, "step": 14331 }, { "epoch": 0.41811074158352296, "grad_norm": 1.056530004972702, "learning_rate": 6.543557232788574e-06, "loss": 0.1391, "step": 14332 }, { "epoch": 0.4181399148141665, "grad_norm": 0.9034832075931041, "learning_rate": 6.543107867439049e-06, "loss": 0.153, "step": 14333 }, { "epoch": 0.41816908804481007, "grad_norm": 0.6181331059510731, "learning_rate": 6.542658488313024e-06, "loss": 0.1275, "step": 14334 }, { "epoch": 0.4181982612754536, "grad_norm": 0.8372381582547589, "learning_rate": 6.542209095414512e-06, "loss": 0.1293, "step": 14335 }, { "epoch": 0.41822743450609723, "grad_norm": 1.1441327183955639, "learning_rate": 6.541759688747528e-06, "loss": 0.1261, "step": 14336 }, { "epoch": 0.4182566077367408, "grad_norm": 0.8883941768455438, "learning_rate": 6.541310268316079e-06, "loss": 0.1454, "step": 14337 }, { "epoch": 0.41828578096738434, "grad_norm": 1.0577726495234712, "learning_rate": 6.5408608341241805e-06, "loss": 0.1317, "step": 14338 }, { "epoch": 0.4183149541980279, "grad_norm": 1.3522374229605751, "learning_rate": 6.5404113861758446e-06, "loss": 0.1311, "step": 14339 }, { "epoch": 0.41834412742867144, "grad_norm": 0.8630830853602386, "learning_rate": 6.539961924475083e-06, "loss": 0.1569, "step": 14340 }, { "epoch": 0.418373300659315, "grad_norm": 0.7511730437186435, "learning_rate": 6.53951244902591e-06, "loss": 0.1538, "step": 14341 }, { "epoch": 0.41840247388995855, "grad_norm": 1.1997545421136582, "learning_rate": 6.539062959832337e-06, "loss": 0.1514, "step": 14342 }, { "epoch": 0.41843164712060216, "grad_norm": 0.7480991491496013, "learning_rate": 6.538613456898376e-06, "loss": 0.1259, "step": 14343 }, { "epoch": 0.4184608203512457, "grad_norm": 0.8950531223354506, "learning_rate": 6.538163940228043e-06, "loss": 0.1465, "step": 14344 }, { "epoch": 0.41848999358188926, "grad_norm": 1.5710228081205437, "learning_rate": 6.537714409825349e-06, "loss": 0.1447, "step": 14345 }, { "epoch": 0.4185191668125328, "grad_norm": 0.8818041009882807, "learning_rate": 6.537264865694307e-06, "loss": 0.1451, "step": 14346 }, { "epoch": 0.41854834004317637, "grad_norm": 0.7327222044131102, "learning_rate": 6.5368153078389315e-06, "loss": 0.148, "step": 14347 }, { "epoch": 0.4185775132738199, "grad_norm": 0.899954505310372, "learning_rate": 6.536365736263236e-06, "loss": 0.1388, "step": 14348 }, { "epoch": 0.4186066865044635, "grad_norm": 0.9258798806308532, "learning_rate": 6.535916150971234e-06, "loss": 0.1374, "step": 14349 }, { "epoch": 0.4186358597351071, "grad_norm": 0.8946088431754494, "learning_rate": 6.5354665519669405e-06, "loss": 0.1336, "step": 14350 }, { "epoch": 0.41866503296575064, "grad_norm": 0.9430193967223919, "learning_rate": 6.535016939254366e-06, "loss": 0.1608, "step": 14351 }, { "epoch": 0.4186942061963942, "grad_norm": 0.8612935527285908, "learning_rate": 6.534567312837528e-06, "loss": 0.1399, "step": 14352 }, { "epoch": 0.41872337942703775, "grad_norm": 0.7180210667748339, "learning_rate": 6.53411767272044e-06, "loss": 0.1291, "step": 14353 }, { "epoch": 0.4187525526576813, "grad_norm": 0.7933929550154948, "learning_rate": 6.5336680189071135e-06, "loss": 0.1484, "step": 14354 }, { "epoch": 0.41878172588832485, "grad_norm": 0.7875205272562983, "learning_rate": 6.533218351401567e-06, "loss": 0.1583, "step": 14355 }, { "epoch": 0.41881089911896846, "grad_norm": 0.739509735175548, "learning_rate": 6.532768670207813e-06, "loss": 0.1323, "step": 14356 }, { "epoch": 0.418840072349612, "grad_norm": 0.7523924236931829, "learning_rate": 6.532318975329864e-06, "loss": 0.1348, "step": 14357 }, { "epoch": 0.41886924558025557, "grad_norm": 0.8304539510543331, "learning_rate": 6.5318692667717395e-06, "loss": 0.1586, "step": 14358 }, { "epoch": 0.4188984188108991, "grad_norm": 0.8434055583480368, "learning_rate": 6.531419544537452e-06, "loss": 0.1478, "step": 14359 }, { "epoch": 0.4189275920415427, "grad_norm": 0.7399589595592506, "learning_rate": 6.530969808631014e-06, "loss": 0.1321, "step": 14360 }, { "epoch": 0.4189567652721862, "grad_norm": 0.9081734487926589, "learning_rate": 6.530520059056446e-06, "loss": 0.141, "step": 14361 }, { "epoch": 0.4189859385028298, "grad_norm": 0.703386302611474, "learning_rate": 6.5300702958177585e-06, "loss": 0.1296, "step": 14362 }, { "epoch": 0.4190151117334734, "grad_norm": 0.8498679100186433, "learning_rate": 6.529620518918969e-06, "loss": 0.1273, "step": 14363 }, { "epoch": 0.41904428496411694, "grad_norm": 0.833550905480528, "learning_rate": 6.529170728364092e-06, "loss": 0.1391, "step": 14364 }, { "epoch": 0.4190734581947605, "grad_norm": 4.407525355618035, "learning_rate": 6.528720924157144e-06, "loss": 0.1446, "step": 14365 }, { "epoch": 0.41910263142540405, "grad_norm": 0.8079687573902641, "learning_rate": 6.528271106302141e-06, "loss": 0.1194, "step": 14366 }, { "epoch": 0.4191318046560476, "grad_norm": 0.8849809924407699, "learning_rate": 6.527821274803098e-06, "loss": 0.1394, "step": 14367 }, { "epoch": 0.41916097788669116, "grad_norm": 0.8365329865325013, "learning_rate": 6.527371429664032e-06, "loss": 0.1374, "step": 14368 }, { "epoch": 0.4191901511173347, "grad_norm": 0.6877693104433702, "learning_rate": 6.526921570888958e-06, "loss": 0.1422, "step": 14369 }, { "epoch": 0.4192193243479783, "grad_norm": 1.159280137860826, "learning_rate": 6.526471698481892e-06, "loss": 0.1465, "step": 14370 }, { "epoch": 0.41924849757862187, "grad_norm": 0.7981103899872733, "learning_rate": 6.526021812446854e-06, "loss": 0.1379, "step": 14371 }, { "epoch": 0.4192776708092654, "grad_norm": 0.9183917751391264, "learning_rate": 6.525571912787854e-06, "loss": 0.1441, "step": 14372 }, { "epoch": 0.419306844039909, "grad_norm": 0.7898228443070926, "learning_rate": 6.525121999508915e-06, "loss": 0.1201, "step": 14373 }, { "epoch": 0.41933601727055253, "grad_norm": 1.109406773607781, "learning_rate": 6.524672072614048e-06, "loss": 0.1493, "step": 14374 }, { "epoch": 0.4193651905011961, "grad_norm": 0.7248682319379581, "learning_rate": 6.524222132107273e-06, "loss": 0.1129, "step": 14375 }, { "epoch": 0.41939436373183964, "grad_norm": 1.0096391272567418, "learning_rate": 6.5237721779926086e-06, "loss": 0.137, "step": 14376 }, { "epoch": 0.41942353696248325, "grad_norm": 0.7742284748243854, "learning_rate": 6.52332221027407e-06, "loss": 0.1327, "step": 14377 }, { "epoch": 0.4194527101931268, "grad_norm": 1.0158524719397997, "learning_rate": 6.522872228955672e-06, "loss": 0.1391, "step": 14378 }, { "epoch": 0.41948188342377035, "grad_norm": 0.7942257032163648, "learning_rate": 6.522422234041436e-06, "loss": 0.151, "step": 14379 }, { "epoch": 0.4195110566544139, "grad_norm": 0.9800663996007741, "learning_rate": 6.521972225535378e-06, "loss": 0.1466, "step": 14380 }, { "epoch": 0.41954022988505746, "grad_norm": 0.8590362545214911, "learning_rate": 6.5215222034415146e-06, "loss": 0.1458, "step": 14381 }, { "epoch": 0.419569403115701, "grad_norm": 0.7623873539684414, "learning_rate": 6.521072167763864e-06, "loss": 0.1116, "step": 14382 }, { "epoch": 0.4195985763463446, "grad_norm": 1.0187623040401035, "learning_rate": 6.520622118506446e-06, "loss": 0.1436, "step": 14383 }, { "epoch": 0.4196277495769882, "grad_norm": 0.8541720230399411, "learning_rate": 6.520172055673274e-06, "loss": 0.1534, "step": 14384 }, { "epoch": 0.4196569228076317, "grad_norm": 0.8024418784142257, "learning_rate": 6.5197219792683695e-06, "loss": 0.15, "step": 14385 }, { "epoch": 0.4196860960382753, "grad_norm": 0.7600298330378055, "learning_rate": 6.519271889295752e-06, "loss": 0.1366, "step": 14386 }, { "epoch": 0.41971526926891883, "grad_norm": 0.9907289182929588, "learning_rate": 6.518821785759435e-06, "loss": 0.1476, "step": 14387 }, { "epoch": 0.4197444424995624, "grad_norm": 0.7972380548786816, "learning_rate": 6.518371668663442e-06, "loss": 0.1517, "step": 14388 }, { "epoch": 0.41977361573020594, "grad_norm": 1.100186635034028, "learning_rate": 6.517921538011789e-06, "loss": 0.1425, "step": 14389 }, { "epoch": 0.41980278896084955, "grad_norm": 1.0191478368419349, "learning_rate": 6.517471393808492e-06, "loss": 0.1349, "step": 14390 }, { "epoch": 0.4198319621914931, "grad_norm": 0.750193984141269, "learning_rate": 6.517021236057575e-06, "loss": 0.1436, "step": 14391 }, { "epoch": 0.41986113542213666, "grad_norm": 0.8731682549347246, "learning_rate": 6.516571064763055e-06, "loss": 0.1573, "step": 14392 }, { "epoch": 0.4198903086527802, "grad_norm": 1.0952546377060774, "learning_rate": 6.51612087992895e-06, "loss": 0.1234, "step": 14393 }, { "epoch": 0.41991948188342376, "grad_norm": 0.9693942403539806, "learning_rate": 6.51567068155928e-06, "loss": 0.1414, "step": 14394 }, { "epoch": 0.4199486551140673, "grad_norm": 0.9917884406099452, "learning_rate": 6.515220469658062e-06, "loss": 0.1447, "step": 14395 }, { "epoch": 0.41997782834471087, "grad_norm": 1.0461003193772818, "learning_rate": 6.514770244229319e-06, "loss": 0.1443, "step": 14396 }, { "epoch": 0.4200070015753545, "grad_norm": 0.7730216763770943, "learning_rate": 6.51432000527707e-06, "loss": 0.1479, "step": 14397 }, { "epoch": 0.42003617480599803, "grad_norm": 0.7271575490953223, "learning_rate": 6.513869752805333e-06, "loss": 0.1598, "step": 14398 }, { "epoch": 0.4200653480366416, "grad_norm": 0.7973071694822216, "learning_rate": 6.513419486818125e-06, "loss": 0.152, "step": 14399 }, { "epoch": 0.42009452126728514, "grad_norm": 0.9407392053654383, "learning_rate": 6.512969207319472e-06, "loss": 0.1625, "step": 14400 }, { "epoch": 0.4201236944979287, "grad_norm": 0.807174221705208, "learning_rate": 6.512518914313392e-06, "loss": 0.1352, "step": 14401 }, { "epoch": 0.42015286772857224, "grad_norm": 0.7364112184600825, "learning_rate": 6.512068607803901e-06, "loss": 0.1457, "step": 14402 }, { "epoch": 0.4201820409592158, "grad_norm": 0.7971310429385652, "learning_rate": 6.5116182877950255e-06, "loss": 0.1461, "step": 14403 }, { "epoch": 0.4202112141898594, "grad_norm": 0.8979404807664068, "learning_rate": 6.511167954290781e-06, "loss": 0.138, "step": 14404 }, { "epoch": 0.42024038742050296, "grad_norm": 0.8636420617153241, "learning_rate": 6.5107176072951895e-06, "loss": 0.1515, "step": 14405 }, { "epoch": 0.4202695606511465, "grad_norm": 0.8233094600114146, "learning_rate": 6.510267246812274e-06, "loss": 0.1355, "step": 14406 }, { "epoch": 0.42029873388179007, "grad_norm": 0.8062573146011515, "learning_rate": 6.5098168728460505e-06, "loss": 0.1086, "step": 14407 }, { "epoch": 0.4203279071124336, "grad_norm": 0.7171526571763346, "learning_rate": 6.509366485400544e-06, "loss": 0.1325, "step": 14408 }, { "epoch": 0.42035708034307717, "grad_norm": 0.7503885416778169, "learning_rate": 6.508916084479774e-06, "loss": 0.1545, "step": 14409 }, { "epoch": 0.4203862535737208, "grad_norm": 0.8119253575625889, "learning_rate": 6.50846567008776e-06, "loss": 0.1856, "step": 14410 }, { "epoch": 0.42041542680436433, "grad_norm": 1.0800479061082418, "learning_rate": 6.5080152422285255e-06, "loss": 0.1501, "step": 14411 }, { "epoch": 0.4204446000350079, "grad_norm": 0.9561561455476036, "learning_rate": 6.507564800906091e-06, "loss": 0.1566, "step": 14412 }, { "epoch": 0.42047377326565144, "grad_norm": 0.7765485467964146, "learning_rate": 6.507114346124479e-06, "loss": 0.1282, "step": 14413 }, { "epoch": 0.420502946496295, "grad_norm": 0.7646190425731896, "learning_rate": 6.506663877887707e-06, "loss": 0.1249, "step": 14414 }, { "epoch": 0.42053211972693855, "grad_norm": 0.8564671391059466, "learning_rate": 6.506213396199801e-06, "loss": 0.1429, "step": 14415 }, { "epoch": 0.4205612929575821, "grad_norm": 1.058010102157088, "learning_rate": 6.505762901064782e-06, "loss": 0.1376, "step": 14416 }, { "epoch": 0.4205904661882257, "grad_norm": 0.82715533223091, "learning_rate": 6.50531239248667e-06, "loss": 0.1318, "step": 14417 }, { "epoch": 0.42061963941886926, "grad_norm": 0.7235873882819607, "learning_rate": 6.50486187046949e-06, "loss": 0.1342, "step": 14418 }, { "epoch": 0.4206488126495128, "grad_norm": 0.877946213738441, "learning_rate": 6.504411335017263e-06, "loss": 0.1493, "step": 14419 }, { "epoch": 0.42067798588015637, "grad_norm": 1.0487731356546257, "learning_rate": 6.503960786134007e-06, "loss": 0.1396, "step": 14420 }, { "epoch": 0.4207071591107999, "grad_norm": 0.7336313983219429, "learning_rate": 6.503510223823751e-06, "loss": 0.1184, "step": 14421 }, { "epoch": 0.4207363323414435, "grad_norm": 0.9128619470625778, "learning_rate": 6.503059648090514e-06, "loss": 0.1426, "step": 14422 }, { "epoch": 0.42076550557208703, "grad_norm": 0.9919108847115311, "learning_rate": 6.502609058938319e-06, "loss": 0.1635, "step": 14423 }, { "epoch": 0.42079467880273064, "grad_norm": 0.701259220471712, "learning_rate": 6.50215845637119e-06, "loss": 0.1226, "step": 14424 }, { "epoch": 0.4208238520333742, "grad_norm": 0.8996779412217872, "learning_rate": 6.501707840393147e-06, "loss": 0.1307, "step": 14425 }, { "epoch": 0.42085302526401774, "grad_norm": 0.7019403756544518, "learning_rate": 6.501257211008216e-06, "loss": 0.1554, "step": 14426 }, { "epoch": 0.4208821984946613, "grad_norm": 1.04852831197307, "learning_rate": 6.500806568220419e-06, "loss": 0.1465, "step": 14427 }, { "epoch": 0.42091137172530485, "grad_norm": 0.7910520105499192, "learning_rate": 6.500355912033781e-06, "loss": 0.1607, "step": 14428 }, { "epoch": 0.4209405449559484, "grad_norm": 0.8815956341747757, "learning_rate": 6.49990524245232e-06, "loss": 0.1665, "step": 14429 }, { "epoch": 0.42096971818659196, "grad_norm": 0.790428077954022, "learning_rate": 6.4994545594800655e-06, "loss": 0.128, "step": 14430 }, { "epoch": 0.42099889141723557, "grad_norm": 0.8413021886541057, "learning_rate": 6.499003863121039e-06, "loss": 0.1562, "step": 14431 }, { "epoch": 0.4210280646478791, "grad_norm": 0.8960692385578597, "learning_rate": 6.498553153379262e-06, "loss": 0.1362, "step": 14432 }, { "epoch": 0.42105723787852267, "grad_norm": 1.081311930001512, "learning_rate": 6.498102430258761e-06, "loss": 0.1454, "step": 14433 }, { "epoch": 0.4210864111091662, "grad_norm": 0.876389351039299, "learning_rate": 6.49765169376356e-06, "loss": 0.1357, "step": 14434 }, { "epoch": 0.4211155843398098, "grad_norm": 0.875251342324052, "learning_rate": 6.49720094389768e-06, "loss": 0.1289, "step": 14435 }, { "epoch": 0.42114475757045333, "grad_norm": 0.810552797688318, "learning_rate": 6.49675018066515e-06, "loss": 0.1378, "step": 14436 }, { "epoch": 0.42117393080109694, "grad_norm": 0.8625619733077718, "learning_rate": 6.496299404069991e-06, "loss": 0.1612, "step": 14437 }, { "epoch": 0.4212031040317405, "grad_norm": 1.0399405499631094, "learning_rate": 6.4958486141162266e-06, "loss": 0.1672, "step": 14438 }, { "epoch": 0.42123227726238405, "grad_norm": 0.9267043636752254, "learning_rate": 6.495397810807884e-06, "loss": 0.1235, "step": 14439 }, { "epoch": 0.4212614504930276, "grad_norm": 0.7273239851740533, "learning_rate": 6.4949469941489874e-06, "loss": 0.1309, "step": 14440 }, { "epoch": 0.42129062372367115, "grad_norm": 0.9197924239614961, "learning_rate": 6.49449616414356e-06, "loss": 0.1402, "step": 14441 }, { "epoch": 0.4213197969543147, "grad_norm": 1.041942525152656, "learning_rate": 6.4940453207956274e-06, "loss": 0.1296, "step": 14442 }, { "epoch": 0.42134897018495826, "grad_norm": 0.8788726373225624, "learning_rate": 6.493594464109217e-06, "loss": 0.1253, "step": 14443 }, { "epoch": 0.42137814341560187, "grad_norm": 0.7326265049777108, "learning_rate": 6.493143594088348e-06, "loss": 0.14, "step": 14444 }, { "epoch": 0.4214073166462454, "grad_norm": 0.9904664601160023, "learning_rate": 6.492692710737052e-06, "loss": 0.1328, "step": 14445 }, { "epoch": 0.421436489876889, "grad_norm": 0.9432998346701613, "learning_rate": 6.492241814059351e-06, "loss": 0.1454, "step": 14446 }, { "epoch": 0.42146566310753253, "grad_norm": 0.7541833112111631, "learning_rate": 6.491790904059271e-06, "loss": 0.1219, "step": 14447 }, { "epoch": 0.4214948363381761, "grad_norm": 0.9898427213653226, "learning_rate": 6.491339980740839e-06, "loss": 0.1599, "step": 14448 }, { "epoch": 0.42152400956881964, "grad_norm": 1.103395260444594, "learning_rate": 6.490889044108079e-06, "loss": 0.1725, "step": 14449 }, { "epoch": 0.4215531827994632, "grad_norm": 0.8698384097384274, "learning_rate": 6.490438094165017e-06, "loss": 0.1362, "step": 14450 }, { "epoch": 0.4215823560301068, "grad_norm": 0.7955909545275446, "learning_rate": 6.48998713091568e-06, "loss": 0.1436, "step": 14451 }, { "epoch": 0.42161152926075035, "grad_norm": 0.9972732986336026, "learning_rate": 6.4895361543640945e-06, "loss": 0.1428, "step": 14452 }, { "epoch": 0.4216407024913939, "grad_norm": 0.8758179208475545, "learning_rate": 6.489085164514285e-06, "loss": 0.1189, "step": 14453 }, { "epoch": 0.42166987572203746, "grad_norm": 0.8987846600605, "learning_rate": 6.4886341613702785e-06, "loss": 0.1247, "step": 14454 }, { "epoch": 0.421699048952681, "grad_norm": 0.8682519836965628, "learning_rate": 6.4881831449361025e-06, "loss": 0.1225, "step": 14455 }, { "epoch": 0.42172822218332456, "grad_norm": 1.0115723125029012, "learning_rate": 6.487732115215781e-06, "loss": 0.1617, "step": 14456 }, { "epoch": 0.4217573954139681, "grad_norm": 0.7916856556748872, "learning_rate": 6.487281072213343e-06, "loss": 0.1344, "step": 14457 }, { "epoch": 0.4217865686446117, "grad_norm": 0.9089706188771904, "learning_rate": 6.486830015932816e-06, "loss": 0.1487, "step": 14458 }, { "epoch": 0.4218157418752553, "grad_norm": 0.8129103123432081, "learning_rate": 6.486378946378222e-06, "loss": 0.1182, "step": 14459 }, { "epoch": 0.42184491510589883, "grad_norm": 0.8407381672554703, "learning_rate": 6.485927863553595e-06, "loss": 0.1509, "step": 14460 }, { "epoch": 0.4218740883365424, "grad_norm": 0.7697383827971408, "learning_rate": 6.485476767462958e-06, "loss": 0.1277, "step": 14461 }, { "epoch": 0.42190326156718594, "grad_norm": 0.7826646804597619, "learning_rate": 6.485025658110337e-06, "loss": 0.1479, "step": 14462 }, { "epoch": 0.4219324347978295, "grad_norm": 1.0353386333869485, "learning_rate": 6.484574535499766e-06, "loss": 0.1557, "step": 14463 }, { "epoch": 0.4219616080284731, "grad_norm": 0.8493185436058729, "learning_rate": 6.484123399635264e-06, "loss": 0.1557, "step": 14464 }, { "epoch": 0.42199078125911665, "grad_norm": 0.8082244009837359, "learning_rate": 6.483672250520863e-06, "loss": 0.1512, "step": 14465 }, { "epoch": 0.4220199544897602, "grad_norm": 1.1399350390091791, "learning_rate": 6.483221088160592e-06, "loss": 0.1488, "step": 14466 }, { "epoch": 0.42204912772040376, "grad_norm": 1.1131730149976795, "learning_rate": 6.482769912558475e-06, "loss": 0.1312, "step": 14467 }, { "epoch": 0.4220783009510473, "grad_norm": 0.8989369341205465, "learning_rate": 6.482318723718544e-06, "loss": 0.1116, "step": 14468 }, { "epoch": 0.42210747418169087, "grad_norm": 0.901983020841205, "learning_rate": 6.481867521644825e-06, "loss": 0.1527, "step": 14469 }, { "epoch": 0.4221366474123344, "grad_norm": 1.1364380199037103, "learning_rate": 6.481416306341346e-06, "loss": 0.161, "step": 14470 }, { "epoch": 0.42216582064297803, "grad_norm": 0.8151538578658158, "learning_rate": 6.480965077812136e-06, "loss": 0.1235, "step": 14471 }, { "epoch": 0.4221949938736216, "grad_norm": 0.7338103446092081, "learning_rate": 6.480513836061223e-06, "loss": 0.1301, "step": 14472 }, { "epoch": 0.42222416710426514, "grad_norm": 0.7493153678674694, "learning_rate": 6.480062581092638e-06, "loss": 0.1287, "step": 14473 }, { "epoch": 0.4222533403349087, "grad_norm": 0.9227149055644112, "learning_rate": 6.479611312910405e-06, "loss": 0.1377, "step": 14474 }, { "epoch": 0.42228251356555224, "grad_norm": 1.029601597535191, "learning_rate": 6.479160031518555e-06, "loss": 0.1528, "step": 14475 }, { "epoch": 0.4223116867961958, "grad_norm": 0.8191931311640929, "learning_rate": 6.47870873692112e-06, "loss": 0.1482, "step": 14476 }, { "epoch": 0.42234086002683935, "grad_norm": 0.7787834985340119, "learning_rate": 6.4782574291221234e-06, "loss": 0.1396, "step": 14477 }, { "epoch": 0.42237003325748296, "grad_norm": 1.0113480106310186, "learning_rate": 6.4778061081256e-06, "loss": 0.1599, "step": 14478 }, { "epoch": 0.4223992064881265, "grad_norm": 0.8803704920654022, "learning_rate": 6.477354773935576e-06, "loss": 0.131, "step": 14479 }, { "epoch": 0.42242837971877006, "grad_norm": 0.8194151344060884, "learning_rate": 6.476903426556079e-06, "loss": 0.1369, "step": 14480 }, { "epoch": 0.4224575529494136, "grad_norm": 1.0973736802943928, "learning_rate": 6.4764520659911436e-06, "loss": 0.1289, "step": 14481 }, { "epoch": 0.42248672618005717, "grad_norm": 0.7909811108443876, "learning_rate": 6.476000692244795e-06, "loss": 0.1429, "step": 14482 }, { "epoch": 0.4225158994107007, "grad_norm": 0.8807801299318434, "learning_rate": 6.475549305321065e-06, "loss": 0.1467, "step": 14483 }, { "epoch": 0.4225450726413443, "grad_norm": 0.8934675700711427, "learning_rate": 6.475097905223984e-06, "loss": 0.1458, "step": 14484 }, { "epoch": 0.4225742458719879, "grad_norm": 1.0066656807002248, "learning_rate": 6.474646491957579e-06, "loss": 0.1418, "step": 14485 }, { "epoch": 0.42260341910263144, "grad_norm": 0.7798560813803963, "learning_rate": 6.474195065525884e-06, "loss": 0.1267, "step": 14486 }, { "epoch": 0.422632592333275, "grad_norm": 0.8455626917926337, "learning_rate": 6.473743625932926e-06, "loss": 0.1555, "step": 14487 }, { "epoch": 0.42266176556391855, "grad_norm": 1.2057873204756218, "learning_rate": 6.473292173182738e-06, "loss": 0.1396, "step": 14488 }, { "epoch": 0.4226909387945621, "grad_norm": 0.7783230851033105, "learning_rate": 6.472840707279348e-06, "loss": 0.1434, "step": 14489 }, { "epoch": 0.42272011202520565, "grad_norm": 0.9383578860812518, "learning_rate": 6.4723892282267875e-06, "loss": 0.1389, "step": 14490 }, { "epoch": 0.4227492852558492, "grad_norm": 0.9187106361483238, "learning_rate": 6.47193773602909e-06, "loss": 0.139, "step": 14491 }, { "epoch": 0.4227784584864928, "grad_norm": 1.0385874121281264, "learning_rate": 6.47148623069028e-06, "loss": 0.1322, "step": 14492 }, { "epoch": 0.42280763171713637, "grad_norm": 1.0229260498219677, "learning_rate": 6.471034712214396e-06, "loss": 0.1401, "step": 14493 }, { "epoch": 0.4228368049477799, "grad_norm": 0.7925077642592967, "learning_rate": 6.470583180605463e-06, "loss": 0.1258, "step": 14494 }, { "epoch": 0.4228659781784235, "grad_norm": 1.0985345433187705, "learning_rate": 6.470131635867515e-06, "loss": 0.1682, "step": 14495 }, { "epoch": 0.422895151409067, "grad_norm": 0.9135708400307052, "learning_rate": 6.4696800780045825e-06, "loss": 0.1395, "step": 14496 }, { "epoch": 0.4229243246397106, "grad_norm": 1.0214609418602645, "learning_rate": 6.469228507020697e-06, "loss": 0.1427, "step": 14497 }, { "epoch": 0.4229534978703542, "grad_norm": 2.079836989571655, "learning_rate": 6.46877692291989e-06, "loss": 0.1438, "step": 14498 }, { "epoch": 0.42298267110099774, "grad_norm": 0.9745922787004977, "learning_rate": 6.468325325706194e-06, "loss": 0.1397, "step": 14499 }, { "epoch": 0.4230118443316413, "grad_norm": 0.8420242500977787, "learning_rate": 6.467873715383639e-06, "loss": 0.1297, "step": 14500 }, { "epoch": 0.42304101756228485, "grad_norm": 0.9924336625137476, "learning_rate": 6.4674220919562594e-06, "loss": 0.1465, "step": 14501 }, { "epoch": 0.4230701907929284, "grad_norm": 0.9787686309765826, "learning_rate": 6.466970455428085e-06, "loss": 0.1493, "step": 14502 }, { "epoch": 0.42309936402357196, "grad_norm": 0.91428197702099, "learning_rate": 6.466518805803148e-06, "loss": 0.161, "step": 14503 }, { "epoch": 0.4231285372542155, "grad_norm": 1.0981507514878934, "learning_rate": 6.466067143085481e-06, "loss": 0.1687, "step": 14504 }, { "epoch": 0.4231577104848591, "grad_norm": 0.8314098051543142, "learning_rate": 6.465615467279116e-06, "loss": 0.159, "step": 14505 }, { "epoch": 0.42318688371550267, "grad_norm": 0.9551817398368962, "learning_rate": 6.4651637783880885e-06, "loss": 0.141, "step": 14506 }, { "epoch": 0.4232160569461462, "grad_norm": 0.6840919919862214, "learning_rate": 6.464712076416426e-06, "loss": 0.113, "step": 14507 }, { "epoch": 0.4232452301767898, "grad_norm": 0.7784800829644861, "learning_rate": 6.464260361368165e-06, "loss": 0.1543, "step": 14508 }, { "epoch": 0.42327440340743333, "grad_norm": 1.1386175511507357, "learning_rate": 6.463808633247337e-06, "loss": 0.1512, "step": 14509 }, { "epoch": 0.4233035766380769, "grad_norm": 0.8722414067316532, "learning_rate": 6.463356892057975e-06, "loss": 0.1558, "step": 14510 }, { "epoch": 0.42333274986872044, "grad_norm": 0.7786503128579964, "learning_rate": 6.462905137804112e-06, "loss": 0.1371, "step": 14511 }, { "epoch": 0.42336192309936405, "grad_norm": 0.9231178955877767, "learning_rate": 6.462453370489781e-06, "loss": 0.1693, "step": 14512 }, { "epoch": 0.4233910963300076, "grad_norm": 1.0801509375056284, "learning_rate": 6.462001590119015e-06, "loss": 0.1686, "step": 14513 }, { "epoch": 0.42342026956065115, "grad_norm": 0.9346433310084615, "learning_rate": 6.461549796695847e-06, "loss": 0.1345, "step": 14514 }, { "epoch": 0.4234494427912947, "grad_norm": 0.9262236702603318, "learning_rate": 6.461097990224313e-06, "loss": 0.1633, "step": 14515 }, { "epoch": 0.42347861602193826, "grad_norm": 0.8092654480157292, "learning_rate": 6.460646170708445e-06, "loss": 0.1399, "step": 14516 }, { "epoch": 0.4235077892525818, "grad_norm": 0.9282686039633216, "learning_rate": 6.460194338152276e-06, "loss": 0.1293, "step": 14517 }, { "epoch": 0.42353696248322537, "grad_norm": 0.7488744299677202, "learning_rate": 6.459742492559842e-06, "loss": 0.1208, "step": 14518 }, { "epoch": 0.423566135713869, "grad_norm": 0.919009376017385, "learning_rate": 6.459290633935172e-06, "loss": 0.1464, "step": 14519 }, { "epoch": 0.4235953089445125, "grad_norm": 0.8989568180018619, "learning_rate": 6.458838762282306e-06, "loss": 0.1453, "step": 14520 }, { "epoch": 0.4236244821751561, "grad_norm": 0.7095194367777338, "learning_rate": 6.458386877605276e-06, "loss": 0.1225, "step": 14521 }, { "epoch": 0.42365365540579963, "grad_norm": 0.8215149469125433, "learning_rate": 6.457934979908115e-06, "loss": 0.1393, "step": 14522 }, { "epoch": 0.4236828286364432, "grad_norm": 0.7256366165278979, "learning_rate": 6.45748306919486e-06, "loss": 0.1368, "step": 14523 }, { "epoch": 0.42371200186708674, "grad_norm": 0.7054796777177985, "learning_rate": 6.457031145469543e-06, "loss": 0.1318, "step": 14524 }, { "epoch": 0.42374117509773035, "grad_norm": 0.8383542805823619, "learning_rate": 6.4565792087362e-06, "loss": 0.1462, "step": 14525 }, { "epoch": 0.4237703483283739, "grad_norm": 0.6398447224104609, "learning_rate": 6.456127258998866e-06, "loss": 0.1517, "step": 14526 }, { "epoch": 0.42379952155901746, "grad_norm": 0.7920031033802036, "learning_rate": 6.455675296261574e-06, "loss": 0.1441, "step": 14527 }, { "epoch": 0.423828694789661, "grad_norm": 0.8937732273513436, "learning_rate": 6.455223320528361e-06, "loss": 0.1362, "step": 14528 }, { "epoch": 0.42385786802030456, "grad_norm": 0.9071346261385165, "learning_rate": 6.454771331803262e-06, "loss": 0.1426, "step": 14529 }, { "epoch": 0.4238870412509481, "grad_norm": 0.734115501364661, "learning_rate": 6.454319330090313e-06, "loss": 0.1337, "step": 14530 }, { "epoch": 0.42391621448159167, "grad_norm": 0.9098881502519791, "learning_rate": 6.453867315393546e-06, "loss": 0.1339, "step": 14531 }, { "epoch": 0.4239453877122353, "grad_norm": 1.0463011771926354, "learning_rate": 6.453415287717e-06, "loss": 0.11, "step": 14532 }, { "epoch": 0.42397456094287883, "grad_norm": 0.8406661020750411, "learning_rate": 6.45296324706471e-06, "loss": 0.1252, "step": 14533 }, { "epoch": 0.4240037341735224, "grad_norm": 0.7746094591515468, "learning_rate": 6.452511193440708e-06, "loss": 0.1392, "step": 14534 }, { "epoch": 0.42403290740416594, "grad_norm": 1.0186412532074003, "learning_rate": 6.452059126849035e-06, "loss": 0.1379, "step": 14535 }, { "epoch": 0.4240620806348095, "grad_norm": 0.9899846271410743, "learning_rate": 6.451607047293726e-06, "loss": 0.1355, "step": 14536 }, { "epoch": 0.42409125386545304, "grad_norm": 0.7079457275795795, "learning_rate": 6.451154954778813e-06, "loss": 0.1294, "step": 14537 }, { "epoch": 0.4241204270960966, "grad_norm": 0.8899821635349057, "learning_rate": 6.4507028493083365e-06, "loss": 0.1384, "step": 14538 }, { "epoch": 0.4241496003267402, "grad_norm": 1.3216708054820439, "learning_rate": 6.4502507308863316e-06, "loss": 0.1489, "step": 14539 }, { "epoch": 0.42417877355738376, "grad_norm": 0.7976546901755325, "learning_rate": 6.449798599516833e-06, "loss": 0.1283, "step": 14540 }, { "epoch": 0.4242079467880273, "grad_norm": 0.8192831887327774, "learning_rate": 6.44934645520388e-06, "loss": 0.1446, "step": 14541 }, { "epoch": 0.42423712001867087, "grad_norm": 1.0154037535246627, "learning_rate": 6.448894297951507e-06, "loss": 0.1393, "step": 14542 }, { "epoch": 0.4242662932493144, "grad_norm": 0.8159730911855505, "learning_rate": 6.448442127763752e-06, "loss": 0.1407, "step": 14543 }, { "epoch": 0.424295466479958, "grad_norm": 1.1149308617173088, "learning_rate": 6.447989944644651e-06, "loss": 0.1503, "step": 14544 }, { "epoch": 0.4243246397106015, "grad_norm": 0.6467155633479809, "learning_rate": 6.447537748598241e-06, "loss": 0.1495, "step": 14545 }, { "epoch": 0.42435381294124513, "grad_norm": 1.0370129458736717, "learning_rate": 6.447085539628562e-06, "loss": 0.141, "step": 14546 }, { "epoch": 0.4243829861718887, "grad_norm": 0.8784085698016646, "learning_rate": 6.446633317739646e-06, "loss": 0.1542, "step": 14547 }, { "epoch": 0.42441215940253224, "grad_norm": 0.8093690792754681, "learning_rate": 6.446181082935534e-06, "loss": 0.1434, "step": 14548 }, { "epoch": 0.4244413326331758, "grad_norm": 1.4952964465652518, "learning_rate": 6.445728835220262e-06, "loss": 0.1478, "step": 14549 }, { "epoch": 0.42447050586381935, "grad_norm": 0.6586631728253155, "learning_rate": 6.44527657459787e-06, "loss": 0.1148, "step": 14550 }, { "epoch": 0.4244996790944629, "grad_norm": 0.7988990831046745, "learning_rate": 6.444824301072391e-06, "loss": 0.1425, "step": 14551 }, { "epoch": 0.4245288523251065, "grad_norm": 0.7756335620708912, "learning_rate": 6.4443720146478675e-06, "loss": 0.149, "step": 14552 }, { "epoch": 0.42455802555575006, "grad_norm": 0.7363998507458365, "learning_rate": 6.443919715328336e-06, "loss": 0.1458, "step": 14553 }, { "epoch": 0.4245871987863936, "grad_norm": 0.8815222354786677, "learning_rate": 6.4434674031178314e-06, "loss": 0.1527, "step": 14554 }, { "epoch": 0.42461637201703717, "grad_norm": 0.924317890620181, "learning_rate": 6.443015078020397e-06, "loss": 0.1362, "step": 14555 }, { "epoch": 0.4246455452476807, "grad_norm": 0.6482306932868144, "learning_rate": 6.442562740040067e-06, "loss": 0.1226, "step": 14556 }, { "epoch": 0.4246747184783243, "grad_norm": 0.8306777726432536, "learning_rate": 6.442110389180881e-06, "loss": 0.1348, "step": 14557 }, { "epoch": 0.42470389170896783, "grad_norm": 0.8132825556688807, "learning_rate": 6.4416580254468795e-06, "loss": 0.1212, "step": 14558 }, { "epoch": 0.42473306493961144, "grad_norm": 0.8170276126492356, "learning_rate": 6.441205648842097e-06, "loss": 0.1524, "step": 14559 }, { "epoch": 0.424762238170255, "grad_norm": 0.8744322554012237, "learning_rate": 6.440753259370575e-06, "loss": 0.1311, "step": 14560 }, { "epoch": 0.42479141140089854, "grad_norm": 1.0156791192303587, "learning_rate": 6.440300857036354e-06, "loss": 0.1267, "step": 14561 }, { "epoch": 0.4248205846315421, "grad_norm": 0.977216342020889, "learning_rate": 6.439848441843469e-06, "loss": 0.1473, "step": 14562 }, { "epoch": 0.42484975786218565, "grad_norm": 0.9484923266452084, "learning_rate": 6.439396013795961e-06, "loss": 0.1433, "step": 14563 }, { "epoch": 0.4248789310928292, "grad_norm": 0.9721265502642883, "learning_rate": 6.438943572897869e-06, "loss": 0.1459, "step": 14564 }, { "epoch": 0.42490810432347276, "grad_norm": 0.8066516855911635, "learning_rate": 6.4384911191532316e-06, "loss": 0.169, "step": 14565 }, { "epoch": 0.42493727755411637, "grad_norm": 1.0033952960392376, "learning_rate": 6.43803865256609e-06, "loss": 0.1259, "step": 14566 }, { "epoch": 0.4249664507847599, "grad_norm": 1.0417315083259715, "learning_rate": 6.437586173140482e-06, "loss": 0.165, "step": 14567 }, { "epoch": 0.4249956240154035, "grad_norm": 0.9037656702375, "learning_rate": 6.43713368088045e-06, "loss": 0.1449, "step": 14568 }, { "epoch": 0.425024797246047, "grad_norm": 0.7885990323875087, "learning_rate": 6.436681175790028e-06, "loss": 0.1257, "step": 14569 }, { "epoch": 0.4250539704766906, "grad_norm": 0.8877062285620676, "learning_rate": 6.4362286578732626e-06, "loss": 0.1671, "step": 14570 }, { "epoch": 0.42508314370733413, "grad_norm": 0.9317443602253012, "learning_rate": 6.4357761271341876e-06, "loss": 0.1491, "step": 14571 }, { "epoch": 0.4251123169379777, "grad_norm": 1.0402581857014654, "learning_rate": 6.435323583576847e-06, "loss": 0.1796, "step": 14572 }, { "epoch": 0.4251414901686213, "grad_norm": 0.9707473246841538, "learning_rate": 6.434871027205282e-06, "loss": 0.1152, "step": 14573 }, { "epoch": 0.42517066339926485, "grad_norm": 0.8164273297113386, "learning_rate": 6.434418458023529e-06, "loss": 0.1311, "step": 14574 }, { "epoch": 0.4251998366299084, "grad_norm": 0.8475827553944394, "learning_rate": 6.433965876035631e-06, "loss": 0.1492, "step": 14575 }, { "epoch": 0.42522900986055195, "grad_norm": 0.7173770494645649, "learning_rate": 6.433513281245628e-06, "loss": 0.1274, "step": 14576 }, { "epoch": 0.4252581830911955, "grad_norm": 0.8127496231813877, "learning_rate": 6.43306067365756e-06, "loss": 0.1228, "step": 14577 }, { "epoch": 0.42528735632183906, "grad_norm": 0.6928186794485005, "learning_rate": 6.43260805327547e-06, "loss": 0.1198, "step": 14578 }, { "epoch": 0.42531652955248267, "grad_norm": 0.7726298241645552, "learning_rate": 6.432155420103396e-06, "loss": 0.1304, "step": 14579 }, { "epoch": 0.4253457027831262, "grad_norm": 0.7400268183665162, "learning_rate": 6.431702774145381e-06, "loss": 0.1569, "step": 14580 }, { "epoch": 0.4253748760137698, "grad_norm": 0.9906871144526957, "learning_rate": 6.4312501154054655e-06, "loss": 0.1326, "step": 14581 }, { "epoch": 0.42540404924441333, "grad_norm": 0.7263777764166814, "learning_rate": 6.430797443887689e-06, "loss": 0.132, "step": 14582 }, { "epoch": 0.4254332224750569, "grad_norm": 0.7966484002152375, "learning_rate": 6.430344759596096e-06, "loss": 0.1349, "step": 14583 }, { "epoch": 0.42546239570570044, "grad_norm": 0.8534298329644914, "learning_rate": 6.429892062534726e-06, "loss": 0.15, "step": 14584 }, { "epoch": 0.425491568936344, "grad_norm": 1.0236583563235566, "learning_rate": 6.429439352707623e-06, "loss": 0.1386, "step": 14585 }, { "epoch": 0.4255207421669876, "grad_norm": 0.7415576548091419, "learning_rate": 6.428986630118824e-06, "loss": 0.1505, "step": 14586 }, { "epoch": 0.42554991539763115, "grad_norm": 0.7181493695752493, "learning_rate": 6.428533894772373e-06, "loss": 0.133, "step": 14587 }, { "epoch": 0.4255790886282747, "grad_norm": 0.7921312425054876, "learning_rate": 6.428081146672315e-06, "loss": 0.1403, "step": 14588 }, { "epoch": 0.42560826185891826, "grad_norm": 0.8902890918129877, "learning_rate": 6.427628385822688e-06, "loss": 0.1396, "step": 14589 }, { "epoch": 0.4256374350895618, "grad_norm": 0.8471636344004445, "learning_rate": 6.427175612227535e-06, "loss": 0.1617, "step": 14590 }, { "epoch": 0.42566660832020536, "grad_norm": 0.9145104986876964, "learning_rate": 6.4267228258909e-06, "loss": 0.1448, "step": 14591 }, { "epoch": 0.4256957815508489, "grad_norm": 1.274479869068229, "learning_rate": 6.426270026816824e-06, "loss": 0.1206, "step": 14592 }, { "epoch": 0.4257249547814925, "grad_norm": 0.6782055025083349, "learning_rate": 6.425817215009349e-06, "loss": 0.1352, "step": 14593 }, { "epoch": 0.4257541280121361, "grad_norm": 0.8963187453287486, "learning_rate": 6.425364390472518e-06, "loss": 0.1496, "step": 14594 }, { "epoch": 0.42578330124277963, "grad_norm": 0.7295762114599001, "learning_rate": 6.424911553210376e-06, "loss": 0.1437, "step": 14595 }, { "epoch": 0.4258124744734232, "grad_norm": 0.7534925172133695, "learning_rate": 6.4244587032269615e-06, "loss": 0.122, "step": 14596 }, { "epoch": 0.42584164770406674, "grad_norm": 0.7328190284111709, "learning_rate": 6.424005840526321e-06, "loss": 0.1509, "step": 14597 }, { "epoch": 0.4258708209347103, "grad_norm": 0.7813073881683321, "learning_rate": 6.423552965112496e-06, "loss": 0.1511, "step": 14598 }, { "epoch": 0.42589999416535385, "grad_norm": 0.9991050549661694, "learning_rate": 6.42310007698953e-06, "loss": 0.1505, "step": 14599 }, { "epoch": 0.42592916739599745, "grad_norm": 0.8524195923266077, "learning_rate": 6.4226471761614675e-06, "loss": 0.1314, "step": 14600 }, { "epoch": 0.425958340626641, "grad_norm": 0.9428984302763539, "learning_rate": 6.422194262632349e-06, "loss": 0.1393, "step": 14601 }, { "epoch": 0.42598751385728456, "grad_norm": 0.7506115387752303, "learning_rate": 6.421741336406218e-06, "loss": 0.1408, "step": 14602 }, { "epoch": 0.4260166870879281, "grad_norm": 0.8425262154277137, "learning_rate": 6.4212883974871236e-06, "loss": 0.1451, "step": 14603 }, { "epoch": 0.42604586031857167, "grad_norm": 0.8511437934435212, "learning_rate": 6.4208354458791035e-06, "loss": 0.1551, "step": 14604 }, { "epoch": 0.4260750335492152, "grad_norm": 0.881979714026643, "learning_rate": 6.420382481586203e-06, "loss": 0.1641, "step": 14605 }, { "epoch": 0.4261042067798588, "grad_norm": 0.9978972110053544, "learning_rate": 6.419929504612469e-06, "loss": 0.1662, "step": 14606 }, { "epoch": 0.4261333800105024, "grad_norm": 0.8786849002723329, "learning_rate": 6.419476514961942e-06, "loss": 0.1579, "step": 14607 }, { "epoch": 0.42616255324114594, "grad_norm": 1.0140316415914195, "learning_rate": 6.419023512638667e-06, "loss": 0.1448, "step": 14608 }, { "epoch": 0.4261917264717895, "grad_norm": 0.9005428459177359, "learning_rate": 6.41857049764669e-06, "loss": 0.1406, "step": 14609 }, { "epoch": 0.42622089970243304, "grad_norm": 0.7864340508166872, "learning_rate": 6.418117469990053e-06, "loss": 0.1507, "step": 14610 }, { "epoch": 0.4262500729330766, "grad_norm": 0.8805637404277392, "learning_rate": 6.417664429672803e-06, "loss": 0.1246, "step": 14611 }, { "epoch": 0.42627924616372015, "grad_norm": 0.8653322469341848, "learning_rate": 6.417211376698982e-06, "loss": 0.157, "step": 14612 }, { "epoch": 0.42630841939436376, "grad_norm": 0.7611503716150947, "learning_rate": 6.416758311072638e-06, "loss": 0.1357, "step": 14613 }, { "epoch": 0.4263375926250073, "grad_norm": 0.7678749486288232, "learning_rate": 6.416305232797813e-06, "loss": 0.1477, "step": 14614 }, { "epoch": 0.42636676585565086, "grad_norm": 0.7823079024797566, "learning_rate": 6.415852141878553e-06, "loss": 0.1613, "step": 14615 }, { "epoch": 0.4263959390862944, "grad_norm": 0.8640111274829947, "learning_rate": 6.415399038318903e-06, "loss": 0.1425, "step": 14616 }, { "epoch": 0.42642511231693797, "grad_norm": 0.9216988519330479, "learning_rate": 6.414945922122908e-06, "loss": 0.1532, "step": 14617 }, { "epoch": 0.4264542855475815, "grad_norm": 0.791367072581288, "learning_rate": 6.414492793294615e-06, "loss": 0.1167, "step": 14618 }, { "epoch": 0.4264834587782251, "grad_norm": 0.879186370276052, "learning_rate": 6.414039651838066e-06, "loss": 0.1591, "step": 14619 }, { "epoch": 0.4265126320088687, "grad_norm": 0.976453014064819, "learning_rate": 6.41358649775731e-06, "loss": 0.1443, "step": 14620 }, { "epoch": 0.42654180523951224, "grad_norm": 0.7046471432503829, "learning_rate": 6.413133331056391e-06, "loss": 0.1367, "step": 14621 }, { "epoch": 0.4265709784701558, "grad_norm": 1.0266429249007463, "learning_rate": 6.412680151739354e-06, "loss": 0.1399, "step": 14622 }, { "epoch": 0.42660015170079935, "grad_norm": 0.7657002313823391, "learning_rate": 6.412226959810246e-06, "loss": 0.1357, "step": 14623 }, { "epoch": 0.4266293249314429, "grad_norm": 0.9034638933677264, "learning_rate": 6.411773755273114e-06, "loss": 0.126, "step": 14624 }, { "epoch": 0.42665849816208645, "grad_norm": 0.7096191941575555, "learning_rate": 6.411320538132002e-06, "loss": 0.1174, "step": 14625 }, { "epoch": 0.42668767139273, "grad_norm": 0.6205600714182188, "learning_rate": 6.410867308390958e-06, "loss": 0.1377, "step": 14626 }, { "epoch": 0.4267168446233736, "grad_norm": 0.830338251694489, "learning_rate": 6.410414066054026e-06, "loss": 0.1359, "step": 14627 }, { "epoch": 0.42674601785401717, "grad_norm": 0.8140714428121494, "learning_rate": 6.409960811125256e-06, "loss": 0.1316, "step": 14628 }, { "epoch": 0.4267751910846607, "grad_norm": 1.0873122870007383, "learning_rate": 6.40950754360869e-06, "loss": 0.1445, "step": 14629 }, { "epoch": 0.4268043643153043, "grad_norm": 0.7581253703358832, "learning_rate": 6.40905426350838e-06, "loss": 0.132, "step": 14630 }, { "epoch": 0.4268335375459478, "grad_norm": 0.8040443130093091, "learning_rate": 6.408600970828367e-06, "loss": 0.1509, "step": 14631 }, { "epoch": 0.4268627107765914, "grad_norm": 0.8831884676715346, "learning_rate": 6.408147665572701e-06, "loss": 0.1342, "step": 14632 }, { "epoch": 0.42689188400723493, "grad_norm": 0.9123740565031518, "learning_rate": 6.407694347745431e-06, "loss": 0.1393, "step": 14633 }, { "epoch": 0.42692105723787854, "grad_norm": 2.6416471712195997, "learning_rate": 6.407241017350601e-06, "loss": 0.1462, "step": 14634 }, { "epoch": 0.4269502304685221, "grad_norm": 0.8061560738824025, "learning_rate": 6.406787674392259e-06, "loss": 0.1611, "step": 14635 }, { "epoch": 0.42697940369916565, "grad_norm": 0.9363533775038286, "learning_rate": 6.406334318874452e-06, "loss": 0.1451, "step": 14636 }, { "epoch": 0.4270085769298092, "grad_norm": 0.867880207892632, "learning_rate": 6.4058809508012285e-06, "loss": 0.1448, "step": 14637 }, { "epoch": 0.42703775016045276, "grad_norm": 0.8735382344438637, "learning_rate": 6.405427570176635e-06, "loss": 0.1204, "step": 14638 }, { "epoch": 0.4270669233910963, "grad_norm": 0.9445295126908109, "learning_rate": 6.40497417700472e-06, "loss": 0.145, "step": 14639 }, { "epoch": 0.4270960966217399, "grad_norm": 0.8897433233589293, "learning_rate": 6.404520771289531e-06, "loss": 0.1316, "step": 14640 }, { "epoch": 0.42712526985238347, "grad_norm": 1.2586564701146228, "learning_rate": 6.404067353035115e-06, "loss": 0.1382, "step": 14641 }, { "epoch": 0.427154443083027, "grad_norm": 0.9972580081860094, "learning_rate": 6.403613922245522e-06, "loss": 0.1609, "step": 14642 }, { "epoch": 0.4271836163136706, "grad_norm": 0.8452344557746346, "learning_rate": 6.403160478924799e-06, "loss": 0.1524, "step": 14643 }, { "epoch": 0.42721278954431413, "grad_norm": 1.3583397959402321, "learning_rate": 6.402707023076993e-06, "loss": 0.1462, "step": 14644 }, { "epoch": 0.4272419627749577, "grad_norm": 1.063121484670113, "learning_rate": 6.402253554706155e-06, "loss": 0.1483, "step": 14645 }, { "epoch": 0.42727113600560124, "grad_norm": 0.8858529375043536, "learning_rate": 6.401800073816331e-06, "loss": 0.1359, "step": 14646 }, { "epoch": 0.42730030923624485, "grad_norm": 1.106632331475958, "learning_rate": 6.401346580411571e-06, "loss": 0.1426, "step": 14647 }, { "epoch": 0.4273294824668884, "grad_norm": 1.1434067015461586, "learning_rate": 6.400893074495923e-06, "loss": 0.1442, "step": 14648 }, { "epoch": 0.42735865569753195, "grad_norm": 0.961129580601906, "learning_rate": 6.4004395560734366e-06, "loss": 0.1307, "step": 14649 }, { "epoch": 0.4273878289281755, "grad_norm": 1.217218499848346, "learning_rate": 6.39998602514816e-06, "loss": 0.1172, "step": 14650 }, { "epoch": 0.42741700215881906, "grad_norm": 0.9946967120904634, "learning_rate": 6.399532481724142e-06, "loss": 0.1245, "step": 14651 }, { "epoch": 0.4274461753894626, "grad_norm": 0.7041848791125103, "learning_rate": 6.399078925805432e-06, "loss": 0.123, "step": 14652 }, { "epoch": 0.42747534862010617, "grad_norm": 1.0551097008424255, "learning_rate": 6.398625357396079e-06, "loss": 0.1329, "step": 14653 }, { "epoch": 0.4275045218507498, "grad_norm": 1.2890936070936188, "learning_rate": 6.398171776500132e-06, "loss": 0.1304, "step": 14654 }, { "epoch": 0.42753369508139333, "grad_norm": 0.7244104729640806, "learning_rate": 6.397718183121644e-06, "loss": 0.1334, "step": 14655 }, { "epoch": 0.4275628683120369, "grad_norm": 0.7180189347348244, "learning_rate": 6.397264577264659e-06, "loss": 0.1486, "step": 14656 }, { "epoch": 0.42759204154268043, "grad_norm": 0.7370659794570515, "learning_rate": 6.396810958933231e-06, "loss": 0.1218, "step": 14657 }, { "epoch": 0.427621214773324, "grad_norm": 1.2249162954026172, "learning_rate": 6.396357328131408e-06, "loss": 0.1288, "step": 14658 }, { "epoch": 0.42765038800396754, "grad_norm": 0.7801917318751426, "learning_rate": 6.3959036848632395e-06, "loss": 0.1374, "step": 14659 }, { "epoch": 0.4276795612346111, "grad_norm": 0.6327083136322517, "learning_rate": 6.395450029132777e-06, "loss": 0.1112, "step": 14660 }, { "epoch": 0.4277087344652547, "grad_norm": 0.9239410078834523, "learning_rate": 6.39499636094407e-06, "loss": 0.1371, "step": 14661 }, { "epoch": 0.42773790769589826, "grad_norm": 0.7501179303141055, "learning_rate": 6.394542680301165e-06, "loss": 0.1303, "step": 14662 }, { "epoch": 0.4277670809265418, "grad_norm": 0.7814820705795176, "learning_rate": 6.3940889872081205e-06, "loss": 0.1485, "step": 14663 }, { "epoch": 0.42779625415718536, "grad_norm": 0.7109996794925011, "learning_rate": 6.39363528166898e-06, "loss": 0.1244, "step": 14664 }, { "epoch": 0.4278254273878289, "grad_norm": 0.907689957657468, "learning_rate": 6.393181563687798e-06, "loss": 0.1306, "step": 14665 }, { "epoch": 0.42785460061847247, "grad_norm": 0.7765637906615048, "learning_rate": 6.3927278332686215e-06, "loss": 0.1283, "step": 14666 }, { "epoch": 0.4278837738491161, "grad_norm": 0.8711063592658128, "learning_rate": 6.392274090415505e-06, "loss": 0.1428, "step": 14667 }, { "epoch": 0.42791294707975963, "grad_norm": 0.9415437304505929, "learning_rate": 6.391820335132497e-06, "loss": 0.1452, "step": 14668 }, { "epoch": 0.4279421203104032, "grad_norm": 0.6453696198741923, "learning_rate": 6.391366567423649e-06, "loss": 0.1427, "step": 14669 }, { "epoch": 0.42797129354104674, "grad_norm": 1.0611456114369375, "learning_rate": 6.390912787293012e-06, "loss": 0.1602, "step": 14670 }, { "epoch": 0.4280004667716903, "grad_norm": 0.8196304221773512, "learning_rate": 6.390458994744638e-06, "loss": 0.131, "step": 14671 }, { "epoch": 0.42802964000233384, "grad_norm": 0.9389071162102353, "learning_rate": 6.390005189782579e-06, "loss": 0.1619, "step": 14672 }, { "epoch": 0.4280588132329774, "grad_norm": 0.920338543674002, "learning_rate": 6.389551372410886e-06, "loss": 0.1505, "step": 14673 }, { "epoch": 0.428087986463621, "grad_norm": 0.8812668029713562, "learning_rate": 6.389097542633608e-06, "loss": 0.1366, "step": 14674 }, { "epoch": 0.42811715969426456, "grad_norm": 0.8265439211426261, "learning_rate": 6.388643700454801e-06, "loss": 0.1411, "step": 14675 }, { "epoch": 0.4281463329249081, "grad_norm": 1.0629184186352492, "learning_rate": 6.388189845878513e-06, "loss": 0.1625, "step": 14676 }, { "epoch": 0.42817550615555167, "grad_norm": 0.9247386992323844, "learning_rate": 6.387735978908797e-06, "loss": 0.1404, "step": 14677 }, { "epoch": 0.4282046793861952, "grad_norm": 0.9480840655829462, "learning_rate": 6.387282099549707e-06, "loss": 0.1414, "step": 14678 }, { "epoch": 0.4282338526168388, "grad_norm": 0.6551231763411762, "learning_rate": 6.386828207805292e-06, "loss": 0.1297, "step": 14679 }, { "epoch": 0.4282630258474823, "grad_norm": 0.7151633617029399, "learning_rate": 6.386374303679607e-06, "loss": 0.1209, "step": 14680 }, { "epoch": 0.42829219907812593, "grad_norm": 1.0121783239850635, "learning_rate": 6.385920387176703e-06, "loss": 0.1431, "step": 14681 }, { "epoch": 0.4283213723087695, "grad_norm": 1.028630375965957, "learning_rate": 6.385466458300632e-06, "loss": 0.1454, "step": 14682 }, { "epoch": 0.42835054553941304, "grad_norm": 0.9145442814616266, "learning_rate": 6.385012517055448e-06, "loss": 0.1477, "step": 14683 }, { "epoch": 0.4283797187700566, "grad_norm": 0.8999939217874624, "learning_rate": 6.384558563445203e-06, "loss": 0.17, "step": 14684 }, { "epoch": 0.42840889200070015, "grad_norm": 0.8934297591872845, "learning_rate": 6.384104597473948e-06, "loss": 0.1426, "step": 14685 }, { "epoch": 0.4284380652313437, "grad_norm": 0.9772242512820827, "learning_rate": 6.383650619145738e-06, "loss": 0.1505, "step": 14686 }, { "epoch": 0.42846723846198725, "grad_norm": 0.7706815869965222, "learning_rate": 6.383196628464627e-06, "loss": 0.1528, "step": 14687 }, { "epoch": 0.42849641169263086, "grad_norm": 0.8005157520564964, "learning_rate": 6.382742625434667e-06, "loss": 0.1521, "step": 14688 }, { "epoch": 0.4285255849232744, "grad_norm": 1.0033328094096647, "learning_rate": 6.382288610059908e-06, "loss": 0.1391, "step": 14689 }, { "epoch": 0.42855475815391797, "grad_norm": 0.8924927853595284, "learning_rate": 6.3818345823444094e-06, "loss": 0.1515, "step": 14690 }, { "epoch": 0.4285839313845615, "grad_norm": 0.8280280209418701, "learning_rate": 6.38138054229222e-06, "loss": 0.1419, "step": 14691 }, { "epoch": 0.4286131046152051, "grad_norm": 0.8792603523785534, "learning_rate": 6.380926489907394e-06, "loss": 0.1614, "step": 14692 }, { "epoch": 0.42864227784584863, "grad_norm": 0.9324739732655796, "learning_rate": 6.380472425193989e-06, "loss": 0.1259, "step": 14693 }, { "epoch": 0.42867145107649224, "grad_norm": 0.7579055524898174, "learning_rate": 6.380018348156054e-06, "loss": 0.1331, "step": 14694 }, { "epoch": 0.4287006243071358, "grad_norm": 0.7546100364855088, "learning_rate": 6.379564258797644e-06, "loss": 0.1603, "step": 14695 }, { "epoch": 0.42872979753777934, "grad_norm": 0.9605596683417, "learning_rate": 6.379110157122815e-06, "loss": 0.1404, "step": 14696 }, { "epoch": 0.4287589707684229, "grad_norm": 0.9019800145363126, "learning_rate": 6.378656043135618e-06, "loss": 0.1459, "step": 14697 }, { "epoch": 0.42878814399906645, "grad_norm": 1.2597422167241183, "learning_rate": 6.37820191684011e-06, "loss": 0.1395, "step": 14698 }, { "epoch": 0.42881731722971, "grad_norm": 0.7761258828569346, "learning_rate": 6.377747778240344e-06, "loss": 0.1407, "step": 14699 }, { "epoch": 0.42884649046035356, "grad_norm": 0.6569514069900847, "learning_rate": 6.377293627340374e-06, "loss": 0.1284, "step": 14700 }, { "epoch": 0.42887566369099717, "grad_norm": 0.7491732766116863, "learning_rate": 6.376839464144257e-06, "loss": 0.1438, "step": 14701 }, { "epoch": 0.4289048369216407, "grad_norm": 0.9238875908563786, "learning_rate": 6.376385288656044e-06, "loss": 0.1654, "step": 14702 }, { "epoch": 0.4289340101522843, "grad_norm": 0.7969852178845906, "learning_rate": 6.3759311008797945e-06, "loss": 0.1444, "step": 14703 }, { "epoch": 0.4289631833829278, "grad_norm": 0.7045271765828216, "learning_rate": 6.3754769008195576e-06, "loss": 0.1431, "step": 14704 }, { "epoch": 0.4289923566135714, "grad_norm": 1.0154308113320514, "learning_rate": 6.375022688479393e-06, "loss": 0.151, "step": 14705 }, { "epoch": 0.42902152984421493, "grad_norm": 0.9667412064910351, "learning_rate": 6.374568463863353e-06, "loss": 0.1422, "step": 14706 }, { "epoch": 0.4290507030748585, "grad_norm": 0.7824217836663594, "learning_rate": 6.374114226975494e-06, "loss": 0.1273, "step": 14707 }, { "epoch": 0.4290798763055021, "grad_norm": 1.1880867361015324, "learning_rate": 6.3736599778198725e-06, "loss": 0.1203, "step": 14708 }, { "epoch": 0.42910904953614565, "grad_norm": 1.0002914702130004, "learning_rate": 6.373205716400543e-06, "loss": 0.1455, "step": 14709 }, { "epoch": 0.4291382227667892, "grad_norm": 1.0439607342833814, "learning_rate": 6.372751442721559e-06, "loss": 0.1555, "step": 14710 }, { "epoch": 0.42916739599743275, "grad_norm": 1.0342263842925121, "learning_rate": 6.372297156786978e-06, "loss": 0.1154, "step": 14711 }, { "epoch": 0.4291965692280763, "grad_norm": 0.9284556281035707, "learning_rate": 6.371842858600856e-06, "loss": 0.1457, "step": 14712 }, { "epoch": 0.42922574245871986, "grad_norm": 0.9488620328940125, "learning_rate": 6.3713885481672476e-06, "loss": 0.135, "step": 14713 }, { "epoch": 0.4292549156893634, "grad_norm": 0.9318273083737971, "learning_rate": 6.37093422549021e-06, "loss": 0.1406, "step": 14714 }, { "epoch": 0.429284088920007, "grad_norm": 0.6390769448191116, "learning_rate": 6.3704798905737995e-06, "loss": 0.1315, "step": 14715 }, { "epoch": 0.4293132621506506, "grad_norm": 0.8955550043126647, "learning_rate": 6.3700255434220714e-06, "loss": 0.144, "step": 14716 }, { "epoch": 0.42934243538129413, "grad_norm": 0.8678171250181997, "learning_rate": 6.3695711840390826e-06, "loss": 0.1457, "step": 14717 }, { "epoch": 0.4293716086119377, "grad_norm": 0.797499210866375, "learning_rate": 6.36911681242889e-06, "loss": 0.1627, "step": 14718 }, { "epoch": 0.42940078184258124, "grad_norm": 0.9453525502046212, "learning_rate": 6.368662428595548e-06, "loss": 0.1451, "step": 14719 }, { "epoch": 0.4294299550732248, "grad_norm": 0.6540759317187861, "learning_rate": 6.368208032543115e-06, "loss": 0.1609, "step": 14720 }, { "epoch": 0.42945912830386834, "grad_norm": 0.8733203342358636, "learning_rate": 6.367753624275648e-06, "loss": 0.1464, "step": 14721 }, { "epoch": 0.42948830153451195, "grad_norm": 0.8924426659836293, "learning_rate": 6.367299203797202e-06, "loss": 0.1104, "step": 14722 }, { "epoch": 0.4295174747651555, "grad_norm": 0.7593034303139613, "learning_rate": 6.366844771111835e-06, "loss": 0.1509, "step": 14723 }, { "epoch": 0.42954664799579906, "grad_norm": 0.8137304725602087, "learning_rate": 6.366390326223605e-06, "loss": 0.1425, "step": 14724 }, { "epoch": 0.4295758212264426, "grad_norm": 1.0327459850613934, "learning_rate": 6.365935869136568e-06, "loss": 0.1279, "step": 14725 }, { "epoch": 0.42960499445708616, "grad_norm": 0.6340026185915157, "learning_rate": 6.365481399854782e-06, "loss": 0.1454, "step": 14726 }, { "epoch": 0.4296341676877297, "grad_norm": 0.7648429592184687, "learning_rate": 6.365026918382303e-06, "loss": 0.1585, "step": 14727 }, { "epoch": 0.4296633409183733, "grad_norm": 1.2101507521140271, "learning_rate": 6.36457242472319e-06, "loss": 0.1483, "step": 14728 }, { "epoch": 0.4296925141490169, "grad_norm": 0.7692745981042659, "learning_rate": 6.3641179188815e-06, "loss": 0.1363, "step": 14729 }, { "epoch": 0.42972168737966043, "grad_norm": 0.6070084951386207, "learning_rate": 6.363663400861291e-06, "loss": 0.1166, "step": 14730 }, { "epoch": 0.429750860610304, "grad_norm": 0.9497120536580956, "learning_rate": 6.363208870666621e-06, "loss": 0.1697, "step": 14731 }, { "epoch": 0.42978003384094754, "grad_norm": 0.9847011205316754, "learning_rate": 6.362754328301548e-06, "loss": 0.133, "step": 14732 }, { "epoch": 0.4298092070715911, "grad_norm": 0.7350037064593803, "learning_rate": 6.36229977377013e-06, "loss": 0.1421, "step": 14733 }, { "epoch": 0.42983838030223465, "grad_norm": 0.8404109498205895, "learning_rate": 6.361845207076423e-06, "loss": 0.1665, "step": 14734 }, { "epoch": 0.42986755353287825, "grad_norm": 0.829169797032689, "learning_rate": 6.361390628224488e-06, "loss": 0.1252, "step": 14735 }, { "epoch": 0.4298967267635218, "grad_norm": 0.9767726647577462, "learning_rate": 6.3609360372183834e-06, "loss": 0.141, "step": 14736 }, { "epoch": 0.42992589999416536, "grad_norm": 1.0249799083053788, "learning_rate": 6.360481434062164e-06, "loss": 0.1345, "step": 14737 }, { "epoch": 0.4299550732248089, "grad_norm": 1.0543286085668138, "learning_rate": 6.360026818759894e-06, "loss": 0.1477, "step": 14738 }, { "epoch": 0.42998424645545247, "grad_norm": 0.8235070795515166, "learning_rate": 6.359572191315629e-06, "loss": 0.1285, "step": 14739 }, { "epoch": 0.430013419686096, "grad_norm": 0.7234196639753296, "learning_rate": 6.359117551733427e-06, "loss": 0.1303, "step": 14740 }, { "epoch": 0.4300425929167396, "grad_norm": 0.7910670223734967, "learning_rate": 6.358662900017348e-06, "loss": 0.1565, "step": 14741 }, { "epoch": 0.4300717661473832, "grad_norm": 1.1594702431343762, "learning_rate": 6.358208236171451e-06, "loss": 0.1157, "step": 14742 }, { "epoch": 0.43010093937802674, "grad_norm": 0.7541241378096104, "learning_rate": 6.357753560199795e-06, "loss": 0.1321, "step": 14743 }, { "epoch": 0.4301301126086703, "grad_norm": 0.7988039846351058, "learning_rate": 6.35729887210644e-06, "loss": 0.1454, "step": 14744 }, { "epoch": 0.43015928583931384, "grad_norm": 0.8143874563860272, "learning_rate": 6.356844171895444e-06, "loss": 0.1423, "step": 14745 }, { "epoch": 0.4301884590699574, "grad_norm": 0.8809231822137538, "learning_rate": 6.356389459570868e-06, "loss": 0.1291, "step": 14746 }, { "epoch": 0.43021763230060095, "grad_norm": 0.7402927413175663, "learning_rate": 6.35593473513677e-06, "loss": 0.1326, "step": 14747 }, { "epoch": 0.4302468055312445, "grad_norm": 0.8641214382003821, "learning_rate": 6.355479998597211e-06, "loss": 0.1348, "step": 14748 }, { "epoch": 0.4302759787618881, "grad_norm": 1.1837464571437748, "learning_rate": 6.355025249956249e-06, "loss": 0.1471, "step": 14749 }, { "epoch": 0.43030515199253166, "grad_norm": 0.7054205118269635, "learning_rate": 6.354570489217946e-06, "loss": 0.1292, "step": 14750 }, { "epoch": 0.4303343252231752, "grad_norm": 0.9815155821544417, "learning_rate": 6.35411571638636e-06, "loss": 0.1482, "step": 14751 }, { "epoch": 0.43036349845381877, "grad_norm": 0.7517246662679853, "learning_rate": 6.353660931465553e-06, "loss": 0.1495, "step": 14752 }, { "epoch": 0.4303926716844623, "grad_norm": 0.8042249174324039, "learning_rate": 6.353206134459585e-06, "loss": 0.1376, "step": 14753 }, { "epoch": 0.4304218449151059, "grad_norm": 0.7419311244258114, "learning_rate": 6.352751325372515e-06, "loss": 0.1348, "step": 14754 }, { "epoch": 0.4304510181457495, "grad_norm": 0.7243243445421994, "learning_rate": 6.352296504208404e-06, "loss": 0.1289, "step": 14755 }, { "epoch": 0.43048019137639304, "grad_norm": 0.7399725796332371, "learning_rate": 6.351841670971313e-06, "loss": 0.1236, "step": 14756 }, { "epoch": 0.4305093646070366, "grad_norm": 0.7262352627221309, "learning_rate": 6.3513868256653e-06, "loss": 0.1465, "step": 14757 }, { "epoch": 0.43053853783768015, "grad_norm": 0.982726738830854, "learning_rate": 6.350931968294432e-06, "loss": 0.1353, "step": 14758 }, { "epoch": 0.4305677110683237, "grad_norm": 0.8203260531758041, "learning_rate": 6.3504770988627625e-06, "loss": 0.1286, "step": 14759 }, { "epoch": 0.43059688429896725, "grad_norm": 0.8721819696136724, "learning_rate": 6.350022217374358e-06, "loss": 0.1239, "step": 14760 }, { "epoch": 0.4306260575296108, "grad_norm": 0.924114420605603, "learning_rate": 6.349567323833277e-06, "loss": 0.125, "step": 14761 }, { "epoch": 0.4306552307602544, "grad_norm": 0.9240299533530848, "learning_rate": 6.349112418243579e-06, "loss": 0.143, "step": 14762 }, { "epoch": 0.43068440399089797, "grad_norm": 0.7282105079622296, "learning_rate": 6.3486575006093295e-06, "loss": 0.133, "step": 14763 }, { "epoch": 0.4307135772215415, "grad_norm": 0.8709789885973791, "learning_rate": 6.348202570934588e-06, "loss": 0.1231, "step": 14764 }, { "epoch": 0.4307427504521851, "grad_norm": 1.1936752809308744, "learning_rate": 6.347747629223415e-06, "loss": 0.1609, "step": 14765 }, { "epoch": 0.43077192368282863, "grad_norm": 0.9580558806630077, "learning_rate": 6.347292675479872e-06, "loss": 0.1489, "step": 14766 }, { "epoch": 0.4308010969134722, "grad_norm": 0.8337521198576897, "learning_rate": 6.346837709708023e-06, "loss": 0.1415, "step": 14767 }, { "epoch": 0.43083027014411573, "grad_norm": 0.9152428122636086, "learning_rate": 6.34638273191193e-06, "loss": 0.1323, "step": 14768 }, { "epoch": 0.43085944337475934, "grad_norm": 1.162065846525339, "learning_rate": 6.34592774209565e-06, "loss": 0.1692, "step": 14769 }, { "epoch": 0.4308886166054029, "grad_norm": 0.8715189540513543, "learning_rate": 6.345472740263251e-06, "loss": 0.1455, "step": 14770 }, { "epoch": 0.43091778983604645, "grad_norm": 0.9425556467260457, "learning_rate": 6.345017726418792e-06, "loss": 0.1382, "step": 14771 }, { "epoch": 0.43094696306669, "grad_norm": 1.1258620194234759, "learning_rate": 6.344562700566334e-06, "loss": 0.1311, "step": 14772 }, { "epoch": 0.43097613629733356, "grad_norm": 0.9633974929046847, "learning_rate": 6.344107662709943e-06, "loss": 0.1332, "step": 14773 }, { "epoch": 0.4310053095279771, "grad_norm": 0.7794259895237825, "learning_rate": 6.343652612853679e-06, "loss": 0.1058, "step": 14774 }, { "epoch": 0.43103448275862066, "grad_norm": 1.0422691345915542, "learning_rate": 6.343197551001605e-06, "loss": 0.1167, "step": 14775 }, { "epoch": 0.43106365598926427, "grad_norm": 0.9600283141702454, "learning_rate": 6.342742477157784e-06, "loss": 0.1456, "step": 14776 }, { "epoch": 0.4310928292199078, "grad_norm": 0.8345360247156994, "learning_rate": 6.3422873913262796e-06, "loss": 0.1537, "step": 14777 }, { "epoch": 0.4311220024505514, "grad_norm": 0.9786213226755879, "learning_rate": 6.341832293511152e-06, "loss": 0.1425, "step": 14778 }, { "epoch": 0.43115117568119493, "grad_norm": 0.9059194181708891, "learning_rate": 6.341377183716469e-06, "loss": 0.1461, "step": 14779 }, { "epoch": 0.4311803489118385, "grad_norm": 0.6796709303229915, "learning_rate": 6.340922061946288e-06, "loss": 0.1464, "step": 14780 }, { "epoch": 0.43120952214248204, "grad_norm": 0.788282698161525, "learning_rate": 6.3404669282046745e-06, "loss": 0.1427, "step": 14781 }, { "epoch": 0.43123869537312565, "grad_norm": 0.9207172393032127, "learning_rate": 6.340011782495694e-06, "loss": 0.1189, "step": 14782 }, { "epoch": 0.4312678686037692, "grad_norm": 0.7501648475307601, "learning_rate": 6.339556624823409e-06, "loss": 0.1365, "step": 14783 }, { "epoch": 0.43129704183441275, "grad_norm": 0.7625215407292547, "learning_rate": 6.339101455191881e-06, "loss": 0.1503, "step": 14784 }, { "epoch": 0.4313262150650563, "grad_norm": 0.8133516307255715, "learning_rate": 6.338646273605175e-06, "loss": 0.1318, "step": 14785 }, { "epoch": 0.43135538829569986, "grad_norm": 0.6745848108498254, "learning_rate": 6.338191080067354e-06, "loss": 0.1322, "step": 14786 }, { "epoch": 0.4313845615263434, "grad_norm": 0.6268664280390904, "learning_rate": 6.337735874582482e-06, "loss": 0.1542, "step": 14787 }, { "epoch": 0.43141373475698697, "grad_norm": 0.7997227880190497, "learning_rate": 6.337280657154625e-06, "loss": 0.1309, "step": 14788 }, { "epoch": 0.4314429079876306, "grad_norm": 0.703119308180586, "learning_rate": 6.336825427787845e-06, "loss": 0.1286, "step": 14789 }, { "epoch": 0.43147208121827413, "grad_norm": 0.7206278096767809, "learning_rate": 6.336370186486207e-06, "loss": 0.1321, "step": 14790 }, { "epoch": 0.4315012544489177, "grad_norm": 0.7961418270278572, "learning_rate": 6.335914933253775e-06, "loss": 0.1137, "step": 14791 }, { "epoch": 0.43153042767956123, "grad_norm": 1.242814905356728, "learning_rate": 6.335459668094612e-06, "loss": 0.1316, "step": 14792 }, { "epoch": 0.4315596009102048, "grad_norm": 0.774466874132768, "learning_rate": 6.335004391012786e-06, "loss": 0.1186, "step": 14793 }, { "epoch": 0.43158877414084834, "grad_norm": 2.4898668528798806, "learning_rate": 6.334549102012357e-06, "loss": 0.135, "step": 14794 }, { "epoch": 0.4316179473714919, "grad_norm": 0.8596545447807837, "learning_rate": 6.334093801097395e-06, "loss": 0.1519, "step": 14795 }, { "epoch": 0.4316471206021355, "grad_norm": 0.844686921865426, "learning_rate": 6.333638488271961e-06, "loss": 0.1436, "step": 14796 }, { "epoch": 0.43167629383277906, "grad_norm": 0.9872144068219241, "learning_rate": 6.33318316354012e-06, "loss": 0.1603, "step": 14797 }, { "epoch": 0.4317054670634226, "grad_norm": 0.749878854173997, "learning_rate": 6.332727826905939e-06, "loss": 0.1258, "step": 14798 }, { "epoch": 0.43173464029406616, "grad_norm": 0.7914791480108483, "learning_rate": 6.33227247837348e-06, "loss": 0.156, "step": 14799 }, { "epoch": 0.4317638135247097, "grad_norm": 0.7703284842657782, "learning_rate": 6.331817117946814e-06, "loss": 0.1167, "step": 14800 }, { "epoch": 0.43179298675535327, "grad_norm": 0.7347550592703608, "learning_rate": 6.33136174563e-06, "loss": 0.1318, "step": 14801 }, { "epoch": 0.4318221599859968, "grad_norm": 0.8049458505427863, "learning_rate": 6.330906361427106e-06, "loss": 0.1351, "step": 14802 }, { "epoch": 0.43185133321664043, "grad_norm": 0.8281217131378312, "learning_rate": 6.330450965342199e-06, "loss": 0.1359, "step": 14803 }, { "epoch": 0.431880506447284, "grad_norm": 0.8203016360463499, "learning_rate": 6.329995557379344e-06, "loss": 0.1542, "step": 14804 }, { "epoch": 0.43190967967792754, "grad_norm": 0.748088965862053, "learning_rate": 6.329540137542605e-06, "loss": 0.152, "step": 14805 }, { "epoch": 0.4319388529085711, "grad_norm": 0.8582098892345322, "learning_rate": 6.329084705836049e-06, "loss": 0.1433, "step": 14806 }, { "epoch": 0.43196802613921464, "grad_norm": 0.8470891127566365, "learning_rate": 6.328629262263741e-06, "loss": 0.1301, "step": 14807 }, { "epoch": 0.4319971993698582, "grad_norm": 0.7133027560523214, "learning_rate": 6.328173806829751e-06, "loss": 0.1521, "step": 14808 }, { "epoch": 0.4320263726005018, "grad_norm": 0.9062881346168233, "learning_rate": 6.3277183395381405e-06, "loss": 0.1244, "step": 14809 }, { "epoch": 0.43205554583114536, "grad_norm": 0.7081085915181456, "learning_rate": 6.3272628603929775e-06, "loss": 0.123, "step": 14810 }, { "epoch": 0.4320847190617889, "grad_norm": 0.7425128005166141, "learning_rate": 6.3268073693983275e-06, "loss": 0.1323, "step": 14811 }, { "epoch": 0.43211389229243247, "grad_norm": 0.8403564542402145, "learning_rate": 6.3263518665582606e-06, "loss": 0.1224, "step": 14812 }, { "epoch": 0.432143065523076, "grad_norm": 0.7591884647532012, "learning_rate": 6.32589635187684e-06, "loss": 0.1504, "step": 14813 }, { "epoch": 0.4321722387537196, "grad_norm": 0.7700852059272205, "learning_rate": 6.325440825358131e-06, "loss": 0.1317, "step": 14814 }, { "epoch": 0.4322014119843631, "grad_norm": 0.79680827071898, "learning_rate": 6.324985287006206e-06, "loss": 0.1345, "step": 14815 }, { "epoch": 0.43223058521500674, "grad_norm": 0.9241577630626693, "learning_rate": 6.324529736825127e-06, "loss": 0.125, "step": 14816 }, { "epoch": 0.4322597584456503, "grad_norm": 0.8675621657450447, "learning_rate": 6.324074174818961e-06, "loss": 0.1365, "step": 14817 }, { "epoch": 0.43228893167629384, "grad_norm": 0.709867319033397, "learning_rate": 6.323618600991781e-06, "loss": 0.1414, "step": 14818 }, { "epoch": 0.4323181049069374, "grad_norm": 0.7159572321632133, "learning_rate": 6.323163015347648e-06, "loss": 0.1476, "step": 14819 }, { "epoch": 0.43234727813758095, "grad_norm": 0.7680476626555585, "learning_rate": 6.322707417890631e-06, "loss": 0.1443, "step": 14820 }, { "epoch": 0.4323764513682245, "grad_norm": 0.8288063399065261, "learning_rate": 6.322251808624799e-06, "loss": 0.1141, "step": 14821 }, { "epoch": 0.43240562459886805, "grad_norm": 0.6191127988079769, "learning_rate": 6.321796187554217e-06, "loss": 0.1378, "step": 14822 }, { "epoch": 0.43243479782951166, "grad_norm": 1.4845953736268853, "learning_rate": 6.321340554682955e-06, "loss": 0.1489, "step": 14823 }, { "epoch": 0.4324639710601552, "grad_norm": 1.2885937859330068, "learning_rate": 6.320884910015079e-06, "loss": 0.1437, "step": 14824 }, { "epoch": 0.43249314429079877, "grad_norm": 0.8138087163816917, "learning_rate": 6.320429253554661e-06, "loss": 0.1098, "step": 14825 }, { "epoch": 0.4325223175214423, "grad_norm": 0.8907509065064294, "learning_rate": 6.319973585305762e-06, "loss": 0.1451, "step": 14826 }, { "epoch": 0.4325514907520859, "grad_norm": 0.8669764743772568, "learning_rate": 6.319517905272455e-06, "loss": 0.1552, "step": 14827 }, { "epoch": 0.43258066398272943, "grad_norm": 1.0586071764642637, "learning_rate": 6.319062213458808e-06, "loss": 0.1477, "step": 14828 }, { "epoch": 0.432609837213373, "grad_norm": 0.8915242741557635, "learning_rate": 6.318606509868888e-06, "loss": 0.1427, "step": 14829 }, { "epoch": 0.4326390104440166, "grad_norm": 0.7399806161453074, "learning_rate": 6.318150794506765e-06, "loss": 0.13, "step": 14830 }, { "epoch": 0.43266818367466015, "grad_norm": 0.939475842359072, "learning_rate": 6.317695067376506e-06, "loss": 0.1449, "step": 14831 }, { "epoch": 0.4326973569053037, "grad_norm": 0.7400291112559871, "learning_rate": 6.3172393284821775e-06, "loss": 0.114, "step": 14832 }, { "epoch": 0.43272653013594725, "grad_norm": 0.7331330322028462, "learning_rate": 6.316783577827854e-06, "loss": 0.1268, "step": 14833 }, { "epoch": 0.4327557033665908, "grad_norm": 1.0497645154772755, "learning_rate": 6.3163278154176e-06, "loss": 0.1552, "step": 14834 }, { "epoch": 0.43278487659723436, "grad_norm": 0.9624530482482117, "learning_rate": 6.315872041255484e-06, "loss": 0.1213, "step": 14835 }, { "epoch": 0.43281404982787797, "grad_norm": 1.0323126643004623, "learning_rate": 6.3154162553455775e-06, "loss": 0.142, "step": 14836 }, { "epoch": 0.4328432230585215, "grad_norm": 0.980677544834582, "learning_rate": 6.31496045769195e-06, "loss": 0.1549, "step": 14837 }, { "epoch": 0.4328723962891651, "grad_norm": 0.8966870514937749, "learning_rate": 6.314504648298667e-06, "loss": 0.1512, "step": 14838 }, { "epoch": 0.4329015695198086, "grad_norm": 0.7708401591962507, "learning_rate": 6.3140488271698015e-06, "loss": 0.1196, "step": 14839 }, { "epoch": 0.4329307427504522, "grad_norm": 0.9611765463011959, "learning_rate": 6.3135929943094235e-06, "loss": 0.152, "step": 14840 }, { "epoch": 0.43295991598109573, "grad_norm": 0.7808004179677782, "learning_rate": 6.313137149721597e-06, "loss": 0.1158, "step": 14841 }, { "epoch": 0.4329890892117393, "grad_norm": 0.7507308527471663, "learning_rate": 6.312681293410399e-06, "loss": 0.135, "step": 14842 }, { "epoch": 0.4330182624423829, "grad_norm": 0.7333275014877074, "learning_rate": 6.312225425379896e-06, "loss": 0.1319, "step": 14843 }, { "epoch": 0.43304743567302645, "grad_norm": 0.7817531313835508, "learning_rate": 6.311769545634154e-06, "loss": 0.1585, "step": 14844 }, { "epoch": 0.43307660890367, "grad_norm": 0.8548138312330888, "learning_rate": 6.311313654177249e-06, "loss": 0.1431, "step": 14845 }, { "epoch": 0.43310578213431355, "grad_norm": 0.7211573466060593, "learning_rate": 6.310857751013248e-06, "loss": 0.1541, "step": 14846 }, { "epoch": 0.4331349553649571, "grad_norm": 0.645913438630516, "learning_rate": 6.3104018361462225e-06, "loss": 0.1229, "step": 14847 }, { "epoch": 0.43316412859560066, "grad_norm": 0.9447276864129122, "learning_rate": 6.309945909580243e-06, "loss": 0.139, "step": 14848 }, { "epoch": 0.4331933018262442, "grad_norm": 0.7900679300978004, "learning_rate": 6.309489971319378e-06, "loss": 0.1307, "step": 14849 }, { "epoch": 0.4332224750568878, "grad_norm": 0.8623032168405524, "learning_rate": 6.309034021367699e-06, "loss": 0.156, "step": 14850 }, { "epoch": 0.4332516482875314, "grad_norm": 1.346691040609426, "learning_rate": 6.308578059729278e-06, "loss": 0.1723, "step": 14851 }, { "epoch": 0.43328082151817493, "grad_norm": 1.0181550369805636, "learning_rate": 6.308122086408184e-06, "loss": 0.1391, "step": 14852 }, { "epoch": 0.4333099947488185, "grad_norm": 0.66617601220997, "learning_rate": 6.307666101408487e-06, "loss": 0.1405, "step": 14853 }, { "epoch": 0.43333916797946204, "grad_norm": 1.0803410521516597, "learning_rate": 6.30721010473426e-06, "loss": 0.1717, "step": 14854 }, { "epoch": 0.4333683412101056, "grad_norm": 0.9935671643146751, "learning_rate": 6.306754096389575e-06, "loss": 0.1304, "step": 14855 }, { "epoch": 0.43339751444074914, "grad_norm": 0.9995445759687146, "learning_rate": 6.306298076378499e-06, "loss": 0.1719, "step": 14856 }, { "epoch": 0.43342668767139275, "grad_norm": 0.8930670840373487, "learning_rate": 6.305842044705105e-06, "loss": 0.1458, "step": 14857 }, { "epoch": 0.4334558609020363, "grad_norm": 0.729476831124679, "learning_rate": 6.305386001373468e-06, "loss": 0.1315, "step": 14858 }, { "epoch": 0.43348503413267986, "grad_norm": 0.8435005806158672, "learning_rate": 6.3049299463876535e-06, "loss": 0.1395, "step": 14859 }, { "epoch": 0.4335142073633234, "grad_norm": 0.9560165677874352, "learning_rate": 6.304473879751738e-06, "loss": 0.1497, "step": 14860 }, { "epoch": 0.43354338059396696, "grad_norm": 0.8345507564547471, "learning_rate": 6.3040178014697905e-06, "loss": 0.1444, "step": 14861 }, { "epoch": 0.4335725538246105, "grad_norm": 1.1152359866359134, "learning_rate": 6.303561711545883e-06, "loss": 0.1392, "step": 14862 }, { "epoch": 0.43360172705525407, "grad_norm": 0.8206332388495481, "learning_rate": 6.303105609984087e-06, "loss": 0.1363, "step": 14863 }, { "epoch": 0.4336309002858977, "grad_norm": 0.7987801003957105, "learning_rate": 6.302649496788476e-06, "loss": 0.141, "step": 14864 }, { "epoch": 0.43366007351654123, "grad_norm": 0.8427239224657411, "learning_rate": 6.3021933719631215e-06, "loss": 0.1303, "step": 14865 }, { "epoch": 0.4336892467471848, "grad_norm": 0.851809512482157, "learning_rate": 6.301737235512096e-06, "loss": 0.1272, "step": 14866 }, { "epoch": 0.43371841997782834, "grad_norm": 0.760481169540387, "learning_rate": 6.301281087439469e-06, "loss": 0.1356, "step": 14867 }, { "epoch": 0.4337475932084719, "grad_norm": 0.8702583160884054, "learning_rate": 6.300824927749317e-06, "loss": 0.1262, "step": 14868 }, { "epoch": 0.43377676643911545, "grad_norm": 0.9830728223049329, "learning_rate": 6.300368756445709e-06, "loss": 0.1411, "step": 14869 }, { "epoch": 0.43380593966975906, "grad_norm": 0.7699137388317029, "learning_rate": 6.299912573532723e-06, "loss": 0.1248, "step": 14870 }, { "epoch": 0.4338351129004026, "grad_norm": 0.8193503189857513, "learning_rate": 6.299456379014424e-06, "loss": 0.1584, "step": 14871 }, { "epoch": 0.43386428613104616, "grad_norm": 0.7466214930730316, "learning_rate": 6.299000172894889e-06, "loss": 0.1465, "step": 14872 }, { "epoch": 0.4338934593616897, "grad_norm": 0.9512890462432595, "learning_rate": 6.298543955178192e-06, "loss": 0.1463, "step": 14873 }, { "epoch": 0.43392263259233327, "grad_norm": 0.9847601613888907, "learning_rate": 6.298087725868403e-06, "loss": 0.1606, "step": 14874 }, { "epoch": 0.4339518058229768, "grad_norm": 0.678758495208975, "learning_rate": 6.2976314849695985e-06, "loss": 0.1286, "step": 14875 }, { "epoch": 0.4339809790536204, "grad_norm": 0.8780937813226559, "learning_rate": 6.297175232485849e-06, "loss": 0.132, "step": 14876 }, { "epoch": 0.434010152284264, "grad_norm": 1.015683950220094, "learning_rate": 6.296718968421228e-06, "loss": 0.1433, "step": 14877 }, { "epoch": 0.43403932551490754, "grad_norm": 0.7916238415866884, "learning_rate": 6.296262692779811e-06, "loss": 0.1223, "step": 14878 }, { "epoch": 0.4340684987455511, "grad_norm": 0.5392514728974622, "learning_rate": 6.295806405565668e-06, "loss": 0.1095, "step": 14879 }, { "epoch": 0.43409767197619464, "grad_norm": 0.8330809840747785, "learning_rate": 6.295350106782877e-06, "loss": 0.1337, "step": 14880 }, { "epoch": 0.4341268452068382, "grad_norm": 1.262089795742695, "learning_rate": 6.294893796435508e-06, "loss": 0.14, "step": 14881 }, { "epoch": 0.43415601843748175, "grad_norm": 0.866794026353123, "learning_rate": 6.294437474527637e-06, "loss": 0.1564, "step": 14882 }, { "epoch": 0.4341851916681253, "grad_norm": 0.7113868590294488, "learning_rate": 6.293981141063336e-06, "loss": 0.1326, "step": 14883 }, { "epoch": 0.4342143648987689, "grad_norm": 0.9040129207199873, "learning_rate": 6.293524796046683e-06, "loss": 0.1393, "step": 14884 }, { "epoch": 0.43424353812941247, "grad_norm": 0.9098606553163996, "learning_rate": 6.293068439481749e-06, "loss": 0.1279, "step": 14885 }, { "epoch": 0.434272711360056, "grad_norm": 0.9560544364486673, "learning_rate": 6.2926120713726055e-06, "loss": 0.1355, "step": 14886 }, { "epoch": 0.43430188459069957, "grad_norm": 0.9017628328457967, "learning_rate": 6.292155691723331e-06, "loss": 0.1274, "step": 14887 }, { "epoch": 0.4343310578213431, "grad_norm": 1.0174906176820488, "learning_rate": 6.291699300538001e-06, "loss": 0.1381, "step": 14888 }, { "epoch": 0.4343602310519867, "grad_norm": 0.7896600965768121, "learning_rate": 6.291242897820686e-06, "loss": 0.1125, "step": 14889 }, { "epoch": 0.43438940428263023, "grad_norm": 0.8736816177417097, "learning_rate": 6.290786483575465e-06, "loss": 0.141, "step": 14890 }, { "epoch": 0.43441857751327384, "grad_norm": 0.8666296258123337, "learning_rate": 6.290330057806408e-06, "loss": 0.1431, "step": 14891 }, { "epoch": 0.4344477507439174, "grad_norm": 0.9022593673584454, "learning_rate": 6.289873620517594e-06, "loss": 0.1285, "step": 14892 }, { "epoch": 0.43447692397456095, "grad_norm": 0.7726111119294767, "learning_rate": 6.289417171713095e-06, "loss": 0.1192, "step": 14893 }, { "epoch": 0.4345060972052045, "grad_norm": 0.9764752659765082, "learning_rate": 6.288960711396987e-06, "loss": 0.1531, "step": 14894 }, { "epoch": 0.43453527043584805, "grad_norm": 1.1802016135698505, "learning_rate": 6.288504239573348e-06, "loss": 0.1348, "step": 14895 }, { "epoch": 0.4345644436664916, "grad_norm": 0.7652544979387879, "learning_rate": 6.2880477562462475e-06, "loss": 0.1306, "step": 14896 }, { "epoch": 0.4345936168971352, "grad_norm": 0.9221428203868074, "learning_rate": 6.287591261419765e-06, "loss": 0.1353, "step": 14897 }, { "epoch": 0.43462279012777877, "grad_norm": 0.9333945401115743, "learning_rate": 6.287134755097977e-06, "loss": 0.1694, "step": 14898 }, { "epoch": 0.4346519633584223, "grad_norm": 0.997088890826675, "learning_rate": 6.2866782372849555e-06, "loss": 0.1511, "step": 14899 }, { "epoch": 0.4346811365890659, "grad_norm": 0.7862373800922435, "learning_rate": 6.286221707984778e-06, "loss": 0.1322, "step": 14900 }, { "epoch": 0.43471030981970943, "grad_norm": 0.8032756680491813, "learning_rate": 6.28576516720152e-06, "loss": 0.1375, "step": 14901 }, { "epoch": 0.434739483050353, "grad_norm": 0.8915256699252233, "learning_rate": 6.285308614939259e-06, "loss": 0.1441, "step": 14902 }, { "epoch": 0.43476865628099653, "grad_norm": 0.8143586950469169, "learning_rate": 6.284852051202069e-06, "loss": 0.1562, "step": 14903 }, { "epoch": 0.43479782951164014, "grad_norm": 0.8097365289970733, "learning_rate": 6.284395475994024e-06, "loss": 0.1588, "step": 14904 }, { "epoch": 0.4348270027422837, "grad_norm": 0.6316064871648317, "learning_rate": 6.283938889319205e-06, "loss": 0.1258, "step": 14905 }, { "epoch": 0.43485617597292725, "grad_norm": 0.7199241791446711, "learning_rate": 6.283482291181686e-06, "loss": 0.1332, "step": 14906 }, { "epoch": 0.4348853492035708, "grad_norm": 0.9549830821817287, "learning_rate": 6.283025681585544e-06, "loss": 0.1295, "step": 14907 }, { "epoch": 0.43491452243421436, "grad_norm": 0.5998824774171313, "learning_rate": 6.282569060534854e-06, "loss": 0.132, "step": 14908 }, { "epoch": 0.4349436956648579, "grad_norm": 0.8522325645392712, "learning_rate": 6.2821124280336934e-06, "loss": 0.1778, "step": 14909 }, { "epoch": 0.43497286889550146, "grad_norm": 0.9510984597363902, "learning_rate": 6.28165578408614e-06, "loss": 0.1334, "step": 14910 }, { "epoch": 0.43500204212614507, "grad_norm": 0.7994132652116095, "learning_rate": 6.281199128696269e-06, "loss": 0.1362, "step": 14911 }, { "epoch": 0.4350312153567886, "grad_norm": 0.7392170295309389, "learning_rate": 6.280742461868159e-06, "loss": 0.1265, "step": 14912 }, { "epoch": 0.4350603885874322, "grad_norm": 0.7661820989637711, "learning_rate": 6.280285783605885e-06, "loss": 0.1386, "step": 14913 }, { "epoch": 0.43508956181807573, "grad_norm": 0.815470895969181, "learning_rate": 6.279829093913525e-06, "loss": 0.1496, "step": 14914 }, { "epoch": 0.4351187350487193, "grad_norm": 0.9107718400772307, "learning_rate": 6.2793723927951575e-06, "loss": 0.1439, "step": 14915 }, { "epoch": 0.43514790827936284, "grad_norm": 0.9852545037025268, "learning_rate": 6.278915680254858e-06, "loss": 0.1523, "step": 14916 }, { "epoch": 0.4351770815100064, "grad_norm": 0.844753075866623, "learning_rate": 6.2784589562967045e-06, "loss": 0.1344, "step": 14917 }, { "epoch": 0.43520625474065, "grad_norm": 0.7164684216736642, "learning_rate": 6.278002220924776e-06, "loss": 0.1148, "step": 14918 }, { "epoch": 0.43523542797129355, "grad_norm": 0.8886839598601174, "learning_rate": 6.277545474143146e-06, "loss": 0.1262, "step": 14919 }, { "epoch": 0.4352646012019371, "grad_norm": 0.9947075280271497, "learning_rate": 6.277088715955898e-06, "loss": 0.1341, "step": 14920 }, { "epoch": 0.43529377443258066, "grad_norm": 1.2448171324132684, "learning_rate": 6.276631946367106e-06, "loss": 0.1653, "step": 14921 }, { "epoch": 0.4353229476632242, "grad_norm": 0.9888601040175079, "learning_rate": 6.276175165380847e-06, "loss": 0.1308, "step": 14922 }, { "epoch": 0.43535212089386777, "grad_norm": 0.9215285039816433, "learning_rate": 6.275718373001203e-06, "loss": 0.1321, "step": 14923 }, { "epoch": 0.4353812941245114, "grad_norm": 0.7205768417382196, "learning_rate": 6.2752615692322485e-06, "loss": 0.1367, "step": 14924 }, { "epoch": 0.43541046735515493, "grad_norm": 0.7417452238793568, "learning_rate": 6.274804754078063e-06, "loss": 0.1131, "step": 14925 }, { "epoch": 0.4354396405857985, "grad_norm": 1.022941663769502, "learning_rate": 6.2743479275427255e-06, "loss": 0.1343, "step": 14926 }, { "epoch": 0.43546881381644204, "grad_norm": 1.143967356093502, "learning_rate": 6.273891089630313e-06, "loss": 0.1701, "step": 14927 }, { "epoch": 0.4354979870470856, "grad_norm": 0.7368982907513386, "learning_rate": 6.273434240344906e-06, "loss": 0.1265, "step": 14928 }, { "epoch": 0.43552716027772914, "grad_norm": 0.9170885634221387, "learning_rate": 6.272977379690583e-06, "loss": 0.128, "step": 14929 }, { "epoch": 0.4355563335083727, "grad_norm": 1.0144873266327827, "learning_rate": 6.2725205076714215e-06, "loss": 0.1514, "step": 14930 }, { "epoch": 0.4355855067390163, "grad_norm": 0.8171141688562251, "learning_rate": 6.272063624291498e-06, "loss": 0.1571, "step": 14931 }, { "epoch": 0.43561467996965986, "grad_norm": 1.1620975943386103, "learning_rate": 6.271606729554897e-06, "loss": 0.1408, "step": 14932 }, { "epoch": 0.4356438532003034, "grad_norm": 1.1871283517654905, "learning_rate": 6.271149823465693e-06, "loss": 0.1349, "step": 14933 }, { "epoch": 0.43567302643094696, "grad_norm": 0.847428614997877, "learning_rate": 6.270692906027968e-06, "loss": 0.1352, "step": 14934 }, { "epoch": 0.4357021996615905, "grad_norm": 1.0326812778059362, "learning_rate": 6.2702359772458e-06, "loss": 0.1473, "step": 14935 }, { "epoch": 0.43573137289223407, "grad_norm": 0.9176456457317715, "learning_rate": 6.269779037123267e-06, "loss": 0.1451, "step": 14936 }, { "epoch": 0.4357605461228776, "grad_norm": 0.7759409629178708, "learning_rate": 6.269322085664452e-06, "loss": 0.1432, "step": 14937 }, { "epoch": 0.43578971935352123, "grad_norm": 1.2262564725803873, "learning_rate": 6.268865122873431e-06, "loss": 0.171, "step": 14938 }, { "epoch": 0.4358188925841648, "grad_norm": 0.829829756502998, "learning_rate": 6.268408148754285e-06, "loss": 0.1361, "step": 14939 }, { "epoch": 0.43584806581480834, "grad_norm": 0.8094543078533318, "learning_rate": 6.267951163311095e-06, "loss": 0.1466, "step": 14940 }, { "epoch": 0.4358772390454519, "grad_norm": 0.7789330195323289, "learning_rate": 6.267494166547938e-06, "loss": 0.1413, "step": 14941 }, { "epoch": 0.43590641227609545, "grad_norm": 0.8812221258860311, "learning_rate": 6.267037158468897e-06, "loss": 0.1695, "step": 14942 }, { "epoch": 0.435935585506739, "grad_norm": 0.9116048139269639, "learning_rate": 6.266580139078051e-06, "loss": 0.1426, "step": 14943 }, { "epoch": 0.43596475873738255, "grad_norm": 0.7102903023901466, "learning_rate": 6.266123108379478e-06, "loss": 0.1274, "step": 14944 }, { "epoch": 0.43599393196802616, "grad_norm": 0.8306328790161557, "learning_rate": 6.265666066377262e-06, "loss": 0.1406, "step": 14945 }, { "epoch": 0.4360231051986697, "grad_norm": 1.0817500740617516, "learning_rate": 6.265209013075481e-06, "loss": 0.1409, "step": 14946 }, { "epoch": 0.43605227842931327, "grad_norm": 0.8373492525705621, "learning_rate": 6.264751948478216e-06, "loss": 0.141, "step": 14947 }, { "epoch": 0.4360814516599568, "grad_norm": 1.006839374873911, "learning_rate": 6.264294872589547e-06, "loss": 0.1501, "step": 14948 }, { "epoch": 0.4361106248906004, "grad_norm": 1.2613639955868652, "learning_rate": 6.263837785413556e-06, "loss": 0.1353, "step": 14949 }, { "epoch": 0.4361397981212439, "grad_norm": 0.844220750536089, "learning_rate": 6.263380686954324e-06, "loss": 0.1376, "step": 14950 }, { "epoch": 0.43616897135188754, "grad_norm": 0.8256806551009317, "learning_rate": 6.2629235772159266e-06, "loss": 0.137, "step": 14951 }, { "epoch": 0.4361981445825311, "grad_norm": 0.8015607269478686, "learning_rate": 6.262466456202453e-06, "loss": 0.1512, "step": 14952 }, { "epoch": 0.43622731781317464, "grad_norm": 0.8865343343345692, "learning_rate": 6.262009323917979e-06, "loss": 0.1394, "step": 14953 }, { "epoch": 0.4362564910438182, "grad_norm": 0.7404050238691539, "learning_rate": 6.261552180366586e-06, "loss": 0.1637, "step": 14954 }, { "epoch": 0.43628566427446175, "grad_norm": 0.8766041674094689, "learning_rate": 6.261095025552359e-06, "loss": 0.1636, "step": 14955 }, { "epoch": 0.4363148375051053, "grad_norm": 0.9401005495181551, "learning_rate": 6.260637859479374e-06, "loss": 0.1458, "step": 14956 }, { "epoch": 0.43634401073574886, "grad_norm": 0.7294483064104669, "learning_rate": 6.260180682151716e-06, "loss": 0.1086, "step": 14957 }, { "epoch": 0.43637318396639246, "grad_norm": 0.8767594795626354, "learning_rate": 6.259723493573467e-06, "loss": 0.1393, "step": 14958 }, { "epoch": 0.436402357197036, "grad_norm": 0.8694135397666469, "learning_rate": 6.259266293748705e-06, "loss": 0.1594, "step": 14959 }, { "epoch": 0.43643153042767957, "grad_norm": 1.0462151480352384, "learning_rate": 6.258809082681515e-06, "loss": 0.1464, "step": 14960 }, { "epoch": 0.4364607036583231, "grad_norm": 0.9366276317687989, "learning_rate": 6.258351860375979e-06, "loss": 0.109, "step": 14961 }, { "epoch": 0.4364898768889667, "grad_norm": 0.9656065547459961, "learning_rate": 6.257894626836176e-06, "loss": 0.1393, "step": 14962 }, { "epoch": 0.43651905011961023, "grad_norm": 0.9458351499044808, "learning_rate": 6.257437382066191e-06, "loss": 0.1341, "step": 14963 }, { "epoch": 0.4365482233502538, "grad_norm": 0.7728687151152849, "learning_rate": 6.256980126070107e-06, "loss": 0.1462, "step": 14964 }, { "epoch": 0.4365773965808974, "grad_norm": 0.7792620701017975, "learning_rate": 6.256522858852003e-06, "loss": 0.1398, "step": 14965 }, { "epoch": 0.43660656981154095, "grad_norm": 0.795409314591477, "learning_rate": 6.256065580415962e-06, "loss": 0.164, "step": 14966 }, { "epoch": 0.4366357430421845, "grad_norm": 0.9598838014143705, "learning_rate": 6.2556082907660685e-06, "loss": 0.1268, "step": 14967 }, { "epoch": 0.43666491627282805, "grad_norm": 0.8031767049728477, "learning_rate": 6.255150989906405e-06, "loss": 0.1431, "step": 14968 }, { "epoch": 0.4366940895034716, "grad_norm": 1.1100118664682679, "learning_rate": 6.254693677841051e-06, "loss": 0.145, "step": 14969 }, { "epoch": 0.43672326273411516, "grad_norm": 0.7209447122471424, "learning_rate": 6.254236354574092e-06, "loss": 0.1401, "step": 14970 }, { "epoch": 0.4367524359647587, "grad_norm": 0.7933848661987428, "learning_rate": 6.25377902010961e-06, "loss": 0.1217, "step": 14971 }, { "epoch": 0.4367816091954023, "grad_norm": 0.8697372108140659, "learning_rate": 6.253321674451689e-06, "loss": 0.1393, "step": 14972 }, { "epoch": 0.4368107824260459, "grad_norm": 0.8834304271180028, "learning_rate": 6.252864317604411e-06, "loss": 0.1579, "step": 14973 }, { "epoch": 0.4368399556566894, "grad_norm": 0.8534089619224059, "learning_rate": 6.252406949571858e-06, "loss": 0.1436, "step": 14974 }, { "epoch": 0.436869128887333, "grad_norm": 0.8595474014569607, "learning_rate": 6.2519495703581165e-06, "loss": 0.1305, "step": 14975 }, { "epoch": 0.43689830211797653, "grad_norm": 1.116247587712783, "learning_rate": 6.2514921799672675e-06, "loss": 0.1445, "step": 14976 }, { "epoch": 0.4369274753486201, "grad_norm": 1.06846417250696, "learning_rate": 6.251034778403396e-06, "loss": 0.1363, "step": 14977 }, { "epoch": 0.43695664857926364, "grad_norm": 0.9021355009698474, "learning_rate": 6.250577365670584e-06, "loss": 0.1361, "step": 14978 }, { "epoch": 0.43698582180990725, "grad_norm": 0.878252703424149, "learning_rate": 6.250119941772915e-06, "loss": 0.1362, "step": 14979 }, { "epoch": 0.4370149950405508, "grad_norm": 0.961434453542702, "learning_rate": 6.2496625067144755e-06, "loss": 0.1505, "step": 14980 }, { "epoch": 0.43704416827119436, "grad_norm": 0.6862118257027884, "learning_rate": 6.249205060499345e-06, "loss": 0.1371, "step": 14981 }, { "epoch": 0.4370733415018379, "grad_norm": 0.9255086249462283, "learning_rate": 6.248747603131612e-06, "loss": 0.1137, "step": 14982 }, { "epoch": 0.43710251473248146, "grad_norm": 0.9150855053642007, "learning_rate": 6.2482901346153575e-06, "loss": 0.1457, "step": 14983 }, { "epoch": 0.437131687963125, "grad_norm": 0.8467982806994376, "learning_rate": 6.247832654954666e-06, "loss": 0.1751, "step": 14984 }, { "epoch": 0.4371608611937686, "grad_norm": 0.8952665039433834, "learning_rate": 6.247375164153624e-06, "loss": 0.1263, "step": 14985 }, { "epoch": 0.4371900344244122, "grad_norm": 0.698666395867167, "learning_rate": 6.246917662216314e-06, "loss": 0.1576, "step": 14986 }, { "epoch": 0.43721920765505573, "grad_norm": 0.9601414635502714, "learning_rate": 6.24646014914682e-06, "loss": 0.1541, "step": 14987 }, { "epoch": 0.4372483808856993, "grad_norm": 0.8327388951772458, "learning_rate": 6.246002624949228e-06, "loss": 0.1543, "step": 14988 }, { "epoch": 0.43727755411634284, "grad_norm": 0.7883870737055048, "learning_rate": 6.245545089627622e-06, "loss": 0.1337, "step": 14989 }, { "epoch": 0.4373067273469864, "grad_norm": 0.7564155832898626, "learning_rate": 6.2450875431860855e-06, "loss": 0.1703, "step": 14990 }, { "epoch": 0.43733590057762994, "grad_norm": 0.8815126459544893, "learning_rate": 6.244629985628706e-06, "loss": 0.1471, "step": 14991 }, { "epoch": 0.43736507380827355, "grad_norm": 0.8876949536195939, "learning_rate": 6.2441724169595665e-06, "loss": 0.1385, "step": 14992 }, { "epoch": 0.4373942470389171, "grad_norm": 0.7380826998864822, "learning_rate": 6.243714837182753e-06, "loss": 0.148, "step": 14993 }, { "epoch": 0.43742342026956066, "grad_norm": 0.8595716256583968, "learning_rate": 6.24325724630235e-06, "loss": 0.1449, "step": 14994 }, { "epoch": 0.4374525935002042, "grad_norm": 0.8102512183599935, "learning_rate": 6.242799644322445e-06, "loss": 0.1439, "step": 14995 }, { "epoch": 0.43748176673084777, "grad_norm": 0.7982252801663071, "learning_rate": 6.2423420312471185e-06, "loss": 0.1231, "step": 14996 }, { "epoch": 0.4375109399614913, "grad_norm": 0.7994109582589674, "learning_rate": 6.241884407080461e-06, "loss": 0.1628, "step": 14997 }, { "epoch": 0.43754011319213487, "grad_norm": 0.7772701160558678, "learning_rate": 6.241426771826555e-06, "loss": 0.1458, "step": 14998 }, { "epoch": 0.4375692864227785, "grad_norm": 0.8368242603943941, "learning_rate": 6.240969125489486e-06, "loss": 0.1534, "step": 14999 }, { "epoch": 0.43759845965342203, "grad_norm": 0.8074603876020473, "learning_rate": 6.240511468073343e-06, "loss": 0.1478, "step": 15000 }, { "epoch": 0.4376276328840656, "grad_norm": 0.7153335338167677, "learning_rate": 6.2400537995822085e-06, "loss": 0.1383, "step": 15001 }, { "epoch": 0.43765680611470914, "grad_norm": 0.7662948335841253, "learning_rate": 6.23959612002017e-06, "loss": 0.1434, "step": 15002 }, { "epoch": 0.4376859793453527, "grad_norm": 0.9111369640201166, "learning_rate": 6.239138429391314e-06, "loss": 0.1436, "step": 15003 }, { "epoch": 0.43771515257599625, "grad_norm": 0.8663095010290788, "learning_rate": 6.238680727699726e-06, "loss": 0.1317, "step": 15004 }, { "epoch": 0.4377443258066398, "grad_norm": 1.058155303477025, "learning_rate": 6.2382230149494906e-06, "loss": 0.1717, "step": 15005 }, { "epoch": 0.4377734990372834, "grad_norm": 0.8210054234280479, "learning_rate": 6.237765291144696e-06, "loss": 0.1501, "step": 15006 }, { "epoch": 0.43780267226792696, "grad_norm": 0.8199647565725456, "learning_rate": 6.237307556289429e-06, "loss": 0.1314, "step": 15007 }, { "epoch": 0.4378318454985705, "grad_norm": 1.223085026269827, "learning_rate": 6.236849810387776e-06, "loss": 0.1625, "step": 15008 }, { "epoch": 0.43786101872921407, "grad_norm": 1.1614711613469009, "learning_rate": 6.236392053443822e-06, "loss": 0.1234, "step": 15009 }, { "epoch": 0.4378901919598576, "grad_norm": 0.9369550405747571, "learning_rate": 6.235934285461656e-06, "loss": 0.1158, "step": 15010 }, { "epoch": 0.4379193651905012, "grad_norm": 0.8493194095409426, "learning_rate": 6.235476506445362e-06, "loss": 0.1246, "step": 15011 }, { "epoch": 0.4379485384211448, "grad_norm": 1.3030959463256946, "learning_rate": 6.2350187163990314e-06, "loss": 0.1556, "step": 15012 }, { "epoch": 0.43797771165178834, "grad_norm": 0.9238553779865656, "learning_rate": 6.234560915326747e-06, "loss": 0.1468, "step": 15013 }, { "epoch": 0.4380068848824319, "grad_norm": 0.9696903616097787, "learning_rate": 6.234103103232597e-06, "loss": 0.1363, "step": 15014 }, { "epoch": 0.43803605811307544, "grad_norm": 1.2887541279913717, "learning_rate": 6.233645280120671e-06, "loss": 0.1583, "step": 15015 }, { "epoch": 0.438065231343719, "grad_norm": 1.113477237477694, "learning_rate": 6.233187445995053e-06, "loss": 0.1368, "step": 15016 }, { "epoch": 0.43809440457436255, "grad_norm": 0.9901409573743811, "learning_rate": 6.232729600859832e-06, "loss": 0.1444, "step": 15017 }, { "epoch": 0.4381235778050061, "grad_norm": 0.9425912909861479, "learning_rate": 6.232271744719094e-06, "loss": 0.1431, "step": 15018 }, { "epoch": 0.4381527510356497, "grad_norm": 0.8408452784656811, "learning_rate": 6.23181387757693e-06, "loss": 0.1307, "step": 15019 }, { "epoch": 0.43818192426629327, "grad_norm": 0.8291973248767233, "learning_rate": 6.231355999437425e-06, "loss": 0.1542, "step": 15020 }, { "epoch": 0.4382110974969368, "grad_norm": 0.887432534102003, "learning_rate": 6.230898110304668e-06, "loss": 0.1206, "step": 15021 }, { "epoch": 0.43824027072758037, "grad_norm": 0.8618449996123551, "learning_rate": 6.230440210182745e-06, "loss": 0.1462, "step": 15022 }, { "epoch": 0.4382694439582239, "grad_norm": 0.8350368418007432, "learning_rate": 6.2299822990757475e-06, "loss": 0.1338, "step": 15023 }, { "epoch": 0.4382986171888675, "grad_norm": 0.7501682509218951, "learning_rate": 6.22952437698776e-06, "loss": 0.1292, "step": 15024 }, { "epoch": 0.43832779041951103, "grad_norm": 0.7891398829649721, "learning_rate": 6.229066443922874e-06, "loss": 0.1189, "step": 15025 }, { "epoch": 0.43835696365015464, "grad_norm": 0.8053139780836996, "learning_rate": 6.228608499885174e-06, "loss": 0.148, "step": 15026 }, { "epoch": 0.4383861368807982, "grad_norm": 0.7746703807058757, "learning_rate": 6.228150544878754e-06, "loss": 0.1347, "step": 15027 }, { "epoch": 0.43841531011144175, "grad_norm": 0.7696282133756824, "learning_rate": 6.227692578907697e-06, "loss": 0.1438, "step": 15028 }, { "epoch": 0.4384444833420853, "grad_norm": 0.7889312279455797, "learning_rate": 6.2272346019760936e-06, "loss": 0.149, "step": 15029 }, { "epoch": 0.43847365657272885, "grad_norm": 0.8572841859126313, "learning_rate": 6.2267766140880325e-06, "loss": 0.1316, "step": 15030 }, { "epoch": 0.4385028298033724, "grad_norm": 0.9308072763704206, "learning_rate": 6.226318615247604e-06, "loss": 0.1376, "step": 15031 }, { "epoch": 0.43853200303401596, "grad_norm": 0.9821851734638208, "learning_rate": 6.225860605458895e-06, "loss": 0.1418, "step": 15032 }, { "epoch": 0.43856117626465957, "grad_norm": 0.6586206659882248, "learning_rate": 6.225402584725993e-06, "loss": 0.1475, "step": 15033 }, { "epoch": 0.4385903494953031, "grad_norm": 0.8719938041849924, "learning_rate": 6.224944553052992e-06, "loss": 0.1403, "step": 15034 }, { "epoch": 0.4386195227259467, "grad_norm": 0.7686687697514415, "learning_rate": 6.224486510443978e-06, "loss": 0.1354, "step": 15035 }, { "epoch": 0.43864869595659023, "grad_norm": 0.7872009238631145, "learning_rate": 6.2240284569030395e-06, "loss": 0.1254, "step": 15036 }, { "epoch": 0.4386778691872338, "grad_norm": 0.6871719476135176, "learning_rate": 6.223570392434268e-06, "loss": 0.1316, "step": 15037 }, { "epoch": 0.43870704241787734, "grad_norm": 1.059686281698016, "learning_rate": 6.223112317041751e-06, "loss": 0.1475, "step": 15038 }, { "epoch": 0.43873621564852094, "grad_norm": 0.7529910432543049, "learning_rate": 6.222654230729582e-06, "loss": 0.1137, "step": 15039 }, { "epoch": 0.4387653888791645, "grad_norm": 1.4046174304789645, "learning_rate": 6.2221961335018464e-06, "loss": 0.1392, "step": 15040 }, { "epoch": 0.43879456210980805, "grad_norm": 0.7354673971840441, "learning_rate": 6.2217380253626346e-06, "loss": 0.1434, "step": 15041 }, { "epoch": 0.4388237353404516, "grad_norm": 0.7909726582936715, "learning_rate": 6.221279906316039e-06, "loss": 0.129, "step": 15042 }, { "epoch": 0.43885290857109516, "grad_norm": 0.7945483450078731, "learning_rate": 6.220821776366146e-06, "loss": 0.1347, "step": 15043 }, { "epoch": 0.4388820818017387, "grad_norm": 0.7010766125587274, "learning_rate": 6.2203636355170485e-06, "loss": 0.1185, "step": 15044 }, { "epoch": 0.43891125503238226, "grad_norm": 0.77055252530375, "learning_rate": 6.219905483772837e-06, "loss": 0.1544, "step": 15045 }, { "epoch": 0.4389404282630259, "grad_norm": 0.6051215517554468, "learning_rate": 6.2194473211376e-06, "loss": 0.1216, "step": 15046 }, { "epoch": 0.4389696014936694, "grad_norm": 0.7614440524907657, "learning_rate": 6.218989147615426e-06, "loss": 0.1486, "step": 15047 }, { "epoch": 0.438998774724313, "grad_norm": 0.7310503216162159, "learning_rate": 6.218530963210411e-06, "loss": 0.1438, "step": 15048 }, { "epoch": 0.43902794795495653, "grad_norm": 0.8824424699031387, "learning_rate": 6.21807276792664e-06, "loss": 0.15, "step": 15049 }, { "epoch": 0.4390571211856001, "grad_norm": 1.1171730097733568, "learning_rate": 6.217614561768208e-06, "loss": 0.1269, "step": 15050 }, { "epoch": 0.43908629441624364, "grad_norm": 0.6888003281689934, "learning_rate": 6.217156344739203e-06, "loss": 0.1411, "step": 15051 }, { "epoch": 0.4391154676468872, "grad_norm": 0.6657787649455748, "learning_rate": 6.2166981168437165e-06, "loss": 0.1229, "step": 15052 }, { "epoch": 0.4391446408775308, "grad_norm": 0.7578786141444442, "learning_rate": 6.21623987808584e-06, "loss": 0.1442, "step": 15053 }, { "epoch": 0.43917381410817435, "grad_norm": 0.8051741284438174, "learning_rate": 6.215781628469663e-06, "loss": 0.1484, "step": 15054 }, { "epoch": 0.4392029873388179, "grad_norm": 0.8568323410142273, "learning_rate": 6.2153233679992805e-06, "loss": 0.1431, "step": 15055 }, { "epoch": 0.43923216056946146, "grad_norm": 0.7701121083595855, "learning_rate": 6.214865096678779e-06, "loss": 0.1355, "step": 15056 }, { "epoch": 0.439261333800105, "grad_norm": 0.9624893288209205, "learning_rate": 6.214406814512254e-06, "loss": 0.1419, "step": 15057 }, { "epoch": 0.43929050703074857, "grad_norm": 0.7941603210473188, "learning_rate": 6.213948521503793e-06, "loss": 0.1653, "step": 15058 }, { "epoch": 0.4393196802613921, "grad_norm": 0.7443878015701055, "learning_rate": 6.2134902176574884e-06, "loss": 0.1404, "step": 15059 }, { "epoch": 0.43934885349203573, "grad_norm": 0.8533354162846383, "learning_rate": 6.213031902977436e-06, "loss": 0.1166, "step": 15060 }, { "epoch": 0.4393780267226793, "grad_norm": 0.9405274831200605, "learning_rate": 6.212573577467722e-06, "loss": 0.1437, "step": 15061 }, { "epoch": 0.43940719995332284, "grad_norm": 0.7169491241073812, "learning_rate": 6.212115241132441e-06, "loss": 0.1453, "step": 15062 }, { "epoch": 0.4394363731839664, "grad_norm": 1.0823337820424184, "learning_rate": 6.211656893975685e-06, "loss": 0.1327, "step": 15063 }, { "epoch": 0.43946554641460994, "grad_norm": 0.8252949874601155, "learning_rate": 6.211198536001545e-06, "loss": 0.1303, "step": 15064 }, { "epoch": 0.4394947196452535, "grad_norm": 1.031682793059374, "learning_rate": 6.210740167214114e-06, "loss": 0.1398, "step": 15065 }, { "epoch": 0.4395238928758971, "grad_norm": 0.7706924572131718, "learning_rate": 6.210281787617483e-06, "loss": 0.15, "step": 15066 }, { "epoch": 0.43955306610654066, "grad_norm": 0.832935142749437, "learning_rate": 6.209823397215746e-06, "loss": 0.1258, "step": 15067 }, { "epoch": 0.4395822393371842, "grad_norm": 0.9778245354120296, "learning_rate": 6.209364996012994e-06, "loss": 0.1274, "step": 15068 }, { "epoch": 0.43961141256782776, "grad_norm": 0.89526369817899, "learning_rate": 6.20890658401332e-06, "loss": 0.1301, "step": 15069 }, { "epoch": 0.4396405857984713, "grad_norm": 0.6959084421482379, "learning_rate": 6.208448161220818e-06, "loss": 0.1338, "step": 15070 }, { "epoch": 0.43966975902911487, "grad_norm": 1.5306102217470179, "learning_rate": 6.207989727639577e-06, "loss": 0.1257, "step": 15071 }, { "epoch": 0.4396989322597584, "grad_norm": 0.9210846705347121, "learning_rate": 6.2075312832736945e-06, "loss": 0.157, "step": 15072 }, { "epoch": 0.43972810549040203, "grad_norm": 0.8404251767950718, "learning_rate": 6.2070728281272594e-06, "loss": 0.1411, "step": 15073 }, { "epoch": 0.4397572787210456, "grad_norm": 0.763211868240203, "learning_rate": 6.206614362204366e-06, "loss": 0.1296, "step": 15074 }, { "epoch": 0.43978645195168914, "grad_norm": 0.883705407748208, "learning_rate": 6.206155885509108e-06, "loss": 0.1219, "step": 15075 }, { "epoch": 0.4398156251823327, "grad_norm": 0.9266157241004488, "learning_rate": 6.2056973980455795e-06, "loss": 0.1237, "step": 15076 }, { "epoch": 0.43984479841297625, "grad_norm": 0.9700898960791888, "learning_rate": 6.2052388998178705e-06, "loss": 0.1742, "step": 15077 }, { "epoch": 0.4398739716436198, "grad_norm": 0.9618272210553835, "learning_rate": 6.2047803908300776e-06, "loss": 0.1336, "step": 15078 }, { "epoch": 0.43990314487426335, "grad_norm": 0.863116055024015, "learning_rate": 6.204321871086292e-06, "loss": 0.133, "step": 15079 }, { "epoch": 0.43993231810490696, "grad_norm": 1.0521571374995566, "learning_rate": 6.203863340590609e-06, "loss": 0.1354, "step": 15080 }, { "epoch": 0.4399614913355505, "grad_norm": 0.9063771646807343, "learning_rate": 6.203404799347122e-06, "loss": 0.1264, "step": 15081 }, { "epoch": 0.43999066456619407, "grad_norm": 0.9149711879709472, "learning_rate": 6.202946247359922e-06, "loss": 0.1477, "step": 15082 }, { "epoch": 0.4400198377968376, "grad_norm": 0.979774442979104, "learning_rate": 6.202487684633107e-06, "loss": 0.141, "step": 15083 }, { "epoch": 0.4400490110274812, "grad_norm": 0.9123487969392395, "learning_rate": 6.202029111170769e-06, "loss": 0.1393, "step": 15084 }, { "epoch": 0.4400781842581247, "grad_norm": 0.8849756322792716, "learning_rate": 6.201570526977001e-06, "loss": 0.1287, "step": 15085 }, { "epoch": 0.4401073574887683, "grad_norm": 0.8109589019185897, "learning_rate": 6.2011119320558986e-06, "loss": 0.1462, "step": 15086 }, { "epoch": 0.4401365307194119, "grad_norm": 0.8302390639730577, "learning_rate": 6.2006533264115564e-06, "loss": 0.1255, "step": 15087 }, { "epoch": 0.44016570395005544, "grad_norm": 0.7300103239122315, "learning_rate": 6.2001947100480675e-06, "loss": 0.1481, "step": 15088 }, { "epoch": 0.440194877180699, "grad_norm": 0.9699959274058704, "learning_rate": 6.199736082969525e-06, "loss": 0.1314, "step": 15089 }, { "epoch": 0.44022405041134255, "grad_norm": 0.7641958932511366, "learning_rate": 6.199277445180028e-06, "loss": 0.1622, "step": 15090 }, { "epoch": 0.4402532236419861, "grad_norm": 0.7577420220487728, "learning_rate": 6.198818796683666e-06, "loss": 0.1461, "step": 15091 }, { "epoch": 0.44028239687262966, "grad_norm": 0.7869450527540598, "learning_rate": 6.198360137484537e-06, "loss": 0.1396, "step": 15092 }, { "epoch": 0.4403115701032732, "grad_norm": 0.7217383889627569, "learning_rate": 6.1979014675867345e-06, "loss": 0.1396, "step": 15093 }, { "epoch": 0.4403407433339168, "grad_norm": 0.7495436122585886, "learning_rate": 6.197442786994354e-06, "loss": 0.1183, "step": 15094 }, { "epoch": 0.44036991656456037, "grad_norm": 0.8044067727692116, "learning_rate": 6.1969840957114904e-06, "loss": 0.157, "step": 15095 }, { "epoch": 0.4403990897952039, "grad_norm": 0.7255909826283716, "learning_rate": 6.196525393742238e-06, "loss": 0.1701, "step": 15096 }, { "epoch": 0.4404282630258475, "grad_norm": 0.8239930130991369, "learning_rate": 6.196066681090692e-06, "loss": 0.1358, "step": 15097 }, { "epoch": 0.44045743625649103, "grad_norm": 0.8686482362650536, "learning_rate": 6.1956079577609485e-06, "loss": 0.143, "step": 15098 }, { "epoch": 0.4404866094871346, "grad_norm": 0.8572453656024628, "learning_rate": 6.195149223757103e-06, "loss": 0.1392, "step": 15099 }, { "epoch": 0.4405157827177782, "grad_norm": 0.7974897270986314, "learning_rate": 6.194690479083251e-06, "loss": 0.1489, "step": 15100 }, { "epoch": 0.44054495594842175, "grad_norm": 1.0348902345146171, "learning_rate": 6.194231723743486e-06, "loss": 0.1479, "step": 15101 }, { "epoch": 0.4405741291790653, "grad_norm": 0.9820780283957675, "learning_rate": 6.193772957741907e-06, "loss": 0.1756, "step": 15102 }, { "epoch": 0.44060330240970885, "grad_norm": 0.8434465511246421, "learning_rate": 6.193314181082607e-06, "loss": 0.14, "step": 15103 }, { "epoch": 0.4406324756403524, "grad_norm": 0.6533601682398729, "learning_rate": 6.192855393769683e-06, "loss": 0.1337, "step": 15104 }, { "epoch": 0.44066164887099596, "grad_norm": 0.8235956027060124, "learning_rate": 6.192396595807231e-06, "loss": 0.1576, "step": 15105 }, { "epoch": 0.4406908221016395, "grad_norm": 1.0340137000191478, "learning_rate": 6.191937787199347e-06, "loss": 0.1479, "step": 15106 }, { "epoch": 0.4407199953322831, "grad_norm": 0.9728982450315318, "learning_rate": 6.1914789679501266e-06, "loss": 0.1302, "step": 15107 }, { "epoch": 0.4407491685629267, "grad_norm": 0.8074227064702215, "learning_rate": 6.191020138063666e-06, "loss": 0.1589, "step": 15108 }, { "epoch": 0.4407783417935702, "grad_norm": 0.9898810044190635, "learning_rate": 6.190561297544063e-06, "loss": 0.1296, "step": 15109 }, { "epoch": 0.4408075150242138, "grad_norm": 0.7838370141602546, "learning_rate": 6.190102446395412e-06, "loss": 0.1513, "step": 15110 }, { "epoch": 0.44083668825485733, "grad_norm": 0.8351440670730054, "learning_rate": 6.189643584621811e-06, "loss": 0.1408, "step": 15111 }, { "epoch": 0.4408658614855009, "grad_norm": 0.9429585268779143, "learning_rate": 6.189184712227356e-06, "loss": 0.1598, "step": 15112 }, { "epoch": 0.44089503471614444, "grad_norm": 0.8872677081948187, "learning_rate": 6.1887258292161435e-06, "loss": 0.1627, "step": 15113 }, { "epoch": 0.44092420794678805, "grad_norm": 0.7317606862709696, "learning_rate": 6.1882669355922706e-06, "loss": 0.1216, "step": 15114 }, { "epoch": 0.4409533811774316, "grad_norm": 0.8599470645369262, "learning_rate": 6.187808031359835e-06, "loss": 0.1377, "step": 15115 }, { "epoch": 0.44098255440807516, "grad_norm": 0.9091108097425813, "learning_rate": 6.187349116522932e-06, "loss": 0.1308, "step": 15116 }, { "epoch": 0.4410117276387187, "grad_norm": 0.6868528400456547, "learning_rate": 6.186890191085659e-06, "loss": 0.1224, "step": 15117 }, { "epoch": 0.44104090086936226, "grad_norm": 0.7895111323825614, "learning_rate": 6.1864312550521156e-06, "loss": 0.1134, "step": 15118 }, { "epoch": 0.4410700741000058, "grad_norm": 0.9170317658962038, "learning_rate": 6.185972308426394e-06, "loss": 0.1425, "step": 15119 }, { "epoch": 0.44109924733064937, "grad_norm": 0.8898762215953443, "learning_rate": 6.185513351212599e-06, "loss": 0.1339, "step": 15120 }, { "epoch": 0.441128420561293, "grad_norm": 0.7120990645692094, "learning_rate": 6.185054383414821e-06, "loss": 0.128, "step": 15121 }, { "epoch": 0.44115759379193653, "grad_norm": 0.9098797685642345, "learning_rate": 6.18459540503716e-06, "loss": 0.1396, "step": 15122 }, { "epoch": 0.4411867670225801, "grad_norm": 0.9099002629450361, "learning_rate": 6.184136416083716e-06, "loss": 0.1754, "step": 15123 }, { "epoch": 0.44121594025322364, "grad_norm": 0.8491899787753415, "learning_rate": 6.1836774165585835e-06, "loss": 0.1255, "step": 15124 }, { "epoch": 0.4412451134838672, "grad_norm": 1.0133237976787148, "learning_rate": 6.183218406465861e-06, "loss": 0.1295, "step": 15125 }, { "epoch": 0.44127428671451074, "grad_norm": 0.937482166158476, "learning_rate": 6.182759385809648e-06, "loss": 0.1598, "step": 15126 }, { "epoch": 0.44130345994515435, "grad_norm": 0.8495937070330164, "learning_rate": 6.182300354594041e-06, "loss": 0.1587, "step": 15127 }, { "epoch": 0.4413326331757979, "grad_norm": 0.9365884837859367, "learning_rate": 6.181841312823139e-06, "loss": 0.1495, "step": 15128 }, { "epoch": 0.44136180640644146, "grad_norm": 0.828917931816457, "learning_rate": 6.18138226050104e-06, "loss": 0.1354, "step": 15129 }, { "epoch": 0.441390979637085, "grad_norm": 1.2121680427116477, "learning_rate": 6.1809231976318414e-06, "loss": 0.1461, "step": 15130 }, { "epoch": 0.44142015286772857, "grad_norm": 0.8267442464136356, "learning_rate": 6.1804641242196435e-06, "loss": 0.1297, "step": 15131 }, { "epoch": 0.4414493260983721, "grad_norm": 0.8669047273309362, "learning_rate": 6.180005040268544e-06, "loss": 0.1639, "step": 15132 }, { "epoch": 0.4414784993290157, "grad_norm": 0.9900346816148035, "learning_rate": 6.179545945782639e-06, "loss": 0.1352, "step": 15133 }, { "epoch": 0.4415076725596593, "grad_norm": 0.8302777317742639, "learning_rate": 6.179086840766031e-06, "loss": 0.1545, "step": 15134 }, { "epoch": 0.44153684579030283, "grad_norm": 0.8540477899928897, "learning_rate": 6.178627725222819e-06, "loss": 0.1528, "step": 15135 }, { "epoch": 0.4415660190209464, "grad_norm": 0.7911456172678053, "learning_rate": 6.178168599157096e-06, "loss": 0.1047, "step": 15136 }, { "epoch": 0.44159519225158994, "grad_norm": 0.9700265681787416, "learning_rate": 6.177709462572969e-06, "loss": 0.1471, "step": 15137 }, { "epoch": 0.4416243654822335, "grad_norm": 0.7729885089326664, "learning_rate": 6.17725031547453e-06, "loss": 0.157, "step": 15138 }, { "epoch": 0.44165353871287705, "grad_norm": 1.0104582272958682, "learning_rate": 6.176791157865881e-06, "loss": 0.1337, "step": 15139 }, { "epoch": 0.4416827119435206, "grad_norm": 0.775961171060146, "learning_rate": 6.176331989751125e-06, "loss": 0.1253, "step": 15140 }, { "epoch": 0.4417118851741642, "grad_norm": 1.0101315435786071, "learning_rate": 6.175872811134355e-06, "loss": 0.1335, "step": 15141 }, { "epoch": 0.44174105840480776, "grad_norm": 0.756935795113428, "learning_rate": 6.175413622019674e-06, "loss": 0.1401, "step": 15142 }, { "epoch": 0.4417702316354513, "grad_norm": 0.8103766556796341, "learning_rate": 6.1749544224111805e-06, "loss": 0.1261, "step": 15143 }, { "epoch": 0.44179940486609487, "grad_norm": 0.7480765217196009, "learning_rate": 6.174495212312974e-06, "loss": 0.1561, "step": 15144 }, { "epoch": 0.4418285780967384, "grad_norm": 0.7150165037411563, "learning_rate": 6.174035991729155e-06, "loss": 0.1752, "step": 15145 }, { "epoch": 0.441857751327382, "grad_norm": 0.9834870920906477, "learning_rate": 6.173576760663823e-06, "loss": 0.15, "step": 15146 }, { "epoch": 0.44188692455802553, "grad_norm": 0.9306883206375008, "learning_rate": 6.173117519121079e-06, "loss": 0.1472, "step": 15147 }, { "epoch": 0.44191609778866914, "grad_norm": 0.6708625275650458, "learning_rate": 6.172658267105019e-06, "loss": 0.1182, "step": 15148 }, { "epoch": 0.4419452710193127, "grad_norm": 0.6782386383396968, "learning_rate": 6.172199004619748e-06, "loss": 0.1478, "step": 15149 }, { "epoch": 0.44197444424995624, "grad_norm": 0.7840701456064665, "learning_rate": 6.171739731669365e-06, "loss": 0.1354, "step": 15150 }, { "epoch": 0.4420036174805998, "grad_norm": 0.8921163707375518, "learning_rate": 6.171280448257967e-06, "loss": 0.1115, "step": 15151 }, { "epoch": 0.44203279071124335, "grad_norm": 0.7350397465064393, "learning_rate": 6.170821154389659e-06, "loss": 0.1397, "step": 15152 }, { "epoch": 0.4420619639418869, "grad_norm": 0.8706135341382112, "learning_rate": 6.170361850068538e-06, "loss": 0.1366, "step": 15153 }, { "epoch": 0.4420911371725305, "grad_norm": 0.8786752834643791, "learning_rate": 6.169902535298704e-06, "loss": 0.1148, "step": 15154 }, { "epoch": 0.44212031040317407, "grad_norm": 0.9376217221559763, "learning_rate": 6.169443210084262e-06, "loss": 0.1354, "step": 15155 }, { "epoch": 0.4421494836338176, "grad_norm": 0.7440517010432058, "learning_rate": 6.1689838744293105e-06, "loss": 0.1375, "step": 15156 }, { "epoch": 0.4421786568644612, "grad_norm": 0.7252953866774031, "learning_rate": 6.168524528337949e-06, "loss": 0.1439, "step": 15157 }, { "epoch": 0.4422078300951047, "grad_norm": 0.8294662727384108, "learning_rate": 6.168065171814279e-06, "loss": 0.1416, "step": 15158 }, { "epoch": 0.4422370033257483, "grad_norm": 1.006490711539657, "learning_rate": 6.1676058048624035e-06, "loss": 0.1264, "step": 15159 }, { "epoch": 0.44226617655639183, "grad_norm": 0.8004298319702237, "learning_rate": 6.167146427486421e-06, "loss": 0.1325, "step": 15160 }, { "epoch": 0.44229534978703544, "grad_norm": 0.6268073336564143, "learning_rate": 6.166687039690433e-06, "loss": 0.1251, "step": 15161 }, { "epoch": 0.442324523017679, "grad_norm": 0.8880811293595925, "learning_rate": 6.166227641478544e-06, "loss": 0.1372, "step": 15162 }, { "epoch": 0.44235369624832255, "grad_norm": 0.9086839478739694, "learning_rate": 6.1657682328548505e-06, "loss": 0.1435, "step": 15163 }, { "epoch": 0.4423828694789661, "grad_norm": 0.873657569457763, "learning_rate": 6.165308813823457e-06, "loss": 0.1293, "step": 15164 }, { "epoch": 0.44241204270960965, "grad_norm": 0.833198728873711, "learning_rate": 6.164849384388467e-06, "loss": 0.141, "step": 15165 }, { "epoch": 0.4424412159402532, "grad_norm": 0.7669766276065918, "learning_rate": 6.164389944553977e-06, "loss": 0.1584, "step": 15166 }, { "epoch": 0.44247038917089676, "grad_norm": 1.2068899831680615, "learning_rate": 6.163930494324093e-06, "loss": 0.1478, "step": 15167 }, { "epoch": 0.44249956240154037, "grad_norm": 0.7778010498215501, "learning_rate": 6.163471033702914e-06, "loss": 0.1153, "step": 15168 }, { "epoch": 0.4425287356321839, "grad_norm": 0.770797837270307, "learning_rate": 6.1630115626945445e-06, "loss": 0.1552, "step": 15169 }, { "epoch": 0.4425579088628275, "grad_norm": 1.055067195964919, "learning_rate": 6.1625520813030855e-06, "loss": 0.1304, "step": 15170 }, { "epoch": 0.44258708209347103, "grad_norm": 0.790305789887117, "learning_rate": 6.162092589532639e-06, "loss": 0.1389, "step": 15171 }, { "epoch": 0.4426162553241146, "grad_norm": 0.7926830874783952, "learning_rate": 6.1616330873873065e-06, "loss": 0.1296, "step": 15172 }, { "epoch": 0.44264542855475814, "grad_norm": 0.8228868513259496, "learning_rate": 6.161173574871192e-06, "loss": 0.1313, "step": 15173 }, { "epoch": 0.4426746017854017, "grad_norm": 0.7739986899820893, "learning_rate": 6.160714051988396e-06, "loss": 0.1407, "step": 15174 }, { "epoch": 0.4427037750160453, "grad_norm": 0.7870682635859886, "learning_rate": 6.160254518743023e-06, "loss": 0.1022, "step": 15175 }, { "epoch": 0.44273294824668885, "grad_norm": 0.8602714911088228, "learning_rate": 6.159794975139174e-06, "loss": 0.1296, "step": 15176 }, { "epoch": 0.4427621214773324, "grad_norm": 0.8641677714563666, "learning_rate": 6.159335421180954e-06, "loss": 0.1355, "step": 15177 }, { "epoch": 0.44279129470797596, "grad_norm": 0.8789323711322151, "learning_rate": 6.158875856872462e-06, "loss": 0.1198, "step": 15178 }, { "epoch": 0.4428204679386195, "grad_norm": 0.948326178954136, "learning_rate": 6.158416282217803e-06, "loss": 0.1767, "step": 15179 }, { "epoch": 0.44284964116926306, "grad_norm": 1.11965728484572, "learning_rate": 6.157956697221082e-06, "loss": 0.1376, "step": 15180 }, { "epoch": 0.4428788143999067, "grad_norm": 0.8302308190233277, "learning_rate": 6.157497101886397e-06, "loss": 0.1307, "step": 15181 }, { "epoch": 0.4429079876305502, "grad_norm": 1.045664337871547, "learning_rate": 6.157037496217857e-06, "loss": 0.1194, "step": 15182 }, { "epoch": 0.4429371608611938, "grad_norm": 0.7982028729623004, "learning_rate": 6.156577880219561e-06, "loss": 0.1474, "step": 15183 }, { "epoch": 0.44296633409183733, "grad_norm": 0.9772078210105002, "learning_rate": 6.156118253895613e-06, "loss": 0.1386, "step": 15184 }, { "epoch": 0.4429955073224809, "grad_norm": 0.8324946152115834, "learning_rate": 6.15565861725012e-06, "loss": 0.1312, "step": 15185 }, { "epoch": 0.44302468055312444, "grad_norm": 0.8117971938902022, "learning_rate": 6.155198970287181e-06, "loss": 0.1317, "step": 15186 }, { "epoch": 0.443053853783768, "grad_norm": 0.9119895557336309, "learning_rate": 6.154739313010901e-06, "loss": 0.1308, "step": 15187 }, { "epoch": 0.4430830270144116, "grad_norm": 0.8644022003527182, "learning_rate": 6.154279645425385e-06, "loss": 0.1339, "step": 15188 }, { "epoch": 0.44311220024505515, "grad_norm": 0.6803353772697368, "learning_rate": 6.153819967534734e-06, "loss": 0.1451, "step": 15189 }, { "epoch": 0.4431413734756987, "grad_norm": 0.879070882971226, "learning_rate": 6.153360279343056e-06, "loss": 0.1267, "step": 15190 }, { "epoch": 0.44317054670634226, "grad_norm": 0.766632869208845, "learning_rate": 6.152900580854452e-06, "loss": 0.1361, "step": 15191 }, { "epoch": 0.4431997199369858, "grad_norm": 0.9171909654778313, "learning_rate": 6.1524408720730276e-06, "loss": 0.1592, "step": 15192 }, { "epoch": 0.44322889316762937, "grad_norm": 1.0116900483386435, "learning_rate": 6.1519811530028836e-06, "loss": 0.1347, "step": 15193 }, { "epoch": 0.4432580663982729, "grad_norm": 0.9236343921387395, "learning_rate": 6.151521423648129e-06, "loss": 0.1317, "step": 15194 }, { "epoch": 0.44328723962891653, "grad_norm": 0.8200229888697481, "learning_rate": 6.151061684012867e-06, "loss": 0.1476, "step": 15195 }, { "epoch": 0.4433164128595601, "grad_norm": 1.0263558504613906, "learning_rate": 6.150601934101198e-06, "loss": 0.1367, "step": 15196 }, { "epoch": 0.44334558609020364, "grad_norm": 1.0179915279890752, "learning_rate": 6.150142173917233e-06, "loss": 0.1562, "step": 15197 }, { "epoch": 0.4433747593208472, "grad_norm": 0.8193260872849141, "learning_rate": 6.1496824034650715e-06, "loss": 0.1457, "step": 15198 }, { "epoch": 0.44340393255149074, "grad_norm": 0.8125875373992769, "learning_rate": 6.149222622748818e-06, "loss": 0.1672, "step": 15199 }, { "epoch": 0.4434331057821343, "grad_norm": 0.9401759401314863, "learning_rate": 6.148762831772582e-06, "loss": 0.1409, "step": 15200 }, { "epoch": 0.44346227901277785, "grad_norm": 1.061541647071786, "learning_rate": 6.148303030540466e-06, "loss": 0.1317, "step": 15201 }, { "epoch": 0.44349145224342146, "grad_norm": 0.9208910349135103, "learning_rate": 6.1478432190565725e-06, "loss": 0.1472, "step": 15202 }, { "epoch": 0.443520625474065, "grad_norm": 0.7130989810499107, "learning_rate": 6.14738339732501e-06, "loss": 0.1403, "step": 15203 }, { "epoch": 0.44354979870470856, "grad_norm": 1.1571797061398832, "learning_rate": 6.146923565349882e-06, "loss": 0.163, "step": 15204 }, { "epoch": 0.4435789719353521, "grad_norm": 0.8491187005356661, "learning_rate": 6.146463723135295e-06, "loss": 0.1467, "step": 15205 }, { "epoch": 0.44360814516599567, "grad_norm": 0.7907530667174092, "learning_rate": 6.146003870685353e-06, "loss": 0.1215, "step": 15206 }, { "epoch": 0.4436373183966392, "grad_norm": 0.8576470547447197, "learning_rate": 6.145544008004163e-06, "loss": 0.1469, "step": 15207 }, { "epoch": 0.44366649162728283, "grad_norm": 0.6210393668457042, "learning_rate": 6.145084135095827e-06, "loss": 0.13, "step": 15208 }, { "epoch": 0.4436956648579264, "grad_norm": 0.9519110825112453, "learning_rate": 6.144624251964455e-06, "loss": 0.1293, "step": 15209 }, { "epoch": 0.44372483808856994, "grad_norm": 0.8792011471834411, "learning_rate": 6.144164358614152e-06, "loss": 0.1432, "step": 15210 }, { "epoch": 0.4437540113192135, "grad_norm": 0.7657075243259581, "learning_rate": 6.14370445504902e-06, "loss": 0.1162, "step": 15211 }, { "epoch": 0.44378318454985705, "grad_norm": 0.8340099443319311, "learning_rate": 6.14324454127317e-06, "loss": 0.144, "step": 15212 }, { "epoch": 0.4438123577805006, "grad_norm": 0.8752137407083753, "learning_rate": 6.1427846172907045e-06, "loss": 0.1488, "step": 15213 }, { "epoch": 0.44384153101114415, "grad_norm": 0.9318068773304626, "learning_rate": 6.14232468310573e-06, "loss": 0.1284, "step": 15214 }, { "epoch": 0.44387070424178776, "grad_norm": 0.8736516655055431, "learning_rate": 6.141864738722356e-06, "loss": 0.1607, "step": 15215 }, { "epoch": 0.4438998774724313, "grad_norm": 0.8076426652809794, "learning_rate": 6.141404784144685e-06, "loss": 0.1443, "step": 15216 }, { "epoch": 0.44392905070307487, "grad_norm": 0.9168196815559632, "learning_rate": 6.140944819376824e-06, "loss": 0.1482, "step": 15217 }, { "epoch": 0.4439582239337184, "grad_norm": 0.7397749370047907, "learning_rate": 6.140484844422879e-06, "loss": 0.1244, "step": 15218 }, { "epoch": 0.443987397164362, "grad_norm": 0.7476313378941652, "learning_rate": 6.14002485928696e-06, "loss": 0.1526, "step": 15219 }, { "epoch": 0.4440165703950055, "grad_norm": 0.6667299942179602, "learning_rate": 6.139564863973169e-06, "loss": 0.1326, "step": 15220 }, { "epoch": 0.4440457436256491, "grad_norm": 0.9066358944844822, "learning_rate": 6.139104858485616e-06, "loss": 0.1348, "step": 15221 }, { "epoch": 0.4440749168562927, "grad_norm": 0.7812964435545219, "learning_rate": 6.138644842828407e-06, "loss": 0.138, "step": 15222 }, { "epoch": 0.44410409008693624, "grad_norm": 0.8349967115734126, "learning_rate": 6.138184817005648e-06, "loss": 0.1524, "step": 15223 }, { "epoch": 0.4441332633175798, "grad_norm": 1.1972323678106125, "learning_rate": 6.1377247810214466e-06, "loss": 0.1452, "step": 15224 }, { "epoch": 0.44416243654822335, "grad_norm": 0.9094728032049704, "learning_rate": 6.137264734879912e-06, "loss": 0.132, "step": 15225 }, { "epoch": 0.4441916097788669, "grad_norm": 0.8020759584277014, "learning_rate": 6.136804678585146e-06, "loss": 0.1341, "step": 15226 }, { "epoch": 0.44422078300951046, "grad_norm": 1.1431151200320264, "learning_rate": 6.136344612141262e-06, "loss": 0.1435, "step": 15227 }, { "epoch": 0.444249956240154, "grad_norm": 0.9889349789349048, "learning_rate": 6.135884535552363e-06, "loss": 0.1479, "step": 15228 }, { "epoch": 0.4442791294707976, "grad_norm": 0.7478560976135962, "learning_rate": 6.135424448822559e-06, "loss": 0.1271, "step": 15229 }, { "epoch": 0.44430830270144117, "grad_norm": 0.7186266917342702, "learning_rate": 6.134964351955955e-06, "loss": 0.1181, "step": 15230 }, { "epoch": 0.4443374759320847, "grad_norm": 0.8726330738217674, "learning_rate": 6.134504244956662e-06, "loss": 0.1172, "step": 15231 }, { "epoch": 0.4443666491627283, "grad_norm": 0.9163240605568302, "learning_rate": 6.134044127828785e-06, "loss": 0.124, "step": 15232 }, { "epoch": 0.44439582239337183, "grad_norm": 0.96480251333729, "learning_rate": 6.133584000576433e-06, "loss": 0.1463, "step": 15233 }, { "epoch": 0.4444249956240154, "grad_norm": 0.7747088266092722, "learning_rate": 6.133123863203714e-06, "loss": 0.13, "step": 15234 }, { "epoch": 0.44445416885465894, "grad_norm": 1.0229218733197905, "learning_rate": 6.132663715714735e-06, "loss": 0.131, "step": 15235 }, { "epoch": 0.44448334208530255, "grad_norm": 0.8384492580311871, "learning_rate": 6.132203558113604e-06, "loss": 0.1064, "step": 15236 }, { "epoch": 0.4445125153159461, "grad_norm": 0.7906416335983961, "learning_rate": 6.131743390404432e-06, "loss": 0.1413, "step": 15237 }, { "epoch": 0.44454168854658965, "grad_norm": 0.7826541865543741, "learning_rate": 6.131283212591324e-06, "loss": 0.1527, "step": 15238 }, { "epoch": 0.4445708617772332, "grad_norm": 0.9590447953863227, "learning_rate": 6.130823024678388e-06, "loss": 0.1454, "step": 15239 }, { "epoch": 0.44460003500787676, "grad_norm": 0.9056538663663815, "learning_rate": 6.1303628266697365e-06, "loss": 0.1467, "step": 15240 }, { "epoch": 0.4446292082385203, "grad_norm": 0.8059288224470611, "learning_rate": 6.129902618569474e-06, "loss": 0.1477, "step": 15241 }, { "epoch": 0.4446583814691639, "grad_norm": 0.8979235488551794, "learning_rate": 6.129442400381712e-06, "loss": 0.1207, "step": 15242 }, { "epoch": 0.4446875546998075, "grad_norm": 0.8475314209268942, "learning_rate": 6.128982172110558e-06, "loss": 0.1386, "step": 15243 }, { "epoch": 0.44471672793045103, "grad_norm": 0.5986386792810503, "learning_rate": 6.128521933760119e-06, "loss": 0.1282, "step": 15244 }, { "epoch": 0.4447459011610946, "grad_norm": 0.8971110629639041, "learning_rate": 6.1280616853345065e-06, "loss": 0.1489, "step": 15245 }, { "epoch": 0.44477507439173813, "grad_norm": 0.9085603557417934, "learning_rate": 6.127601426837828e-06, "loss": 0.1624, "step": 15246 }, { "epoch": 0.4448042476223817, "grad_norm": 0.8271277986371625, "learning_rate": 6.127141158274194e-06, "loss": 0.1638, "step": 15247 }, { "epoch": 0.44483342085302524, "grad_norm": 1.1073249084395338, "learning_rate": 6.126680879647712e-06, "loss": 0.1424, "step": 15248 }, { "epoch": 0.44486259408366885, "grad_norm": 0.9205586686240007, "learning_rate": 6.126220590962493e-06, "loss": 0.1486, "step": 15249 }, { "epoch": 0.4448917673143124, "grad_norm": 0.7290816364217954, "learning_rate": 6.1257602922226445e-06, "loss": 0.1311, "step": 15250 }, { "epoch": 0.44492094054495596, "grad_norm": 1.2122255502811066, "learning_rate": 6.1252999834322766e-06, "loss": 0.16, "step": 15251 }, { "epoch": 0.4449501137755995, "grad_norm": 1.0996832928390419, "learning_rate": 6.124839664595501e-06, "loss": 0.1589, "step": 15252 }, { "epoch": 0.44497928700624306, "grad_norm": 0.9662578582152325, "learning_rate": 6.1243793357164224e-06, "loss": 0.1348, "step": 15253 }, { "epoch": 0.4450084602368866, "grad_norm": 0.8472291855870548, "learning_rate": 6.123918996799155e-06, "loss": 0.1675, "step": 15254 }, { "epoch": 0.44503763346753017, "grad_norm": 1.1205126844670723, "learning_rate": 6.123458647847808e-06, "loss": 0.1689, "step": 15255 }, { "epoch": 0.4450668066981738, "grad_norm": 0.9057560283475344, "learning_rate": 6.1229982888664895e-06, "loss": 0.161, "step": 15256 }, { "epoch": 0.44509597992881733, "grad_norm": 0.8111224915309679, "learning_rate": 6.122537919859312e-06, "loss": 0.1204, "step": 15257 }, { "epoch": 0.4451251531594609, "grad_norm": 0.752264863469283, "learning_rate": 6.1220775408303825e-06, "loss": 0.1355, "step": 15258 }, { "epoch": 0.44515432639010444, "grad_norm": 0.9918192620855522, "learning_rate": 6.121617151783812e-06, "loss": 0.1696, "step": 15259 }, { "epoch": 0.445183499620748, "grad_norm": 0.7428392567570353, "learning_rate": 6.1211567527237115e-06, "loss": 0.1471, "step": 15260 }, { "epoch": 0.44521267285139154, "grad_norm": 0.8730197701897332, "learning_rate": 6.120696343654191e-06, "loss": 0.1305, "step": 15261 }, { "epoch": 0.4452418460820351, "grad_norm": 0.7914013776401143, "learning_rate": 6.120235924579362e-06, "loss": 0.1259, "step": 15262 }, { "epoch": 0.4452710193126787, "grad_norm": 0.7101655196438709, "learning_rate": 6.119775495503334e-06, "loss": 0.142, "step": 15263 }, { "epoch": 0.44530019254332226, "grad_norm": 0.8127695382551665, "learning_rate": 6.119315056430217e-06, "loss": 0.1323, "step": 15264 }, { "epoch": 0.4453293657739658, "grad_norm": 0.8752157719510197, "learning_rate": 6.118854607364122e-06, "loss": 0.1405, "step": 15265 }, { "epoch": 0.44535853900460937, "grad_norm": 0.8859459423157212, "learning_rate": 6.118394148309161e-06, "loss": 0.1511, "step": 15266 }, { "epoch": 0.4453877122352529, "grad_norm": 0.9430807259674541, "learning_rate": 6.117933679269446e-06, "loss": 0.1602, "step": 15267 }, { "epoch": 0.4454168854658965, "grad_norm": 0.8101264308156917, "learning_rate": 6.117473200249082e-06, "loss": 0.1251, "step": 15268 }, { "epoch": 0.4454460586965401, "grad_norm": 0.6454273830947004, "learning_rate": 6.117012711252186e-06, "loss": 0.1311, "step": 15269 }, { "epoch": 0.44547523192718363, "grad_norm": 1.0266229742097204, "learning_rate": 6.116552212282868e-06, "loss": 0.1252, "step": 15270 }, { "epoch": 0.4455044051578272, "grad_norm": 0.8443968042739745, "learning_rate": 6.116091703345236e-06, "loss": 0.1326, "step": 15271 }, { "epoch": 0.44553357838847074, "grad_norm": 0.7291060015392791, "learning_rate": 6.1156311844434065e-06, "loss": 0.1311, "step": 15272 }, { "epoch": 0.4455627516191143, "grad_norm": 0.8763559814127485, "learning_rate": 6.115170655581486e-06, "loss": 0.1279, "step": 15273 }, { "epoch": 0.44559192484975785, "grad_norm": 0.7304809459449753, "learning_rate": 6.114710116763589e-06, "loss": 0.1284, "step": 15274 }, { "epoch": 0.4456210980804014, "grad_norm": 0.788802497349413, "learning_rate": 6.114249567993826e-06, "loss": 0.1394, "step": 15275 }, { "epoch": 0.445650271311045, "grad_norm": 1.0010644044930095, "learning_rate": 6.11378900927631e-06, "loss": 0.1526, "step": 15276 }, { "epoch": 0.44567944454168856, "grad_norm": 0.9800887845963783, "learning_rate": 6.1133284406151494e-06, "loss": 0.1468, "step": 15277 }, { "epoch": 0.4457086177723321, "grad_norm": 0.8837972629123051, "learning_rate": 6.11286786201446e-06, "loss": 0.1246, "step": 15278 }, { "epoch": 0.44573779100297567, "grad_norm": 0.756446040358204, "learning_rate": 6.112407273478351e-06, "loss": 0.1459, "step": 15279 }, { "epoch": 0.4457669642336192, "grad_norm": 0.8495668524065141, "learning_rate": 6.111946675010936e-06, "loss": 0.1448, "step": 15280 }, { "epoch": 0.4457961374642628, "grad_norm": 1.01471428766146, "learning_rate": 6.111486066616326e-06, "loss": 0.1664, "step": 15281 }, { "epoch": 0.44582531069490633, "grad_norm": 0.916121638870755, "learning_rate": 6.1110254482986354e-06, "loss": 0.1574, "step": 15282 }, { "epoch": 0.44585448392554994, "grad_norm": 0.7760158580921451, "learning_rate": 6.110564820061972e-06, "loss": 0.1525, "step": 15283 }, { "epoch": 0.4458836571561935, "grad_norm": 0.853142412517895, "learning_rate": 6.110104181910452e-06, "loss": 0.1295, "step": 15284 }, { "epoch": 0.44591283038683704, "grad_norm": 0.9426743939597242, "learning_rate": 6.1096435338481885e-06, "loss": 0.1505, "step": 15285 }, { "epoch": 0.4459420036174806, "grad_norm": 0.7478221107434188, "learning_rate": 6.10918287587929e-06, "loss": 0.1502, "step": 15286 }, { "epoch": 0.44597117684812415, "grad_norm": 0.7782289003441114, "learning_rate": 6.108722208007875e-06, "loss": 0.1607, "step": 15287 }, { "epoch": 0.4460003500787677, "grad_norm": 0.904992190235612, "learning_rate": 6.10826153023805e-06, "loss": 0.1362, "step": 15288 }, { "epoch": 0.44602952330941126, "grad_norm": 0.8852852530556897, "learning_rate": 6.107800842573931e-06, "loss": 0.1436, "step": 15289 }, { "epoch": 0.44605869654005487, "grad_norm": 1.429744324549573, "learning_rate": 6.10734014501963e-06, "loss": 0.1426, "step": 15290 }, { "epoch": 0.4460878697706984, "grad_norm": 0.9704718485114926, "learning_rate": 6.106879437579262e-06, "loss": 0.1535, "step": 15291 }, { "epoch": 0.446117043001342, "grad_norm": 0.866394347707187, "learning_rate": 6.106418720256938e-06, "loss": 0.1261, "step": 15292 }, { "epoch": 0.4461462162319855, "grad_norm": 0.8273542824415847, "learning_rate": 6.105957993056772e-06, "loss": 0.1243, "step": 15293 }, { "epoch": 0.4461753894626291, "grad_norm": 1.5233395354131498, "learning_rate": 6.105497255982876e-06, "loss": 0.1381, "step": 15294 }, { "epoch": 0.44620456269327263, "grad_norm": 1.0572255648397941, "learning_rate": 6.105036509039365e-06, "loss": 0.1579, "step": 15295 }, { "epoch": 0.44623373592391624, "grad_norm": 0.7290005488993977, "learning_rate": 6.1045757522303516e-06, "loss": 0.154, "step": 15296 }, { "epoch": 0.4462629091545598, "grad_norm": 0.7555598067273647, "learning_rate": 6.104114985559952e-06, "loss": 0.1504, "step": 15297 }, { "epoch": 0.44629208238520335, "grad_norm": 1.0388390205532618, "learning_rate": 6.1036542090322736e-06, "loss": 0.1504, "step": 15298 }, { "epoch": 0.4463212556158469, "grad_norm": 0.890318762127317, "learning_rate": 6.103193422651436e-06, "loss": 0.144, "step": 15299 }, { "epoch": 0.44635042884649045, "grad_norm": 0.6257069102607349, "learning_rate": 6.102732626421552e-06, "loss": 0.129, "step": 15300 }, { "epoch": 0.446379602077134, "grad_norm": 0.8375940792515587, "learning_rate": 6.102271820346731e-06, "loss": 0.1485, "step": 15301 }, { "epoch": 0.44640877530777756, "grad_norm": 1.1803825090593871, "learning_rate": 6.101811004431093e-06, "loss": 0.1412, "step": 15302 }, { "epoch": 0.44643794853842117, "grad_norm": 1.0694821347337167, "learning_rate": 6.101350178678749e-06, "loss": 0.1196, "step": 15303 }, { "epoch": 0.4464671217690647, "grad_norm": 0.7030966777568456, "learning_rate": 6.100889343093812e-06, "loss": 0.1313, "step": 15304 }, { "epoch": 0.4464962949997083, "grad_norm": 0.8758459983348651, "learning_rate": 6.1004284976804e-06, "loss": 0.1426, "step": 15305 }, { "epoch": 0.44652546823035183, "grad_norm": 0.8054310547956597, "learning_rate": 6.099967642442623e-06, "loss": 0.1436, "step": 15306 }, { "epoch": 0.4465546414609954, "grad_norm": 0.9332237157358919, "learning_rate": 6.099506777384598e-06, "loss": 0.1381, "step": 15307 }, { "epoch": 0.44658381469163894, "grad_norm": 0.8506058449099817, "learning_rate": 6.09904590251044e-06, "loss": 0.1403, "step": 15308 }, { "epoch": 0.4466129879222825, "grad_norm": 0.7043375782536109, "learning_rate": 6.098585017824261e-06, "loss": 0.1346, "step": 15309 }, { "epoch": 0.4466421611529261, "grad_norm": 0.9915756333596731, "learning_rate": 6.098124123330178e-06, "loss": 0.1271, "step": 15310 }, { "epoch": 0.44667133438356965, "grad_norm": 0.7555900798427305, "learning_rate": 6.097663219032306e-06, "loss": 0.1277, "step": 15311 }, { "epoch": 0.4467005076142132, "grad_norm": 0.7811465571733777, "learning_rate": 6.097202304934758e-06, "loss": 0.1369, "step": 15312 }, { "epoch": 0.44672968084485676, "grad_norm": 0.7760424626545825, "learning_rate": 6.096741381041649e-06, "loss": 0.1393, "step": 15313 }, { "epoch": 0.4467588540755003, "grad_norm": 0.8137295964514707, "learning_rate": 6.096280447357095e-06, "loss": 0.1242, "step": 15314 }, { "epoch": 0.44678802730614386, "grad_norm": 0.8190671346470227, "learning_rate": 6.0958195038852115e-06, "loss": 0.1736, "step": 15315 }, { "epoch": 0.4468172005367874, "grad_norm": 0.7415349402083039, "learning_rate": 6.095358550630113e-06, "loss": 0.1358, "step": 15316 }, { "epoch": 0.446846373767431, "grad_norm": 0.7279555472391306, "learning_rate": 6.0948975875959145e-06, "loss": 0.1279, "step": 15317 }, { "epoch": 0.4468755469980746, "grad_norm": 0.8911639321268853, "learning_rate": 6.094436614786733e-06, "loss": 0.1519, "step": 15318 }, { "epoch": 0.44690472022871813, "grad_norm": 0.9726188669800133, "learning_rate": 6.093975632206681e-06, "loss": 0.1252, "step": 15319 }, { "epoch": 0.4469338934593617, "grad_norm": 0.8049216253395035, "learning_rate": 6.093514639859877e-06, "loss": 0.1671, "step": 15320 }, { "epoch": 0.44696306669000524, "grad_norm": 0.8248019570433789, "learning_rate": 6.093053637750433e-06, "loss": 0.1335, "step": 15321 }, { "epoch": 0.4469922399206488, "grad_norm": 0.9190140038585972, "learning_rate": 6.09259262588247e-06, "loss": 0.1481, "step": 15322 }, { "epoch": 0.4470214131512924, "grad_norm": 1.0802199683662481, "learning_rate": 6.092131604260099e-06, "loss": 0.1237, "step": 15323 }, { "epoch": 0.44705058638193595, "grad_norm": 1.9020566396685108, "learning_rate": 6.091670572887438e-06, "loss": 0.1477, "step": 15324 }, { "epoch": 0.4470797596125795, "grad_norm": 0.7611700628300063, "learning_rate": 6.091209531768603e-06, "loss": 0.1403, "step": 15325 }, { "epoch": 0.44710893284322306, "grad_norm": 0.8753853301393958, "learning_rate": 6.09074848090771e-06, "loss": 0.13, "step": 15326 }, { "epoch": 0.4471381060738666, "grad_norm": 0.9155815837711926, "learning_rate": 6.0902874203088744e-06, "loss": 0.1358, "step": 15327 }, { "epoch": 0.44716727930451017, "grad_norm": 1.1288010289736823, "learning_rate": 6.089826349976213e-06, "loss": 0.1342, "step": 15328 }, { "epoch": 0.4471964525351537, "grad_norm": 0.7041431192759933, "learning_rate": 6.0893652699138425e-06, "loss": 0.135, "step": 15329 }, { "epoch": 0.44722562576579733, "grad_norm": 0.916718058298528, "learning_rate": 6.088904180125878e-06, "loss": 0.1359, "step": 15330 }, { "epoch": 0.4472547989964409, "grad_norm": 0.8518468216049682, "learning_rate": 6.088443080616439e-06, "loss": 0.1593, "step": 15331 }, { "epoch": 0.44728397222708444, "grad_norm": 1.1255361767521337, "learning_rate": 6.087981971389639e-06, "loss": 0.1362, "step": 15332 }, { "epoch": 0.447313145457728, "grad_norm": 0.6877519479258729, "learning_rate": 6.0875208524495945e-06, "loss": 0.1262, "step": 15333 }, { "epoch": 0.44734231868837154, "grad_norm": 0.8847023564377745, "learning_rate": 6.087059723800426e-06, "loss": 0.1368, "step": 15334 }, { "epoch": 0.4473714919190151, "grad_norm": 0.7625876272799125, "learning_rate": 6.086598585446245e-06, "loss": 0.1436, "step": 15335 }, { "epoch": 0.44740066514965865, "grad_norm": 0.78945406976093, "learning_rate": 6.086137437391172e-06, "loss": 0.1253, "step": 15336 }, { "epoch": 0.44742983838030226, "grad_norm": 0.7537447445197407, "learning_rate": 6.0856762796393244e-06, "loss": 0.149, "step": 15337 }, { "epoch": 0.4474590116109458, "grad_norm": 0.7916157298811274, "learning_rate": 6.085215112194818e-06, "loss": 0.1312, "step": 15338 }, { "epoch": 0.44748818484158936, "grad_norm": 0.9057345129733425, "learning_rate": 6.084753935061769e-06, "loss": 0.1543, "step": 15339 }, { "epoch": 0.4475173580722329, "grad_norm": 0.6811350085174357, "learning_rate": 6.084292748244296e-06, "loss": 0.1302, "step": 15340 }, { "epoch": 0.44754653130287647, "grad_norm": 0.7061628425165356, "learning_rate": 6.083831551746516e-06, "loss": 0.1418, "step": 15341 }, { "epoch": 0.44757570453352, "grad_norm": 0.9942950514244173, "learning_rate": 6.083370345572548e-06, "loss": 0.1333, "step": 15342 }, { "epoch": 0.4476048777641636, "grad_norm": 0.8480920884999015, "learning_rate": 6.082909129726506e-06, "loss": 0.1311, "step": 15343 }, { "epoch": 0.4476340509948072, "grad_norm": 0.7649499289265241, "learning_rate": 6.082447904212512e-06, "loss": 0.1371, "step": 15344 }, { "epoch": 0.44766322422545074, "grad_norm": 0.7993715485804653, "learning_rate": 6.081986669034681e-06, "loss": 0.14, "step": 15345 }, { "epoch": 0.4476923974560943, "grad_norm": 0.8205791858500433, "learning_rate": 6.08152542419713e-06, "loss": 0.1245, "step": 15346 }, { "epoch": 0.44772157068673785, "grad_norm": 0.7218935117331445, "learning_rate": 6.081064169703981e-06, "loss": 0.1305, "step": 15347 }, { "epoch": 0.4477507439173814, "grad_norm": 0.8263799578193723, "learning_rate": 6.080602905559346e-06, "loss": 0.1597, "step": 15348 }, { "epoch": 0.44777991714802495, "grad_norm": 0.8179836136294776, "learning_rate": 6.080141631767349e-06, "loss": 0.1604, "step": 15349 }, { "epoch": 0.4478090903786685, "grad_norm": 0.9099487724906116, "learning_rate": 6.079680348332103e-06, "loss": 0.1421, "step": 15350 }, { "epoch": 0.4478382636093121, "grad_norm": 1.0850840366906596, "learning_rate": 6.079219055257729e-06, "loss": 0.1333, "step": 15351 }, { "epoch": 0.44786743683995567, "grad_norm": 0.789915599834623, "learning_rate": 6.078757752548346e-06, "loss": 0.132, "step": 15352 }, { "epoch": 0.4478966100705992, "grad_norm": 0.8338508408773504, "learning_rate": 6.07829644020807e-06, "loss": 0.1281, "step": 15353 }, { "epoch": 0.4479257833012428, "grad_norm": 0.7019238584494384, "learning_rate": 6.0778351182410226e-06, "loss": 0.109, "step": 15354 }, { "epoch": 0.44795495653188633, "grad_norm": 0.8044855984225002, "learning_rate": 6.077373786651319e-06, "loss": 0.1535, "step": 15355 }, { "epoch": 0.4479841297625299, "grad_norm": 0.7373770750377117, "learning_rate": 6.076912445443079e-06, "loss": 0.1124, "step": 15356 }, { "epoch": 0.4480133029931735, "grad_norm": 0.7168772588691699, "learning_rate": 6.076451094620424e-06, "loss": 0.1466, "step": 15357 }, { "epoch": 0.44804247622381704, "grad_norm": 0.95791986581113, "learning_rate": 6.075989734187469e-06, "loss": 0.1563, "step": 15358 }, { "epoch": 0.4480716494544606, "grad_norm": 0.7106563143561379, "learning_rate": 6.075528364148335e-06, "loss": 0.1061, "step": 15359 }, { "epoch": 0.44810082268510415, "grad_norm": 0.8339195536507266, "learning_rate": 6.07506698450714e-06, "loss": 0.1729, "step": 15360 }, { "epoch": 0.4481299959157477, "grad_norm": 0.8239540782182683, "learning_rate": 6.074605595268002e-06, "loss": 0.1284, "step": 15361 }, { "epoch": 0.44815916914639126, "grad_norm": 0.7366551213719733, "learning_rate": 6.074144196435045e-06, "loss": 0.1367, "step": 15362 }, { "epoch": 0.4481883423770348, "grad_norm": 0.7667253677875338, "learning_rate": 6.073682788012384e-06, "loss": 0.1103, "step": 15363 }, { "epoch": 0.4482175156076784, "grad_norm": 0.7472561908775801, "learning_rate": 6.073221370004139e-06, "loss": 0.1373, "step": 15364 }, { "epoch": 0.44824668883832197, "grad_norm": 0.9541954510364338, "learning_rate": 6.07275994241443e-06, "loss": 0.1223, "step": 15365 }, { "epoch": 0.4482758620689655, "grad_norm": 0.8713491107684442, "learning_rate": 6.072298505247376e-06, "loss": 0.1183, "step": 15366 }, { "epoch": 0.4483050352996091, "grad_norm": 1.3063701456978865, "learning_rate": 6.071837058507097e-06, "loss": 0.1942, "step": 15367 }, { "epoch": 0.44833420853025263, "grad_norm": 0.8573021583863663, "learning_rate": 6.071375602197713e-06, "loss": 0.1376, "step": 15368 }, { "epoch": 0.4483633817608962, "grad_norm": 0.9923513666614028, "learning_rate": 6.070914136323342e-06, "loss": 0.1429, "step": 15369 }, { "epoch": 0.44839255499153974, "grad_norm": 0.9075965650298964, "learning_rate": 6.070452660888108e-06, "loss": 0.1554, "step": 15370 }, { "epoch": 0.44842172822218335, "grad_norm": 0.7643302118184612, "learning_rate": 6.069991175896126e-06, "loss": 0.1473, "step": 15371 }, { "epoch": 0.4484509014528269, "grad_norm": 1.0499260942359514, "learning_rate": 6.069529681351518e-06, "loss": 0.16, "step": 15372 }, { "epoch": 0.44848007468347045, "grad_norm": 0.8892435689751551, "learning_rate": 6.069068177258406e-06, "loss": 0.1542, "step": 15373 }, { "epoch": 0.448509247914114, "grad_norm": 0.8745325863367627, "learning_rate": 6.068606663620907e-06, "loss": 0.1316, "step": 15374 }, { "epoch": 0.44853842114475756, "grad_norm": 1.2117473936439547, "learning_rate": 6.068145140443143e-06, "loss": 0.1457, "step": 15375 }, { "epoch": 0.4485675943754011, "grad_norm": 0.9842029551190973, "learning_rate": 6.067683607729234e-06, "loss": 0.1585, "step": 15376 }, { "epoch": 0.44859676760604467, "grad_norm": 0.942666837965468, "learning_rate": 6.067222065483303e-06, "loss": 0.1406, "step": 15377 }, { "epoch": 0.4486259408366883, "grad_norm": 0.8178630689461366, "learning_rate": 6.066760513709466e-06, "loss": 0.1281, "step": 15378 }, { "epoch": 0.44865511406733183, "grad_norm": 0.7872626468120314, "learning_rate": 6.066298952411846e-06, "loss": 0.14, "step": 15379 }, { "epoch": 0.4486842872979754, "grad_norm": 0.9733597246995307, "learning_rate": 6.065837381594563e-06, "loss": 0.1416, "step": 15380 }, { "epoch": 0.44871346052861893, "grad_norm": 0.8273228500542338, "learning_rate": 6.065375801261739e-06, "loss": 0.153, "step": 15381 }, { "epoch": 0.4487426337592625, "grad_norm": 1.01081477485419, "learning_rate": 6.064914211417495e-06, "loss": 0.1465, "step": 15382 }, { "epoch": 0.44877180698990604, "grad_norm": 0.7620129358509248, "learning_rate": 6.06445261206595e-06, "loss": 0.1337, "step": 15383 }, { "epoch": 0.44880098022054965, "grad_norm": 0.875167098626487, "learning_rate": 6.063991003211227e-06, "loss": 0.1402, "step": 15384 }, { "epoch": 0.4488301534511932, "grad_norm": 0.8485254629721719, "learning_rate": 6.063529384857445e-06, "loss": 0.1427, "step": 15385 }, { "epoch": 0.44885932668183676, "grad_norm": 0.874625989164001, "learning_rate": 6.063067757008727e-06, "loss": 0.1343, "step": 15386 }, { "epoch": 0.4488884999124803, "grad_norm": 0.8749592803137143, "learning_rate": 6.062606119669194e-06, "loss": 0.1499, "step": 15387 }, { "epoch": 0.44891767314312386, "grad_norm": 0.9409782433805018, "learning_rate": 6.0621444728429675e-06, "loss": 0.1678, "step": 15388 }, { "epoch": 0.4489468463737674, "grad_norm": 0.9233812981784898, "learning_rate": 6.061682816534169e-06, "loss": 0.1501, "step": 15389 }, { "epoch": 0.44897601960441097, "grad_norm": 0.9928736377413834, "learning_rate": 6.061221150746919e-06, "loss": 0.1468, "step": 15390 }, { "epoch": 0.4490051928350546, "grad_norm": 0.8676091112081428, "learning_rate": 6.060759475485341e-06, "loss": 0.1581, "step": 15391 }, { "epoch": 0.44903436606569813, "grad_norm": 0.8149235286502625, "learning_rate": 6.060297790753555e-06, "loss": 0.1564, "step": 15392 }, { "epoch": 0.4490635392963417, "grad_norm": 0.7952708965899153, "learning_rate": 6.059836096555682e-06, "loss": 0.1651, "step": 15393 }, { "epoch": 0.44909271252698524, "grad_norm": 0.774273371444286, "learning_rate": 6.059374392895847e-06, "loss": 0.1432, "step": 15394 }, { "epoch": 0.4491218857576288, "grad_norm": 1.064555635610097, "learning_rate": 6.0589126797781705e-06, "loss": 0.166, "step": 15395 }, { "epoch": 0.44915105898827234, "grad_norm": 0.8378755092136079, "learning_rate": 6.058450957206773e-06, "loss": 0.1344, "step": 15396 }, { "epoch": 0.4491802322189159, "grad_norm": 0.8085456034294426, "learning_rate": 6.057989225185779e-06, "loss": 0.1393, "step": 15397 }, { "epoch": 0.4492094054495595, "grad_norm": 0.7754635374313775, "learning_rate": 6.0575274837193096e-06, "loss": 0.1318, "step": 15398 }, { "epoch": 0.44923857868020306, "grad_norm": 0.8527890777382867, "learning_rate": 6.057065732811488e-06, "loss": 0.1442, "step": 15399 }, { "epoch": 0.4492677519108466, "grad_norm": 0.9897797469581112, "learning_rate": 6.056603972466435e-06, "loss": 0.1585, "step": 15400 }, { "epoch": 0.44929692514149017, "grad_norm": 0.7479714463271162, "learning_rate": 6.0561422026882735e-06, "loss": 0.169, "step": 15401 }, { "epoch": 0.4493260983721337, "grad_norm": 0.8982032075909928, "learning_rate": 6.0556804234811276e-06, "loss": 0.1232, "step": 15402 }, { "epoch": 0.4493552716027773, "grad_norm": 0.7703823612855346, "learning_rate": 6.055218634849118e-06, "loss": 0.1442, "step": 15403 }, { "epoch": 0.4493844448334208, "grad_norm": 0.9600556990766124, "learning_rate": 6.054756836796369e-06, "loss": 0.1059, "step": 15404 }, { "epoch": 0.44941361806406444, "grad_norm": 1.0048974647856885, "learning_rate": 6.054295029327002e-06, "loss": 0.1536, "step": 15405 }, { "epoch": 0.449442791294708, "grad_norm": 0.7310083617419373, "learning_rate": 6.053833212445141e-06, "loss": 0.1471, "step": 15406 }, { "epoch": 0.44947196452535154, "grad_norm": 0.7867274235152153, "learning_rate": 6.05337138615491e-06, "loss": 0.1298, "step": 15407 }, { "epoch": 0.4495011377559951, "grad_norm": 0.7945308302893132, "learning_rate": 6.052909550460429e-06, "loss": 0.1391, "step": 15408 }, { "epoch": 0.44953031098663865, "grad_norm": 0.8331005019096529, "learning_rate": 6.052447705365824e-06, "loss": 0.1677, "step": 15409 }, { "epoch": 0.4495594842172822, "grad_norm": 0.763065824096036, "learning_rate": 6.051985850875216e-06, "loss": 0.1639, "step": 15410 }, { "epoch": 0.4495886574479258, "grad_norm": 0.6980665358403694, "learning_rate": 6.0515239869927285e-06, "loss": 0.1118, "step": 15411 }, { "epoch": 0.44961783067856936, "grad_norm": 0.8724358091090383, "learning_rate": 6.051062113722489e-06, "loss": 0.1447, "step": 15412 }, { "epoch": 0.4496470039092129, "grad_norm": 0.6836028132907334, "learning_rate": 6.050600231068616e-06, "loss": 0.1302, "step": 15413 }, { "epoch": 0.44967617713985647, "grad_norm": 0.7380583610704688, "learning_rate": 6.050138339035235e-06, "loss": 0.1333, "step": 15414 }, { "epoch": 0.4497053503705, "grad_norm": 0.7250387437012618, "learning_rate": 6.0496764376264705e-06, "loss": 0.1405, "step": 15415 }, { "epoch": 0.4497345236011436, "grad_norm": 0.7031926994293658, "learning_rate": 6.049214526846444e-06, "loss": 0.147, "step": 15416 }, { "epoch": 0.44976369683178713, "grad_norm": 0.8165137002136926, "learning_rate": 6.048752606699282e-06, "loss": 0.135, "step": 15417 }, { "epoch": 0.44979287006243074, "grad_norm": 0.7543408656527488, "learning_rate": 6.048290677189106e-06, "loss": 0.1364, "step": 15418 }, { "epoch": 0.4498220432930743, "grad_norm": 0.6675726524186767, "learning_rate": 6.047828738320041e-06, "loss": 0.1324, "step": 15419 }, { "epoch": 0.44985121652371785, "grad_norm": 0.9602264692579462, "learning_rate": 6.047366790096212e-06, "loss": 0.169, "step": 15420 }, { "epoch": 0.4498803897543614, "grad_norm": 0.8502253876920634, "learning_rate": 6.046904832521742e-06, "loss": 0.1487, "step": 15421 }, { "epoch": 0.44990956298500495, "grad_norm": 0.7192279901324837, "learning_rate": 6.046442865600756e-06, "loss": 0.1191, "step": 15422 }, { "epoch": 0.4499387362156485, "grad_norm": 0.9532467202734655, "learning_rate": 6.0459808893373764e-06, "loss": 0.1439, "step": 15423 }, { "epoch": 0.44996790944629206, "grad_norm": 0.7877536771062995, "learning_rate": 6.045518903735731e-06, "loss": 0.1374, "step": 15424 }, { "epoch": 0.44999708267693567, "grad_norm": 0.763803614463936, "learning_rate": 6.045056908799941e-06, "loss": 0.1274, "step": 15425 }, { "epoch": 0.4500262559075792, "grad_norm": 0.684652609615203, "learning_rate": 6.044594904534132e-06, "loss": 0.1227, "step": 15426 }, { "epoch": 0.4500554291382228, "grad_norm": 0.9331517680282322, "learning_rate": 6.044132890942432e-06, "loss": 0.1402, "step": 15427 }, { "epoch": 0.4500846023688663, "grad_norm": 0.8575296634774847, "learning_rate": 6.04367086802896e-06, "loss": 0.1065, "step": 15428 }, { "epoch": 0.4501137755995099, "grad_norm": 0.8340571847404953, "learning_rate": 6.043208835797845e-06, "loss": 0.1389, "step": 15429 }, { "epoch": 0.45014294883015343, "grad_norm": 0.805375479888196, "learning_rate": 6.042746794253209e-06, "loss": 0.1324, "step": 15430 }, { "epoch": 0.450172122060797, "grad_norm": 0.7207822573273216, "learning_rate": 6.0422847433991795e-06, "loss": 0.1489, "step": 15431 }, { "epoch": 0.4502012952914406, "grad_norm": 0.911818155425748, "learning_rate": 6.041822683239881e-06, "loss": 0.1296, "step": 15432 }, { "epoch": 0.45023046852208415, "grad_norm": 0.674039306822005, "learning_rate": 6.041360613779438e-06, "loss": 0.1305, "step": 15433 }, { "epoch": 0.4502596417527277, "grad_norm": 0.8040425280619382, "learning_rate": 6.040898535021975e-06, "loss": 0.1427, "step": 15434 }, { "epoch": 0.45028881498337125, "grad_norm": 0.8042811515528271, "learning_rate": 6.040436446971619e-06, "loss": 0.1344, "step": 15435 }, { "epoch": 0.4503179882140148, "grad_norm": 0.8620850881804475, "learning_rate": 6.039974349632496e-06, "loss": 0.1272, "step": 15436 }, { "epoch": 0.45034716144465836, "grad_norm": 0.842513368101869, "learning_rate": 6.03951224300873e-06, "loss": 0.1622, "step": 15437 }, { "epoch": 0.45037633467530197, "grad_norm": 0.7177630781376846, "learning_rate": 6.0390501271044455e-06, "loss": 0.1204, "step": 15438 }, { "epoch": 0.4504055079059455, "grad_norm": 0.7727972733442917, "learning_rate": 6.038588001923771e-06, "loss": 0.1323, "step": 15439 }, { "epoch": 0.4504346811365891, "grad_norm": 1.0554661299495012, "learning_rate": 6.03812586747083e-06, "loss": 0.1363, "step": 15440 }, { "epoch": 0.45046385436723263, "grad_norm": 0.7942799378197438, "learning_rate": 6.0376637237497474e-06, "loss": 0.1341, "step": 15441 }, { "epoch": 0.4504930275978762, "grad_norm": 0.8396266384549794, "learning_rate": 6.037201570764654e-06, "loss": 0.1325, "step": 15442 }, { "epoch": 0.45052220082851974, "grad_norm": 0.805519465081484, "learning_rate": 6.036739408519671e-06, "loss": 0.1034, "step": 15443 }, { "epoch": 0.4505513740591633, "grad_norm": 0.8864674384806074, "learning_rate": 6.036277237018926e-06, "loss": 0.119, "step": 15444 }, { "epoch": 0.4505805472898069, "grad_norm": 0.9880335570734815, "learning_rate": 6.0358150562665455e-06, "loss": 0.1416, "step": 15445 }, { "epoch": 0.45060972052045045, "grad_norm": 1.2902869860420179, "learning_rate": 6.035352866266655e-06, "loss": 0.1648, "step": 15446 }, { "epoch": 0.450638893751094, "grad_norm": 1.052955135958026, "learning_rate": 6.034890667023381e-06, "loss": 0.1499, "step": 15447 }, { "epoch": 0.45066806698173756, "grad_norm": 0.9574731079353955, "learning_rate": 6.034428458540851e-06, "loss": 0.1356, "step": 15448 }, { "epoch": 0.4506972402123811, "grad_norm": 1.2152176449831125, "learning_rate": 6.03396624082319e-06, "loss": 0.1404, "step": 15449 }, { "epoch": 0.45072641344302466, "grad_norm": 1.2169092351149746, "learning_rate": 6.033504013874525e-06, "loss": 0.1334, "step": 15450 }, { "epoch": 0.4507555866736682, "grad_norm": 0.7316672657289102, "learning_rate": 6.033041777698983e-06, "loss": 0.1448, "step": 15451 }, { "epoch": 0.4507847599043118, "grad_norm": 0.8576953274679225, "learning_rate": 6.032579532300693e-06, "loss": 0.1356, "step": 15452 }, { "epoch": 0.4508139331349554, "grad_norm": 1.192208792342409, "learning_rate": 6.032117277683776e-06, "loss": 0.1185, "step": 15453 }, { "epoch": 0.45084310636559893, "grad_norm": 0.6967262956605862, "learning_rate": 6.0316550138523646e-06, "loss": 0.1477, "step": 15454 }, { "epoch": 0.4508722795962425, "grad_norm": 0.6481728147418251, "learning_rate": 6.031192740810583e-06, "loss": 0.1276, "step": 15455 }, { "epoch": 0.45090145282688604, "grad_norm": 0.8024521820210737, "learning_rate": 6.030730458562557e-06, "loss": 0.117, "step": 15456 }, { "epoch": 0.4509306260575296, "grad_norm": 0.9684911976346853, "learning_rate": 6.030268167112419e-06, "loss": 0.1476, "step": 15457 }, { "epoch": 0.45095979928817315, "grad_norm": 0.7871994175419291, "learning_rate": 6.02980586646429e-06, "loss": 0.1215, "step": 15458 }, { "epoch": 0.45098897251881676, "grad_norm": 0.8654542181243517, "learning_rate": 6.0293435566223e-06, "loss": 0.134, "step": 15459 }, { "epoch": 0.4510181457494603, "grad_norm": 1.2935168187353963, "learning_rate": 6.028881237590578e-06, "loss": 0.1403, "step": 15460 }, { "epoch": 0.45104731898010386, "grad_norm": 0.9120213480978883, "learning_rate": 6.028418909373249e-06, "loss": 0.135, "step": 15461 }, { "epoch": 0.4510764922107474, "grad_norm": 0.8344562021187523, "learning_rate": 6.027956571974442e-06, "loss": 0.1437, "step": 15462 }, { "epoch": 0.45110566544139097, "grad_norm": 0.8154161122264343, "learning_rate": 6.0274942253982825e-06, "loss": 0.1398, "step": 15463 }, { "epoch": 0.4511348386720345, "grad_norm": 0.9741081672251926, "learning_rate": 6.027031869648901e-06, "loss": 0.1248, "step": 15464 }, { "epoch": 0.4511640119026781, "grad_norm": 0.8620278803679651, "learning_rate": 6.026569504730425e-06, "loss": 0.1573, "step": 15465 }, { "epoch": 0.4511931851333217, "grad_norm": 0.9282741921915446, "learning_rate": 6.026107130646981e-06, "loss": 0.1241, "step": 15466 }, { "epoch": 0.45122235836396524, "grad_norm": 1.0156400767179663, "learning_rate": 6.025644747402698e-06, "loss": 0.1425, "step": 15467 }, { "epoch": 0.4512515315946088, "grad_norm": 0.9510747137714097, "learning_rate": 6.025182355001702e-06, "loss": 0.1329, "step": 15468 }, { "epoch": 0.45128070482525234, "grad_norm": 0.7098683350119727, "learning_rate": 6.024719953448124e-06, "loss": 0.1367, "step": 15469 }, { "epoch": 0.4513098780558959, "grad_norm": 0.9384021723867909, "learning_rate": 6.02425754274609e-06, "loss": 0.1586, "step": 15470 }, { "epoch": 0.45133905128653945, "grad_norm": 1.0093393459392084, "learning_rate": 6.023795122899729e-06, "loss": 0.1176, "step": 15471 }, { "epoch": 0.45136822451718306, "grad_norm": 0.736763983910399, "learning_rate": 6.023332693913171e-06, "loss": 0.1392, "step": 15472 }, { "epoch": 0.4513973977478266, "grad_norm": 1.0968694045293141, "learning_rate": 6.0228702557905415e-06, "loss": 0.1444, "step": 15473 }, { "epoch": 0.45142657097847017, "grad_norm": 1.007510007736189, "learning_rate": 6.022407808535972e-06, "loss": 0.142, "step": 15474 }, { "epoch": 0.4514557442091137, "grad_norm": 0.7977697991769906, "learning_rate": 6.0219453521535875e-06, "loss": 0.1291, "step": 15475 }, { "epoch": 0.45148491743975727, "grad_norm": 0.9444930378608256, "learning_rate": 6.021482886647521e-06, "loss": 0.1555, "step": 15476 }, { "epoch": 0.4515140906704008, "grad_norm": 0.7666298926815919, "learning_rate": 6.021020412021897e-06, "loss": 0.1339, "step": 15477 }, { "epoch": 0.4515432639010444, "grad_norm": 0.9371666251757395, "learning_rate": 6.020557928280848e-06, "loss": 0.1174, "step": 15478 }, { "epoch": 0.451572437131688, "grad_norm": 0.7594950748927057, "learning_rate": 6.020095435428501e-06, "loss": 0.1194, "step": 15479 }, { "epoch": 0.45160161036233154, "grad_norm": 0.8172497911967067, "learning_rate": 6.019632933468986e-06, "loss": 0.1396, "step": 15480 }, { "epoch": 0.4516307835929751, "grad_norm": 0.7670550947442615, "learning_rate": 6.0191704224064305e-06, "loss": 0.1405, "step": 15481 }, { "epoch": 0.45165995682361865, "grad_norm": 0.7604196341090894, "learning_rate": 6.018707902244967e-06, "loss": 0.1279, "step": 15482 }, { "epoch": 0.4516891300542622, "grad_norm": 0.8268719505091074, "learning_rate": 6.0182453729887205e-06, "loss": 0.1349, "step": 15483 }, { "epoch": 0.45171830328490575, "grad_norm": 0.7436341402224729, "learning_rate": 6.0177828346418235e-06, "loss": 0.1178, "step": 15484 }, { "epoch": 0.4517474765155493, "grad_norm": 0.8496856208825444, "learning_rate": 6.0173202872084035e-06, "loss": 0.1266, "step": 15485 }, { "epoch": 0.4517766497461929, "grad_norm": 0.7986128202583314, "learning_rate": 6.01685773069259e-06, "loss": 0.1241, "step": 15486 }, { "epoch": 0.45180582297683647, "grad_norm": 0.7725657820741384, "learning_rate": 6.016395165098516e-06, "loss": 0.1323, "step": 15487 }, { "epoch": 0.45183499620748, "grad_norm": 0.7673420715874265, "learning_rate": 6.0159325904303064e-06, "loss": 0.1361, "step": 15488 }, { "epoch": 0.4518641694381236, "grad_norm": 0.8167424355679127, "learning_rate": 6.015470006692095e-06, "loss": 0.1518, "step": 15489 }, { "epoch": 0.45189334266876713, "grad_norm": 0.9149063800333057, "learning_rate": 6.015007413888008e-06, "loss": 0.1391, "step": 15490 }, { "epoch": 0.4519225158994107, "grad_norm": 0.7660604220663534, "learning_rate": 6.014544812022177e-06, "loss": 0.1487, "step": 15491 }, { "epoch": 0.45195168913005423, "grad_norm": 0.7932425885698281, "learning_rate": 6.014082201098733e-06, "loss": 0.13, "step": 15492 }, { "epoch": 0.45198086236069784, "grad_norm": 0.8012751187484937, "learning_rate": 6.013619581121806e-06, "loss": 0.1417, "step": 15493 }, { "epoch": 0.4520100355913414, "grad_norm": 0.8094050479653967, "learning_rate": 6.013156952095523e-06, "loss": 0.1362, "step": 15494 }, { "epoch": 0.45203920882198495, "grad_norm": 0.8730183129639572, "learning_rate": 6.012694314024018e-06, "loss": 0.1355, "step": 15495 }, { "epoch": 0.4520683820526285, "grad_norm": 0.7518413162940071, "learning_rate": 6.01223166691142e-06, "loss": 0.1276, "step": 15496 }, { "epoch": 0.45209755528327206, "grad_norm": 0.855937178667111, "learning_rate": 6.011769010761861e-06, "loss": 0.13, "step": 15497 }, { "epoch": 0.4521267285139156, "grad_norm": 0.7668697609911654, "learning_rate": 6.011306345579466e-06, "loss": 0.1356, "step": 15498 }, { "epoch": 0.4521559017445592, "grad_norm": 0.8047188374888111, "learning_rate": 6.010843671368373e-06, "loss": 0.1378, "step": 15499 }, { "epoch": 0.45218507497520277, "grad_norm": 0.8493618000262269, "learning_rate": 6.0103809881327065e-06, "loss": 0.1479, "step": 15500 }, { "epoch": 0.4522142482058463, "grad_norm": 0.6987696427403324, "learning_rate": 6.0099182958766e-06, "loss": 0.1301, "step": 15501 }, { "epoch": 0.4522434214364899, "grad_norm": 0.6967339943179401, "learning_rate": 6.0094555946041855e-06, "loss": 0.1226, "step": 15502 }, { "epoch": 0.45227259466713343, "grad_norm": 0.7669631552971107, "learning_rate": 6.008992884319591e-06, "loss": 0.154, "step": 15503 }, { "epoch": 0.452301767897777, "grad_norm": 0.6731993615570382, "learning_rate": 6.00853016502695e-06, "loss": 0.1545, "step": 15504 }, { "epoch": 0.45233094112842054, "grad_norm": 7.223912458215729, "learning_rate": 6.008067436730392e-06, "loss": 0.1505, "step": 15505 }, { "epoch": 0.45236011435906415, "grad_norm": 1.0159335650326058, "learning_rate": 6.0076046994340486e-06, "loss": 0.1614, "step": 15506 }, { "epoch": 0.4523892875897077, "grad_norm": 0.886501189483512, "learning_rate": 6.0071419531420505e-06, "loss": 0.1175, "step": 15507 }, { "epoch": 0.45241846082035125, "grad_norm": 0.6803573396212004, "learning_rate": 6.006679197858529e-06, "loss": 0.1308, "step": 15508 }, { "epoch": 0.4524476340509948, "grad_norm": 0.7105691467647167, "learning_rate": 6.006216433587617e-06, "loss": 0.1336, "step": 15509 }, { "epoch": 0.45247680728163836, "grad_norm": 1.0094452391803512, "learning_rate": 6.005753660333446e-06, "loss": 0.1536, "step": 15510 }, { "epoch": 0.4525059805122819, "grad_norm": 0.8377821686328103, "learning_rate": 6.005290878100144e-06, "loss": 0.1503, "step": 15511 }, { "epoch": 0.45253515374292547, "grad_norm": 0.889393134243688, "learning_rate": 6.004828086891847e-06, "loss": 0.1549, "step": 15512 }, { "epoch": 0.4525643269735691, "grad_norm": 0.8069271483880135, "learning_rate": 6.0043652867126835e-06, "loss": 0.1406, "step": 15513 }, { "epoch": 0.45259350020421263, "grad_norm": 0.9104707480265944, "learning_rate": 6.003902477566788e-06, "loss": 0.1286, "step": 15514 }, { "epoch": 0.4526226734348562, "grad_norm": 0.9262352416055278, "learning_rate": 6.003439659458288e-06, "loss": 0.147, "step": 15515 }, { "epoch": 0.45265184666549974, "grad_norm": 0.8160715212386537, "learning_rate": 6.00297683239132e-06, "loss": 0.1469, "step": 15516 }, { "epoch": 0.4526810198961433, "grad_norm": 0.9547087787979797, "learning_rate": 6.002513996370014e-06, "loss": 0.1373, "step": 15517 }, { "epoch": 0.45271019312678684, "grad_norm": 0.78882374032233, "learning_rate": 6.002051151398503e-06, "loss": 0.1406, "step": 15518 }, { "epoch": 0.4527393663574304, "grad_norm": 0.8128759934170584, "learning_rate": 6.001588297480918e-06, "loss": 0.146, "step": 15519 }, { "epoch": 0.452768539588074, "grad_norm": 0.8322631443153432, "learning_rate": 6.0011254346213924e-06, "loss": 0.1505, "step": 15520 }, { "epoch": 0.45279771281871756, "grad_norm": 0.6798064602547464, "learning_rate": 6.000662562824056e-06, "loss": 0.1224, "step": 15521 }, { "epoch": 0.4528268860493611, "grad_norm": 0.8639071412444207, "learning_rate": 6.000199682093045e-06, "loss": 0.1521, "step": 15522 }, { "epoch": 0.45285605928000466, "grad_norm": 0.8478110353943761, "learning_rate": 5.999736792432489e-06, "loss": 0.1428, "step": 15523 }, { "epoch": 0.4528852325106482, "grad_norm": 0.704310892555951, "learning_rate": 5.9992738938465226e-06, "loss": 0.1189, "step": 15524 }, { "epoch": 0.45291440574129177, "grad_norm": 0.6830878620903953, "learning_rate": 5.998810986339276e-06, "loss": 0.1302, "step": 15525 }, { "epoch": 0.4529435789719354, "grad_norm": 0.6755613588284448, "learning_rate": 5.998348069914884e-06, "loss": 0.1466, "step": 15526 }, { "epoch": 0.45297275220257893, "grad_norm": 0.754726292745728, "learning_rate": 5.99788514457748e-06, "loss": 0.1368, "step": 15527 }, { "epoch": 0.4530019254332225, "grad_norm": 0.8819122489878344, "learning_rate": 5.997422210331194e-06, "loss": 0.1646, "step": 15528 }, { "epoch": 0.45303109866386604, "grad_norm": 0.9737874095863003, "learning_rate": 5.996959267180162e-06, "loss": 0.1405, "step": 15529 }, { "epoch": 0.4530602718945096, "grad_norm": 0.7185403255339305, "learning_rate": 5.996496315128514e-06, "loss": 0.1293, "step": 15530 }, { "epoch": 0.45308944512515315, "grad_norm": 0.808329124481926, "learning_rate": 5.996033354180386e-06, "loss": 0.1442, "step": 15531 }, { "epoch": 0.4531186183557967, "grad_norm": 0.876900716347898, "learning_rate": 5.99557038433991e-06, "loss": 0.1421, "step": 15532 }, { "epoch": 0.4531477915864403, "grad_norm": 0.9997237214924847, "learning_rate": 5.995107405611218e-06, "loss": 0.1238, "step": 15533 }, { "epoch": 0.45317696481708386, "grad_norm": 0.8329400431096406, "learning_rate": 5.994644417998447e-06, "loss": 0.1166, "step": 15534 }, { "epoch": 0.4532061380477274, "grad_norm": 0.9260749046529769, "learning_rate": 5.994181421505726e-06, "loss": 0.121, "step": 15535 }, { "epoch": 0.45323531127837097, "grad_norm": 0.9736342829476802, "learning_rate": 5.993718416137191e-06, "loss": 0.1253, "step": 15536 }, { "epoch": 0.4532644845090145, "grad_norm": 1.1189328167263761, "learning_rate": 5.993255401896976e-06, "loss": 0.1448, "step": 15537 }, { "epoch": 0.4532936577396581, "grad_norm": 1.0059683400281096, "learning_rate": 5.9927923787892125e-06, "loss": 0.137, "step": 15538 }, { "epoch": 0.4533228309703016, "grad_norm": 0.8726715932419563, "learning_rate": 5.992329346818036e-06, "loss": 0.1438, "step": 15539 }, { "epoch": 0.45335200420094524, "grad_norm": 0.9672239353443517, "learning_rate": 5.991866305987581e-06, "loss": 0.1713, "step": 15540 }, { "epoch": 0.4533811774315888, "grad_norm": 1.1397650317352954, "learning_rate": 5.99140325630198e-06, "loss": 0.1413, "step": 15541 }, { "epoch": 0.45341035066223234, "grad_norm": 0.7483326709503136, "learning_rate": 5.990940197765367e-06, "loss": 0.141, "step": 15542 }, { "epoch": 0.4534395238928759, "grad_norm": 0.7700113396276715, "learning_rate": 5.990477130381877e-06, "loss": 0.1207, "step": 15543 }, { "epoch": 0.45346869712351945, "grad_norm": 1.1356080666950594, "learning_rate": 5.990014054155644e-06, "loss": 0.128, "step": 15544 }, { "epoch": 0.453497870354163, "grad_norm": 0.9938686487025937, "learning_rate": 5.989550969090801e-06, "loss": 0.1549, "step": 15545 }, { "epoch": 0.45352704358480656, "grad_norm": 1.2927614805086236, "learning_rate": 5.989087875191481e-06, "loss": 0.131, "step": 15546 }, { "epoch": 0.45355621681545016, "grad_norm": 0.7277551171650158, "learning_rate": 5.9886247724618255e-06, "loss": 0.1438, "step": 15547 }, { "epoch": 0.4535853900460937, "grad_norm": 0.9748458948991484, "learning_rate": 5.98816166090596e-06, "loss": 0.1412, "step": 15548 }, { "epoch": 0.45361456327673727, "grad_norm": 0.8023881371892535, "learning_rate": 5.987698540528026e-06, "loss": 0.1378, "step": 15549 }, { "epoch": 0.4536437365073808, "grad_norm": 0.8176600544026086, "learning_rate": 5.987235411332153e-06, "loss": 0.1076, "step": 15550 }, { "epoch": 0.4536729097380244, "grad_norm": 0.8899976618069858, "learning_rate": 5.986772273322478e-06, "loss": 0.138, "step": 15551 }, { "epoch": 0.45370208296866793, "grad_norm": 0.7308522708829461, "learning_rate": 5.986309126503137e-06, "loss": 0.1376, "step": 15552 }, { "epoch": 0.45373125619931154, "grad_norm": 0.7756023275048722, "learning_rate": 5.985845970878263e-06, "loss": 0.1553, "step": 15553 }, { "epoch": 0.4537604294299551, "grad_norm": 1.15371829833908, "learning_rate": 5.985382806451991e-06, "loss": 0.1332, "step": 15554 }, { "epoch": 0.45378960266059865, "grad_norm": 0.6294588425665543, "learning_rate": 5.984919633228458e-06, "loss": 0.1331, "step": 15555 }, { "epoch": 0.4538187758912422, "grad_norm": 0.9856300304573363, "learning_rate": 5.984456451211795e-06, "loss": 0.1383, "step": 15556 }, { "epoch": 0.45384794912188575, "grad_norm": 0.8435417059113256, "learning_rate": 5.9839932604061425e-06, "loss": 0.1154, "step": 15557 }, { "epoch": 0.4538771223525293, "grad_norm": 0.8038887465016826, "learning_rate": 5.983530060815631e-06, "loss": 0.1333, "step": 15558 }, { "epoch": 0.45390629558317286, "grad_norm": 0.7613533098142548, "learning_rate": 5.9830668524444e-06, "loss": 0.1672, "step": 15559 }, { "epoch": 0.45393546881381647, "grad_norm": 0.8701557236316875, "learning_rate": 5.982603635296581e-06, "loss": 0.1397, "step": 15560 }, { "epoch": 0.45396464204446, "grad_norm": 0.9238254563999214, "learning_rate": 5.9821404093763116e-06, "loss": 0.1065, "step": 15561 }, { "epoch": 0.4539938152751036, "grad_norm": 0.8083320936044646, "learning_rate": 5.981677174687729e-06, "loss": 0.1197, "step": 15562 }, { "epoch": 0.4540229885057471, "grad_norm": 0.8540124986521128, "learning_rate": 5.981213931234964e-06, "loss": 0.1431, "step": 15563 }, { "epoch": 0.4540521617363907, "grad_norm": 1.0437493845420331, "learning_rate": 5.980750679022158e-06, "loss": 0.1303, "step": 15564 }, { "epoch": 0.45408133496703423, "grad_norm": 0.6804187898118352, "learning_rate": 5.980287418053442e-06, "loss": 0.1369, "step": 15565 }, { "epoch": 0.4541105081976778, "grad_norm": 0.7692563005271972, "learning_rate": 5.979824148332954e-06, "loss": 0.1495, "step": 15566 }, { "epoch": 0.4541396814283214, "grad_norm": 0.800288155652702, "learning_rate": 5.979360869864832e-06, "loss": 0.1512, "step": 15567 }, { "epoch": 0.45416885465896495, "grad_norm": 1.003529682542251, "learning_rate": 5.9788975826532085e-06, "loss": 0.1341, "step": 15568 }, { "epoch": 0.4541980278896085, "grad_norm": 0.7516609503566002, "learning_rate": 5.97843428670222e-06, "loss": 0.1413, "step": 15569 }, { "epoch": 0.45422720112025206, "grad_norm": 0.7483244607450641, "learning_rate": 5.977970982016006e-06, "loss": 0.1375, "step": 15570 }, { "epoch": 0.4542563743508956, "grad_norm": 0.6541581501805274, "learning_rate": 5.977507668598699e-06, "loss": 0.1139, "step": 15571 }, { "epoch": 0.45428554758153916, "grad_norm": 0.6464670584708954, "learning_rate": 5.977044346454437e-06, "loss": 0.1312, "step": 15572 }, { "epoch": 0.4543147208121827, "grad_norm": 0.9645176954342236, "learning_rate": 5.976581015587357e-06, "loss": 0.1507, "step": 15573 }, { "epoch": 0.4543438940428263, "grad_norm": 0.8301075641835433, "learning_rate": 5.9761176760015945e-06, "loss": 0.1429, "step": 15574 }, { "epoch": 0.4543730672734699, "grad_norm": 1.0723481299694118, "learning_rate": 5.975654327701286e-06, "loss": 0.1478, "step": 15575 }, { "epoch": 0.45440224050411343, "grad_norm": 0.8724914626221386, "learning_rate": 5.975190970690568e-06, "loss": 0.1429, "step": 15576 }, { "epoch": 0.454431413734757, "grad_norm": 0.8844472149878354, "learning_rate": 5.97472760497358e-06, "loss": 0.1346, "step": 15577 }, { "epoch": 0.45446058696540054, "grad_norm": 0.8083720723703682, "learning_rate": 5.974264230554454e-06, "loss": 0.1345, "step": 15578 }, { "epoch": 0.4544897601960441, "grad_norm": 1.3204416064588236, "learning_rate": 5.973800847437332e-06, "loss": 0.1251, "step": 15579 }, { "epoch": 0.4545189334266877, "grad_norm": 1.0738716931241754, "learning_rate": 5.973337455626348e-06, "loss": 0.1452, "step": 15580 }, { "epoch": 0.45454810665733125, "grad_norm": 1.0675443266092903, "learning_rate": 5.972874055125637e-06, "loss": 0.1411, "step": 15581 }, { "epoch": 0.4545772798879748, "grad_norm": 0.8243831145326032, "learning_rate": 5.972410645939342e-06, "loss": 0.1254, "step": 15582 }, { "epoch": 0.45460645311861836, "grad_norm": 0.7569276271537585, "learning_rate": 5.971947228071595e-06, "loss": 0.1528, "step": 15583 }, { "epoch": 0.4546356263492619, "grad_norm": 0.8531415261963556, "learning_rate": 5.971483801526536e-06, "loss": 0.1391, "step": 15584 }, { "epoch": 0.45466479957990547, "grad_norm": 0.9787891705695544, "learning_rate": 5.971020366308301e-06, "loss": 0.1334, "step": 15585 }, { "epoch": 0.454693972810549, "grad_norm": 0.7507445483480693, "learning_rate": 5.970556922421028e-06, "loss": 0.1465, "step": 15586 }, { "epoch": 0.4547231460411926, "grad_norm": 0.8299327822734983, "learning_rate": 5.970093469868855e-06, "loss": 0.1207, "step": 15587 }, { "epoch": 0.4547523192718362, "grad_norm": 0.8162947581409953, "learning_rate": 5.969630008655919e-06, "loss": 0.1715, "step": 15588 }, { "epoch": 0.45478149250247973, "grad_norm": 0.7519473468992576, "learning_rate": 5.969166538786357e-06, "loss": 0.1527, "step": 15589 }, { "epoch": 0.4548106657331233, "grad_norm": 0.8550610823679656, "learning_rate": 5.968703060264308e-06, "loss": 0.1403, "step": 15590 }, { "epoch": 0.45483983896376684, "grad_norm": 0.9089281538646847, "learning_rate": 5.968239573093909e-06, "loss": 0.1542, "step": 15591 }, { "epoch": 0.4548690121944104, "grad_norm": 0.8927215201020315, "learning_rate": 5.967776077279299e-06, "loss": 0.1516, "step": 15592 }, { "epoch": 0.45489818542505395, "grad_norm": 0.8438451780909817, "learning_rate": 5.9673125728246136e-06, "loss": 0.1618, "step": 15593 }, { "epoch": 0.45492735865569756, "grad_norm": 0.8108699552061513, "learning_rate": 5.966849059733994e-06, "loss": 0.14, "step": 15594 }, { "epoch": 0.4549565318863411, "grad_norm": 0.9168909298375204, "learning_rate": 5.966385538011577e-06, "loss": 0.1678, "step": 15595 }, { "epoch": 0.45498570511698466, "grad_norm": 0.684513498308056, "learning_rate": 5.9659220076614995e-06, "loss": 0.1509, "step": 15596 }, { "epoch": 0.4550148783476282, "grad_norm": 0.7329521844052996, "learning_rate": 5.965458468687902e-06, "loss": 0.125, "step": 15597 }, { "epoch": 0.45504405157827177, "grad_norm": 0.7877975127804708, "learning_rate": 5.964994921094921e-06, "loss": 0.1397, "step": 15598 }, { "epoch": 0.4550732248089153, "grad_norm": 0.8390761540958495, "learning_rate": 5.964531364886696e-06, "loss": 0.1449, "step": 15599 }, { "epoch": 0.4551023980395589, "grad_norm": 0.8375526024404093, "learning_rate": 5.964067800067366e-06, "loss": 0.1434, "step": 15600 }, { "epoch": 0.4551315712702025, "grad_norm": 0.784226838130666, "learning_rate": 5.9636042266410666e-06, "loss": 0.1398, "step": 15601 }, { "epoch": 0.45516074450084604, "grad_norm": 1.188310705158349, "learning_rate": 5.96314064461194e-06, "loss": 0.1507, "step": 15602 }, { "epoch": 0.4551899177314896, "grad_norm": 0.8884153249430453, "learning_rate": 5.962677053984124e-06, "loss": 0.1318, "step": 15603 }, { "epoch": 0.45521909096213314, "grad_norm": 0.8591894126257077, "learning_rate": 5.962213454761758e-06, "loss": 0.1347, "step": 15604 }, { "epoch": 0.4552482641927767, "grad_norm": 0.8814817104144005, "learning_rate": 5.961749846948977e-06, "loss": 0.1266, "step": 15605 }, { "epoch": 0.45527743742342025, "grad_norm": 0.8035971967177175, "learning_rate": 5.961286230549925e-06, "loss": 0.1422, "step": 15606 }, { "epoch": 0.4553066106540638, "grad_norm": 0.8688263422836926, "learning_rate": 5.96082260556874e-06, "loss": 0.1463, "step": 15607 }, { "epoch": 0.4553357838847074, "grad_norm": 0.868205180530539, "learning_rate": 5.9603589720095575e-06, "loss": 0.1334, "step": 15608 }, { "epoch": 0.45536495711535097, "grad_norm": 0.7593555479635258, "learning_rate": 5.959895329876521e-06, "loss": 0.1627, "step": 15609 }, { "epoch": 0.4553941303459945, "grad_norm": 0.9070878175146634, "learning_rate": 5.959431679173768e-06, "loss": 0.1438, "step": 15610 }, { "epoch": 0.45542330357663807, "grad_norm": 0.7284378881575929, "learning_rate": 5.958968019905438e-06, "loss": 0.1518, "step": 15611 }, { "epoch": 0.4554524768072816, "grad_norm": 0.8233408714281342, "learning_rate": 5.95850435207567e-06, "loss": 0.1225, "step": 15612 }, { "epoch": 0.4554816500379252, "grad_norm": 0.6495865762672907, "learning_rate": 5.9580406756886046e-06, "loss": 0.1332, "step": 15613 }, { "epoch": 0.4555108232685688, "grad_norm": 0.8377952270274468, "learning_rate": 5.957576990748381e-06, "loss": 0.1348, "step": 15614 }, { "epoch": 0.45553999649921234, "grad_norm": 0.8555159658834381, "learning_rate": 5.957113297259137e-06, "loss": 0.1294, "step": 15615 }, { "epoch": 0.4555691697298559, "grad_norm": 0.9373924405724049, "learning_rate": 5.956649595225015e-06, "loss": 0.1473, "step": 15616 }, { "epoch": 0.45559834296049945, "grad_norm": 0.8757514150161813, "learning_rate": 5.956185884650154e-06, "loss": 0.1538, "step": 15617 }, { "epoch": 0.455627516191143, "grad_norm": 0.6024764240720053, "learning_rate": 5.955722165538693e-06, "loss": 0.1267, "step": 15618 }, { "epoch": 0.45565668942178655, "grad_norm": 0.9658590661050219, "learning_rate": 5.9552584378947746e-06, "loss": 0.1291, "step": 15619 }, { "epoch": 0.4556858626524301, "grad_norm": 0.8764901413759552, "learning_rate": 5.954794701722534e-06, "loss": 0.1311, "step": 15620 }, { "epoch": 0.4557150358830737, "grad_norm": 0.780645758995893, "learning_rate": 5.954330957026115e-06, "loss": 0.1202, "step": 15621 }, { "epoch": 0.45574420911371727, "grad_norm": 0.8320018159178345, "learning_rate": 5.953867203809659e-06, "loss": 0.1486, "step": 15622 }, { "epoch": 0.4557733823443608, "grad_norm": 0.7268077402865062, "learning_rate": 5.953403442077302e-06, "loss": 0.1322, "step": 15623 }, { "epoch": 0.4558025555750044, "grad_norm": 0.770748963245814, "learning_rate": 5.952939671833189e-06, "loss": 0.1121, "step": 15624 }, { "epoch": 0.45583172880564793, "grad_norm": 0.7652247459678828, "learning_rate": 5.9524758930814565e-06, "loss": 0.1243, "step": 15625 }, { "epoch": 0.4558609020362915, "grad_norm": 0.846546008252921, "learning_rate": 5.952012105826247e-06, "loss": 0.1317, "step": 15626 }, { "epoch": 0.45589007526693504, "grad_norm": 0.6920386970271211, "learning_rate": 5.9515483100716994e-06, "loss": 0.1261, "step": 15627 }, { "epoch": 0.45591924849757864, "grad_norm": 0.8808606251055751, "learning_rate": 5.951084505821957e-06, "loss": 0.1725, "step": 15628 }, { "epoch": 0.4559484217282222, "grad_norm": 0.6640518970320024, "learning_rate": 5.950620693081159e-06, "loss": 0.123, "step": 15629 }, { "epoch": 0.45597759495886575, "grad_norm": 0.8132760484031268, "learning_rate": 5.950156871853446e-06, "loss": 0.1639, "step": 15630 }, { "epoch": 0.4560067681895093, "grad_norm": 0.8180047225855253, "learning_rate": 5.94969304214296e-06, "loss": 0.1458, "step": 15631 }, { "epoch": 0.45603594142015286, "grad_norm": 0.7903443282540956, "learning_rate": 5.94922920395384e-06, "loss": 0.1364, "step": 15632 }, { "epoch": 0.4560651146507964, "grad_norm": 0.7793112046755744, "learning_rate": 5.948765357290229e-06, "loss": 0.1368, "step": 15633 }, { "epoch": 0.45609428788143996, "grad_norm": 0.7479085223042332, "learning_rate": 5.94830150215627e-06, "loss": 0.1462, "step": 15634 }, { "epoch": 0.4561234611120836, "grad_norm": 0.7091872238254096, "learning_rate": 5.947837638556096e-06, "loss": 0.1318, "step": 15635 }, { "epoch": 0.4561526343427271, "grad_norm": 0.9630085534828409, "learning_rate": 5.947373766493858e-06, "loss": 0.1337, "step": 15636 }, { "epoch": 0.4561818075733707, "grad_norm": 0.9419381849430555, "learning_rate": 5.946909885973693e-06, "loss": 0.1222, "step": 15637 }, { "epoch": 0.45621098080401423, "grad_norm": 0.8737622678445661, "learning_rate": 5.94644599699974e-06, "loss": 0.1392, "step": 15638 }, { "epoch": 0.4562401540346578, "grad_norm": 0.7275142853586954, "learning_rate": 5.945982099576147e-06, "loss": 0.1472, "step": 15639 }, { "epoch": 0.45626932726530134, "grad_norm": 0.8130554862107516, "learning_rate": 5.945518193707048e-06, "loss": 0.1315, "step": 15640 }, { "epoch": 0.45629850049594495, "grad_norm": 0.6153955434577857, "learning_rate": 5.945054279396589e-06, "loss": 0.1294, "step": 15641 }, { "epoch": 0.4563276737265885, "grad_norm": 1.0260667782830692, "learning_rate": 5.944590356648913e-06, "loss": 0.161, "step": 15642 }, { "epoch": 0.45635684695723205, "grad_norm": 0.6957108945306344, "learning_rate": 5.944126425468158e-06, "loss": 0.123, "step": 15643 }, { "epoch": 0.4563860201878756, "grad_norm": 0.7159912722493332, "learning_rate": 5.943662485858468e-06, "loss": 0.1206, "step": 15644 }, { "epoch": 0.45641519341851916, "grad_norm": 1.1752349501025643, "learning_rate": 5.9431985378239845e-06, "loss": 0.1787, "step": 15645 }, { "epoch": 0.4564443666491627, "grad_norm": 0.9045465807891037, "learning_rate": 5.94273458136885e-06, "loss": 0.1486, "step": 15646 }, { "epoch": 0.45647353987980627, "grad_norm": 0.6638173725878435, "learning_rate": 5.942270616497206e-06, "loss": 0.1113, "step": 15647 }, { "epoch": 0.4565027131104499, "grad_norm": 1.0599652223154115, "learning_rate": 5.941806643213194e-06, "loss": 0.1386, "step": 15648 }, { "epoch": 0.45653188634109343, "grad_norm": 1.108494409632205, "learning_rate": 5.941342661520959e-06, "loss": 0.1446, "step": 15649 }, { "epoch": 0.456561059571737, "grad_norm": 0.8771978366714083, "learning_rate": 5.940878671424639e-06, "loss": 0.1388, "step": 15650 }, { "epoch": 0.45659023280238054, "grad_norm": 0.8269749943895522, "learning_rate": 5.940414672928381e-06, "loss": 0.1236, "step": 15651 }, { "epoch": 0.4566194060330241, "grad_norm": 0.8759997309413657, "learning_rate": 5.9399506660363244e-06, "loss": 0.1394, "step": 15652 }, { "epoch": 0.45664857926366764, "grad_norm": 0.9488383640892364, "learning_rate": 5.939486650752612e-06, "loss": 0.136, "step": 15653 }, { "epoch": 0.4566777524943112, "grad_norm": 0.6794040859282786, "learning_rate": 5.939022627081389e-06, "loss": 0.123, "step": 15654 }, { "epoch": 0.4567069257249548, "grad_norm": 0.7957088297622816, "learning_rate": 5.938558595026794e-06, "loss": 0.1195, "step": 15655 }, { "epoch": 0.45673609895559836, "grad_norm": 0.8379369819921373, "learning_rate": 5.938094554592973e-06, "loss": 0.1637, "step": 15656 }, { "epoch": 0.4567652721862419, "grad_norm": 0.8238980349361563, "learning_rate": 5.937630505784068e-06, "loss": 0.1355, "step": 15657 }, { "epoch": 0.45679444541688546, "grad_norm": 0.8688347774364377, "learning_rate": 5.9371664486042216e-06, "loss": 0.132, "step": 15658 }, { "epoch": 0.456823618647529, "grad_norm": 1.095856481281604, "learning_rate": 5.936702383057576e-06, "loss": 0.1301, "step": 15659 }, { "epoch": 0.45685279187817257, "grad_norm": 0.8897872583146305, "learning_rate": 5.936238309148276e-06, "loss": 0.1311, "step": 15660 }, { "epoch": 0.4568819651088161, "grad_norm": 0.7323225377747237, "learning_rate": 5.935774226880463e-06, "loss": 0.1243, "step": 15661 }, { "epoch": 0.45691113833945973, "grad_norm": 0.8574268429481138, "learning_rate": 5.9353101362582825e-06, "loss": 0.1275, "step": 15662 }, { "epoch": 0.4569403115701033, "grad_norm": 1.0035717491324825, "learning_rate": 5.934846037285875e-06, "loss": 0.1274, "step": 15663 }, { "epoch": 0.45696948480074684, "grad_norm": 0.6340515572348439, "learning_rate": 5.9343819299673865e-06, "loss": 0.1032, "step": 15664 }, { "epoch": 0.4569986580313904, "grad_norm": 0.9438652048879773, "learning_rate": 5.933917814306958e-06, "loss": 0.1508, "step": 15665 }, { "epoch": 0.45702783126203395, "grad_norm": 0.9742379780115008, "learning_rate": 5.933453690308734e-06, "loss": 0.1412, "step": 15666 }, { "epoch": 0.4570570044926775, "grad_norm": 0.8079002900725311, "learning_rate": 5.93298955797686e-06, "loss": 0.1339, "step": 15667 }, { "epoch": 0.4570861777233211, "grad_norm": 1.1181298450483161, "learning_rate": 5.9325254173154754e-06, "loss": 0.1351, "step": 15668 }, { "epoch": 0.45711535095396466, "grad_norm": 1.2047272065936931, "learning_rate": 5.932061268328729e-06, "loss": 0.126, "step": 15669 }, { "epoch": 0.4571445241846082, "grad_norm": 0.9596218433201393, "learning_rate": 5.931597111020762e-06, "loss": 0.1295, "step": 15670 }, { "epoch": 0.45717369741525177, "grad_norm": 1.0413839676500838, "learning_rate": 5.931132945395717e-06, "loss": 0.1467, "step": 15671 }, { "epoch": 0.4572028706458953, "grad_norm": 1.3019729448171455, "learning_rate": 5.930668771457739e-06, "loss": 0.1244, "step": 15672 }, { "epoch": 0.4572320438765389, "grad_norm": 0.8658591039474676, "learning_rate": 5.930204589210974e-06, "loss": 0.1481, "step": 15673 }, { "epoch": 0.4572612171071824, "grad_norm": 0.7759193174158013, "learning_rate": 5.929740398659563e-06, "loss": 0.1408, "step": 15674 }, { "epoch": 0.45729039033782604, "grad_norm": 0.8641358135923433, "learning_rate": 5.929276199807652e-06, "loss": 0.151, "step": 15675 }, { "epoch": 0.4573195635684696, "grad_norm": 0.7434838348542243, "learning_rate": 5.928811992659386e-06, "loss": 0.1453, "step": 15676 }, { "epoch": 0.45734873679911314, "grad_norm": 0.9680447752688965, "learning_rate": 5.928347777218907e-06, "loss": 0.1489, "step": 15677 }, { "epoch": 0.4573779100297567, "grad_norm": 0.7862911694994094, "learning_rate": 5.927883553490361e-06, "loss": 0.1265, "step": 15678 }, { "epoch": 0.45740708326040025, "grad_norm": 1.045671477489225, "learning_rate": 5.927419321477893e-06, "loss": 0.1559, "step": 15679 }, { "epoch": 0.4574362564910438, "grad_norm": 0.8921272670391884, "learning_rate": 5.926955081185646e-06, "loss": 0.1695, "step": 15680 }, { "epoch": 0.45746542972168736, "grad_norm": 0.7146774925535057, "learning_rate": 5.926490832617764e-06, "loss": 0.1307, "step": 15681 }, { "epoch": 0.45749460295233096, "grad_norm": 0.9267503736800597, "learning_rate": 5.926026575778396e-06, "loss": 0.1105, "step": 15682 }, { "epoch": 0.4575237761829745, "grad_norm": 0.7048558503763448, "learning_rate": 5.9255623106716805e-06, "loss": 0.1403, "step": 15683 }, { "epoch": 0.45755294941361807, "grad_norm": 0.9110532148569493, "learning_rate": 5.925098037301769e-06, "loss": 0.1727, "step": 15684 }, { "epoch": 0.4575821226442616, "grad_norm": 1.1119534675833322, "learning_rate": 5.9246337556728005e-06, "loss": 0.1407, "step": 15685 }, { "epoch": 0.4576112958749052, "grad_norm": 0.6629367221488919, "learning_rate": 5.9241694657889236e-06, "loss": 0.1429, "step": 15686 }, { "epoch": 0.45764046910554873, "grad_norm": 0.6451570080377812, "learning_rate": 5.9237051676542825e-06, "loss": 0.1166, "step": 15687 }, { "epoch": 0.4576696423361923, "grad_norm": 0.8167856611009412, "learning_rate": 5.923240861273021e-06, "loss": 0.1632, "step": 15688 }, { "epoch": 0.4576988155668359, "grad_norm": 1.0186215312827995, "learning_rate": 5.922776546649287e-06, "loss": 0.1269, "step": 15689 }, { "epoch": 0.45772798879747945, "grad_norm": 0.7110533075572291, "learning_rate": 5.922312223787223e-06, "loss": 0.1395, "step": 15690 }, { "epoch": 0.457757162028123, "grad_norm": 0.7325905083465877, "learning_rate": 5.921847892690976e-06, "loss": 0.1408, "step": 15691 }, { "epoch": 0.45778633525876655, "grad_norm": 1.1060383046376248, "learning_rate": 5.9213835533646914e-06, "loss": 0.1358, "step": 15692 }, { "epoch": 0.4578155084894101, "grad_norm": 0.8946239487513469, "learning_rate": 5.920919205812514e-06, "loss": 0.1249, "step": 15693 }, { "epoch": 0.45784468172005366, "grad_norm": 0.8759308957921003, "learning_rate": 5.920454850038591e-06, "loss": 0.1302, "step": 15694 }, { "epoch": 0.45787385495069727, "grad_norm": 1.1367852553850144, "learning_rate": 5.919990486047065e-06, "loss": 0.1378, "step": 15695 }, { "epoch": 0.4579030281813408, "grad_norm": 0.7844432128423668, "learning_rate": 5.919526113842085e-06, "loss": 0.1208, "step": 15696 }, { "epoch": 0.4579322014119844, "grad_norm": 0.8387595450575478, "learning_rate": 5.9190617334277955e-06, "loss": 0.1366, "step": 15697 }, { "epoch": 0.4579613746426279, "grad_norm": 0.9436747595291688, "learning_rate": 5.91859734480834e-06, "loss": 0.1487, "step": 15698 }, { "epoch": 0.4579905478732715, "grad_norm": 0.8176453567039192, "learning_rate": 5.9181329479878694e-06, "loss": 0.1326, "step": 15699 }, { "epoch": 0.45801972110391503, "grad_norm": 0.8249520224108696, "learning_rate": 5.917668542970525e-06, "loss": 0.1346, "step": 15700 }, { "epoch": 0.4580488943345586, "grad_norm": 1.1771261961324997, "learning_rate": 5.917204129760457e-06, "loss": 0.1258, "step": 15701 }, { "epoch": 0.4580780675652022, "grad_norm": 0.922435303808003, "learning_rate": 5.916739708361807e-06, "loss": 0.1468, "step": 15702 }, { "epoch": 0.45810724079584575, "grad_norm": 0.7783907730184846, "learning_rate": 5.916275278778725e-06, "loss": 0.1308, "step": 15703 }, { "epoch": 0.4581364140264893, "grad_norm": 0.8477210818925518, "learning_rate": 5.915810841015356e-06, "loss": 0.1288, "step": 15704 }, { "epoch": 0.45816558725713286, "grad_norm": 0.9263544934521192, "learning_rate": 5.9153463950758465e-06, "loss": 0.1392, "step": 15705 }, { "epoch": 0.4581947604877764, "grad_norm": 1.1589657547471184, "learning_rate": 5.914881940964343e-06, "loss": 0.1556, "step": 15706 }, { "epoch": 0.45822393371841996, "grad_norm": 0.7878431455735818, "learning_rate": 5.914417478684992e-06, "loss": 0.1244, "step": 15707 }, { "epoch": 0.4582531069490635, "grad_norm": 1.048494819227112, "learning_rate": 5.913953008241939e-06, "loss": 0.1429, "step": 15708 }, { "epoch": 0.4582822801797071, "grad_norm": 1.0324088380933156, "learning_rate": 5.913488529639334e-06, "loss": 0.1603, "step": 15709 }, { "epoch": 0.4583114534103507, "grad_norm": 0.7533032521400135, "learning_rate": 5.913024042881319e-06, "loss": 0.1267, "step": 15710 }, { "epoch": 0.45834062664099423, "grad_norm": 0.7113361607782436, "learning_rate": 5.912559547972043e-06, "loss": 0.1301, "step": 15711 }, { "epoch": 0.4583697998716378, "grad_norm": 0.9468588685317267, "learning_rate": 5.912095044915655e-06, "loss": 0.1429, "step": 15712 }, { "epoch": 0.45839897310228134, "grad_norm": 1.0343857302662343, "learning_rate": 5.911630533716299e-06, "loss": 0.1347, "step": 15713 }, { "epoch": 0.4584281463329249, "grad_norm": 0.7442153617635787, "learning_rate": 5.911166014378126e-06, "loss": 0.134, "step": 15714 }, { "epoch": 0.45845731956356844, "grad_norm": 1.0059846109109405, "learning_rate": 5.910701486905277e-06, "loss": 0.1327, "step": 15715 }, { "epoch": 0.45848649279421205, "grad_norm": 0.8944237191137988, "learning_rate": 5.910236951301904e-06, "loss": 0.1669, "step": 15716 }, { "epoch": 0.4585156660248556, "grad_norm": 0.7624367383167346, "learning_rate": 5.909772407572153e-06, "loss": 0.136, "step": 15717 }, { "epoch": 0.45854483925549916, "grad_norm": 0.84192067433435, "learning_rate": 5.90930785572017e-06, "loss": 0.1112, "step": 15718 }, { "epoch": 0.4585740124861427, "grad_norm": 0.7057893376643394, "learning_rate": 5.908843295750104e-06, "loss": 0.1616, "step": 15719 }, { "epoch": 0.45860318571678627, "grad_norm": 0.8740791571990643, "learning_rate": 5.908378727666103e-06, "loss": 0.1485, "step": 15720 }, { "epoch": 0.4586323589474298, "grad_norm": 0.8135321402168436, "learning_rate": 5.907914151472312e-06, "loss": 0.1325, "step": 15721 }, { "epoch": 0.4586615321780734, "grad_norm": 0.848164326095268, "learning_rate": 5.9074495671728814e-06, "loss": 0.1317, "step": 15722 }, { "epoch": 0.458690705408717, "grad_norm": 0.704790213205819, "learning_rate": 5.9069849747719565e-06, "loss": 0.1221, "step": 15723 }, { "epoch": 0.45871987863936053, "grad_norm": 0.7766729929604375, "learning_rate": 5.906520374273688e-06, "loss": 0.1364, "step": 15724 }, { "epoch": 0.4587490518700041, "grad_norm": 0.7079896371676835, "learning_rate": 5.90605576568222e-06, "loss": 0.1366, "step": 15725 }, { "epoch": 0.45877822510064764, "grad_norm": 0.7778515559501419, "learning_rate": 5.905591149001704e-06, "loss": 0.1276, "step": 15726 }, { "epoch": 0.4588073983312912, "grad_norm": 0.9684472097472696, "learning_rate": 5.9051265242362854e-06, "loss": 0.1346, "step": 15727 }, { "epoch": 0.45883657156193475, "grad_norm": 0.676124760181125, "learning_rate": 5.904661891390114e-06, "loss": 0.1197, "step": 15728 }, { "epoch": 0.45886574479257836, "grad_norm": 0.6906927877819188, "learning_rate": 5.904197250467339e-06, "loss": 0.1425, "step": 15729 }, { "epoch": 0.4588949180232219, "grad_norm": 1.2108509483220204, "learning_rate": 5.903732601472102e-06, "loss": 0.1395, "step": 15730 }, { "epoch": 0.45892409125386546, "grad_norm": 1.0535901221585215, "learning_rate": 5.903267944408561e-06, "loss": 0.1449, "step": 15731 }, { "epoch": 0.458953264484509, "grad_norm": 0.730594325168327, "learning_rate": 5.902803279280857e-06, "loss": 0.106, "step": 15732 }, { "epoch": 0.45898243771515257, "grad_norm": 0.9053460795820518, "learning_rate": 5.902338606093139e-06, "loss": 0.1285, "step": 15733 }, { "epoch": 0.4590116109457961, "grad_norm": 0.7754003592857962, "learning_rate": 5.9018739248495605e-06, "loss": 0.1299, "step": 15734 }, { "epoch": 0.4590407841764397, "grad_norm": 0.7470854924206519, "learning_rate": 5.901409235554265e-06, "loss": 0.1078, "step": 15735 }, { "epoch": 0.4590699574070833, "grad_norm": 0.8360333458535162, "learning_rate": 5.900944538211404e-06, "loss": 0.1367, "step": 15736 }, { "epoch": 0.45909913063772684, "grad_norm": 0.8861071433531529, "learning_rate": 5.9004798328251255e-06, "loss": 0.1485, "step": 15737 }, { "epoch": 0.4591283038683704, "grad_norm": 0.9182952557822152, "learning_rate": 5.900015119399577e-06, "loss": 0.1466, "step": 15738 }, { "epoch": 0.45915747709901394, "grad_norm": 1.1191553678586899, "learning_rate": 5.899550397938909e-06, "loss": 0.148, "step": 15739 }, { "epoch": 0.4591866503296575, "grad_norm": 0.816610906007468, "learning_rate": 5.89908566844727e-06, "loss": 0.1262, "step": 15740 }, { "epoch": 0.45921582356030105, "grad_norm": 1.1109664468265683, "learning_rate": 5.898620930928808e-06, "loss": 0.1432, "step": 15741 }, { "epoch": 0.4592449967909446, "grad_norm": 0.8793888636473848, "learning_rate": 5.898156185387674e-06, "loss": 0.156, "step": 15742 }, { "epoch": 0.4592741700215882, "grad_norm": 0.7624539990976663, "learning_rate": 5.897691431828014e-06, "loss": 0.135, "step": 15743 }, { "epoch": 0.45930334325223177, "grad_norm": 0.9667460169531502, "learning_rate": 5.897226670253982e-06, "loss": 0.1476, "step": 15744 }, { "epoch": 0.4593325164828753, "grad_norm": 0.8293488957500059, "learning_rate": 5.896761900669722e-06, "loss": 0.1422, "step": 15745 }, { "epoch": 0.4593616897135189, "grad_norm": 0.8085583621733674, "learning_rate": 5.896297123079388e-06, "loss": 0.1448, "step": 15746 }, { "epoch": 0.4593908629441624, "grad_norm": 0.9251861458750373, "learning_rate": 5.895832337487126e-06, "loss": 0.1357, "step": 15747 }, { "epoch": 0.459420036174806, "grad_norm": 0.773105611322147, "learning_rate": 5.895367543897086e-06, "loss": 0.1397, "step": 15748 }, { "epoch": 0.45944920940544953, "grad_norm": 0.9358682107421241, "learning_rate": 5.89490274231342e-06, "loss": 0.1257, "step": 15749 }, { "epoch": 0.45947838263609314, "grad_norm": 0.7510929156606305, "learning_rate": 5.894437932740274e-06, "loss": 0.127, "step": 15750 }, { "epoch": 0.4595075558667367, "grad_norm": 0.8084065674036928, "learning_rate": 5.893973115181801e-06, "loss": 0.1441, "step": 15751 }, { "epoch": 0.45953672909738025, "grad_norm": 0.822699837033059, "learning_rate": 5.8935082896421495e-06, "loss": 0.1429, "step": 15752 }, { "epoch": 0.4595659023280238, "grad_norm": 0.8620453745010047, "learning_rate": 5.893043456125469e-06, "loss": 0.1112, "step": 15753 }, { "epoch": 0.45959507555866735, "grad_norm": 1.0328251337959462, "learning_rate": 5.892578614635909e-06, "loss": 0.1393, "step": 15754 }, { "epoch": 0.4596242487893109, "grad_norm": 0.8911042434095463, "learning_rate": 5.892113765177621e-06, "loss": 0.1269, "step": 15755 }, { "epoch": 0.4596534220199545, "grad_norm": 0.7718290472463595, "learning_rate": 5.891648907754753e-06, "loss": 0.136, "step": 15756 }, { "epoch": 0.45968259525059807, "grad_norm": 0.9180483729049965, "learning_rate": 5.891184042371459e-06, "loss": 0.1252, "step": 15757 }, { "epoch": 0.4597117684812416, "grad_norm": 0.9859209708795096, "learning_rate": 5.890719169031885e-06, "loss": 0.1557, "step": 15758 }, { "epoch": 0.4597409417118852, "grad_norm": 0.8682703837180158, "learning_rate": 5.890254287740183e-06, "loss": 0.1461, "step": 15759 }, { "epoch": 0.45977011494252873, "grad_norm": 0.9547434965349666, "learning_rate": 5.889789398500503e-06, "loss": 0.1371, "step": 15760 }, { "epoch": 0.4597992881731723, "grad_norm": 1.1981820087067303, "learning_rate": 5.8893245013169965e-06, "loss": 0.1411, "step": 15761 }, { "epoch": 0.45982846140381584, "grad_norm": 1.1017722517492872, "learning_rate": 5.888859596193812e-06, "loss": 0.1496, "step": 15762 }, { "epoch": 0.45985763463445944, "grad_norm": 0.7807452502563147, "learning_rate": 5.8883946831351014e-06, "loss": 0.1224, "step": 15763 }, { "epoch": 0.459886807865103, "grad_norm": 0.8802133211082914, "learning_rate": 5.887929762145016e-06, "loss": 0.1343, "step": 15764 }, { "epoch": 0.45991598109574655, "grad_norm": 1.0183444332824991, "learning_rate": 5.887464833227705e-06, "loss": 0.1534, "step": 15765 }, { "epoch": 0.4599451543263901, "grad_norm": 1.0514384318453445, "learning_rate": 5.8869998963873195e-06, "loss": 0.1323, "step": 15766 }, { "epoch": 0.45997432755703366, "grad_norm": 0.8659569271433172, "learning_rate": 5.886534951628011e-06, "loss": 0.1539, "step": 15767 }, { "epoch": 0.4600035007876772, "grad_norm": 0.969923767980383, "learning_rate": 5.88606999895393e-06, "loss": 0.1455, "step": 15768 }, { "epoch": 0.46003267401832076, "grad_norm": 2.026311164005376, "learning_rate": 5.885605038369228e-06, "loss": 0.1345, "step": 15769 }, { "epoch": 0.4600618472489644, "grad_norm": 1.0163049865030611, "learning_rate": 5.885140069878056e-06, "loss": 0.1313, "step": 15770 }, { "epoch": 0.4600910204796079, "grad_norm": 0.7852708379640772, "learning_rate": 5.884675093484565e-06, "loss": 0.1119, "step": 15771 }, { "epoch": 0.4601201937102515, "grad_norm": 0.8553733755474103, "learning_rate": 5.884210109192904e-06, "loss": 0.1698, "step": 15772 }, { "epoch": 0.46014936694089503, "grad_norm": 0.9825994654228479, "learning_rate": 5.883745117007227e-06, "loss": 0.1414, "step": 15773 }, { "epoch": 0.4601785401715386, "grad_norm": 1.1105769379837276, "learning_rate": 5.883280116931687e-06, "loss": 0.1795, "step": 15774 }, { "epoch": 0.46020771340218214, "grad_norm": 0.7570744073486113, "learning_rate": 5.882815108970429e-06, "loss": 0.1167, "step": 15775 }, { "epoch": 0.4602368866328257, "grad_norm": 0.9461345232593066, "learning_rate": 5.882350093127611e-06, "loss": 0.1551, "step": 15776 }, { "epoch": 0.4602660598634693, "grad_norm": 0.9622667746723423, "learning_rate": 5.881885069407382e-06, "loss": 0.138, "step": 15777 }, { "epoch": 0.46029523309411285, "grad_norm": 0.7238455267781404, "learning_rate": 5.881420037813892e-06, "loss": 0.1303, "step": 15778 }, { "epoch": 0.4603244063247564, "grad_norm": 0.7767999021164391, "learning_rate": 5.880954998351296e-06, "loss": 0.1333, "step": 15779 }, { "epoch": 0.46035357955539996, "grad_norm": 0.7106336440672024, "learning_rate": 5.8804899510237435e-06, "loss": 0.1248, "step": 15780 }, { "epoch": 0.4603827527860435, "grad_norm": 0.7356084204781619, "learning_rate": 5.880024895835387e-06, "loss": 0.161, "step": 15781 }, { "epoch": 0.46041192601668707, "grad_norm": 0.7885342508501595, "learning_rate": 5.879559832790378e-06, "loss": 0.1349, "step": 15782 }, { "epoch": 0.4604410992473307, "grad_norm": 0.8843470982483292, "learning_rate": 5.8790947618928686e-06, "loss": 0.1549, "step": 15783 }, { "epoch": 0.46047027247797423, "grad_norm": 0.7950865713698176, "learning_rate": 5.878629683147011e-06, "loss": 0.1552, "step": 15784 }, { "epoch": 0.4604994457086178, "grad_norm": 0.76882081982006, "learning_rate": 5.878164596556958e-06, "loss": 0.1415, "step": 15785 }, { "epoch": 0.46052861893926134, "grad_norm": 0.7126205467770146, "learning_rate": 5.87769950212686e-06, "loss": 0.1221, "step": 15786 }, { "epoch": 0.4605577921699049, "grad_norm": 0.9999807887006671, "learning_rate": 5.877234399860872e-06, "loss": 0.1513, "step": 15787 }, { "epoch": 0.46058696540054844, "grad_norm": 0.8897769518034591, "learning_rate": 5.876769289763144e-06, "loss": 0.1587, "step": 15788 }, { "epoch": 0.460616138631192, "grad_norm": 0.8672343051128001, "learning_rate": 5.876304171837829e-06, "loss": 0.1574, "step": 15789 }, { "epoch": 0.4606453118618356, "grad_norm": 0.9092971735426206, "learning_rate": 5.875839046089078e-06, "loss": 0.1332, "step": 15790 }, { "epoch": 0.46067448509247916, "grad_norm": 0.8162992516912753, "learning_rate": 5.875373912521047e-06, "loss": 0.1321, "step": 15791 }, { "epoch": 0.4607036583231227, "grad_norm": 0.9867222396381891, "learning_rate": 5.874908771137887e-06, "loss": 0.1341, "step": 15792 }, { "epoch": 0.46073283155376626, "grad_norm": 0.7682594536658461, "learning_rate": 5.874443621943749e-06, "loss": 0.1305, "step": 15793 }, { "epoch": 0.4607620047844098, "grad_norm": 0.7447781676970837, "learning_rate": 5.873978464942788e-06, "loss": 0.1393, "step": 15794 }, { "epoch": 0.46079117801505337, "grad_norm": 0.8220847614500845, "learning_rate": 5.873513300139155e-06, "loss": 0.1376, "step": 15795 }, { "epoch": 0.4608203512456969, "grad_norm": 0.9003115680649544, "learning_rate": 5.873048127537005e-06, "loss": 0.1283, "step": 15796 }, { "epoch": 0.46084952447634053, "grad_norm": 0.6516482520777607, "learning_rate": 5.8725829471404884e-06, "loss": 0.1268, "step": 15797 }, { "epoch": 0.4608786977069841, "grad_norm": 0.8948338886054641, "learning_rate": 5.87211775895376e-06, "loss": 0.1359, "step": 15798 }, { "epoch": 0.46090787093762764, "grad_norm": 0.7926542584453049, "learning_rate": 5.871652562980973e-06, "loss": 0.1421, "step": 15799 }, { "epoch": 0.4609370441682712, "grad_norm": 1.140870125403056, "learning_rate": 5.871187359226279e-06, "loss": 0.1334, "step": 15800 }, { "epoch": 0.46096621739891475, "grad_norm": 0.7507135948161748, "learning_rate": 5.870722147693832e-06, "loss": 0.1395, "step": 15801 }, { "epoch": 0.4609953906295583, "grad_norm": 0.8021253975166392, "learning_rate": 5.870256928387788e-06, "loss": 0.1351, "step": 15802 }, { "epoch": 0.46102456386020185, "grad_norm": 0.7941479253720344, "learning_rate": 5.8697917013122955e-06, "loss": 0.1427, "step": 15803 }, { "epoch": 0.46105373709084546, "grad_norm": 0.8310011848931662, "learning_rate": 5.869326466471512e-06, "loss": 0.1563, "step": 15804 }, { "epoch": 0.461082910321489, "grad_norm": 0.7490702873320659, "learning_rate": 5.868861223869587e-06, "loss": 0.1321, "step": 15805 }, { "epoch": 0.46111208355213257, "grad_norm": 0.7299876823540122, "learning_rate": 5.868395973510679e-06, "loss": 0.1423, "step": 15806 }, { "epoch": 0.4611412567827761, "grad_norm": 0.7715733570698626, "learning_rate": 5.867930715398938e-06, "loss": 0.132, "step": 15807 }, { "epoch": 0.4611704300134197, "grad_norm": 0.7824107871791222, "learning_rate": 5.867465449538518e-06, "loss": 0.1356, "step": 15808 }, { "epoch": 0.4611996032440632, "grad_norm": 0.7929137437316226, "learning_rate": 5.8670001759335745e-06, "loss": 0.1374, "step": 15809 }, { "epoch": 0.46122877647470684, "grad_norm": 0.787361562331058, "learning_rate": 5.86653489458826e-06, "loss": 0.1274, "step": 15810 }, { "epoch": 0.4612579497053504, "grad_norm": 0.6777553009520738, "learning_rate": 5.866069605506729e-06, "loss": 0.1245, "step": 15811 }, { "epoch": 0.46128712293599394, "grad_norm": 0.7695877637104159, "learning_rate": 5.865604308693136e-06, "loss": 0.1516, "step": 15812 }, { "epoch": 0.4613162961666375, "grad_norm": 1.0323711240579274, "learning_rate": 5.865139004151633e-06, "loss": 0.1448, "step": 15813 }, { "epoch": 0.46134546939728105, "grad_norm": 0.799632101319446, "learning_rate": 5.864673691886375e-06, "loss": 0.1519, "step": 15814 }, { "epoch": 0.4613746426279246, "grad_norm": 0.7439077035334605, "learning_rate": 5.864208371901519e-06, "loss": 0.1462, "step": 15815 }, { "epoch": 0.46140381585856816, "grad_norm": 0.9327709860995128, "learning_rate": 5.863743044201215e-06, "loss": 0.1285, "step": 15816 }, { "epoch": 0.46143298908921176, "grad_norm": 0.998590255210444, "learning_rate": 5.8632777087896205e-06, "loss": 0.1448, "step": 15817 }, { "epoch": 0.4614621623198553, "grad_norm": 0.8592407937248154, "learning_rate": 5.862812365670888e-06, "loss": 0.1331, "step": 15818 }, { "epoch": 0.46149133555049887, "grad_norm": 0.9755159497312703, "learning_rate": 5.862347014849174e-06, "loss": 0.1505, "step": 15819 }, { "epoch": 0.4615205087811424, "grad_norm": 0.9038505585913441, "learning_rate": 5.861881656328629e-06, "loss": 0.135, "step": 15820 }, { "epoch": 0.461549682011786, "grad_norm": 0.7637412982771709, "learning_rate": 5.861416290113413e-06, "loss": 0.1406, "step": 15821 }, { "epoch": 0.46157885524242953, "grad_norm": 0.86363035493597, "learning_rate": 5.860950916207677e-06, "loss": 0.1316, "step": 15822 }, { "epoch": 0.4616080284730731, "grad_norm": 0.9496228424623425, "learning_rate": 5.8604855346155756e-06, "loss": 0.1151, "step": 15823 }, { "epoch": 0.4616372017037167, "grad_norm": 0.9982435813688407, "learning_rate": 5.860020145341267e-06, "loss": 0.1306, "step": 15824 }, { "epoch": 0.46166637493436025, "grad_norm": 1.0682495955333833, "learning_rate": 5.859554748388903e-06, "loss": 0.1191, "step": 15825 }, { "epoch": 0.4616955481650038, "grad_norm": 0.9238371193535599, "learning_rate": 5.859089343762638e-06, "loss": 0.1472, "step": 15826 }, { "epoch": 0.46172472139564735, "grad_norm": 0.8926023227567703, "learning_rate": 5.85862393146663e-06, "loss": 0.1351, "step": 15827 }, { "epoch": 0.4617538946262909, "grad_norm": 0.7104826765634306, "learning_rate": 5.858158511505032e-06, "loss": 0.1103, "step": 15828 }, { "epoch": 0.46178306785693446, "grad_norm": 1.5810407758998741, "learning_rate": 5.857693083881999e-06, "loss": 0.1561, "step": 15829 }, { "epoch": 0.461812241087578, "grad_norm": 0.9074428050151642, "learning_rate": 5.857227648601688e-06, "loss": 0.1765, "step": 15830 }, { "epoch": 0.4618414143182216, "grad_norm": 0.8134479443437905, "learning_rate": 5.856762205668253e-06, "loss": 0.1403, "step": 15831 }, { "epoch": 0.4618705875488652, "grad_norm": 1.0476413221189405, "learning_rate": 5.856296755085849e-06, "loss": 0.1617, "step": 15832 }, { "epoch": 0.46189976077950873, "grad_norm": 0.7207947215252731, "learning_rate": 5.855831296858631e-06, "loss": 0.1565, "step": 15833 }, { "epoch": 0.4619289340101523, "grad_norm": 0.8456209194298886, "learning_rate": 5.855365830990759e-06, "loss": 0.1342, "step": 15834 }, { "epoch": 0.46195810724079583, "grad_norm": 1.099463485103391, "learning_rate": 5.8549003574863815e-06, "loss": 0.1646, "step": 15835 }, { "epoch": 0.4619872804714394, "grad_norm": 0.7539161030504511, "learning_rate": 5.85443487634966e-06, "loss": 0.1418, "step": 15836 }, { "epoch": 0.46201645370208294, "grad_norm": 1.2446236246928624, "learning_rate": 5.853969387584747e-06, "loss": 0.1406, "step": 15837 }, { "epoch": 0.46204562693272655, "grad_norm": 1.01262732191191, "learning_rate": 5.853503891195797e-06, "loss": 0.1418, "step": 15838 }, { "epoch": 0.4620748001633701, "grad_norm": 1.0295086226658106, "learning_rate": 5.8530383871869725e-06, "loss": 0.1275, "step": 15839 }, { "epoch": 0.46210397339401366, "grad_norm": 0.8157928167959317, "learning_rate": 5.852572875562422e-06, "loss": 0.1301, "step": 15840 }, { "epoch": 0.4621331466246572, "grad_norm": 1.2160861930921467, "learning_rate": 5.852107356326305e-06, "loss": 0.1339, "step": 15841 }, { "epoch": 0.46216231985530076, "grad_norm": 1.0446637605268678, "learning_rate": 5.851641829482777e-06, "loss": 0.1403, "step": 15842 }, { "epoch": 0.4621914930859443, "grad_norm": 0.9811265382880493, "learning_rate": 5.851176295035994e-06, "loss": 0.1238, "step": 15843 }, { "epoch": 0.4622206663165879, "grad_norm": 0.9901597261186631, "learning_rate": 5.850710752990112e-06, "loss": 0.1242, "step": 15844 }, { "epoch": 0.4622498395472315, "grad_norm": 0.7981800484178719, "learning_rate": 5.850245203349288e-06, "loss": 0.1528, "step": 15845 }, { "epoch": 0.46227901277787503, "grad_norm": 0.7895154486982267, "learning_rate": 5.849779646117677e-06, "loss": 0.1567, "step": 15846 }, { "epoch": 0.4623081860085186, "grad_norm": 0.745081575095745, "learning_rate": 5.849314081299436e-06, "loss": 0.1386, "step": 15847 }, { "epoch": 0.46233735923916214, "grad_norm": 0.8394289508340748, "learning_rate": 5.848848508898722e-06, "loss": 0.1277, "step": 15848 }, { "epoch": 0.4623665324698057, "grad_norm": 0.8546716452311085, "learning_rate": 5.848382928919693e-06, "loss": 0.1425, "step": 15849 }, { "epoch": 0.46239570570044924, "grad_norm": 0.8334257354863847, "learning_rate": 5.847917341366501e-06, "loss": 0.1315, "step": 15850 }, { "epoch": 0.46242487893109285, "grad_norm": 0.9946812920536485, "learning_rate": 5.847451746243306e-06, "loss": 0.1282, "step": 15851 }, { "epoch": 0.4624540521617364, "grad_norm": 1.0914693586102404, "learning_rate": 5.846986143554265e-06, "loss": 0.1525, "step": 15852 }, { "epoch": 0.46248322539237996, "grad_norm": 1.0420059449555357, "learning_rate": 5.846520533303532e-06, "loss": 0.1485, "step": 15853 }, { "epoch": 0.4625123986230235, "grad_norm": 0.7094138955921574, "learning_rate": 5.846054915495269e-06, "loss": 0.1142, "step": 15854 }, { "epoch": 0.46254157185366707, "grad_norm": 1.051887299073645, "learning_rate": 5.845589290133627e-06, "loss": 0.1377, "step": 15855 }, { "epoch": 0.4625707450843106, "grad_norm": 1.0753670585455644, "learning_rate": 5.845123657222768e-06, "loss": 0.1548, "step": 15856 }, { "epoch": 0.4625999183149542, "grad_norm": 0.8494958539932758, "learning_rate": 5.844658016766845e-06, "loss": 0.1188, "step": 15857 }, { "epoch": 0.4626290915455978, "grad_norm": 0.6343566290332142, "learning_rate": 5.844192368770017e-06, "loss": 0.1272, "step": 15858 }, { "epoch": 0.46265826477624133, "grad_norm": 0.8215137434270036, "learning_rate": 5.843726713236442e-06, "loss": 0.1326, "step": 15859 }, { "epoch": 0.4626874380068849, "grad_norm": 1.092692961779195, "learning_rate": 5.843261050170274e-06, "loss": 0.1321, "step": 15860 }, { "epoch": 0.46271661123752844, "grad_norm": 0.7652329509303649, "learning_rate": 5.842795379575675e-06, "loss": 0.1259, "step": 15861 }, { "epoch": 0.462745784468172, "grad_norm": 0.6855271583271109, "learning_rate": 5.842329701456799e-06, "loss": 0.1304, "step": 15862 }, { "epoch": 0.46277495769881555, "grad_norm": 0.7693759880466009, "learning_rate": 5.841864015817804e-06, "loss": 0.1292, "step": 15863 }, { "epoch": 0.4628041309294591, "grad_norm": 1.107998901306329, "learning_rate": 5.84139832266285e-06, "loss": 0.1294, "step": 15864 }, { "epoch": 0.4628333041601027, "grad_norm": 0.7082320486190519, "learning_rate": 5.84093262199609e-06, "loss": 0.125, "step": 15865 }, { "epoch": 0.46286247739074626, "grad_norm": 0.6618581406917674, "learning_rate": 5.840466913821687e-06, "loss": 0.139, "step": 15866 }, { "epoch": 0.4628916506213898, "grad_norm": 0.9890547647006038, "learning_rate": 5.840001198143795e-06, "loss": 0.1392, "step": 15867 }, { "epoch": 0.46292082385203337, "grad_norm": 0.8354134840361568, "learning_rate": 5.8395354749665725e-06, "loss": 0.1412, "step": 15868 }, { "epoch": 0.4629499970826769, "grad_norm": 0.6994827796111657, "learning_rate": 5.839069744294178e-06, "loss": 0.1183, "step": 15869 }, { "epoch": 0.4629791703133205, "grad_norm": 0.7988385418865582, "learning_rate": 5.838604006130769e-06, "loss": 0.1315, "step": 15870 }, { "epoch": 0.4630083435439641, "grad_norm": 0.8536960901789566, "learning_rate": 5.8381382604805035e-06, "loss": 0.1253, "step": 15871 }, { "epoch": 0.46303751677460764, "grad_norm": 0.8348528895425051, "learning_rate": 5.83767250734754e-06, "loss": 0.1369, "step": 15872 }, { "epoch": 0.4630666900052512, "grad_norm": 0.703700913853645, "learning_rate": 5.837206746736036e-06, "loss": 0.1319, "step": 15873 }, { "epoch": 0.46309586323589474, "grad_norm": 0.721868231071233, "learning_rate": 5.836740978650149e-06, "loss": 0.1438, "step": 15874 }, { "epoch": 0.4631250364665383, "grad_norm": 0.9297929393124746, "learning_rate": 5.83627520309404e-06, "loss": 0.1312, "step": 15875 }, { "epoch": 0.46315420969718185, "grad_norm": 0.7933117896441769, "learning_rate": 5.835809420071865e-06, "loss": 0.1378, "step": 15876 }, { "epoch": 0.4631833829278254, "grad_norm": 0.8277036736973242, "learning_rate": 5.835343629587783e-06, "loss": 0.1574, "step": 15877 }, { "epoch": 0.463212556158469, "grad_norm": 0.8011762285766284, "learning_rate": 5.834877831645952e-06, "loss": 0.1508, "step": 15878 }, { "epoch": 0.46324172938911257, "grad_norm": 0.8365521808430085, "learning_rate": 5.8344120262505335e-06, "loss": 0.1466, "step": 15879 }, { "epoch": 0.4632709026197561, "grad_norm": 0.8445417585011479, "learning_rate": 5.8339462134056805e-06, "loss": 0.1448, "step": 15880 }, { "epoch": 0.4633000758503997, "grad_norm": 0.8004943882549624, "learning_rate": 5.833480393115556e-06, "loss": 0.1441, "step": 15881 }, { "epoch": 0.4633292490810432, "grad_norm": 0.7248928591422803, "learning_rate": 5.833014565384318e-06, "loss": 0.1138, "step": 15882 }, { "epoch": 0.4633584223116868, "grad_norm": 1.2993545420646264, "learning_rate": 5.832548730216123e-06, "loss": 0.1358, "step": 15883 }, { "epoch": 0.46338759554233033, "grad_norm": 0.888070530529126, "learning_rate": 5.832082887615134e-06, "loss": 0.128, "step": 15884 }, { "epoch": 0.46341676877297394, "grad_norm": 0.8489878461255642, "learning_rate": 5.8316170375855065e-06, "loss": 0.1354, "step": 15885 }, { "epoch": 0.4634459420036175, "grad_norm": 0.7668677989178522, "learning_rate": 5.8311511801314e-06, "loss": 0.1479, "step": 15886 }, { "epoch": 0.46347511523426105, "grad_norm": 0.8060949317504428, "learning_rate": 5.8306853152569755e-06, "loss": 0.1384, "step": 15887 }, { "epoch": 0.4635042884649046, "grad_norm": 0.7966606036787572, "learning_rate": 5.83021944296639e-06, "loss": 0.1749, "step": 15888 }, { "epoch": 0.46353346169554815, "grad_norm": 0.7777750236694937, "learning_rate": 5.829753563263803e-06, "loss": 0.1172, "step": 15889 }, { "epoch": 0.4635626349261917, "grad_norm": 0.830723371055366, "learning_rate": 5.829287676153375e-06, "loss": 0.1285, "step": 15890 }, { "epoch": 0.46359180815683526, "grad_norm": 0.8333860237145468, "learning_rate": 5.828821781639264e-06, "loss": 0.1498, "step": 15891 }, { "epoch": 0.46362098138747887, "grad_norm": 0.8372865646905908, "learning_rate": 5.828355879725632e-06, "loss": 0.1189, "step": 15892 }, { "epoch": 0.4636501546181224, "grad_norm": 0.8435699699315544, "learning_rate": 5.827889970416634e-06, "loss": 0.1346, "step": 15893 }, { "epoch": 0.463679327848766, "grad_norm": 0.7458663290715156, "learning_rate": 5.827424053716434e-06, "loss": 0.1518, "step": 15894 }, { "epoch": 0.46370850107940953, "grad_norm": 0.7838148068917841, "learning_rate": 5.826958129629187e-06, "loss": 0.1417, "step": 15895 }, { "epoch": 0.4637376743100531, "grad_norm": 0.888477035667017, "learning_rate": 5.826492198159058e-06, "loss": 0.126, "step": 15896 }, { "epoch": 0.46376684754069664, "grad_norm": 0.8729074064593524, "learning_rate": 5.826026259310202e-06, "loss": 0.1457, "step": 15897 }, { "epoch": 0.46379602077134024, "grad_norm": 0.9131639863595894, "learning_rate": 5.825560313086781e-06, "loss": 0.1425, "step": 15898 }, { "epoch": 0.4638251940019838, "grad_norm": 0.9122556029639202, "learning_rate": 5.825094359492955e-06, "loss": 0.1552, "step": 15899 }, { "epoch": 0.46385436723262735, "grad_norm": 0.8976113489352935, "learning_rate": 5.8246283985328845e-06, "loss": 0.1385, "step": 15900 }, { "epoch": 0.4638835404632709, "grad_norm": 0.8362597188678967, "learning_rate": 5.824162430210727e-06, "loss": 0.1447, "step": 15901 }, { "epoch": 0.46391271369391446, "grad_norm": 0.9641924428098306, "learning_rate": 5.823696454530645e-06, "loss": 0.1453, "step": 15902 }, { "epoch": 0.463941886924558, "grad_norm": 0.975611475874682, "learning_rate": 5.823230471496797e-06, "loss": 0.1274, "step": 15903 }, { "epoch": 0.46397106015520156, "grad_norm": 0.7284168076885339, "learning_rate": 5.822764481113345e-06, "loss": 0.122, "step": 15904 }, { "epoch": 0.4640002333858452, "grad_norm": 0.8004637172072524, "learning_rate": 5.822298483384446e-06, "loss": 0.1305, "step": 15905 }, { "epoch": 0.4640294066164887, "grad_norm": 1.3252533327640181, "learning_rate": 5.821832478314265e-06, "loss": 0.1376, "step": 15906 }, { "epoch": 0.4640585798471323, "grad_norm": 0.9785839268351734, "learning_rate": 5.821366465906958e-06, "loss": 0.1339, "step": 15907 }, { "epoch": 0.46408775307777583, "grad_norm": 0.8941719513667673, "learning_rate": 5.820900446166687e-06, "loss": 0.1448, "step": 15908 }, { "epoch": 0.4641169263084194, "grad_norm": 0.9483706041460341, "learning_rate": 5.820434419097614e-06, "loss": 0.1568, "step": 15909 }, { "epoch": 0.46414609953906294, "grad_norm": 1.0202428273220814, "learning_rate": 5.819968384703898e-06, "loss": 0.115, "step": 15910 }, { "epoch": 0.4641752727697065, "grad_norm": 0.7549637289971923, "learning_rate": 5.819502342989701e-06, "loss": 0.1394, "step": 15911 }, { "epoch": 0.4642044460003501, "grad_norm": 0.8714901820124208, "learning_rate": 5.81903629395918e-06, "loss": 0.1595, "step": 15912 }, { "epoch": 0.46423361923099365, "grad_norm": 0.9732469182318602, "learning_rate": 5.818570237616501e-06, "loss": 0.152, "step": 15913 }, { "epoch": 0.4642627924616372, "grad_norm": 0.842016257223515, "learning_rate": 5.818104173965822e-06, "loss": 0.1264, "step": 15914 }, { "epoch": 0.46429196569228076, "grad_norm": 0.8294549762141147, "learning_rate": 5.817638103011303e-06, "loss": 0.1548, "step": 15915 }, { "epoch": 0.4643211389229243, "grad_norm": 1.1116113134622982, "learning_rate": 5.817172024757107e-06, "loss": 0.1276, "step": 15916 }, { "epoch": 0.46435031215356787, "grad_norm": 0.8217614550077912, "learning_rate": 5.8167059392073945e-06, "loss": 0.153, "step": 15917 }, { "epoch": 0.4643794853842114, "grad_norm": 1.0098753202766408, "learning_rate": 5.816239846366325e-06, "loss": 0.1522, "step": 15918 }, { "epoch": 0.46440865861485503, "grad_norm": 0.9738587566490842, "learning_rate": 5.815773746238063e-06, "loss": 0.138, "step": 15919 }, { "epoch": 0.4644378318454986, "grad_norm": 0.7493340709827934, "learning_rate": 5.815307638826767e-06, "loss": 0.1279, "step": 15920 }, { "epoch": 0.46446700507614214, "grad_norm": 0.8826322638455888, "learning_rate": 5.8148415241365985e-06, "loss": 0.1448, "step": 15921 }, { "epoch": 0.4644961783067857, "grad_norm": 0.7779789604266115, "learning_rate": 5.81437540217172e-06, "loss": 0.1314, "step": 15922 }, { "epoch": 0.46452535153742924, "grad_norm": 0.9722819435858137, "learning_rate": 5.8139092729362925e-06, "loss": 0.1548, "step": 15923 }, { "epoch": 0.4645545247680728, "grad_norm": 0.8050178260881279, "learning_rate": 5.813443136434475e-06, "loss": 0.1209, "step": 15924 }, { "epoch": 0.4645836979987164, "grad_norm": 0.9171370105747304, "learning_rate": 5.812976992670434e-06, "loss": 0.1433, "step": 15925 }, { "epoch": 0.46461287122935996, "grad_norm": 0.8076803104860456, "learning_rate": 5.812510841648329e-06, "loss": 0.1394, "step": 15926 }, { "epoch": 0.4646420444600035, "grad_norm": 0.9973335139882092, "learning_rate": 5.812044683372318e-06, "loss": 0.1444, "step": 15927 }, { "epoch": 0.46467121769064706, "grad_norm": 0.7753992461313428, "learning_rate": 5.811578517846567e-06, "loss": 0.1407, "step": 15928 }, { "epoch": 0.4647003909212906, "grad_norm": 0.9103740934767576, "learning_rate": 5.81111234507524e-06, "loss": 0.1246, "step": 15929 }, { "epoch": 0.46472956415193417, "grad_norm": 0.9864108966598794, "learning_rate": 5.810646165062491e-06, "loss": 0.1285, "step": 15930 }, { "epoch": 0.4647587373825777, "grad_norm": 0.9102732017805926, "learning_rate": 5.8101799778124905e-06, "loss": 0.1346, "step": 15931 }, { "epoch": 0.46478791061322133, "grad_norm": 0.9376487685418402, "learning_rate": 5.809713783329395e-06, "loss": 0.1317, "step": 15932 }, { "epoch": 0.4648170838438649, "grad_norm": 1.0161822282457287, "learning_rate": 5.809247581617366e-06, "loss": 0.1367, "step": 15933 }, { "epoch": 0.46484625707450844, "grad_norm": 0.7920617594439303, "learning_rate": 5.808781372680571e-06, "loss": 0.1301, "step": 15934 }, { "epoch": 0.464875430305152, "grad_norm": 1.4013070796833542, "learning_rate": 5.808315156523168e-06, "loss": 0.1397, "step": 15935 }, { "epoch": 0.46490460353579555, "grad_norm": 0.9982127411340717, "learning_rate": 5.807848933149319e-06, "loss": 0.1309, "step": 15936 }, { "epoch": 0.4649337767664391, "grad_norm": 0.8397415543048476, "learning_rate": 5.807382702563188e-06, "loss": 0.1414, "step": 15937 }, { "epoch": 0.46496294999708265, "grad_norm": 1.347567736516576, "learning_rate": 5.806916464768938e-06, "loss": 0.1287, "step": 15938 }, { "epoch": 0.46499212322772626, "grad_norm": 1.4014554496035387, "learning_rate": 5.80645021977073e-06, "loss": 0.128, "step": 15939 }, { "epoch": 0.4650212964583698, "grad_norm": 0.8452629014948132, "learning_rate": 5.8059839675727255e-06, "loss": 0.1406, "step": 15940 }, { "epoch": 0.46505046968901337, "grad_norm": 0.8116111608551361, "learning_rate": 5.8055177081790916e-06, "loss": 0.1409, "step": 15941 }, { "epoch": 0.4650796429196569, "grad_norm": 1.0389772590121729, "learning_rate": 5.805051441593985e-06, "loss": 0.1388, "step": 15942 }, { "epoch": 0.4651088161503005, "grad_norm": 0.8691310836913704, "learning_rate": 5.804585167821572e-06, "loss": 0.118, "step": 15943 }, { "epoch": 0.46513798938094403, "grad_norm": 0.7957873510033092, "learning_rate": 5.804118886866016e-06, "loss": 0.1363, "step": 15944 }, { "epoch": 0.4651671626115876, "grad_norm": 0.9212943282096638, "learning_rate": 5.803652598731476e-06, "loss": 0.1336, "step": 15945 }, { "epoch": 0.4651963358422312, "grad_norm": 0.7731884868494172, "learning_rate": 5.80318630342212e-06, "loss": 0.1247, "step": 15946 }, { "epoch": 0.46522550907287474, "grad_norm": 0.7845451088379048, "learning_rate": 5.802720000942108e-06, "loss": 0.1445, "step": 15947 }, { "epoch": 0.4652546823035183, "grad_norm": 0.8924213228390518, "learning_rate": 5.802253691295602e-06, "loss": 0.145, "step": 15948 }, { "epoch": 0.46528385553416185, "grad_norm": 0.8146316307558824, "learning_rate": 5.801787374486768e-06, "loss": 0.1665, "step": 15949 }, { "epoch": 0.4653130287648054, "grad_norm": 0.7357533913377574, "learning_rate": 5.801321050519768e-06, "loss": 0.1299, "step": 15950 }, { "epoch": 0.46534220199544896, "grad_norm": 1.0523115059686823, "learning_rate": 5.800854719398764e-06, "loss": 0.1381, "step": 15951 }, { "epoch": 0.46537137522609257, "grad_norm": 1.0395106948844963, "learning_rate": 5.80038838112792e-06, "loss": 0.1508, "step": 15952 }, { "epoch": 0.4654005484567361, "grad_norm": 0.7198719787956999, "learning_rate": 5.799922035711401e-06, "loss": 0.1473, "step": 15953 }, { "epoch": 0.46542972168737967, "grad_norm": 0.7866665469816593, "learning_rate": 5.799455683153367e-06, "loss": 0.1229, "step": 15954 }, { "epoch": 0.4654588949180232, "grad_norm": 0.822350925080411, "learning_rate": 5.798989323457984e-06, "loss": 0.1405, "step": 15955 }, { "epoch": 0.4654880681486668, "grad_norm": 0.6539063365948917, "learning_rate": 5.798522956629418e-06, "loss": 0.1279, "step": 15956 }, { "epoch": 0.46551724137931033, "grad_norm": 0.9022568533322931, "learning_rate": 5.798056582671825e-06, "loss": 0.1308, "step": 15957 }, { "epoch": 0.4655464146099539, "grad_norm": 0.8149328748712105, "learning_rate": 5.797590201589376e-06, "loss": 0.1523, "step": 15958 }, { "epoch": 0.4655755878405975, "grad_norm": 0.8771860608393286, "learning_rate": 5.7971238133862324e-06, "loss": 0.1322, "step": 15959 }, { "epoch": 0.46560476107124105, "grad_norm": 0.9190749951306041, "learning_rate": 5.796657418066556e-06, "loss": 0.1511, "step": 15960 }, { "epoch": 0.4656339343018846, "grad_norm": 0.8041082549146434, "learning_rate": 5.796191015634515e-06, "loss": 0.1421, "step": 15961 }, { "epoch": 0.46566310753252815, "grad_norm": 0.8021631778731418, "learning_rate": 5.795724606094269e-06, "loss": 0.1412, "step": 15962 }, { "epoch": 0.4656922807631717, "grad_norm": 0.972627726391673, "learning_rate": 5.795258189449983e-06, "loss": 0.1599, "step": 15963 }, { "epoch": 0.46572145399381526, "grad_norm": 0.911331091011921, "learning_rate": 5.794791765705823e-06, "loss": 0.1253, "step": 15964 }, { "epoch": 0.4657506272244588, "grad_norm": 0.7905016224581348, "learning_rate": 5.79432533486595e-06, "loss": 0.1567, "step": 15965 }, { "epoch": 0.4657798004551024, "grad_norm": 1.025032660651501, "learning_rate": 5.793858896934532e-06, "loss": 0.1376, "step": 15966 }, { "epoch": 0.465808973685746, "grad_norm": 0.9245182905301442, "learning_rate": 5.79339245191573e-06, "loss": 0.1552, "step": 15967 }, { "epoch": 0.46583814691638953, "grad_norm": 1.1764974665898509, "learning_rate": 5.79292599981371e-06, "loss": 0.1534, "step": 15968 }, { "epoch": 0.4658673201470331, "grad_norm": 0.8612890491522468, "learning_rate": 5.792459540632636e-06, "loss": 0.1402, "step": 15969 }, { "epoch": 0.46589649337767663, "grad_norm": 0.7767768190759483, "learning_rate": 5.791993074376673e-06, "loss": 0.1549, "step": 15970 }, { "epoch": 0.4659256666083202, "grad_norm": 0.7311189609185839, "learning_rate": 5.791526601049985e-06, "loss": 0.1436, "step": 15971 }, { "epoch": 0.46595483983896374, "grad_norm": 0.7177933757195226, "learning_rate": 5.791060120656735e-06, "loss": 0.1271, "step": 15972 }, { "epoch": 0.46598401306960735, "grad_norm": 0.7610377232259685, "learning_rate": 5.790593633201089e-06, "loss": 0.1503, "step": 15973 }, { "epoch": 0.4660131863002509, "grad_norm": 0.8474659506844789, "learning_rate": 5.790127138687215e-06, "loss": 0.1269, "step": 15974 }, { "epoch": 0.46604235953089446, "grad_norm": 0.7254915854900958, "learning_rate": 5.789660637119271e-06, "loss": 0.122, "step": 15975 }, { "epoch": 0.466071532761538, "grad_norm": 0.7989884598295096, "learning_rate": 5.789194128501428e-06, "loss": 0.147, "step": 15976 }, { "epoch": 0.46610070599218156, "grad_norm": 0.8491314139759364, "learning_rate": 5.788727612837846e-06, "loss": 0.1355, "step": 15977 }, { "epoch": 0.4661298792228251, "grad_norm": 0.761607299632363, "learning_rate": 5.788261090132693e-06, "loss": 0.1218, "step": 15978 }, { "epoch": 0.46615905245346867, "grad_norm": 0.6986230239896588, "learning_rate": 5.787794560390133e-06, "loss": 0.1347, "step": 15979 }, { "epoch": 0.4661882256841123, "grad_norm": 1.054838554348645, "learning_rate": 5.787328023614331e-06, "loss": 0.144, "step": 15980 }, { "epoch": 0.46621739891475583, "grad_norm": 0.8017458873753452, "learning_rate": 5.786861479809453e-06, "loss": 0.1497, "step": 15981 }, { "epoch": 0.4662465721453994, "grad_norm": 0.7515159455250477, "learning_rate": 5.786394928979663e-06, "loss": 0.1215, "step": 15982 }, { "epoch": 0.46627574537604294, "grad_norm": 0.7126228538215411, "learning_rate": 5.785928371129127e-06, "loss": 0.131, "step": 15983 }, { "epoch": 0.4663049186066865, "grad_norm": 0.7843490446497038, "learning_rate": 5.785461806262011e-06, "loss": 0.1183, "step": 15984 }, { "epoch": 0.46633409183733004, "grad_norm": 0.6464048953221874, "learning_rate": 5.784995234382478e-06, "loss": 0.1194, "step": 15985 }, { "epoch": 0.46636326506797365, "grad_norm": 0.6520631001183158, "learning_rate": 5.784528655494697e-06, "loss": 0.1371, "step": 15986 }, { "epoch": 0.4663924382986172, "grad_norm": 0.9026076849639647, "learning_rate": 5.784062069602828e-06, "loss": 0.1751, "step": 15987 }, { "epoch": 0.46642161152926076, "grad_norm": 0.8416307734789469, "learning_rate": 5.783595476711043e-06, "loss": 0.135, "step": 15988 }, { "epoch": 0.4664507847599043, "grad_norm": 0.6961788531818385, "learning_rate": 5.783128876823504e-06, "loss": 0.118, "step": 15989 }, { "epoch": 0.46647995799054787, "grad_norm": 0.8461906207423968, "learning_rate": 5.782662269944376e-06, "loss": 0.1398, "step": 15990 }, { "epoch": 0.4665091312211914, "grad_norm": 0.8957956304247597, "learning_rate": 5.782195656077828e-06, "loss": 0.1219, "step": 15991 }, { "epoch": 0.466538304451835, "grad_norm": 0.8668098247405077, "learning_rate": 5.781729035228023e-06, "loss": 0.1477, "step": 15992 }, { "epoch": 0.4665674776824786, "grad_norm": 0.7105224761898491, "learning_rate": 5.7812624073991276e-06, "loss": 0.1321, "step": 15993 }, { "epoch": 0.46659665091312214, "grad_norm": 1.012134838403854, "learning_rate": 5.7807957725953076e-06, "loss": 0.1468, "step": 15994 }, { "epoch": 0.4666258241437657, "grad_norm": 0.8740211865235363, "learning_rate": 5.78032913082073e-06, "loss": 0.1372, "step": 15995 }, { "epoch": 0.46665499737440924, "grad_norm": 0.8097992390360006, "learning_rate": 5.7798624820795605e-06, "loss": 0.1398, "step": 15996 }, { "epoch": 0.4666841706050528, "grad_norm": 0.8794296185515897, "learning_rate": 5.779395826375964e-06, "loss": 0.148, "step": 15997 }, { "epoch": 0.46671334383569635, "grad_norm": 0.9570177270508721, "learning_rate": 5.778929163714109e-06, "loss": 0.1562, "step": 15998 }, { "epoch": 0.4667425170663399, "grad_norm": 0.8375361115858077, "learning_rate": 5.77846249409816e-06, "loss": 0.1143, "step": 15999 }, { "epoch": 0.4667716902969835, "grad_norm": 0.9187618810046481, "learning_rate": 5.777995817532282e-06, "loss": 0.1292, "step": 16000 }, { "epoch": 0.46680086352762706, "grad_norm": 0.6914638111304797, "learning_rate": 5.777529134020645e-06, "loss": 0.1346, "step": 16001 }, { "epoch": 0.4668300367582706, "grad_norm": 0.7145671042787071, "learning_rate": 5.777062443567412e-06, "loss": 0.1461, "step": 16002 }, { "epoch": 0.46685920998891417, "grad_norm": 0.8079655492792002, "learning_rate": 5.7765957461767515e-06, "loss": 0.1321, "step": 16003 }, { "epoch": 0.4668883832195577, "grad_norm": 0.8192383471776307, "learning_rate": 5.776129041852831e-06, "loss": 0.1348, "step": 16004 }, { "epoch": 0.4669175564502013, "grad_norm": 0.8083178474820759, "learning_rate": 5.775662330599814e-06, "loss": 0.1405, "step": 16005 }, { "epoch": 0.46694672968084483, "grad_norm": 0.8795955703771724, "learning_rate": 5.77519561242187e-06, "loss": 0.157, "step": 16006 }, { "epoch": 0.46697590291148844, "grad_norm": 0.7997922708958513, "learning_rate": 5.7747288873231645e-06, "loss": 0.1226, "step": 16007 }, { "epoch": 0.467005076142132, "grad_norm": 0.776315606649705, "learning_rate": 5.774262155307863e-06, "loss": 0.1233, "step": 16008 }, { "epoch": 0.46703424937277555, "grad_norm": 0.8044015518766559, "learning_rate": 5.773795416380135e-06, "loss": 0.1416, "step": 16009 }, { "epoch": 0.4670634226034191, "grad_norm": 0.8374210098741632, "learning_rate": 5.773328670544146e-06, "loss": 0.1358, "step": 16010 }, { "epoch": 0.46709259583406265, "grad_norm": 0.8489569119467483, "learning_rate": 5.772861917804064e-06, "loss": 0.1274, "step": 16011 }, { "epoch": 0.4671217690647062, "grad_norm": 0.6620216492087456, "learning_rate": 5.772395158164054e-06, "loss": 0.1254, "step": 16012 }, { "epoch": 0.4671509422953498, "grad_norm": 1.1379218990602393, "learning_rate": 5.771928391628284e-06, "loss": 0.1233, "step": 16013 }, { "epoch": 0.46718011552599337, "grad_norm": 0.9407417004271419, "learning_rate": 5.771461618200923e-06, "loss": 0.1327, "step": 16014 }, { "epoch": 0.4672092887566369, "grad_norm": 0.8260911501985518, "learning_rate": 5.770994837886137e-06, "loss": 0.1374, "step": 16015 }, { "epoch": 0.4672384619872805, "grad_norm": 0.8446010490366636, "learning_rate": 5.770528050688093e-06, "loss": 0.1131, "step": 16016 }, { "epoch": 0.467267635217924, "grad_norm": 0.7865372069291242, "learning_rate": 5.770061256610957e-06, "loss": 0.1366, "step": 16017 }, { "epoch": 0.4672968084485676, "grad_norm": 0.8856937467203092, "learning_rate": 5.769594455658899e-06, "loss": 0.0975, "step": 16018 }, { "epoch": 0.46732598167921113, "grad_norm": 1.0385318123528824, "learning_rate": 5.7691276478360854e-06, "loss": 0.1515, "step": 16019 }, { "epoch": 0.46735515490985474, "grad_norm": 0.8525272348093069, "learning_rate": 5.768660833146683e-06, "loss": 0.1296, "step": 16020 }, { "epoch": 0.4673843281404983, "grad_norm": 0.7564119644539686, "learning_rate": 5.7681940115948624e-06, "loss": 0.1005, "step": 16021 }, { "epoch": 0.46741350137114185, "grad_norm": 1.032340142288582, "learning_rate": 5.767727183184787e-06, "loss": 0.1272, "step": 16022 }, { "epoch": 0.4674426746017854, "grad_norm": 0.9099153002462543, "learning_rate": 5.767260347920627e-06, "loss": 0.1337, "step": 16023 }, { "epoch": 0.46747184783242896, "grad_norm": 0.846177825244513, "learning_rate": 5.766793505806551e-06, "loss": 0.1474, "step": 16024 }, { "epoch": 0.4675010210630725, "grad_norm": 0.9650979193854139, "learning_rate": 5.766326656846723e-06, "loss": 0.1163, "step": 16025 }, { "epoch": 0.46753019429371606, "grad_norm": 1.1737999601585896, "learning_rate": 5.765859801045316e-06, "loss": 0.1446, "step": 16026 }, { "epoch": 0.46755936752435967, "grad_norm": 0.7507372097227764, "learning_rate": 5.765392938406494e-06, "loss": 0.144, "step": 16027 }, { "epoch": 0.4675885407550032, "grad_norm": 1.0289732575570227, "learning_rate": 5.764926068934428e-06, "loss": 0.1553, "step": 16028 }, { "epoch": 0.4676177139856468, "grad_norm": 0.7891455298453132, "learning_rate": 5.764459192633282e-06, "loss": 0.1426, "step": 16029 }, { "epoch": 0.46764688721629033, "grad_norm": 0.8292319534656433, "learning_rate": 5.763992309507229e-06, "loss": 0.1417, "step": 16030 }, { "epoch": 0.4676760604469339, "grad_norm": 0.9021045559202837, "learning_rate": 5.763525419560436e-06, "loss": 0.1526, "step": 16031 }, { "epoch": 0.46770523367757744, "grad_norm": 0.8264634081061879, "learning_rate": 5.763058522797068e-06, "loss": 0.1202, "step": 16032 }, { "epoch": 0.467734406908221, "grad_norm": 0.652515074592792, "learning_rate": 5.762591619221297e-06, "loss": 0.1367, "step": 16033 }, { "epoch": 0.4677635801388646, "grad_norm": 0.7340814911279222, "learning_rate": 5.762124708837291e-06, "loss": 0.1712, "step": 16034 }, { "epoch": 0.46779275336950815, "grad_norm": 0.8506201084744504, "learning_rate": 5.7616577916492145e-06, "loss": 0.124, "step": 16035 }, { "epoch": 0.4678219266001517, "grad_norm": 0.7813046536314805, "learning_rate": 5.761190867661243e-06, "loss": 0.1523, "step": 16036 }, { "epoch": 0.46785109983079526, "grad_norm": 0.7246186837938973, "learning_rate": 5.760723936877538e-06, "loss": 0.1585, "step": 16037 }, { "epoch": 0.4678802730614388, "grad_norm": 0.8265533346747397, "learning_rate": 5.760256999302273e-06, "loss": 0.1685, "step": 16038 }, { "epoch": 0.46790944629208236, "grad_norm": 0.7512918950003025, "learning_rate": 5.759790054939614e-06, "loss": 0.132, "step": 16039 }, { "epoch": 0.467938619522726, "grad_norm": 0.6419272741135773, "learning_rate": 5.7593231037937306e-06, "loss": 0.1321, "step": 16040 }, { "epoch": 0.4679677927533695, "grad_norm": 1.0469292057265425, "learning_rate": 5.758856145868792e-06, "loss": 0.135, "step": 16041 }, { "epoch": 0.4679969659840131, "grad_norm": 0.844045201941579, "learning_rate": 5.758389181168967e-06, "loss": 0.1423, "step": 16042 }, { "epoch": 0.46802613921465663, "grad_norm": 0.7167638581889434, "learning_rate": 5.757922209698424e-06, "loss": 0.1267, "step": 16043 }, { "epoch": 0.4680553124453002, "grad_norm": 0.7103636203539796, "learning_rate": 5.757455231461334e-06, "loss": 0.146, "step": 16044 }, { "epoch": 0.46808448567594374, "grad_norm": 0.6696736563358334, "learning_rate": 5.756988246461863e-06, "loss": 0.1391, "step": 16045 }, { "epoch": 0.4681136589065873, "grad_norm": 0.7317317777456038, "learning_rate": 5.7565212547041835e-06, "loss": 0.1278, "step": 16046 }, { "epoch": 0.4681428321372309, "grad_norm": 0.637411729797091, "learning_rate": 5.75605425619246e-06, "loss": 0.1503, "step": 16047 }, { "epoch": 0.46817200536787446, "grad_norm": 1.0490510121574332, "learning_rate": 5.755587250930866e-06, "loss": 0.1418, "step": 16048 }, { "epoch": 0.468201178598518, "grad_norm": 0.8138231282346315, "learning_rate": 5.75512023892357e-06, "loss": 0.1409, "step": 16049 }, { "epoch": 0.46823035182916156, "grad_norm": 0.7919611992138462, "learning_rate": 5.75465322017474e-06, "loss": 0.124, "step": 16050 }, { "epoch": 0.4682595250598051, "grad_norm": 1.0982884992699131, "learning_rate": 5.754186194688547e-06, "loss": 0.1767, "step": 16051 }, { "epoch": 0.46828869829044867, "grad_norm": 1.0299722455438751, "learning_rate": 5.753719162469159e-06, "loss": 0.1439, "step": 16052 }, { "epoch": 0.4683178715210922, "grad_norm": 0.6987058633317436, "learning_rate": 5.753252123520746e-06, "loss": 0.1132, "step": 16053 }, { "epoch": 0.46834704475173583, "grad_norm": 1.0383339761552182, "learning_rate": 5.7527850778474795e-06, "loss": 0.143, "step": 16054 }, { "epoch": 0.4683762179823794, "grad_norm": 1.1578639748397883, "learning_rate": 5.752318025453525e-06, "loss": 0.1486, "step": 16055 }, { "epoch": 0.46840539121302294, "grad_norm": 0.7351432849730697, "learning_rate": 5.751850966343057e-06, "loss": 0.1488, "step": 16056 }, { "epoch": 0.4684345644436665, "grad_norm": 0.843477890384064, "learning_rate": 5.751383900520241e-06, "loss": 0.1113, "step": 16057 }, { "epoch": 0.46846373767431004, "grad_norm": 0.8823683745562853, "learning_rate": 5.75091682798925e-06, "loss": 0.1327, "step": 16058 }, { "epoch": 0.4684929109049536, "grad_norm": 0.8005412105676123, "learning_rate": 5.750449748754253e-06, "loss": 0.1338, "step": 16059 }, { "epoch": 0.46852208413559715, "grad_norm": 0.968143710825528, "learning_rate": 5.74998266281942e-06, "loss": 0.1686, "step": 16060 }, { "epoch": 0.46855125736624076, "grad_norm": 0.8627773213283976, "learning_rate": 5.7495155701889215e-06, "loss": 0.1174, "step": 16061 }, { "epoch": 0.4685804305968843, "grad_norm": 1.2187515398357416, "learning_rate": 5.749048470866925e-06, "loss": 0.1568, "step": 16062 }, { "epoch": 0.46860960382752787, "grad_norm": 1.0308792401904288, "learning_rate": 5.748581364857603e-06, "loss": 0.1558, "step": 16063 }, { "epoch": 0.4686387770581714, "grad_norm": 0.9357220981721602, "learning_rate": 5.748114252165127e-06, "loss": 0.1393, "step": 16064 }, { "epoch": 0.46866795028881497, "grad_norm": 0.8992948611466673, "learning_rate": 5.747647132793662e-06, "loss": 0.1467, "step": 16065 }, { "epoch": 0.4686971235194585, "grad_norm": 0.9082733345381605, "learning_rate": 5.747180006747386e-06, "loss": 0.1695, "step": 16066 }, { "epoch": 0.46872629675010213, "grad_norm": 0.8600373402962813, "learning_rate": 5.746712874030462e-06, "loss": 0.1391, "step": 16067 }, { "epoch": 0.4687554699807457, "grad_norm": 0.8737621041818534, "learning_rate": 5.746245734647066e-06, "loss": 0.1266, "step": 16068 }, { "epoch": 0.46878464321138924, "grad_norm": 1.2121580779759222, "learning_rate": 5.745778588601365e-06, "loss": 0.1628, "step": 16069 }, { "epoch": 0.4688138164420328, "grad_norm": 0.7783008039871504, "learning_rate": 5.745311435897531e-06, "loss": 0.1463, "step": 16070 }, { "epoch": 0.46884298967267635, "grad_norm": 0.8315272123788562, "learning_rate": 5.744844276539734e-06, "loss": 0.132, "step": 16071 }, { "epoch": 0.4688721629033199, "grad_norm": 0.8048650563939992, "learning_rate": 5.744377110532146e-06, "loss": 0.1237, "step": 16072 }, { "epoch": 0.46890133613396345, "grad_norm": 1.011428222566658, "learning_rate": 5.7439099378789366e-06, "loss": 0.1364, "step": 16073 }, { "epoch": 0.46893050936460706, "grad_norm": 0.8505536900192274, "learning_rate": 5.743442758584277e-06, "loss": 0.1274, "step": 16074 }, { "epoch": 0.4689596825952506, "grad_norm": 0.9729993148155971, "learning_rate": 5.742975572652337e-06, "loss": 0.123, "step": 16075 }, { "epoch": 0.46898885582589417, "grad_norm": 1.0459297712761293, "learning_rate": 5.74250838008729e-06, "loss": 0.1469, "step": 16076 }, { "epoch": 0.4690180290565377, "grad_norm": 0.8132158729836365, "learning_rate": 5.742041180893303e-06, "loss": 0.1392, "step": 16077 }, { "epoch": 0.4690472022871813, "grad_norm": 0.8584280553812293, "learning_rate": 5.741573975074551e-06, "loss": 0.1159, "step": 16078 }, { "epoch": 0.46907637551782483, "grad_norm": 0.6767765047043773, "learning_rate": 5.741106762635205e-06, "loss": 0.1219, "step": 16079 }, { "epoch": 0.4691055487484684, "grad_norm": 1.1232119840550205, "learning_rate": 5.740639543579433e-06, "loss": 0.1369, "step": 16080 }, { "epoch": 0.469134721979112, "grad_norm": 0.796951447742278, "learning_rate": 5.740172317911409e-06, "loss": 0.1359, "step": 16081 }, { "epoch": 0.46916389520975554, "grad_norm": 0.8761079062131024, "learning_rate": 5.739705085635302e-06, "loss": 0.1431, "step": 16082 }, { "epoch": 0.4691930684403991, "grad_norm": 0.9884386708917005, "learning_rate": 5.739237846755285e-06, "loss": 0.13, "step": 16083 }, { "epoch": 0.46922224167104265, "grad_norm": 1.1919464567864113, "learning_rate": 5.738770601275529e-06, "loss": 0.1246, "step": 16084 }, { "epoch": 0.4692514149016862, "grad_norm": 1.045991070029729, "learning_rate": 5.738303349200206e-06, "loss": 0.1257, "step": 16085 }, { "epoch": 0.46928058813232976, "grad_norm": 0.7959565312596193, "learning_rate": 5.7378360905334865e-06, "loss": 0.1396, "step": 16086 }, { "epoch": 0.4693097613629733, "grad_norm": 1.5820160593183699, "learning_rate": 5.737368825279542e-06, "loss": 0.1476, "step": 16087 }, { "epoch": 0.4693389345936169, "grad_norm": 1.1815826628493877, "learning_rate": 5.736901553442545e-06, "loss": 0.1402, "step": 16088 }, { "epoch": 0.46936810782426047, "grad_norm": 0.7513949019477715, "learning_rate": 5.736434275026667e-06, "loss": 0.1334, "step": 16089 }, { "epoch": 0.469397281054904, "grad_norm": 1.2522696477108841, "learning_rate": 5.735966990036079e-06, "loss": 0.1392, "step": 16090 }, { "epoch": 0.4694264542855476, "grad_norm": 1.0568337995409316, "learning_rate": 5.735499698474956e-06, "loss": 0.139, "step": 16091 }, { "epoch": 0.46945562751619113, "grad_norm": 1.0403577071745036, "learning_rate": 5.735032400347463e-06, "loss": 0.1388, "step": 16092 }, { "epoch": 0.4694848007468347, "grad_norm": 0.7900333409971576, "learning_rate": 5.734565095657779e-06, "loss": 0.1243, "step": 16093 }, { "epoch": 0.46951397397747824, "grad_norm": 0.9720347804398541, "learning_rate": 5.7340977844100735e-06, "loss": 0.1369, "step": 16094 }, { "epoch": 0.46954314720812185, "grad_norm": 0.9672105474943895, "learning_rate": 5.733630466608516e-06, "loss": 0.1358, "step": 16095 }, { "epoch": 0.4695723204387654, "grad_norm": 1.0027947901199838, "learning_rate": 5.733163142257283e-06, "loss": 0.1509, "step": 16096 }, { "epoch": 0.46960149366940895, "grad_norm": 0.6224211116672625, "learning_rate": 5.732695811360543e-06, "loss": 0.1257, "step": 16097 }, { "epoch": 0.4696306669000525, "grad_norm": 0.7699030096852254, "learning_rate": 5.732228473922471e-06, "loss": 0.176, "step": 16098 }, { "epoch": 0.46965984013069606, "grad_norm": 0.8856354430030329, "learning_rate": 5.731761129947238e-06, "loss": 0.1186, "step": 16099 }, { "epoch": 0.4696890133613396, "grad_norm": 0.8167555368557391, "learning_rate": 5.731293779439015e-06, "loss": 0.1285, "step": 16100 }, { "epoch": 0.4697181865919832, "grad_norm": 0.9193967476538044, "learning_rate": 5.730826422401976e-06, "loss": 0.1589, "step": 16101 }, { "epoch": 0.4697473598226268, "grad_norm": 0.6393872877340689, "learning_rate": 5.730359058840294e-06, "loss": 0.1417, "step": 16102 }, { "epoch": 0.46977653305327033, "grad_norm": 1.110856759356158, "learning_rate": 5.7298916887581405e-06, "loss": 0.1457, "step": 16103 }, { "epoch": 0.4698057062839139, "grad_norm": 0.9144741724024261, "learning_rate": 5.729424312159687e-06, "loss": 0.1497, "step": 16104 }, { "epoch": 0.46983487951455744, "grad_norm": 0.648704093586818, "learning_rate": 5.728956929049109e-06, "loss": 0.1298, "step": 16105 }, { "epoch": 0.469864052745201, "grad_norm": 0.8048886852999525, "learning_rate": 5.728489539430576e-06, "loss": 0.1331, "step": 16106 }, { "epoch": 0.46989322597584454, "grad_norm": 0.7897477806332242, "learning_rate": 5.728022143308264e-06, "loss": 0.1339, "step": 16107 }, { "epoch": 0.46992239920648815, "grad_norm": 0.8601492069847314, "learning_rate": 5.727554740686343e-06, "loss": 0.1367, "step": 16108 }, { "epoch": 0.4699515724371317, "grad_norm": 0.9207029387636473, "learning_rate": 5.727087331568986e-06, "loss": 0.1553, "step": 16109 }, { "epoch": 0.46998074566777526, "grad_norm": 0.7737759715592784, "learning_rate": 5.726619915960368e-06, "loss": 0.1069, "step": 16110 }, { "epoch": 0.4700099188984188, "grad_norm": 0.7515370629130431, "learning_rate": 5.726152493864663e-06, "loss": 0.1598, "step": 16111 }, { "epoch": 0.47003909212906236, "grad_norm": 0.8571022277462108, "learning_rate": 5.725685065286038e-06, "loss": 0.1471, "step": 16112 }, { "epoch": 0.4700682653597059, "grad_norm": 0.8753528201700052, "learning_rate": 5.725217630228673e-06, "loss": 0.1596, "step": 16113 }, { "epoch": 0.47009743859034947, "grad_norm": 4.859814509656574, "learning_rate": 5.724750188696737e-06, "loss": 0.1489, "step": 16114 }, { "epoch": 0.4701266118209931, "grad_norm": 1.4828493024288039, "learning_rate": 5.724282740694404e-06, "loss": 0.1154, "step": 16115 }, { "epoch": 0.47015578505163663, "grad_norm": 0.8638338229309651, "learning_rate": 5.723815286225848e-06, "loss": 0.1447, "step": 16116 }, { "epoch": 0.4701849582822802, "grad_norm": 0.7688649662024418, "learning_rate": 5.723347825295243e-06, "loss": 0.1237, "step": 16117 }, { "epoch": 0.47021413151292374, "grad_norm": 0.7522221028055127, "learning_rate": 5.7228803579067594e-06, "loss": 0.1152, "step": 16118 }, { "epoch": 0.4702433047435673, "grad_norm": 0.8944578795617836, "learning_rate": 5.722412884064572e-06, "loss": 0.1298, "step": 16119 }, { "epoch": 0.47027247797421085, "grad_norm": 1.084083705789618, "learning_rate": 5.7219454037728564e-06, "loss": 0.1508, "step": 16120 }, { "epoch": 0.4703016512048544, "grad_norm": 0.9507613558325935, "learning_rate": 5.721477917035785e-06, "loss": 0.1362, "step": 16121 }, { "epoch": 0.470330824435498, "grad_norm": 0.7930644797736451, "learning_rate": 5.7210104238575295e-06, "loss": 0.1201, "step": 16122 }, { "epoch": 0.47035999766614156, "grad_norm": 0.6314872642891975, "learning_rate": 5.720542924242265e-06, "loss": 0.1296, "step": 16123 }, { "epoch": 0.4703891708967851, "grad_norm": 0.8494723624941237, "learning_rate": 5.720075418194166e-06, "loss": 0.1368, "step": 16124 }, { "epoch": 0.47041834412742867, "grad_norm": 0.9204880438144835, "learning_rate": 5.719607905717406e-06, "loss": 0.1355, "step": 16125 }, { "epoch": 0.4704475173580722, "grad_norm": 0.7134989970552862, "learning_rate": 5.719140386816159e-06, "loss": 0.1282, "step": 16126 }, { "epoch": 0.4704766905887158, "grad_norm": 0.8339145112729058, "learning_rate": 5.718672861494597e-06, "loss": 0.1516, "step": 16127 }, { "epoch": 0.4705058638193594, "grad_norm": 0.9566071881084267, "learning_rate": 5.718205329756895e-06, "loss": 0.1565, "step": 16128 }, { "epoch": 0.47053503705000294, "grad_norm": 0.854543237254776, "learning_rate": 5.7177377916072285e-06, "loss": 0.1645, "step": 16129 }, { "epoch": 0.4705642102806465, "grad_norm": 0.9789235876882216, "learning_rate": 5.717270247049769e-06, "loss": 0.1407, "step": 16130 }, { "epoch": 0.47059338351129004, "grad_norm": 0.7974384768832588, "learning_rate": 5.7168026960886925e-06, "loss": 0.1357, "step": 16131 }, { "epoch": 0.4706225567419336, "grad_norm": 0.8275988235439948, "learning_rate": 5.716335138728173e-06, "loss": 0.1263, "step": 16132 }, { "epoch": 0.47065172997257715, "grad_norm": 0.9523676958991666, "learning_rate": 5.715867574972384e-06, "loss": 0.1621, "step": 16133 }, { "epoch": 0.4706809032032207, "grad_norm": 0.7635523007543593, "learning_rate": 5.7154000048255e-06, "loss": 0.1603, "step": 16134 }, { "epoch": 0.4707100764338643, "grad_norm": 0.8623497340330429, "learning_rate": 5.7149324282916966e-06, "loss": 0.1281, "step": 16135 }, { "epoch": 0.47073924966450786, "grad_norm": 0.8510871688185185, "learning_rate": 5.714464845375146e-06, "loss": 0.1225, "step": 16136 }, { "epoch": 0.4707684228951514, "grad_norm": 0.7639056998088636, "learning_rate": 5.7139972560800235e-06, "loss": 0.1646, "step": 16137 }, { "epoch": 0.47079759612579497, "grad_norm": 0.9565157521139924, "learning_rate": 5.713529660410505e-06, "loss": 0.1517, "step": 16138 }, { "epoch": 0.4708267693564385, "grad_norm": 0.7635071253582182, "learning_rate": 5.713062058370763e-06, "loss": 0.1245, "step": 16139 }, { "epoch": 0.4708559425870821, "grad_norm": 0.7291809115107964, "learning_rate": 5.7125944499649745e-06, "loss": 0.1233, "step": 16140 }, { "epoch": 0.47088511581772563, "grad_norm": 0.8774965193916223, "learning_rate": 5.712126835197313e-06, "loss": 0.1439, "step": 16141 }, { "epoch": 0.47091428904836924, "grad_norm": 0.8285968761142548, "learning_rate": 5.711659214071951e-06, "loss": 0.13, "step": 16142 }, { "epoch": 0.4709434622790128, "grad_norm": 0.6676578682951105, "learning_rate": 5.711191586593068e-06, "loss": 0.1202, "step": 16143 }, { "epoch": 0.47097263550965635, "grad_norm": 0.666544706641162, "learning_rate": 5.710723952764835e-06, "loss": 0.1268, "step": 16144 }, { "epoch": 0.4710018087402999, "grad_norm": 0.7635204443806743, "learning_rate": 5.7102563125914265e-06, "loss": 0.1416, "step": 16145 }, { "epoch": 0.47103098197094345, "grad_norm": 0.7802522015872385, "learning_rate": 5.709788666077022e-06, "loss": 0.1194, "step": 16146 }, { "epoch": 0.471060155201587, "grad_norm": 0.7881972123320081, "learning_rate": 5.709321013225792e-06, "loss": 0.1362, "step": 16147 }, { "epoch": 0.47108932843223056, "grad_norm": 0.9339135012352625, "learning_rate": 5.708853354041914e-06, "loss": 0.1373, "step": 16148 }, { "epoch": 0.47111850166287417, "grad_norm": 0.7428028039373517, "learning_rate": 5.708385688529563e-06, "loss": 0.1447, "step": 16149 }, { "epoch": 0.4711476748935177, "grad_norm": 0.7557091850447257, "learning_rate": 5.707918016692913e-06, "loss": 0.1136, "step": 16150 }, { "epoch": 0.4711768481241613, "grad_norm": 0.930051679838062, "learning_rate": 5.7074503385361406e-06, "loss": 0.1528, "step": 16151 }, { "epoch": 0.4712060213548048, "grad_norm": 0.914309014482323, "learning_rate": 5.70698265406342e-06, "loss": 0.1414, "step": 16152 }, { "epoch": 0.4712351945854484, "grad_norm": 1.0921308307739288, "learning_rate": 5.706514963278926e-06, "loss": 0.1458, "step": 16153 }, { "epoch": 0.47126436781609193, "grad_norm": 0.8584718839188415, "learning_rate": 5.706047266186836e-06, "loss": 0.1254, "step": 16154 }, { "epoch": 0.47129354104673554, "grad_norm": 0.7586975375020832, "learning_rate": 5.705579562791325e-06, "loss": 0.1506, "step": 16155 }, { "epoch": 0.4713227142773791, "grad_norm": 0.9601549045653952, "learning_rate": 5.705111853096569e-06, "loss": 0.1453, "step": 16156 }, { "epoch": 0.47135188750802265, "grad_norm": 0.9988405830758633, "learning_rate": 5.70464413710674e-06, "loss": 0.1028, "step": 16157 }, { "epoch": 0.4713810607386662, "grad_norm": 0.7686197023796981, "learning_rate": 5.704176414826018e-06, "loss": 0.1393, "step": 16158 }, { "epoch": 0.47141023396930976, "grad_norm": 0.9426717499516286, "learning_rate": 5.703708686258577e-06, "loss": 0.1318, "step": 16159 }, { "epoch": 0.4714394071999533, "grad_norm": 0.9483265331401411, "learning_rate": 5.703240951408592e-06, "loss": 0.1424, "step": 16160 }, { "epoch": 0.47146858043059686, "grad_norm": 0.9296762430677868, "learning_rate": 5.7027732102802416e-06, "loss": 0.118, "step": 16161 }, { "epoch": 0.47149775366124047, "grad_norm": 0.6927683269348481, "learning_rate": 5.702305462877697e-06, "loss": 0.1589, "step": 16162 }, { "epoch": 0.471526926891884, "grad_norm": 0.6899457308718389, "learning_rate": 5.701837709205139e-06, "loss": 0.1109, "step": 16163 }, { "epoch": 0.4715561001225276, "grad_norm": 0.9561555591782459, "learning_rate": 5.70136994926674e-06, "loss": 0.1595, "step": 16164 }, { "epoch": 0.47158527335317113, "grad_norm": 0.9913448171536647, "learning_rate": 5.700902183066679e-06, "loss": 0.1158, "step": 16165 }, { "epoch": 0.4716144465838147, "grad_norm": 0.7727073830246254, "learning_rate": 5.70043441060913e-06, "loss": 0.1354, "step": 16166 }, { "epoch": 0.47164361981445824, "grad_norm": 0.96473705052807, "learning_rate": 5.699966631898269e-06, "loss": 0.1127, "step": 16167 }, { "epoch": 0.4716727930451018, "grad_norm": 0.7495226406181917, "learning_rate": 5.699498846938274e-06, "loss": 0.1299, "step": 16168 }, { "epoch": 0.4717019662757454, "grad_norm": 0.8292004078232724, "learning_rate": 5.699031055733319e-06, "loss": 0.1492, "step": 16169 }, { "epoch": 0.47173113950638895, "grad_norm": 0.7441635103053541, "learning_rate": 5.698563258287584e-06, "loss": 0.165, "step": 16170 }, { "epoch": 0.4717603127370325, "grad_norm": 0.6816748903949795, "learning_rate": 5.698095454605243e-06, "loss": 0.1299, "step": 16171 }, { "epoch": 0.47178948596767606, "grad_norm": 0.7927442000531812, "learning_rate": 5.6976276446904684e-06, "loss": 0.1514, "step": 16172 }, { "epoch": 0.4718186591983196, "grad_norm": 0.8611246894664979, "learning_rate": 5.697159828547445e-06, "loss": 0.1612, "step": 16173 }, { "epoch": 0.47184783242896317, "grad_norm": 0.7813186770413587, "learning_rate": 5.6966920061803435e-06, "loss": 0.1287, "step": 16174 }, { "epoch": 0.4718770056596067, "grad_norm": 0.8555039145706819, "learning_rate": 5.696224177593341e-06, "loss": 0.1269, "step": 16175 }, { "epoch": 0.4719061788902503, "grad_norm": 0.8187749906027676, "learning_rate": 5.695756342790617e-06, "loss": 0.1456, "step": 16176 }, { "epoch": 0.4719353521208939, "grad_norm": 0.8189764933933033, "learning_rate": 5.6952885017763455e-06, "loss": 0.1397, "step": 16177 }, { "epoch": 0.47196452535153743, "grad_norm": 0.9037366018345423, "learning_rate": 5.694820654554705e-06, "loss": 0.1487, "step": 16178 }, { "epoch": 0.471993698582181, "grad_norm": 0.830932021339775, "learning_rate": 5.694352801129871e-06, "loss": 0.1202, "step": 16179 }, { "epoch": 0.47202287181282454, "grad_norm": 0.9435808114088704, "learning_rate": 5.69388494150602e-06, "loss": 0.1494, "step": 16180 }, { "epoch": 0.4720520450434681, "grad_norm": 0.7360990872898313, "learning_rate": 5.693417075687332e-06, "loss": 0.1384, "step": 16181 }, { "epoch": 0.4720812182741117, "grad_norm": 0.946534332693603, "learning_rate": 5.69294920367798e-06, "loss": 0.1291, "step": 16182 }, { "epoch": 0.47211039150475526, "grad_norm": 0.8751863756735684, "learning_rate": 5.692481325482144e-06, "loss": 0.1362, "step": 16183 }, { "epoch": 0.4721395647353988, "grad_norm": 0.8350416032285239, "learning_rate": 5.692013441103999e-06, "loss": 0.1364, "step": 16184 }, { "epoch": 0.47216873796604236, "grad_norm": 0.7840014775393542, "learning_rate": 5.6915455505477244e-06, "loss": 0.1396, "step": 16185 }, { "epoch": 0.4721979111966859, "grad_norm": 0.9949906733657168, "learning_rate": 5.691077653817496e-06, "loss": 0.143, "step": 16186 }, { "epoch": 0.47222708442732947, "grad_norm": 1.1018000811796227, "learning_rate": 5.690609750917491e-06, "loss": 0.1335, "step": 16187 }, { "epoch": 0.472256257657973, "grad_norm": 0.839745800730884, "learning_rate": 5.690141841851887e-06, "loss": 0.1289, "step": 16188 }, { "epoch": 0.47228543088861663, "grad_norm": 0.8672016742917711, "learning_rate": 5.689673926624862e-06, "loss": 0.1242, "step": 16189 }, { "epoch": 0.4723146041192602, "grad_norm": 0.9283706838298952, "learning_rate": 5.6892060052405906e-06, "loss": 0.1287, "step": 16190 }, { "epoch": 0.47234377734990374, "grad_norm": 0.8957466119878499, "learning_rate": 5.688738077703255e-06, "loss": 0.1532, "step": 16191 }, { "epoch": 0.4723729505805473, "grad_norm": 0.8065300531139225, "learning_rate": 5.68827014401703e-06, "loss": 0.1438, "step": 16192 }, { "epoch": 0.47240212381119084, "grad_norm": 0.7639327856073047, "learning_rate": 5.687802204186092e-06, "loss": 0.1209, "step": 16193 }, { "epoch": 0.4724312970418344, "grad_norm": 0.9828183122126705, "learning_rate": 5.687334258214622e-06, "loss": 0.1269, "step": 16194 }, { "epoch": 0.47246047027247795, "grad_norm": 0.8431529340952344, "learning_rate": 5.686866306106794e-06, "loss": 0.1412, "step": 16195 }, { "epoch": 0.47248964350312156, "grad_norm": 1.1173127652406816, "learning_rate": 5.686398347866789e-06, "loss": 0.1343, "step": 16196 }, { "epoch": 0.4725188167337651, "grad_norm": 0.7631756116195886, "learning_rate": 5.685930383498782e-06, "loss": 0.1304, "step": 16197 }, { "epoch": 0.47254798996440867, "grad_norm": 0.8798489410905271, "learning_rate": 5.685462413006953e-06, "loss": 0.1409, "step": 16198 }, { "epoch": 0.4725771631950522, "grad_norm": 0.9584388683992391, "learning_rate": 5.684994436395479e-06, "loss": 0.1207, "step": 16199 }, { "epoch": 0.47260633642569577, "grad_norm": 0.7511771133137782, "learning_rate": 5.684526453668538e-06, "loss": 0.1153, "step": 16200 }, { "epoch": 0.4726355096563393, "grad_norm": 1.0283700102265485, "learning_rate": 5.684058464830311e-06, "loss": 0.1309, "step": 16201 }, { "epoch": 0.4726646828869829, "grad_norm": 0.815990377068461, "learning_rate": 5.68359046988497e-06, "loss": 0.1206, "step": 16202 }, { "epoch": 0.4726938561176265, "grad_norm": 0.7414053418331662, "learning_rate": 5.683122468836698e-06, "loss": 0.1179, "step": 16203 }, { "epoch": 0.47272302934827004, "grad_norm": 0.7558004797743774, "learning_rate": 5.682654461689671e-06, "loss": 0.1435, "step": 16204 }, { "epoch": 0.4727522025789136, "grad_norm": 0.763002566802126, "learning_rate": 5.682186448448067e-06, "loss": 0.1437, "step": 16205 }, { "epoch": 0.47278137580955715, "grad_norm": 0.7492393525328191, "learning_rate": 5.681718429116067e-06, "loss": 0.1312, "step": 16206 }, { "epoch": 0.4728105490402007, "grad_norm": 0.7150913299742103, "learning_rate": 5.681250403697847e-06, "loss": 0.1036, "step": 16207 }, { "epoch": 0.47283972227084425, "grad_norm": 0.6375610874861878, "learning_rate": 5.680782372197586e-06, "loss": 0.1189, "step": 16208 }, { "epoch": 0.4728688955014878, "grad_norm": 0.9498688512170802, "learning_rate": 5.6803143346194625e-06, "loss": 0.1275, "step": 16209 }, { "epoch": 0.4728980687321314, "grad_norm": 0.782084929024932, "learning_rate": 5.679846290967654e-06, "loss": 0.1257, "step": 16210 }, { "epoch": 0.47292724196277497, "grad_norm": 0.8024384483136685, "learning_rate": 5.679378241246341e-06, "loss": 0.1326, "step": 16211 }, { "epoch": 0.4729564151934185, "grad_norm": 0.9090946722184887, "learning_rate": 5.678910185459702e-06, "loss": 0.1391, "step": 16212 }, { "epoch": 0.4729855884240621, "grad_norm": 0.8432305552418801, "learning_rate": 5.678442123611914e-06, "loss": 0.1356, "step": 16213 }, { "epoch": 0.47301476165470563, "grad_norm": 0.8891832226368575, "learning_rate": 5.6779740557071574e-06, "loss": 0.1394, "step": 16214 }, { "epoch": 0.4730439348853492, "grad_norm": 0.8660342683361745, "learning_rate": 5.67750598174961e-06, "loss": 0.1627, "step": 16215 }, { "epoch": 0.4730731081159928, "grad_norm": 0.9064999845036191, "learning_rate": 5.67703790174345e-06, "loss": 0.1432, "step": 16216 }, { "epoch": 0.47310228134663634, "grad_norm": 0.9992737392003487, "learning_rate": 5.676569815692858e-06, "loss": 0.1315, "step": 16217 }, { "epoch": 0.4731314545772799, "grad_norm": 0.841757401922245, "learning_rate": 5.676101723602014e-06, "loss": 0.1384, "step": 16218 }, { "epoch": 0.47316062780792345, "grad_norm": 0.677806080845052, "learning_rate": 5.675633625475092e-06, "loss": 0.1288, "step": 16219 }, { "epoch": 0.473189801038567, "grad_norm": 0.9660709917443289, "learning_rate": 5.6751655213162746e-06, "loss": 0.1473, "step": 16220 }, { "epoch": 0.47321897426921056, "grad_norm": 0.9806908705855765, "learning_rate": 5.674697411129743e-06, "loss": 0.1382, "step": 16221 }, { "epoch": 0.4732481474998541, "grad_norm": 0.8439538122796523, "learning_rate": 5.674229294919672e-06, "loss": 0.1435, "step": 16222 }, { "epoch": 0.4732773207304977, "grad_norm": 0.7437934665996728, "learning_rate": 5.6737611726902446e-06, "loss": 0.1531, "step": 16223 }, { "epoch": 0.4733064939611413, "grad_norm": 0.9858281049159205, "learning_rate": 5.673293044445636e-06, "loss": 0.1551, "step": 16224 }, { "epoch": 0.4733356671917848, "grad_norm": 0.7692244938518777, "learning_rate": 5.672824910190029e-06, "loss": 0.1487, "step": 16225 }, { "epoch": 0.4733648404224284, "grad_norm": 0.775186689285311, "learning_rate": 5.672356769927601e-06, "loss": 0.1394, "step": 16226 }, { "epoch": 0.47339401365307193, "grad_norm": 0.8328297749803291, "learning_rate": 5.671888623662534e-06, "loss": 0.16, "step": 16227 }, { "epoch": 0.4734231868837155, "grad_norm": 1.738440038387054, "learning_rate": 5.671420471399005e-06, "loss": 0.1497, "step": 16228 }, { "epoch": 0.47345236011435904, "grad_norm": 0.8687263397133216, "learning_rate": 5.670952313141193e-06, "loss": 0.1133, "step": 16229 }, { "epoch": 0.47348153334500265, "grad_norm": 0.9985943263605154, "learning_rate": 5.670484148893281e-06, "loss": 0.1306, "step": 16230 }, { "epoch": 0.4735107065756462, "grad_norm": 0.8357561037795771, "learning_rate": 5.6700159786594466e-06, "loss": 0.1267, "step": 16231 }, { "epoch": 0.47353987980628975, "grad_norm": 1.0401668740599634, "learning_rate": 5.6695478024438665e-06, "loss": 0.1395, "step": 16232 }, { "epoch": 0.4735690530369333, "grad_norm": 0.7872097597891584, "learning_rate": 5.669079620250727e-06, "loss": 0.1358, "step": 16233 }, { "epoch": 0.47359822626757686, "grad_norm": 0.6619763784436152, "learning_rate": 5.668611432084202e-06, "loss": 0.1398, "step": 16234 }, { "epoch": 0.4736273994982204, "grad_norm": 0.8538666418095292, "learning_rate": 5.668143237948474e-06, "loss": 0.1384, "step": 16235 }, { "epoch": 0.47365657272886397, "grad_norm": 0.7445876302715672, "learning_rate": 5.667675037847724e-06, "loss": 0.1286, "step": 16236 }, { "epoch": 0.4736857459595076, "grad_norm": 0.9563616285404325, "learning_rate": 5.667206831786131e-06, "loss": 0.1535, "step": 16237 }, { "epoch": 0.47371491919015113, "grad_norm": 0.7220433397475235, "learning_rate": 5.666738619767873e-06, "loss": 0.1305, "step": 16238 }, { "epoch": 0.4737440924207947, "grad_norm": 0.9418118334499467, "learning_rate": 5.666270401797132e-06, "loss": 0.1213, "step": 16239 }, { "epoch": 0.47377326565143824, "grad_norm": 0.7768616027863579, "learning_rate": 5.665802177878088e-06, "loss": 0.1428, "step": 16240 }, { "epoch": 0.4738024388820818, "grad_norm": 0.9101999476468361, "learning_rate": 5.665333948014922e-06, "loss": 0.1403, "step": 16241 }, { "epoch": 0.47383161211272534, "grad_norm": 0.8402877876782288, "learning_rate": 5.664865712211812e-06, "loss": 0.1304, "step": 16242 }, { "epoch": 0.47386078534336895, "grad_norm": 0.9650119347957364, "learning_rate": 5.66439747047294e-06, "loss": 0.1397, "step": 16243 }, { "epoch": 0.4738899585740125, "grad_norm": 0.7516018342228209, "learning_rate": 5.663929222802487e-06, "loss": 0.1174, "step": 16244 }, { "epoch": 0.47391913180465606, "grad_norm": 0.9969496985948444, "learning_rate": 5.663460969204631e-06, "loss": 0.1438, "step": 16245 }, { "epoch": 0.4739483050352996, "grad_norm": 0.9829370754519864, "learning_rate": 5.662992709683556e-06, "loss": 0.1189, "step": 16246 }, { "epoch": 0.47397747826594316, "grad_norm": 0.7936624946802271, "learning_rate": 5.662524444243437e-06, "loss": 0.1211, "step": 16247 }, { "epoch": 0.4740066514965867, "grad_norm": 1.014677559492022, "learning_rate": 5.6620561728884616e-06, "loss": 0.1323, "step": 16248 }, { "epoch": 0.47403582472723027, "grad_norm": 0.8610488994778456, "learning_rate": 5.661587895622805e-06, "loss": 0.1218, "step": 16249 }, { "epoch": 0.4740649979578739, "grad_norm": 0.8314360485172108, "learning_rate": 5.661119612450647e-06, "loss": 0.129, "step": 16250 }, { "epoch": 0.47409417118851743, "grad_norm": 0.9404321259942992, "learning_rate": 5.660651323376175e-06, "loss": 0.1336, "step": 16251 }, { "epoch": 0.474123344419161, "grad_norm": 0.7948735444190058, "learning_rate": 5.660183028403564e-06, "loss": 0.1381, "step": 16252 }, { "epoch": 0.47415251764980454, "grad_norm": 0.9186003866517694, "learning_rate": 5.659714727536997e-06, "loss": 0.1744, "step": 16253 }, { "epoch": 0.4741816908804481, "grad_norm": 0.6924003566379509, "learning_rate": 5.659246420780654e-06, "loss": 0.14, "step": 16254 }, { "epoch": 0.47421086411109165, "grad_norm": 0.8309259555606926, "learning_rate": 5.658778108138716e-06, "loss": 0.1474, "step": 16255 }, { "epoch": 0.4742400373417352, "grad_norm": 0.8182532797642579, "learning_rate": 5.658309789615365e-06, "loss": 0.1487, "step": 16256 }, { "epoch": 0.4742692105723788, "grad_norm": 0.8406096018410196, "learning_rate": 5.657841465214781e-06, "loss": 0.1357, "step": 16257 }, { "epoch": 0.47429838380302236, "grad_norm": 0.877345040652179, "learning_rate": 5.6573731349411455e-06, "loss": 0.1489, "step": 16258 }, { "epoch": 0.4743275570336659, "grad_norm": 1.108890956422376, "learning_rate": 5.656904798798639e-06, "loss": 0.1375, "step": 16259 }, { "epoch": 0.47435673026430947, "grad_norm": 1.018419444436362, "learning_rate": 5.6564364567914446e-06, "loss": 0.1337, "step": 16260 }, { "epoch": 0.474385903494953, "grad_norm": 0.8214578524218132, "learning_rate": 5.655968108923742e-06, "loss": 0.1333, "step": 16261 }, { "epoch": 0.4744150767255966, "grad_norm": 0.9240534830592845, "learning_rate": 5.655499755199711e-06, "loss": 0.1476, "step": 16262 }, { "epoch": 0.4744442499562401, "grad_norm": 0.8709571035293724, "learning_rate": 5.655031395623537e-06, "loss": 0.1314, "step": 16263 }, { "epoch": 0.47447342318688374, "grad_norm": 0.8551789118468752, "learning_rate": 5.654563030199398e-06, "loss": 0.1443, "step": 16264 }, { "epoch": 0.4745025964175273, "grad_norm": 0.9940196886603396, "learning_rate": 5.654094658931475e-06, "loss": 0.1162, "step": 16265 }, { "epoch": 0.47453176964817084, "grad_norm": 0.8371940155546695, "learning_rate": 5.653626281823954e-06, "loss": 0.1561, "step": 16266 }, { "epoch": 0.4745609428788144, "grad_norm": 0.8418990081313251, "learning_rate": 5.653157898881012e-06, "loss": 0.1439, "step": 16267 }, { "epoch": 0.47459011610945795, "grad_norm": 0.8449860784566638, "learning_rate": 5.652689510106832e-06, "loss": 0.149, "step": 16268 }, { "epoch": 0.4746192893401015, "grad_norm": 0.9916595714534104, "learning_rate": 5.652221115505596e-06, "loss": 0.1212, "step": 16269 }, { "epoch": 0.4746484625707451, "grad_norm": 1.1035250167472606, "learning_rate": 5.651752715081486e-06, "loss": 0.154, "step": 16270 }, { "epoch": 0.47467763580138866, "grad_norm": 0.9671174009877531, "learning_rate": 5.651284308838683e-06, "loss": 0.1522, "step": 16271 }, { "epoch": 0.4747068090320322, "grad_norm": 0.945547690334323, "learning_rate": 5.650815896781369e-06, "loss": 0.1532, "step": 16272 }, { "epoch": 0.47473598226267577, "grad_norm": 0.9820598763822408, "learning_rate": 5.650347478913726e-06, "loss": 0.1293, "step": 16273 }, { "epoch": 0.4747651554933193, "grad_norm": 0.9646350181918945, "learning_rate": 5.649879055239936e-06, "loss": 0.142, "step": 16274 }, { "epoch": 0.4747943287239629, "grad_norm": 1.0998332083928983, "learning_rate": 5.649410625764181e-06, "loss": 0.1274, "step": 16275 }, { "epoch": 0.47482350195460643, "grad_norm": 1.0358855667448654, "learning_rate": 5.648942190490645e-06, "loss": 0.1454, "step": 16276 }, { "epoch": 0.47485267518525004, "grad_norm": 0.73096872678173, "learning_rate": 5.648473749423504e-06, "loss": 0.1263, "step": 16277 }, { "epoch": 0.4748818484158936, "grad_norm": 0.7662822299619612, "learning_rate": 5.648005302566948e-06, "loss": 0.095, "step": 16278 }, { "epoch": 0.47491102164653715, "grad_norm": 0.8184027461602206, "learning_rate": 5.647536849925154e-06, "loss": 0.1174, "step": 16279 }, { "epoch": 0.4749401948771807, "grad_norm": 0.7924127382126381, "learning_rate": 5.647068391502304e-06, "loss": 0.1202, "step": 16280 }, { "epoch": 0.47496936810782425, "grad_norm": 1.10565237542086, "learning_rate": 5.646599927302584e-06, "loss": 0.1581, "step": 16281 }, { "epoch": 0.4749985413384678, "grad_norm": 0.7452630278413567, "learning_rate": 5.646131457330173e-06, "loss": 0.1151, "step": 16282 }, { "epoch": 0.47502771456911136, "grad_norm": 0.7729927488370573, "learning_rate": 5.645662981589255e-06, "loss": 0.1377, "step": 16283 }, { "epoch": 0.47505688779975497, "grad_norm": 0.7449926836030744, "learning_rate": 5.645194500084011e-06, "loss": 0.139, "step": 16284 }, { "epoch": 0.4750860610303985, "grad_norm": 0.7884663737594525, "learning_rate": 5.644726012818626e-06, "loss": 0.1303, "step": 16285 }, { "epoch": 0.4751152342610421, "grad_norm": 1.008820998919924, "learning_rate": 5.644257519797281e-06, "loss": 0.1309, "step": 16286 }, { "epoch": 0.4751444074916856, "grad_norm": 0.7658301592147836, "learning_rate": 5.643789021024157e-06, "loss": 0.1518, "step": 16287 }, { "epoch": 0.4751735807223292, "grad_norm": 0.880928281849524, "learning_rate": 5.64332051650344e-06, "loss": 0.1285, "step": 16288 }, { "epoch": 0.47520275395297273, "grad_norm": 0.7554296987147964, "learning_rate": 5.642852006239311e-06, "loss": 0.1259, "step": 16289 }, { "epoch": 0.4752319271836163, "grad_norm": 1.024612839630492, "learning_rate": 5.642383490235952e-06, "loss": 0.1228, "step": 16290 }, { "epoch": 0.4752611004142599, "grad_norm": 0.9496401102042054, "learning_rate": 5.641914968497547e-06, "loss": 0.1465, "step": 16291 }, { "epoch": 0.47529027364490345, "grad_norm": 0.7016204864134581, "learning_rate": 5.6414464410282775e-06, "loss": 0.1388, "step": 16292 }, { "epoch": 0.475319446875547, "grad_norm": 0.9001482427453192, "learning_rate": 5.640977907832329e-06, "loss": 0.127, "step": 16293 }, { "epoch": 0.47534862010619056, "grad_norm": 0.7739858159602111, "learning_rate": 5.640509368913881e-06, "loss": 0.1445, "step": 16294 }, { "epoch": 0.4753777933368341, "grad_norm": 0.8609336916889685, "learning_rate": 5.640040824277119e-06, "loss": 0.144, "step": 16295 }, { "epoch": 0.47540696656747766, "grad_norm": 0.7231928968906722, "learning_rate": 5.639572273926226e-06, "loss": 0.115, "step": 16296 }, { "epoch": 0.47543613979812127, "grad_norm": 0.8155024116357175, "learning_rate": 5.639103717865383e-06, "loss": 0.127, "step": 16297 }, { "epoch": 0.4754653130287648, "grad_norm": 0.8359432403720437, "learning_rate": 5.6386351560987765e-06, "loss": 0.1327, "step": 16298 }, { "epoch": 0.4754944862594084, "grad_norm": 0.9868203154406928, "learning_rate": 5.6381665886305855e-06, "loss": 0.1372, "step": 16299 }, { "epoch": 0.47552365949005193, "grad_norm": 0.7039813079919041, "learning_rate": 5.637698015464996e-06, "loss": 0.124, "step": 16300 }, { "epoch": 0.4755528327206955, "grad_norm": 0.9433427371998303, "learning_rate": 5.637229436606193e-06, "loss": 0.1366, "step": 16301 }, { "epoch": 0.47558200595133904, "grad_norm": 1.104607429078894, "learning_rate": 5.636760852058356e-06, "loss": 0.1304, "step": 16302 }, { "epoch": 0.4756111791819826, "grad_norm": 0.9894809272089414, "learning_rate": 5.63629226182567e-06, "loss": 0.1131, "step": 16303 }, { "epoch": 0.4756403524126262, "grad_norm": 0.8373168273815884, "learning_rate": 5.635823665912319e-06, "loss": 0.1276, "step": 16304 }, { "epoch": 0.47566952564326975, "grad_norm": 1.1356005883576916, "learning_rate": 5.635355064322485e-06, "loss": 0.1538, "step": 16305 }, { "epoch": 0.4756986988739133, "grad_norm": 0.9302560494816402, "learning_rate": 5.634886457060355e-06, "loss": 0.1354, "step": 16306 }, { "epoch": 0.47572787210455686, "grad_norm": 0.9249712503956626, "learning_rate": 5.634417844130108e-06, "loss": 0.1238, "step": 16307 }, { "epoch": 0.4757570453352004, "grad_norm": 0.9696940757980068, "learning_rate": 5.633949225535932e-06, "loss": 0.1312, "step": 16308 }, { "epoch": 0.47578621856584397, "grad_norm": 1.0416341297635099, "learning_rate": 5.633480601282007e-06, "loss": 0.1279, "step": 16309 }, { "epoch": 0.4758153917964875, "grad_norm": 1.023882818797613, "learning_rate": 5.633011971372519e-06, "loss": 0.1486, "step": 16310 }, { "epoch": 0.47584456502713113, "grad_norm": 0.8128353283753157, "learning_rate": 5.632543335811651e-06, "loss": 0.1116, "step": 16311 }, { "epoch": 0.4758737382577747, "grad_norm": 0.984506636394337, "learning_rate": 5.632074694603586e-06, "loss": 0.1379, "step": 16312 }, { "epoch": 0.47590291148841823, "grad_norm": 1.084500215608927, "learning_rate": 5.631606047752512e-06, "loss": 0.1328, "step": 16313 }, { "epoch": 0.4759320847190618, "grad_norm": 0.7600621811441662, "learning_rate": 5.631137395262608e-06, "loss": 0.1127, "step": 16314 }, { "epoch": 0.47596125794970534, "grad_norm": 0.9186156422568045, "learning_rate": 5.6306687371380585e-06, "loss": 0.1189, "step": 16315 }, { "epoch": 0.4759904311803489, "grad_norm": 0.7012573468766916, "learning_rate": 5.630200073383052e-06, "loss": 0.1336, "step": 16316 }, { "epoch": 0.47601960441099245, "grad_norm": 0.9912072267814284, "learning_rate": 5.629731404001769e-06, "loss": 0.1464, "step": 16317 }, { "epoch": 0.47604877764163606, "grad_norm": 0.766488550025548, "learning_rate": 5.6292627289983934e-06, "loss": 0.1154, "step": 16318 }, { "epoch": 0.4760779508722796, "grad_norm": 0.9167470478466365, "learning_rate": 5.628794048377111e-06, "loss": 0.123, "step": 16319 }, { "epoch": 0.47610712410292316, "grad_norm": 0.8191229535731257, "learning_rate": 5.628325362142105e-06, "loss": 0.1332, "step": 16320 }, { "epoch": 0.4761362973335667, "grad_norm": 0.7887257907050483, "learning_rate": 5.62785667029756e-06, "loss": 0.1343, "step": 16321 }, { "epoch": 0.47616547056421027, "grad_norm": 0.7854894873088004, "learning_rate": 5.627387972847661e-06, "loss": 0.1503, "step": 16322 }, { "epoch": 0.4761946437948538, "grad_norm": 0.9401852237844465, "learning_rate": 5.626919269796594e-06, "loss": 0.1497, "step": 16323 }, { "epoch": 0.47622381702549743, "grad_norm": 0.9153490341263396, "learning_rate": 5.626450561148537e-06, "loss": 0.1437, "step": 16324 }, { "epoch": 0.476252990256141, "grad_norm": 0.7760296164459083, "learning_rate": 5.625981846907682e-06, "loss": 0.1354, "step": 16325 }, { "epoch": 0.47628216348678454, "grad_norm": 0.9832838538956926, "learning_rate": 5.62551312707821e-06, "loss": 0.1492, "step": 16326 }, { "epoch": 0.4763113367174281, "grad_norm": 0.770392387891613, "learning_rate": 5.625044401664306e-06, "loss": 0.1447, "step": 16327 }, { "epoch": 0.47634050994807164, "grad_norm": 0.8861269753905067, "learning_rate": 5.624575670670155e-06, "loss": 0.1423, "step": 16328 }, { "epoch": 0.4763696831787152, "grad_norm": 0.772477885446521, "learning_rate": 5.624106934099941e-06, "loss": 0.1479, "step": 16329 }, { "epoch": 0.47639885640935875, "grad_norm": 0.8666486284004198, "learning_rate": 5.623638191957849e-06, "loss": 0.1366, "step": 16330 }, { "epoch": 0.47642802964000236, "grad_norm": 1.0405527675674633, "learning_rate": 5.623169444248064e-06, "loss": 0.1234, "step": 16331 }, { "epoch": 0.4764572028706459, "grad_norm": 0.9482855984339433, "learning_rate": 5.6227006909747724e-06, "loss": 0.1242, "step": 16332 }, { "epoch": 0.47648637610128947, "grad_norm": 0.8474810524686269, "learning_rate": 5.622231932142157e-06, "loss": 0.1278, "step": 16333 }, { "epoch": 0.476515549331933, "grad_norm": 0.9077890273505308, "learning_rate": 5.621763167754402e-06, "loss": 0.1239, "step": 16334 }, { "epoch": 0.4765447225625766, "grad_norm": 0.742815865182335, "learning_rate": 5.621294397815697e-06, "loss": 0.1233, "step": 16335 }, { "epoch": 0.4765738957932201, "grad_norm": 1.087039266341935, "learning_rate": 5.620825622330221e-06, "loss": 0.1294, "step": 16336 }, { "epoch": 0.4766030690238637, "grad_norm": 0.932618488392475, "learning_rate": 5.620356841302162e-06, "loss": 0.1392, "step": 16337 }, { "epoch": 0.4766322422545073, "grad_norm": 0.9678135850146654, "learning_rate": 5.6198880547357085e-06, "loss": 0.1507, "step": 16338 }, { "epoch": 0.47666141548515084, "grad_norm": 0.7887670456865438, "learning_rate": 5.619419262635039e-06, "loss": 0.1141, "step": 16339 }, { "epoch": 0.4766905887157944, "grad_norm": 0.9818752688943305, "learning_rate": 5.618950465004344e-06, "loss": 0.1223, "step": 16340 }, { "epoch": 0.47671976194643795, "grad_norm": 1.0586796820123847, "learning_rate": 5.618481661847806e-06, "loss": 0.1435, "step": 16341 }, { "epoch": 0.4767489351770815, "grad_norm": 1.1787417750038875, "learning_rate": 5.618012853169611e-06, "loss": 0.1323, "step": 16342 }, { "epoch": 0.47677810840772505, "grad_norm": 0.9320450187711383, "learning_rate": 5.617544038973946e-06, "loss": 0.129, "step": 16343 }, { "epoch": 0.4768072816383686, "grad_norm": 1.2869324861698728, "learning_rate": 5.617075219264996e-06, "loss": 0.1617, "step": 16344 }, { "epoch": 0.4768364548690122, "grad_norm": 1.0556070212776434, "learning_rate": 5.616606394046944e-06, "loss": 0.1311, "step": 16345 }, { "epoch": 0.47686562809965577, "grad_norm": 0.9284384602442743, "learning_rate": 5.616137563323978e-06, "loss": 0.1204, "step": 16346 }, { "epoch": 0.4768948013302993, "grad_norm": 0.8314594194575753, "learning_rate": 5.615668727100283e-06, "loss": 0.1267, "step": 16347 }, { "epoch": 0.4769239745609429, "grad_norm": 0.8195256567909605, "learning_rate": 5.615199885380044e-06, "loss": 0.1313, "step": 16348 }, { "epoch": 0.47695314779158643, "grad_norm": 0.9979888435768413, "learning_rate": 5.614731038167448e-06, "loss": 0.1578, "step": 16349 }, { "epoch": 0.47698232102223, "grad_norm": 0.9328922645018125, "learning_rate": 5.614262185466679e-06, "loss": 0.1347, "step": 16350 }, { "epoch": 0.47701149425287354, "grad_norm": 0.8920237393330316, "learning_rate": 5.613793327281924e-06, "loss": 0.136, "step": 16351 }, { "epoch": 0.47704066748351714, "grad_norm": 0.7439757715781523, "learning_rate": 5.61332446361737e-06, "loss": 0.1161, "step": 16352 }, { "epoch": 0.4770698407141607, "grad_norm": 0.9532320816988058, "learning_rate": 5.612855594477202e-06, "loss": 0.1422, "step": 16353 }, { "epoch": 0.47709901394480425, "grad_norm": 0.773370658411886, "learning_rate": 5.612386719865604e-06, "loss": 0.1233, "step": 16354 }, { "epoch": 0.4771281871754478, "grad_norm": 0.9073938318172595, "learning_rate": 5.611917839786763e-06, "loss": 0.1331, "step": 16355 }, { "epoch": 0.47715736040609136, "grad_norm": 0.8527106710249348, "learning_rate": 5.6114489542448684e-06, "loss": 0.1323, "step": 16356 }, { "epoch": 0.4771865336367349, "grad_norm": 0.787239349102488, "learning_rate": 5.610980063244099e-06, "loss": 0.1331, "step": 16357 }, { "epoch": 0.4772157068673785, "grad_norm": 0.750076972832993, "learning_rate": 5.61051116678865e-06, "loss": 0.1485, "step": 16358 }, { "epoch": 0.4772448800980221, "grad_norm": 0.9619912671955636, "learning_rate": 5.610042264882701e-06, "loss": 0.1588, "step": 16359 }, { "epoch": 0.4772740533286656, "grad_norm": 1.0436512459267488, "learning_rate": 5.60957335753044e-06, "loss": 0.1387, "step": 16360 }, { "epoch": 0.4773032265593092, "grad_norm": 0.9279762888706286, "learning_rate": 5.6091044447360545e-06, "loss": 0.1349, "step": 16361 }, { "epoch": 0.47733239978995273, "grad_norm": 0.840565962671998, "learning_rate": 5.60863552650373e-06, "loss": 0.1497, "step": 16362 }, { "epoch": 0.4773615730205963, "grad_norm": 1.2783894863802543, "learning_rate": 5.608166602837652e-06, "loss": 0.1352, "step": 16363 }, { "epoch": 0.47739074625123984, "grad_norm": 0.8850599410811496, "learning_rate": 5.607697673742008e-06, "loss": 0.1318, "step": 16364 }, { "epoch": 0.47741991948188345, "grad_norm": 0.8114331658584741, "learning_rate": 5.607228739220984e-06, "loss": 0.1255, "step": 16365 }, { "epoch": 0.477449092712527, "grad_norm": 1.5518561725449465, "learning_rate": 5.606759799278766e-06, "loss": 0.1662, "step": 16366 }, { "epoch": 0.47747826594317055, "grad_norm": 0.9667529991787196, "learning_rate": 5.606290853919543e-06, "loss": 0.1221, "step": 16367 }, { "epoch": 0.4775074391738141, "grad_norm": 0.8184996547673199, "learning_rate": 5.6058219031475e-06, "loss": 0.1235, "step": 16368 }, { "epoch": 0.47753661240445766, "grad_norm": 1.2228537771081796, "learning_rate": 5.605352946966822e-06, "loss": 0.1296, "step": 16369 }, { "epoch": 0.4775657856351012, "grad_norm": 1.0041666589820943, "learning_rate": 5.604883985381699e-06, "loss": 0.1564, "step": 16370 }, { "epoch": 0.47759495886574477, "grad_norm": 0.9556537717756693, "learning_rate": 5.604415018396315e-06, "loss": 0.1294, "step": 16371 }, { "epoch": 0.4776241320963884, "grad_norm": 0.9860609207112173, "learning_rate": 5.603946046014859e-06, "loss": 0.149, "step": 16372 }, { "epoch": 0.47765330532703193, "grad_norm": 0.9007862306763328, "learning_rate": 5.603477068241516e-06, "loss": 0.1252, "step": 16373 }, { "epoch": 0.4776824785576755, "grad_norm": 0.9248634586943631, "learning_rate": 5.603008085080475e-06, "loss": 0.1262, "step": 16374 }, { "epoch": 0.47771165178831904, "grad_norm": 0.8512401550436068, "learning_rate": 5.602539096535921e-06, "loss": 0.1188, "step": 16375 }, { "epoch": 0.4777408250189626, "grad_norm": 0.9262570479023378, "learning_rate": 5.602070102612042e-06, "loss": 0.1491, "step": 16376 }, { "epoch": 0.47776999824960614, "grad_norm": 0.8646970332991294, "learning_rate": 5.6016011033130246e-06, "loss": 0.1637, "step": 16377 }, { "epoch": 0.4777991714802497, "grad_norm": 1.1847361888051147, "learning_rate": 5.601132098643056e-06, "loss": 0.1435, "step": 16378 }, { "epoch": 0.4778283447108933, "grad_norm": 0.764824753947898, "learning_rate": 5.600663088606324e-06, "loss": 0.1164, "step": 16379 }, { "epoch": 0.47785751794153686, "grad_norm": 1.0289881956391431, "learning_rate": 5.600194073207015e-06, "loss": 0.1268, "step": 16380 }, { "epoch": 0.4778866911721804, "grad_norm": 1.0154543053608711, "learning_rate": 5.599725052449316e-06, "loss": 0.1469, "step": 16381 }, { "epoch": 0.47791586440282396, "grad_norm": 0.7685139442929392, "learning_rate": 5.599256026337417e-06, "loss": 0.1182, "step": 16382 }, { "epoch": 0.4779450376334675, "grad_norm": 0.8005495512040665, "learning_rate": 5.5987869948755014e-06, "loss": 0.1249, "step": 16383 }, { "epoch": 0.47797421086411107, "grad_norm": 0.7659027252764419, "learning_rate": 5.598317958067758e-06, "loss": 0.1413, "step": 16384 }, { "epoch": 0.4780033840947547, "grad_norm": 0.7497897555580734, "learning_rate": 5.597848915918376e-06, "loss": 0.1359, "step": 16385 }, { "epoch": 0.47803255732539823, "grad_norm": 0.9880357635674445, "learning_rate": 5.5973798684315415e-06, "loss": 0.135, "step": 16386 }, { "epoch": 0.4780617305560418, "grad_norm": 0.8929744030550361, "learning_rate": 5.5969108156114406e-06, "loss": 0.1312, "step": 16387 }, { "epoch": 0.47809090378668534, "grad_norm": 0.8720010906201271, "learning_rate": 5.596441757462266e-06, "loss": 0.1336, "step": 16388 }, { "epoch": 0.4781200770173289, "grad_norm": 0.7842769572080821, "learning_rate": 5.595972693988199e-06, "loss": 0.1095, "step": 16389 }, { "epoch": 0.47814925024797245, "grad_norm": 0.7877639372626694, "learning_rate": 5.595503625193429e-06, "loss": 0.1336, "step": 16390 }, { "epoch": 0.478178423478616, "grad_norm": 1.0346505626710718, "learning_rate": 5.595034551082147e-06, "loss": 0.1541, "step": 16391 }, { "epoch": 0.4782075967092596, "grad_norm": 1.1464245879999029, "learning_rate": 5.594565471658537e-06, "loss": 0.1532, "step": 16392 }, { "epoch": 0.47823676993990316, "grad_norm": 0.7323674847221384, "learning_rate": 5.594096386926789e-06, "loss": 0.1286, "step": 16393 }, { "epoch": 0.4782659431705467, "grad_norm": 0.7177221973266082, "learning_rate": 5.5936272968910905e-06, "loss": 0.149, "step": 16394 }, { "epoch": 0.47829511640119027, "grad_norm": 0.939104521542515, "learning_rate": 5.5931582015556294e-06, "loss": 0.1489, "step": 16395 }, { "epoch": 0.4783242896318338, "grad_norm": 0.7656754892557142, "learning_rate": 5.592689100924595e-06, "loss": 0.1266, "step": 16396 }, { "epoch": 0.4783534628624774, "grad_norm": 0.71370623999873, "learning_rate": 5.59221999500217e-06, "loss": 0.1538, "step": 16397 }, { "epoch": 0.4783826360931209, "grad_norm": 0.885135228127414, "learning_rate": 5.59175088379255e-06, "loss": 0.1246, "step": 16398 }, { "epoch": 0.47841180932376454, "grad_norm": 1.030477827237176, "learning_rate": 5.591281767299916e-06, "loss": 0.1483, "step": 16399 }, { "epoch": 0.4784409825544081, "grad_norm": 0.817095829340118, "learning_rate": 5.590812645528462e-06, "loss": 0.1169, "step": 16400 }, { "epoch": 0.47847015578505164, "grad_norm": 0.7114241373232695, "learning_rate": 5.590343518482374e-06, "loss": 0.1335, "step": 16401 }, { "epoch": 0.4784993290156952, "grad_norm": 0.8398076516534582, "learning_rate": 5.589874386165838e-06, "loss": 0.1445, "step": 16402 }, { "epoch": 0.47852850224633875, "grad_norm": 0.8405885559418892, "learning_rate": 5.5894052485830464e-06, "loss": 0.1207, "step": 16403 }, { "epoch": 0.4785576754769823, "grad_norm": 0.6021177127044054, "learning_rate": 5.588936105738184e-06, "loss": 0.1113, "step": 16404 }, { "epoch": 0.47858684870762586, "grad_norm": 1.0032348959711763, "learning_rate": 5.588466957635441e-06, "loss": 0.1423, "step": 16405 }, { "epoch": 0.47861602193826946, "grad_norm": 0.7924935714187326, "learning_rate": 5.587997804279005e-06, "loss": 0.1522, "step": 16406 }, { "epoch": 0.478645195168913, "grad_norm": 0.7680357104654308, "learning_rate": 5.587528645673066e-06, "loss": 0.1208, "step": 16407 }, { "epoch": 0.47867436839955657, "grad_norm": 0.8609661601631099, "learning_rate": 5.58705948182181e-06, "loss": 0.1406, "step": 16408 }, { "epoch": 0.4787035416302001, "grad_norm": 0.9511141456151238, "learning_rate": 5.586590312729429e-06, "loss": 0.1342, "step": 16409 }, { "epoch": 0.4787327148608437, "grad_norm": 0.7120753550924881, "learning_rate": 5.586121138400108e-06, "loss": 0.1095, "step": 16410 }, { "epoch": 0.47876188809148723, "grad_norm": 1.0006084562915092, "learning_rate": 5.5856519588380385e-06, "loss": 0.14, "step": 16411 }, { "epoch": 0.47879106132213084, "grad_norm": 0.9435542241082506, "learning_rate": 5.5851827740474075e-06, "loss": 0.1616, "step": 16412 }, { "epoch": 0.4788202345527744, "grad_norm": 0.930465655253894, "learning_rate": 5.584713584032406e-06, "loss": 0.121, "step": 16413 }, { "epoch": 0.47884940778341795, "grad_norm": 0.7637096447809273, "learning_rate": 5.5842443887972184e-06, "loss": 0.1268, "step": 16414 }, { "epoch": 0.4788785810140615, "grad_norm": 0.8205669053114473, "learning_rate": 5.5837751883460375e-06, "loss": 0.1387, "step": 16415 }, { "epoch": 0.47890775424470505, "grad_norm": 0.8744346633681963, "learning_rate": 5.583305982683053e-06, "loss": 0.1417, "step": 16416 }, { "epoch": 0.4789369274753486, "grad_norm": 0.7638189563637072, "learning_rate": 5.582836771812448e-06, "loss": 0.1191, "step": 16417 }, { "epoch": 0.47896610070599216, "grad_norm": 1.0753041099163074, "learning_rate": 5.582367555738419e-06, "loss": 0.1313, "step": 16418 }, { "epoch": 0.47899527393663577, "grad_norm": 0.8909782947913392, "learning_rate": 5.5818983344651515e-06, "loss": 0.1149, "step": 16419 }, { "epoch": 0.4790244471672793, "grad_norm": 0.8579148418636114, "learning_rate": 5.581429107996833e-06, "loss": 0.1323, "step": 16420 }, { "epoch": 0.4790536203979229, "grad_norm": 1.112360093645641, "learning_rate": 5.580959876337654e-06, "loss": 0.1384, "step": 16421 }, { "epoch": 0.47908279362856643, "grad_norm": 0.8913188629078489, "learning_rate": 5.580490639491805e-06, "loss": 0.1551, "step": 16422 }, { "epoch": 0.47911196685921, "grad_norm": 0.7864805654378544, "learning_rate": 5.580021397463473e-06, "loss": 0.1642, "step": 16423 }, { "epoch": 0.47914114008985353, "grad_norm": 1.0481762492684195, "learning_rate": 5.579552150256849e-06, "loss": 0.1035, "step": 16424 }, { "epoch": 0.4791703133204971, "grad_norm": 0.8525269377785037, "learning_rate": 5.5790828978761215e-06, "loss": 0.1503, "step": 16425 }, { "epoch": 0.4791994865511407, "grad_norm": 0.7712905240418593, "learning_rate": 5.578613640325481e-06, "loss": 0.1297, "step": 16426 }, { "epoch": 0.47922865978178425, "grad_norm": 1.1679634596940096, "learning_rate": 5.5781443776091145e-06, "loss": 0.1501, "step": 16427 }, { "epoch": 0.4792578330124278, "grad_norm": 1.0837233145526382, "learning_rate": 5.577675109731216e-06, "loss": 0.1392, "step": 16428 }, { "epoch": 0.47928700624307136, "grad_norm": 0.8112739922629897, "learning_rate": 5.577205836695968e-06, "loss": 0.1263, "step": 16429 }, { "epoch": 0.4793161794737149, "grad_norm": 1.4041024594045965, "learning_rate": 5.576736558507566e-06, "loss": 0.1345, "step": 16430 }, { "epoch": 0.47934535270435846, "grad_norm": 1.137613393891698, "learning_rate": 5.5762672751702e-06, "loss": 0.1325, "step": 16431 }, { "epoch": 0.479374525935002, "grad_norm": 0.8934552907259496, "learning_rate": 5.575797986688053e-06, "loss": 0.1214, "step": 16432 }, { "epoch": 0.4794036991656456, "grad_norm": 0.6215980394763082, "learning_rate": 5.575328693065322e-06, "loss": 0.1091, "step": 16433 }, { "epoch": 0.4794328723962892, "grad_norm": 1.1148917131769525, "learning_rate": 5.574859394306194e-06, "loss": 0.1246, "step": 16434 }, { "epoch": 0.47946204562693273, "grad_norm": 1.1038719635113177, "learning_rate": 5.574390090414856e-06, "loss": 0.1283, "step": 16435 }, { "epoch": 0.4794912188575763, "grad_norm": 0.8680988789897821, "learning_rate": 5.573920781395502e-06, "loss": 0.149, "step": 16436 }, { "epoch": 0.47952039208821984, "grad_norm": 1.0342378646567074, "learning_rate": 5.57345146725232e-06, "loss": 0.1458, "step": 16437 }, { "epoch": 0.4795495653188634, "grad_norm": 0.9369272256059544, "learning_rate": 5.572982147989501e-06, "loss": 0.1341, "step": 16438 }, { "epoch": 0.479578738549507, "grad_norm": 0.709060020956352, "learning_rate": 5.5725128236112326e-06, "loss": 0.1266, "step": 16439 }, { "epoch": 0.47960791178015055, "grad_norm": 0.7898214892689679, "learning_rate": 5.572043494121707e-06, "loss": 0.1579, "step": 16440 }, { "epoch": 0.4796370850107941, "grad_norm": 1.0129801073910862, "learning_rate": 5.571574159525114e-06, "loss": 0.1524, "step": 16441 }, { "epoch": 0.47966625824143766, "grad_norm": 1.0811600843143345, "learning_rate": 5.571104819825643e-06, "loss": 0.1195, "step": 16442 }, { "epoch": 0.4796954314720812, "grad_norm": 0.8784083018169584, "learning_rate": 5.570635475027486e-06, "loss": 0.1459, "step": 16443 }, { "epoch": 0.47972460470272477, "grad_norm": 0.8980149655159101, "learning_rate": 5.570166125134829e-06, "loss": 0.1357, "step": 16444 }, { "epoch": 0.4797537779333683, "grad_norm": 0.8902907706004111, "learning_rate": 5.569696770151866e-06, "loss": 0.1377, "step": 16445 }, { "epoch": 0.47978295116401193, "grad_norm": 0.8007189101995574, "learning_rate": 5.569227410082788e-06, "loss": 0.1116, "step": 16446 }, { "epoch": 0.4798121243946555, "grad_norm": 1.013187782512625, "learning_rate": 5.568758044931781e-06, "loss": 0.1442, "step": 16447 }, { "epoch": 0.47984129762529903, "grad_norm": 0.8005773321204657, "learning_rate": 5.568288674703041e-06, "loss": 0.1273, "step": 16448 }, { "epoch": 0.4798704708559426, "grad_norm": 0.7571208636228627, "learning_rate": 5.5678192994007526e-06, "loss": 0.1429, "step": 16449 }, { "epoch": 0.47989964408658614, "grad_norm": 0.6262548695044964, "learning_rate": 5.56734991902911e-06, "loss": 0.1236, "step": 16450 }, { "epoch": 0.4799288173172297, "grad_norm": 1.0030999671621827, "learning_rate": 5.566880533592303e-06, "loss": 0.1397, "step": 16451 }, { "epoch": 0.47995799054787325, "grad_norm": 0.825659076886547, "learning_rate": 5.566411143094521e-06, "loss": 0.1256, "step": 16452 }, { "epoch": 0.47998716377851686, "grad_norm": 0.7132176911079248, "learning_rate": 5.565941747539957e-06, "loss": 0.1444, "step": 16453 }, { "epoch": 0.4800163370091604, "grad_norm": 0.7043674861429906, "learning_rate": 5.565472346932799e-06, "loss": 0.1528, "step": 16454 }, { "epoch": 0.48004551023980396, "grad_norm": 0.9347895133101263, "learning_rate": 5.565002941277239e-06, "loss": 0.1267, "step": 16455 }, { "epoch": 0.4800746834704475, "grad_norm": 0.7726630271489435, "learning_rate": 5.564533530577467e-06, "loss": 0.1314, "step": 16456 }, { "epoch": 0.48010385670109107, "grad_norm": 0.6726280534384327, "learning_rate": 5.5640641148376765e-06, "loss": 0.1319, "step": 16457 }, { "epoch": 0.4801330299317346, "grad_norm": 0.8693280533083172, "learning_rate": 5.563594694062055e-06, "loss": 0.1297, "step": 16458 }, { "epoch": 0.4801622031623782, "grad_norm": 1.0234800984648962, "learning_rate": 5.563125268254794e-06, "loss": 0.1107, "step": 16459 }, { "epoch": 0.4801913763930218, "grad_norm": 0.9369029388200885, "learning_rate": 5.562655837420086e-06, "loss": 0.1648, "step": 16460 }, { "epoch": 0.48022054962366534, "grad_norm": 0.9688258507488033, "learning_rate": 5.562186401562121e-06, "loss": 0.1238, "step": 16461 }, { "epoch": 0.4802497228543089, "grad_norm": 0.9769512448249669, "learning_rate": 5.561716960685089e-06, "loss": 0.1447, "step": 16462 }, { "epoch": 0.48027889608495244, "grad_norm": 1.0054589122449409, "learning_rate": 5.561247514793183e-06, "loss": 0.1481, "step": 16463 }, { "epoch": 0.480308069315596, "grad_norm": 0.8938732851180062, "learning_rate": 5.560778063890593e-06, "loss": 0.1446, "step": 16464 }, { "epoch": 0.48033724254623955, "grad_norm": 0.9926766665942596, "learning_rate": 5.560308607981511e-06, "loss": 0.1388, "step": 16465 }, { "epoch": 0.4803664157768831, "grad_norm": 0.6755509937775684, "learning_rate": 5.559839147070125e-06, "loss": 0.1388, "step": 16466 }, { "epoch": 0.4803955890075267, "grad_norm": 1.1091210266959968, "learning_rate": 5.5593696811606314e-06, "loss": 0.126, "step": 16467 }, { "epoch": 0.48042476223817027, "grad_norm": 0.8662699026525464, "learning_rate": 5.558900210257218e-06, "loss": 0.1566, "step": 16468 }, { "epoch": 0.4804539354688138, "grad_norm": 0.7126832076605465, "learning_rate": 5.558430734364077e-06, "loss": 0.1342, "step": 16469 }, { "epoch": 0.4804831086994574, "grad_norm": 0.8529551201597447, "learning_rate": 5.557961253485399e-06, "loss": 0.1429, "step": 16470 }, { "epoch": 0.4805122819301009, "grad_norm": 0.7867620828402069, "learning_rate": 5.5574917676253755e-06, "loss": 0.1195, "step": 16471 }, { "epoch": 0.4805414551607445, "grad_norm": 0.7318710212935304, "learning_rate": 5.5570222767882e-06, "loss": 0.1182, "step": 16472 }, { "epoch": 0.4805706283913881, "grad_norm": 0.7632637272100438, "learning_rate": 5.5565527809780635e-06, "loss": 0.1299, "step": 16473 }, { "epoch": 0.48059980162203164, "grad_norm": 0.8563688559181248, "learning_rate": 5.556083280199154e-06, "loss": 0.1523, "step": 16474 }, { "epoch": 0.4806289748526752, "grad_norm": 0.8808119500229724, "learning_rate": 5.555613774455667e-06, "loss": 0.1288, "step": 16475 }, { "epoch": 0.48065814808331875, "grad_norm": 0.6808399437320495, "learning_rate": 5.555144263751795e-06, "loss": 0.1326, "step": 16476 }, { "epoch": 0.4806873213139623, "grad_norm": 0.808250933327027, "learning_rate": 5.554674748091724e-06, "loss": 0.1225, "step": 16477 }, { "epoch": 0.48071649454460585, "grad_norm": 0.9609093191262928, "learning_rate": 5.5542052274796524e-06, "loss": 0.1491, "step": 16478 }, { "epoch": 0.4807456677752494, "grad_norm": 0.8257760206838456, "learning_rate": 5.5537357019197665e-06, "loss": 0.1298, "step": 16479 }, { "epoch": 0.480774841005893, "grad_norm": 0.7090715152658524, "learning_rate": 5.553266171416261e-06, "loss": 0.1389, "step": 16480 }, { "epoch": 0.48080401423653657, "grad_norm": 1.0532035124340533, "learning_rate": 5.5527966359733274e-06, "loss": 0.1422, "step": 16481 }, { "epoch": 0.4808331874671801, "grad_norm": 1.0824765111729429, "learning_rate": 5.552327095595157e-06, "loss": 0.1246, "step": 16482 }, { "epoch": 0.4808623606978237, "grad_norm": 0.7181584806931197, "learning_rate": 5.551857550285943e-06, "loss": 0.1449, "step": 16483 }, { "epoch": 0.48089153392846723, "grad_norm": 0.7609579922837822, "learning_rate": 5.551388000049875e-06, "loss": 0.1285, "step": 16484 }, { "epoch": 0.4809207071591108, "grad_norm": 0.9222075582398839, "learning_rate": 5.550918444891148e-06, "loss": 0.1275, "step": 16485 }, { "epoch": 0.48094988038975434, "grad_norm": 0.9596579643201208, "learning_rate": 5.550448884813952e-06, "loss": 0.1431, "step": 16486 }, { "epoch": 0.48097905362039794, "grad_norm": 0.7015797071947066, "learning_rate": 5.54997931982248e-06, "loss": 0.1381, "step": 16487 }, { "epoch": 0.4810082268510415, "grad_norm": 0.925869743484793, "learning_rate": 5.5495097499209235e-06, "loss": 0.158, "step": 16488 }, { "epoch": 0.48103740008168505, "grad_norm": 0.8716894146482536, "learning_rate": 5.549040175113476e-06, "loss": 0.1067, "step": 16489 }, { "epoch": 0.4810665733123286, "grad_norm": 0.7537976244261037, "learning_rate": 5.548570595404328e-06, "loss": 0.1431, "step": 16490 }, { "epoch": 0.48109574654297216, "grad_norm": 0.9190646876419771, "learning_rate": 5.548101010797673e-06, "loss": 0.1516, "step": 16491 }, { "epoch": 0.4811249197736157, "grad_norm": 0.852624910553048, "learning_rate": 5.547631421297704e-06, "loss": 0.1157, "step": 16492 }, { "epoch": 0.48115409300425926, "grad_norm": 0.718147437544926, "learning_rate": 5.5471618269086125e-06, "loss": 0.1214, "step": 16493 }, { "epoch": 0.4811832662349029, "grad_norm": 0.8552282376593306, "learning_rate": 5.546692227634588e-06, "loss": 0.1487, "step": 16494 }, { "epoch": 0.4812124394655464, "grad_norm": 0.9300653222118747, "learning_rate": 5.546222623479829e-06, "loss": 0.144, "step": 16495 }, { "epoch": 0.48124161269619, "grad_norm": 0.856634662299573, "learning_rate": 5.545753014448523e-06, "loss": 0.1705, "step": 16496 }, { "epoch": 0.48127078592683353, "grad_norm": 0.653131023475819, "learning_rate": 5.545283400544864e-06, "loss": 0.1387, "step": 16497 }, { "epoch": 0.4812999591574771, "grad_norm": 0.7570686133259324, "learning_rate": 5.544813781773046e-06, "loss": 0.1388, "step": 16498 }, { "epoch": 0.48132913238812064, "grad_norm": 0.9521446827960226, "learning_rate": 5.544344158137262e-06, "loss": 0.1181, "step": 16499 }, { "epoch": 0.48135830561876425, "grad_norm": 1.8571987751703076, "learning_rate": 5.543874529641701e-06, "loss": 0.1173, "step": 16500 }, { "epoch": 0.4813874788494078, "grad_norm": 0.6692509290438796, "learning_rate": 5.543404896290559e-06, "loss": 0.1118, "step": 16501 }, { "epoch": 0.48141665208005135, "grad_norm": 0.8703717468671059, "learning_rate": 5.542935258088027e-06, "loss": 0.1481, "step": 16502 }, { "epoch": 0.4814458253106949, "grad_norm": 0.95446062042861, "learning_rate": 5.5424656150383e-06, "loss": 0.1519, "step": 16503 }, { "epoch": 0.48147499854133846, "grad_norm": 0.599736865610296, "learning_rate": 5.5419959671455685e-06, "loss": 0.1345, "step": 16504 }, { "epoch": 0.481504171771982, "grad_norm": 0.9137106562508852, "learning_rate": 5.541526314414025e-06, "loss": 0.1264, "step": 16505 }, { "epoch": 0.48153334500262557, "grad_norm": 0.8354033497039266, "learning_rate": 5.541056656847866e-06, "loss": 0.118, "step": 16506 }, { "epoch": 0.4815625182332692, "grad_norm": 0.9765969251457027, "learning_rate": 5.540586994451281e-06, "loss": 0.1303, "step": 16507 }, { "epoch": 0.48159169146391273, "grad_norm": 0.8333721286224998, "learning_rate": 5.540117327228467e-06, "loss": 0.1842, "step": 16508 }, { "epoch": 0.4816208646945563, "grad_norm": 0.8190719756353236, "learning_rate": 5.5396476551836105e-06, "loss": 0.1287, "step": 16509 }, { "epoch": 0.48165003792519984, "grad_norm": 0.8662930610716588, "learning_rate": 5.539177978320912e-06, "loss": 0.131, "step": 16510 }, { "epoch": 0.4816792111558434, "grad_norm": 0.778730400033978, "learning_rate": 5.53870829664456e-06, "loss": 0.122, "step": 16511 }, { "epoch": 0.48170838438648694, "grad_norm": 0.7945330640920626, "learning_rate": 5.538238610158747e-06, "loss": 0.1513, "step": 16512 }, { "epoch": 0.4817375576171305, "grad_norm": 0.8547319196600227, "learning_rate": 5.537768918867672e-06, "loss": 0.125, "step": 16513 }, { "epoch": 0.4817667308477741, "grad_norm": 0.9733174618029062, "learning_rate": 5.537299222775522e-06, "loss": 0.1372, "step": 16514 }, { "epoch": 0.48179590407841766, "grad_norm": 0.5822618893424835, "learning_rate": 5.536829521886493e-06, "loss": 0.1038, "step": 16515 }, { "epoch": 0.4818250773090612, "grad_norm": 0.8427107813694704, "learning_rate": 5.536359816204779e-06, "loss": 0.1315, "step": 16516 }, { "epoch": 0.48185425053970476, "grad_norm": 0.995674395919126, "learning_rate": 5.535890105734571e-06, "loss": 0.1547, "step": 16517 }, { "epoch": 0.4818834237703483, "grad_norm": 0.8359358884480976, "learning_rate": 5.535420390480065e-06, "loss": 0.1496, "step": 16518 }, { "epoch": 0.48191259700099187, "grad_norm": 0.794006514004653, "learning_rate": 5.534950670445453e-06, "loss": 0.14, "step": 16519 }, { "epoch": 0.4819417702316354, "grad_norm": 0.9871956366899922, "learning_rate": 5.53448094563493e-06, "loss": 0.13, "step": 16520 }, { "epoch": 0.48197094346227903, "grad_norm": 0.9647479521573002, "learning_rate": 5.534011216052688e-06, "loss": 0.1238, "step": 16521 }, { "epoch": 0.4820001166929226, "grad_norm": 0.7474789922402135, "learning_rate": 5.533541481702922e-06, "loss": 0.1419, "step": 16522 }, { "epoch": 0.48202928992356614, "grad_norm": 0.890849777723087, "learning_rate": 5.533071742589826e-06, "loss": 0.1217, "step": 16523 }, { "epoch": 0.4820584631542097, "grad_norm": 0.9112210921801089, "learning_rate": 5.53260199871759e-06, "loss": 0.1435, "step": 16524 }, { "epoch": 0.48208763638485325, "grad_norm": 0.8242540047625232, "learning_rate": 5.532132250090414e-06, "loss": 0.1357, "step": 16525 }, { "epoch": 0.4821168096154968, "grad_norm": 1.0408837781474771, "learning_rate": 5.531662496712485e-06, "loss": 0.148, "step": 16526 }, { "epoch": 0.4821459828461404, "grad_norm": 0.808382138381165, "learning_rate": 5.531192738588e-06, "loss": 0.142, "step": 16527 }, { "epoch": 0.48217515607678396, "grad_norm": 0.985210178693923, "learning_rate": 5.5307229757211565e-06, "loss": 0.1392, "step": 16528 }, { "epoch": 0.4822043293074275, "grad_norm": 0.733025222941078, "learning_rate": 5.530253208116143e-06, "loss": 0.125, "step": 16529 }, { "epoch": 0.48223350253807107, "grad_norm": 1.0001245540675354, "learning_rate": 5.529783435777155e-06, "loss": 0.1289, "step": 16530 }, { "epoch": 0.4822626757687146, "grad_norm": 0.7763369947500759, "learning_rate": 5.529313658708387e-06, "loss": 0.1572, "step": 16531 }, { "epoch": 0.4822918489993582, "grad_norm": 0.7732278672191945, "learning_rate": 5.528843876914034e-06, "loss": 0.1258, "step": 16532 }, { "epoch": 0.48232102223000173, "grad_norm": 0.7179032294173053, "learning_rate": 5.5283740903982886e-06, "loss": 0.121, "step": 16533 }, { "epoch": 0.48235019546064534, "grad_norm": 0.8505302722884381, "learning_rate": 5.5279042991653456e-06, "loss": 0.141, "step": 16534 }, { "epoch": 0.4823793686912889, "grad_norm": 0.8750178937222148, "learning_rate": 5.527434503219398e-06, "loss": 0.1379, "step": 16535 }, { "epoch": 0.48240854192193244, "grad_norm": 0.7382532797087719, "learning_rate": 5.526964702564642e-06, "loss": 0.1319, "step": 16536 }, { "epoch": 0.482437715152576, "grad_norm": 0.8259270765480796, "learning_rate": 5.52649489720527e-06, "loss": 0.1567, "step": 16537 }, { "epoch": 0.48246688838321955, "grad_norm": 0.7733304721672678, "learning_rate": 5.526025087145479e-06, "loss": 0.1617, "step": 16538 }, { "epoch": 0.4824960616138631, "grad_norm": 0.7787072372182898, "learning_rate": 5.52555527238946e-06, "loss": 0.1351, "step": 16539 }, { "epoch": 0.48252523484450666, "grad_norm": 0.7745084992915291, "learning_rate": 5.525085452941411e-06, "loss": 0.1699, "step": 16540 }, { "epoch": 0.48255440807515027, "grad_norm": 0.8384407509541995, "learning_rate": 5.524615628805523e-06, "loss": 0.1418, "step": 16541 }, { "epoch": 0.4825835813057938, "grad_norm": 0.8909645378113125, "learning_rate": 5.52414579998599e-06, "loss": 0.1348, "step": 16542 }, { "epoch": 0.48261275453643737, "grad_norm": 0.7095590406783882, "learning_rate": 5.523675966487012e-06, "loss": 0.1429, "step": 16543 }, { "epoch": 0.4826419277670809, "grad_norm": 0.9085360330182193, "learning_rate": 5.523206128312778e-06, "loss": 0.1515, "step": 16544 }, { "epoch": 0.4826711009977245, "grad_norm": 1.1421048007952443, "learning_rate": 5.522736285467485e-06, "loss": 0.1489, "step": 16545 }, { "epoch": 0.48270027422836803, "grad_norm": 1.1052497560282095, "learning_rate": 5.522266437955327e-06, "loss": 0.1619, "step": 16546 }, { "epoch": 0.4827294474590116, "grad_norm": 0.9155191091801037, "learning_rate": 5.5217965857804985e-06, "loss": 0.1381, "step": 16547 }, { "epoch": 0.4827586206896552, "grad_norm": 0.8365915686320591, "learning_rate": 5.521326728947195e-06, "loss": 0.1277, "step": 16548 }, { "epoch": 0.48278779392029875, "grad_norm": 0.7858846985914822, "learning_rate": 5.520856867459612e-06, "loss": 0.1422, "step": 16549 }, { "epoch": 0.4828169671509423, "grad_norm": 1.3055734495893565, "learning_rate": 5.520387001321941e-06, "loss": 0.1592, "step": 16550 }, { "epoch": 0.48284614038158585, "grad_norm": 1.008156375466698, "learning_rate": 5.519917130538381e-06, "loss": 0.1275, "step": 16551 }, { "epoch": 0.4828753136122294, "grad_norm": 0.77627103000323, "learning_rate": 5.519447255113124e-06, "loss": 0.1171, "step": 16552 }, { "epoch": 0.48290448684287296, "grad_norm": 1.060123575100618, "learning_rate": 5.518977375050369e-06, "loss": 0.143, "step": 16553 }, { "epoch": 0.48293366007351657, "grad_norm": 1.021153263148013, "learning_rate": 5.518507490354303e-06, "loss": 0.125, "step": 16554 }, { "epoch": 0.4829628333041601, "grad_norm": 0.882098373933168, "learning_rate": 5.518037601029129e-06, "loss": 0.1284, "step": 16555 }, { "epoch": 0.4829920065348037, "grad_norm": 0.7901226421111385, "learning_rate": 5.517567707079038e-06, "loss": 0.119, "step": 16556 }, { "epoch": 0.48302117976544723, "grad_norm": 0.920247587865245, "learning_rate": 5.517097808508225e-06, "loss": 0.1302, "step": 16557 }, { "epoch": 0.4830503529960908, "grad_norm": 1.1793749720336413, "learning_rate": 5.516627905320888e-06, "loss": 0.1553, "step": 16558 }, { "epoch": 0.48307952622673433, "grad_norm": 1.0796224648889627, "learning_rate": 5.51615799752122e-06, "loss": 0.1401, "step": 16559 }, { "epoch": 0.4831086994573779, "grad_norm": 1.5646483378237794, "learning_rate": 5.515688085113416e-06, "loss": 0.1337, "step": 16560 }, { "epoch": 0.4831378726880215, "grad_norm": 0.9313085361047224, "learning_rate": 5.515218168101673e-06, "loss": 0.1381, "step": 16561 }, { "epoch": 0.48316704591866505, "grad_norm": 0.9453004475932669, "learning_rate": 5.514748246490184e-06, "loss": 0.134, "step": 16562 }, { "epoch": 0.4831962191493086, "grad_norm": 0.9656647349989894, "learning_rate": 5.514278320283145e-06, "loss": 0.1097, "step": 16563 }, { "epoch": 0.48322539237995216, "grad_norm": 0.8275094301706116, "learning_rate": 5.513808389484754e-06, "loss": 0.1214, "step": 16564 }, { "epoch": 0.4832545656105957, "grad_norm": 0.9536694338675364, "learning_rate": 5.513338454099203e-06, "loss": 0.1673, "step": 16565 }, { "epoch": 0.48328373884123926, "grad_norm": 1.42692864189217, "learning_rate": 5.512868514130688e-06, "loss": 0.1681, "step": 16566 }, { "epoch": 0.4833129120718828, "grad_norm": 0.9921845063117846, "learning_rate": 5.512398569583407e-06, "loss": 0.1436, "step": 16567 }, { "epoch": 0.4833420853025264, "grad_norm": 1.0185852627053067, "learning_rate": 5.511928620461554e-06, "loss": 0.1104, "step": 16568 }, { "epoch": 0.48337125853317, "grad_norm": 0.5818103833703272, "learning_rate": 5.511458666769323e-06, "loss": 0.1224, "step": 16569 }, { "epoch": 0.48340043176381353, "grad_norm": 0.9644499244791919, "learning_rate": 5.510988708510913e-06, "loss": 0.1225, "step": 16570 }, { "epoch": 0.4834296049944571, "grad_norm": 1.0213132183185323, "learning_rate": 5.510518745690516e-06, "loss": 0.1303, "step": 16571 }, { "epoch": 0.48345877822510064, "grad_norm": 0.9379995454353172, "learning_rate": 5.510048778312329e-06, "loss": 0.1279, "step": 16572 }, { "epoch": 0.4834879514557442, "grad_norm": 0.790624273438235, "learning_rate": 5.509578806380551e-06, "loss": 0.1366, "step": 16573 }, { "epoch": 0.48351712468638774, "grad_norm": 1.0198715632730508, "learning_rate": 5.509108829899374e-06, "loss": 0.149, "step": 16574 }, { "epoch": 0.48354629791703135, "grad_norm": 0.8444673125265063, "learning_rate": 5.508638848872993e-06, "loss": 0.1399, "step": 16575 }, { "epoch": 0.4835754711476749, "grad_norm": 0.9265335583361651, "learning_rate": 5.508168863305607e-06, "loss": 0.157, "step": 16576 }, { "epoch": 0.48360464437831846, "grad_norm": 0.8774750551063015, "learning_rate": 5.507698873201411e-06, "loss": 0.143, "step": 16577 }, { "epoch": 0.483633817608962, "grad_norm": 0.756139460175104, "learning_rate": 5.507228878564601e-06, "loss": 0.1346, "step": 16578 }, { "epoch": 0.48366299083960557, "grad_norm": 0.7341509530257865, "learning_rate": 5.506758879399372e-06, "loss": 0.1398, "step": 16579 }, { "epoch": 0.4836921640702491, "grad_norm": 0.7510997357780191, "learning_rate": 5.506288875709921e-06, "loss": 0.1311, "step": 16580 }, { "epoch": 0.4837213373008927, "grad_norm": 0.888161077874037, "learning_rate": 5.505818867500443e-06, "loss": 0.1388, "step": 16581 }, { "epoch": 0.4837505105315363, "grad_norm": 0.8020689619674765, "learning_rate": 5.505348854775135e-06, "loss": 0.1351, "step": 16582 }, { "epoch": 0.48377968376217984, "grad_norm": 0.7977449131759667, "learning_rate": 5.504878837538195e-06, "loss": 0.1375, "step": 16583 }, { "epoch": 0.4838088569928234, "grad_norm": 0.9988269198867377, "learning_rate": 5.504408815793816e-06, "loss": 0.1439, "step": 16584 }, { "epoch": 0.48383803022346694, "grad_norm": 1.086336562718429, "learning_rate": 5.5039387895461956e-06, "loss": 0.1632, "step": 16585 }, { "epoch": 0.4838672034541105, "grad_norm": 0.8145764289106557, "learning_rate": 5.503468758799529e-06, "loss": 0.1324, "step": 16586 }, { "epoch": 0.48389637668475405, "grad_norm": 0.7863050471615618, "learning_rate": 5.502998723558014e-06, "loss": 0.1448, "step": 16587 }, { "epoch": 0.48392554991539766, "grad_norm": 0.7478906050284334, "learning_rate": 5.502528683825847e-06, "loss": 0.1493, "step": 16588 }, { "epoch": 0.4839547231460412, "grad_norm": 0.7873330482565611, "learning_rate": 5.502058639607224e-06, "loss": 0.1205, "step": 16589 }, { "epoch": 0.48398389637668476, "grad_norm": 0.8620669116617288, "learning_rate": 5.501588590906342e-06, "loss": 0.1182, "step": 16590 }, { "epoch": 0.4840130696073283, "grad_norm": 0.6864588399225512, "learning_rate": 5.501118537727394e-06, "loss": 0.1367, "step": 16591 }, { "epoch": 0.48404224283797187, "grad_norm": 0.8999941015068548, "learning_rate": 5.500648480074582e-06, "loss": 0.135, "step": 16592 }, { "epoch": 0.4840714160686154, "grad_norm": 0.7647912021712191, "learning_rate": 5.500178417952099e-06, "loss": 0.1207, "step": 16593 }, { "epoch": 0.484100589299259, "grad_norm": 0.7569057693438344, "learning_rate": 5.499708351364142e-06, "loss": 0.1476, "step": 16594 }, { "epoch": 0.4841297625299026, "grad_norm": 0.7492787375397334, "learning_rate": 5.499238280314909e-06, "loss": 0.1325, "step": 16595 }, { "epoch": 0.48415893576054614, "grad_norm": 0.8361052466336646, "learning_rate": 5.4987682048085955e-06, "loss": 0.1384, "step": 16596 }, { "epoch": 0.4841881089911897, "grad_norm": 0.8739257125006146, "learning_rate": 5.498298124849399e-06, "loss": 0.1612, "step": 16597 }, { "epoch": 0.48421728222183325, "grad_norm": 0.7515913562564204, "learning_rate": 5.497828040441515e-06, "loss": 0.1257, "step": 16598 }, { "epoch": 0.4842464554524768, "grad_norm": 0.7851826210252971, "learning_rate": 5.497357951589141e-06, "loss": 0.1335, "step": 16599 }, { "epoch": 0.48427562868312035, "grad_norm": 0.7789635299586463, "learning_rate": 5.496887858296475e-06, "loss": 0.1279, "step": 16600 }, { "epoch": 0.4843048019137639, "grad_norm": 0.9577817455507989, "learning_rate": 5.496417760567712e-06, "loss": 0.135, "step": 16601 }, { "epoch": 0.4843339751444075, "grad_norm": 0.8623397308921672, "learning_rate": 5.4959476584070485e-06, "loss": 0.1557, "step": 16602 }, { "epoch": 0.48436314837505107, "grad_norm": 0.7733125935298913, "learning_rate": 5.495477551818685e-06, "loss": 0.155, "step": 16603 }, { "epoch": 0.4843923216056946, "grad_norm": 0.8175778594508675, "learning_rate": 5.495007440806816e-06, "loss": 0.1325, "step": 16604 }, { "epoch": 0.4844214948363382, "grad_norm": 0.6646761772274842, "learning_rate": 5.494537325375637e-06, "loss": 0.1307, "step": 16605 }, { "epoch": 0.4844506680669817, "grad_norm": 1.02001488818844, "learning_rate": 5.494067205529347e-06, "loss": 0.1322, "step": 16606 }, { "epoch": 0.4844798412976253, "grad_norm": 0.7459854556711544, "learning_rate": 5.493597081272144e-06, "loss": 0.1298, "step": 16607 }, { "epoch": 0.48450901452826883, "grad_norm": 0.8182527768869449, "learning_rate": 5.493126952608224e-06, "loss": 0.1271, "step": 16608 }, { "epoch": 0.48453818775891244, "grad_norm": 0.8523103671767203, "learning_rate": 5.4926568195417836e-06, "loss": 0.1205, "step": 16609 }, { "epoch": 0.484567360989556, "grad_norm": 0.9129672467024895, "learning_rate": 5.492186682077021e-06, "loss": 0.1747, "step": 16610 }, { "epoch": 0.48459653422019955, "grad_norm": 1.0324779786710794, "learning_rate": 5.491716540218134e-06, "loss": 0.1465, "step": 16611 }, { "epoch": 0.4846257074508431, "grad_norm": 0.8173512900407819, "learning_rate": 5.491246393969318e-06, "loss": 0.1309, "step": 16612 }, { "epoch": 0.48465488068148666, "grad_norm": 0.9675424431363238, "learning_rate": 5.490776243334773e-06, "loss": 0.1291, "step": 16613 }, { "epoch": 0.4846840539121302, "grad_norm": 0.7566896229893554, "learning_rate": 5.4903060883186934e-06, "loss": 0.1301, "step": 16614 }, { "epoch": 0.4847132271427738, "grad_norm": 0.7624964269774012, "learning_rate": 5.489835928925279e-06, "loss": 0.1369, "step": 16615 }, { "epoch": 0.48474240037341737, "grad_norm": 0.7919410215877583, "learning_rate": 5.489365765158726e-06, "loss": 0.1303, "step": 16616 }, { "epoch": 0.4847715736040609, "grad_norm": 0.8996018718186514, "learning_rate": 5.488895597023231e-06, "loss": 0.142, "step": 16617 }, { "epoch": 0.4848007468347045, "grad_norm": 0.9470661118490609, "learning_rate": 5.488425424522995e-06, "loss": 0.1381, "step": 16618 }, { "epoch": 0.48482992006534803, "grad_norm": 0.8278474374546028, "learning_rate": 5.487955247662212e-06, "loss": 0.1495, "step": 16619 }, { "epoch": 0.4848590932959916, "grad_norm": 0.8078013253587139, "learning_rate": 5.487485066445082e-06, "loss": 0.1394, "step": 16620 }, { "epoch": 0.48488826652663514, "grad_norm": 0.8290149226693893, "learning_rate": 5.487014880875801e-06, "loss": 0.1268, "step": 16621 }, { "epoch": 0.48491743975727875, "grad_norm": 0.8751408919109652, "learning_rate": 5.486544690958566e-06, "loss": 0.1264, "step": 16622 }, { "epoch": 0.4849466129879223, "grad_norm": 0.6911520271026074, "learning_rate": 5.486074496697579e-06, "loss": 0.1343, "step": 16623 }, { "epoch": 0.48497578621856585, "grad_norm": 1.1207690834165749, "learning_rate": 5.4856042980970325e-06, "loss": 0.1537, "step": 16624 }, { "epoch": 0.4850049594492094, "grad_norm": 0.7882957005018483, "learning_rate": 5.485134095161128e-06, "loss": 0.1228, "step": 16625 }, { "epoch": 0.48503413267985296, "grad_norm": 0.7985098025453309, "learning_rate": 5.484663887894062e-06, "loss": 0.1512, "step": 16626 }, { "epoch": 0.4850633059104965, "grad_norm": 0.9279442523753277, "learning_rate": 5.484193676300033e-06, "loss": 0.1443, "step": 16627 }, { "epoch": 0.48509247914114006, "grad_norm": 0.9967141948785709, "learning_rate": 5.483723460383238e-06, "loss": 0.1182, "step": 16628 }, { "epoch": 0.4851216523717837, "grad_norm": 0.882580770098697, "learning_rate": 5.4832532401478745e-06, "loss": 0.1435, "step": 16629 }, { "epoch": 0.4851508256024272, "grad_norm": 1.3703099992895844, "learning_rate": 5.4827830155981435e-06, "loss": 0.1263, "step": 16630 }, { "epoch": 0.4851799988330708, "grad_norm": 0.8743530719152944, "learning_rate": 5.48231278673824e-06, "loss": 0.1361, "step": 16631 }, { "epoch": 0.48520917206371433, "grad_norm": 1.0134325947634437, "learning_rate": 5.481842553572361e-06, "loss": 0.1203, "step": 16632 }, { "epoch": 0.4852383452943579, "grad_norm": 1.3346784215743988, "learning_rate": 5.481372316104709e-06, "loss": 0.135, "step": 16633 }, { "epoch": 0.48526751852500144, "grad_norm": 0.8632620273812637, "learning_rate": 5.480902074339481e-06, "loss": 0.1536, "step": 16634 }, { "epoch": 0.485296691755645, "grad_norm": 0.7689613898443438, "learning_rate": 5.480431828280871e-06, "loss": 0.1513, "step": 16635 }, { "epoch": 0.4853258649862886, "grad_norm": 1.019997277179937, "learning_rate": 5.479961577933082e-06, "loss": 0.1447, "step": 16636 }, { "epoch": 0.48535503821693216, "grad_norm": 0.7371436515523148, "learning_rate": 5.47949132330031e-06, "loss": 0.1328, "step": 16637 }, { "epoch": 0.4853842114475757, "grad_norm": 0.7720598248913605, "learning_rate": 5.479021064386755e-06, "loss": 0.1351, "step": 16638 }, { "epoch": 0.48541338467821926, "grad_norm": 0.9798615052228705, "learning_rate": 5.4785508011966125e-06, "loss": 0.1451, "step": 16639 }, { "epoch": 0.4854425579088628, "grad_norm": 0.9712722685659123, "learning_rate": 5.478080533734085e-06, "loss": 0.1551, "step": 16640 }, { "epoch": 0.48547173113950637, "grad_norm": 0.8415761999773763, "learning_rate": 5.477610262003367e-06, "loss": 0.1502, "step": 16641 }, { "epoch": 0.48550090437015, "grad_norm": 1.0311450776940383, "learning_rate": 5.477139986008658e-06, "loss": 0.1296, "step": 16642 }, { "epoch": 0.48553007760079353, "grad_norm": 0.8779969680054039, "learning_rate": 5.476669705754159e-06, "loss": 0.1248, "step": 16643 }, { "epoch": 0.4855592508314371, "grad_norm": 0.7249655699170686, "learning_rate": 5.476199421244065e-06, "loss": 0.1212, "step": 16644 }, { "epoch": 0.48558842406208064, "grad_norm": 0.9359321465945644, "learning_rate": 5.475729132482578e-06, "loss": 0.1283, "step": 16645 }, { "epoch": 0.4856175972927242, "grad_norm": 0.8862819070652191, "learning_rate": 5.475258839473894e-06, "loss": 0.1279, "step": 16646 }, { "epoch": 0.48564677052336774, "grad_norm": 0.7651430301032168, "learning_rate": 5.474788542222211e-06, "loss": 0.1379, "step": 16647 }, { "epoch": 0.4856759437540113, "grad_norm": 0.7489622617004734, "learning_rate": 5.474318240731732e-06, "loss": 0.1548, "step": 16648 }, { "epoch": 0.4857051169846549, "grad_norm": 0.7813106362728675, "learning_rate": 5.473847935006652e-06, "loss": 0.1384, "step": 16649 }, { "epoch": 0.48573429021529846, "grad_norm": 0.6767682363045044, "learning_rate": 5.4733776250511706e-06, "loss": 0.1235, "step": 16650 }, { "epoch": 0.485763463445942, "grad_norm": 0.7700170536576478, "learning_rate": 5.472907310869486e-06, "loss": 0.1212, "step": 16651 }, { "epoch": 0.48579263667658557, "grad_norm": 0.9489483314365391, "learning_rate": 5.4724369924657985e-06, "loss": 0.1288, "step": 16652 }, { "epoch": 0.4858218099072291, "grad_norm": 0.7578751245786504, "learning_rate": 5.471966669844307e-06, "loss": 0.1196, "step": 16653 }, { "epoch": 0.48585098313787267, "grad_norm": 0.974441354637069, "learning_rate": 5.471496343009208e-06, "loss": 0.1391, "step": 16654 }, { "epoch": 0.4858801563685162, "grad_norm": 0.7060072836914836, "learning_rate": 5.471026011964703e-06, "loss": 0.1233, "step": 16655 }, { "epoch": 0.48590932959915983, "grad_norm": 0.8272008883339874, "learning_rate": 5.47055567671499e-06, "loss": 0.1241, "step": 16656 }, { "epoch": 0.4859385028298034, "grad_norm": 0.8842349354146034, "learning_rate": 5.470085337264268e-06, "loss": 0.145, "step": 16657 }, { "epoch": 0.48596767606044694, "grad_norm": 0.8066742307684761, "learning_rate": 5.469614993616739e-06, "loss": 0.1552, "step": 16658 }, { "epoch": 0.4859968492910905, "grad_norm": 0.886208217483384, "learning_rate": 5.469144645776596e-06, "loss": 0.1519, "step": 16659 }, { "epoch": 0.48602602252173405, "grad_norm": 0.8917265733195617, "learning_rate": 5.468674293748044e-06, "loss": 0.1398, "step": 16660 }, { "epoch": 0.4860551957523776, "grad_norm": 0.741515680860913, "learning_rate": 5.468203937535278e-06, "loss": 0.1188, "step": 16661 }, { "epoch": 0.48608436898302115, "grad_norm": 0.8224294073227981, "learning_rate": 5.467733577142499e-06, "loss": 0.1556, "step": 16662 }, { "epoch": 0.48611354221366476, "grad_norm": 0.7176844646669913, "learning_rate": 5.467263212573908e-06, "loss": 0.1336, "step": 16663 }, { "epoch": 0.4861427154443083, "grad_norm": 0.70235432623764, "learning_rate": 5.466792843833702e-06, "loss": 0.1443, "step": 16664 }, { "epoch": 0.48617188867495187, "grad_norm": 0.8076954041882298, "learning_rate": 5.46632247092608e-06, "loss": 0.1226, "step": 16665 }, { "epoch": 0.4862010619055954, "grad_norm": 1.0319426461567602, "learning_rate": 5.465852093855243e-06, "loss": 0.1519, "step": 16666 }, { "epoch": 0.486230235136239, "grad_norm": 0.8531808357671388, "learning_rate": 5.46538171262539e-06, "loss": 0.1281, "step": 16667 }, { "epoch": 0.48625940836688253, "grad_norm": 1.0352049857879229, "learning_rate": 5.464911327240719e-06, "loss": 0.173, "step": 16668 }, { "epoch": 0.48628858159752614, "grad_norm": 0.8800928140830611, "learning_rate": 5.4644409377054305e-06, "loss": 0.1315, "step": 16669 }, { "epoch": 0.4863177548281697, "grad_norm": 1.0911421983003886, "learning_rate": 5.463970544023726e-06, "loss": 0.1626, "step": 16670 }, { "epoch": 0.48634692805881324, "grad_norm": 0.8169729470278007, "learning_rate": 5.463500146199801e-06, "loss": 0.1445, "step": 16671 }, { "epoch": 0.4863761012894568, "grad_norm": 0.8859138574134487, "learning_rate": 5.46302974423786e-06, "loss": 0.1237, "step": 16672 }, { "epoch": 0.48640527452010035, "grad_norm": 1.0588198479976483, "learning_rate": 5.4625593381421e-06, "loss": 0.1349, "step": 16673 }, { "epoch": 0.4864344477507439, "grad_norm": 0.9419584188868422, "learning_rate": 5.4620889279167174e-06, "loss": 0.1444, "step": 16674 }, { "epoch": 0.48646362098138746, "grad_norm": 0.8217783306314604, "learning_rate": 5.461618513565918e-06, "loss": 0.116, "step": 16675 }, { "epoch": 0.48649279421203107, "grad_norm": 1.488590067859592, "learning_rate": 5.461148095093898e-06, "loss": 0.167, "step": 16676 }, { "epoch": 0.4865219674426746, "grad_norm": 0.9474291803655491, "learning_rate": 5.460677672504856e-06, "loss": 0.1475, "step": 16677 }, { "epoch": 0.48655114067331817, "grad_norm": 0.860261484254281, "learning_rate": 5.460207245802996e-06, "loss": 0.1379, "step": 16678 }, { "epoch": 0.4865803139039617, "grad_norm": 0.8841663562973775, "learning_rate": 5.4597368149925154e-06, "loss": 0.1154, "step": 16679 }, { "epoch": 0.4866094871346053, "grad_norm": 0.8119690881667214, "learning_rate": 5.459266380077614e-06, "loss": 0.1674, "step": 16680 }, { "epoch": 0.48663866036524883, "grad_norm": 0.8433755193533623, "learning_rate": 5.458795941062491e-06, "loss": 0.1271, "step": 16681 }, { "epoch": 0.4866678335958924, "grad_norm": 0.8857321157738206, "learning_rate": 5.458325497951348e-06, "loss": 0.111, "step": 16682 }, { "epoch": 0.486697006826536, "grad_norm": 0.7623113028603865, "learning_rate": 5.457855050748385e-06, "loss": 0.1291, "step": 16683 }, { "epoch": 0.48672618005717955, "grad_norm": 0.8593488336640536, "learning_rate": 5.457384599457801e-06, "loss": 0.1273, "step": 16684 }, { "epoch": 0.4867553532878231, "grad_norm": 0.7462675008781164, "learning_rate": 5.456914144083796e-06, "loss": 0.1312, "step": 16685 }, { "epoch": 0.48678452651846665, "grad_norm": 0.8076048988420254, "learning_rate": 5.456443684630572e-06, "loss": 0.1053, "step": 16686 }, { "epoch": 0.4868136997491102, "grad_norm": 0.7176795308410698, "learning_rate": 5.455973221102325e-06, "loss": 0.1236, "step": 16687 }, { "epoch": 0.48684287297975376, "grad_norm": 0.7454224420745489, "learning_rate": 5.45550275350326e-06, "loss": 0.1235, "step": 16688 }, { "epoch": 0.4868720462103973, "grad_norm": 0.8958860901901691, "learning_rate": 5.455032281837576e-06, "loss": 0.1262, "step": 16689 }, { "epoch": 0.4869012194410409, "grad_norm": 0.8392124770640628, "learning_rate": 5.454561806109472e-06, "loss": 0.1413, "step": 16690 }, { "epoch": 0.4869303926716845, "grad_norm": 0.9805932324169192, "learning_rate": 5.4540913263231466e-06, "loss": 0.1362, "step": 16691 }, { "epoch": 0.48695956590232803, "grad_norm": 0.8005952637989759, "learning_rate": 5.453620842482803e-06, "loss": 0.1059, "step": 16692 }, { "epoch": 0.4869887391329716, "grad_norm": 1.0117586096771631, "learning_rate": 5.4531503545926425e-06, "loss": 0.1655, "step": 16693 }, { "epoch": 0.48701791236361514, "grad_norm": 0.7899426762886181, "learning_rate": 5.452679862656861e-06, "loss": 0.1377, "step": 16694 }, { "epoch": 0.4870470855942587, "grad_norm": 0.8782222208997967, "learning_rate": 5.452209366679665e-06, "loss": 0.1287, "step": 16695 }, { "epoch": 0.4870762588249023, "grad_norm": 0.798744209565529, "learning_rate": 5.45173886666525e-06, "loss": 0.1163, "step": 16696 }, { "epoch": 0.48710543205554585, "grad_norm": 0.9060525066292064, "learning_rate": 5.451268362617819e-06, "loss": 0.1184, "step": 16697 }, { "epoch": 0.4871346052861894, "grad_norm": 1.039120813857294, "learning_rate": 5.4507978545415704e-06, "loss": 0.1369, "step": 16698 }, { "epoch": 0.48716377851683296, "grad_norm": 0.8210188290349758, "learning_rate": 5.450327342440707e-06, "loss": 0.1311, "step": 16699 }, { "epoch": 0.4871929517474765, "grad_norm": 0.7169642763400298, "learning_rate": 5.449856826319429e-06, "loss": 0.1185, "step": 16700 }, { "epoch": 0.48722212497812006, "grad_norm": 1.2238526355218988, "learning_rate": 5.449386306181935e-06, "loss": 0.1417, "step": 16701 }, { "epoch": 0.4872512982087636, "grad_norm": 1.0169156857525088, "learning_rate": 5.448915782032429e-06, "loss": 0.144, "step": 16702 }, { "epoch": 0.4872804714394072, "grad_norm": 0.8190136502687897, "learning_rate": 5.4484452538751095e-06, "loss": 0.1022, "step": 16703 }, { "epoch": 0.4873096446700508, "grad_norm": 0.9112090248792706, "learning_rate": 5.447974721714178e-06, "loss": 0.1177, "step": 16704 }, { "epoch": 0.48733881790069433, "grad_norm": 0.8967721578079546, "learning_rate": 5.447504185553836e-06, "loss": 0.1348, "step": 16705 }, { "epoch": 0.4873679911313379, "grad_norm": 0.6705471140235855, "learning_rate": 5.4470336453982805e-06, "loss": 0.1351, "step": 16706 }, { "epoch": 0.48739716436198144, "grad_norm": 0.7990269514479517, "learning_rate": 5.446563101251718e-06, "loss": 0.1338, "step": 16707 }, { "epoch": 0.487426337592625, "grad_norm": 0.8389563162533513, "learning_rate": 5.446092553118347e-06, "loss": 0.1428, "step": 16708 }, { "epoch": 0.48745551082326855, "grad_norm": 0.8288790745157593, "learning_rate": 5.445622001002366e-06, "loss": 0.1309, "step": 16709 }, { "epoch": 0.48748468405391215, "grad_norm": 0.8000472215523416, "learning_rate": 5.445151444907981e-06, "loss": 0.1758, "step": 16710 }, { "epoch": 0.4875138572845557, "grad_norm": 0.6687827671769992, "learning_rate": 5.444680884839389e-06, "loss": 0.1213, "step": 16711 }, { "epoch": 0.48754303051519926, "grad_norm": 0.8007923645474778, "learning_rate": 5.444210320800791e-06, "loss": 0.144, "step": 16712 }, { "epoch": 0.4875722037458428, "grad_norm": 1.0185180313992426, "learning_rate": 5.44373975279639e-06, "loss": 0.1232, "step": 16713 }, { "epoch": 0.48760137697648637, "grad_norm": 0.7897839234618698, "learning_rate": 5.443269180830386e-06, "loss": 0.1238, "step": 16714 }, { "epoch": 0.4876305502071299, "grad_norm": 0.7037191563042358, "learning_rate": 5.442798604906981e-06, "loss": 0.1367, "step": 16715 }, { "epoch": 0.4876597234377735, "grad_norm": 0.7153632602948264, "learning_rate": 5.442328025030375e-06, "loss": 0.1373, "step": 16716 }, { "epoch": 0.4876888966684171, "grad_norm": 0.7414456386811991, "learning_rate": 5.441857441204772e-06, "loss": 0.1297, "step": 16717 }, { "epoch": 0.48771806989906064, "grad_norm": 0.9137008098492148, "learning_rate": 5.441386853434369e-06, "loss": 0.1238, "step": 16718 }, { "epoch": 0.4877472431297042, "grad_norm": 0.8122164406109992, "learning_rate": 5.4409162617233715e-06, "loss": 0.1621, "step": 16719 }, { "epoch": 0.48777641636034774, "grad_norm": 0.8667435227430735, "learning_rate": 5.440445666075979e-06, "loss": 0.1396, "step": 16720 }, { "epoch": 0.4878055895909913, "grad_norm": 0.9612628334759031, "learning_rate": 5.4399750664963905e-06, "loss": 0.143, "step": 16721 }, { "epoch": 0.48783476282163485, "grad_norm": 0.7942635529285477, "learning_rate": 5.439504462988811e-06, "loss": 0.1193, "step": 16722 }, { "epoch": 0.4878639360522784, "grad_norm": 0.7533546031416218, "learning_rate": 5.4390338555574405e-06, "loss": 0.148, "step": 16723 }, { "epoch": 0.487893109282922, "grad_norm": 0.8896298213154132, "learning_rate": 5.4385632442064795e-06, "loss": 0.1386, "step": 16724 }, { "epoch": 0.48792228251356556, "grad_norm": 1.0280146935703676, "learning_rate": 5.4380926289401325e-06, "loss": 0.1152, "step": 16725 }, { "epoch": 0.4879514557442091, "grad_norm": 0.7729068892464108, "learning_rate": 5.437622009762599e-06, "loss": 0.1202, "step": 16726 }, { "epoch": 0.48798062897485267, "grad_norm": 0.7668135338946674, "learning_rate": 5.437151386678079e-06, "loss": 0.1416, "step": 16727 }, { "epoch": 0.4880098022054962, "grad_norm": 0.8540119289562168, "learning_rate": 5.436680759690777e-06, "loss": 0.1401, "step": 16728 }, { "epoch": 0.4880389754361398, "grad_norm": 0.7166846145096721, "learning_rate": 5.436210128804893e-06, "loss": 0.1412, "step": 16729 }, { "epoch": 0.4880681486667834, "grad_norm": 1.276487160810915, "learning_rate": 5.435739494024629e-06, "loss": 0.1233, "step": 16730 }, { "epoch": 0.48809732189742694, "grad_norm": 0.9385052299502991, "learning_rate": 5.4352688553541865e-06, "loss": 0.129, "step": 16731 }, { "epoch": 0.4881264951280705, "grad_norm": 0.763211669110271, "learning_rate": 5.434798212797767e-06, "loss": 0.1092, "step": 16732 }, { "epoch": 0.48815566835871405, "grad_norm": 0.799938314499092, "learning_rate": 5.434327566359574e-06, "loss": 0.1212, "step": 16733 }, { "epoch": 0.4881848415893576, "grad_norm": 0.7336706928372488, "learning_rate": 5.433856916043808e-06, "loss": 0.1716, "step": 16734 }, { "epoch": 0.48821401482000115, "grad_norm": 1.068392530537651, "learning_rate": 5.433386261854672e-06, "loss": 0.1375, "step": 16735 }, { "epoch": 0.4882431880506447, "grad_norm": 0.8966242026079092, "learning_rate": 5.432915603796365e-06, "loss": 0.1372, "step": 16736 }, { "epoch": 0.4882723612812883, "grad_norm": 0.7255530041341725, "learning_rate": 5.432444941873092e-06, "loss": 0.136, "step": 16737 }, { "epoch": 0.48830153451193187, "grad_norm": 0.9633534175498903, "learning_rate": 5.431974276089054e-06, "loss": 0.16, "step": 16738 }, { "epoch": 0.4883307077425754, "grad_norm": 0.7816706975664015, "learning_rate": 5.431503606448452e-06, "loss": 0.1302, "step": 16739 }, { "epoch": 0.488359880973219, "grad_norm": 0.7555487024819179, "learning_rate": 5.4310329329554885e-06, "loss": 0.1185, "step": 16740 }, { "epoch": 0.4883890542038625, "grad_norm": 1.1433135594249912, "learning_rate": 5.4305622556143675e-06, "loss": 0.1382, "step": 16741 }, { "epoch": 0.4884182274345061, "grad_norm": 0.7882269824023626, "learning_rate": 5.430091574429288e-06, "loss": 0.1307, "step": 16742 }, { "epoch": 0.48844740066514963, "grad_norm": 0.7279517395647666, "learning_rate": 5.429620889404454e-06, "loss": 0.1144, "step": 16743 }, { "epoch": 0.48847657389579324, "grad_norm": 0.7461429640054047, "learning_rate": 5.429150200544068e-06, "loss": 0.141, "step": 16744 }, { "epoch": 0.4885057471264368, "grad_norm": 0.8471060884395589, "learning_rate": 5.42867950785233e-06, "loss": 0.1369, "step": 16745 }, { "epoch": 0.48853492035708035, "grad_norm": 0.7419462183963568, "learning_rate": 5.4282088113334445e-06, "loss": 0.1459, "step": 16746 }, { "epoch": 0.4885640935877239, "grad_norm": 0.7109712546871579, "learning_rate": 5.427738110991613e-06, "loss": 0.1427, "step": 16747 }, { "epoch": 0.48859326681836746, "grad_norm": 0.7921504567432183, "learning_rate": 5.427267406831037e-06, "loss": 0.1205, "step": 16748 }, { "epoch": 0.488622440049011, "grad_norm": 0.6369093230324466, "learning_rate": 5.426796698855921e-06, "loss": 0.1268, "step": 16749 }, { "epoch": 0.48865161327965456, "grad_norm": 0.7619024885991046, "learning_rate": 5.426325987070465e-06, "loss": 0.1249, "step": 16750 }, { "epoch": 0.48868078651029817, "grad_norm": 1.1428791818286819, "learning_rate": 5.425855271478873e-06, "loss": 0.1642, "step": 16751 }, { "epoch": 0.4887099597409417, "grad_norm": 0.7777704283770847, "learning_rate": 5.425384552085346e-06, "loss": 0.1353, "step": 16752 }, { "epoch": 0.4887391329715853, "grad_norm": 1.350268335225953, "learning_rate": 5.424913828894088e-06, "loss": 0.1344, "step": 16753 }, { "epoch": 0.48876830620222883, "grad_norm": 0.7177870442582034, "learning_rate": 5.424443101909299e-06, "loss": 0.1344, "step": 16754 }, { "epoch": 0.4887974794328724, "grad_norm": 0.902093696664178, "learning_rate": 5.423972371135186e-06, "loss": 0.1412, "step": 16755 }, { "epoch": 0.48882665266351594, "grad_norm": 0.8169954794117326, "learning_rate": 5.423501636575947e-06, "loss": 0.1297, "step": 16756 }, { "epoch": 0.48885582589415955, "grad_norm": 1.1969661614115779, "learning_rate": 5.423030898235788e-06, "loss": 0.1183, "step": 16757 }, { "epoch": 0.4888849991248031, "grad_norm": 0.9097145304324763, "learning_rate": 5.422560156118909e-06, "loss": 0.1331, "step": 16758 }, { "epoch": 0.48891417235544665, "grad_norm": 0.8576091171382718, "learning_rate": 5.422089410229514e-06, "loss": 0.1325, "step": 16759 }, { "epoch": 0.4889433455860902, "grad_norm": 1.542136337139382, "learning_rate": 5.421618660571804e-06, "loss": 0.132, "step": 16760 }, { "epoch": 0.48897251881673376, "grad_norm": 0.9677542293305892, "learning_rate": 5.4211479071499866e-06, "loss": 0.1554, "step": 16761 }, { "epoch": 0.4890016920473773, "grad_norm": 0.802046556672027, "learning_rate": 5.420677149968259e-06, "loss": 0.1335, "step": 16762 }, { "epoch": 0.48903086527802087, "grad_norm": 1.016011147504178, "learning_rate": 5.4202063890308265e-06, "loss": 0.1268, "step": 16763 }, { "epoch": 0.4890600385086645, "grad_norm": 0.6628011873979082, "learning_rate": 5.419735624341891e-06, "loss": 0.1018, "step": 16764 }, { "epoch": 0.489089211739308, "grad_norm": 0.8839723900097081, "learning_rate": 5.419264855905658e-06, "loss": 0.1494, "step": 16765 }, { "epoch": 0.4891183849699516, "grad_norm": 0.7841528464484294, "learning_rate": 5.418794083726326e-06, "loss": 0.1227, "step": 16766 }, { "epoch": 0.48914755820059513, "grad_norm": 0.8447786992580212, "learning_rate": 5.418323307808102e-06, "loss": 0.1175, "step": 16767 }, { "epoch": 0.4891767314312387, "grad_norm": 0.8742525640783552, "learning_rate": 5.4178525281551874e-06, "loss": 0.1429, "step": 16768 }, { "epoch": 0.48920590466188224, "grad_norm": 0.7398236346392426, "learning_rate": 5.417381744771783e-06, "loss": 0.1237, "step": 16769 }, { "epoch": 0.4892350778925258, "grad_norm": 0.8276263046738359, "learning_rate": 5.416910957662098e-06, "loss": 0.1156, "step": 16770 }, { "epoch": 0.4892642511231694, "grad_norm": 0.7211954023857353, "learning_rate": 5.416440166830329e-06, "loss": 0.1169, "step": 16771 }, { "epoch": 0.48929342435381296, "grad_norm": 0.7849124847552079, "learning_rate": 5.415969372280682e-06, "loss": 0.1609, "step": 16772 }, { "epoch": 0.4893225975844565, "grad_norm": 0.8787052180196727, "learning_rate": 5.415498574017359e-06, "loss": 0.1323, "step": 16773 }, { "epoch": 0.48935177081510006, "grad_norm": 0.907574241717095, "learning_rate": 5.415027772044565e-06, "loss": 0.1255, "step": 16774 }, { "epoch": 0.4893809440457436, "grad_norm": 0.7477803076224181, "learning_rate": 5.4145569663665024e-06, "loss": 0.1382, "step": 16775 }, { "epoch": 0.48941011727638717, "grad_norm": 1.0226656916821553, "learning_rate": 5.4140861569873725e-06, "loss": 0.1402, "step": 16776 }, { "epoch": 0.4894392905070307, "grad_norm": 0.7094494368558831, "learning_rate": 5.413615343911382e-06, "loss": 0.1155, "step": 16777 }, { "epoch": 0.48946846373767433, "grad_norm": 0.8783132299893559, "learning_rate": 5.413144527142731e-06, "loss": 0.1295, "step": 16778 }, { "epoch": 0.4894976369683179, "grad_norm": 0.8450164614100708, "learning_rate": 5.412673706685625e-06, "loss": 0.1384, "step": 16779 }, { "epoch": 0.48952681019896144, "grad_norm": 0.6510655045156076, "learning_rate": 5.4122028825442675e-06, "loss": 0.1248, "step": 16780 }, { "epoch": 0.489555983429605, "grad_norm": 0.8934417540642914, "learning_rate": 5.411732054722859e-06, "loss": 0.1369, "step": 16781 }, { "epoch": 0.48958515666024854, "grad_norm": 0.7863068068754623, "learning_rate": 5.411261223225605e-06, "loss": 0.1254, "step": 16782 }, { "epoch": 0.4896143298908921, "grad_norm": 0.8649214694677202, "learning_rate": 5.4107903880567125e-06, "loss": 0.1287, "step": 16783 }, { "epoch": 0.4896435031215357, "grad_norm": 0.8852695558120606, "learning_rate": 5.410319549220378e-06, "loss": 0.137, "step": 16784 }, { "epoch": 0.48967267635217926, "grad_norm": 0.9302665968237415, "learning_rate": 5.40984870672081e-06, "loss": 0.1249, "step": 16785 }, { "epoch": 0.4897018495828228, "grad_norm": 0.8130711936782737, "learning_rate": 5.4093778605622105e-06, "loss": 0.1267, "step": 16786 }, { "epoch": 0.48973102281346637, "grad_norm": 0.8807032602975655, "learning_rate": 5.408907010748783e-06, "loss": 0.1199, "step": 16787 }, { "epoch": 0.4897601960441099, "grad_norm": 1.013407550964662, "learning_rate": 5.408436157284731e-06, "loss": 0.15, "step": 16788 }, { "epoch": 0.48978936927475347, "grad_norm": 0.6923897675745911, "learning_rate": 5.40796530017426e-06, "loss": 0.1332, "step": 16789 }, { "epoch": 0.489818542505397, "grad_norm": 0.858974260629212, "learning_rate": 5.40749443942157e-06, "loss": 0.1068, "step": 16790 }, { "epoch": 0.48984771573604063, "grad_norm": 0.9930197684295528, "learning_rate": 5.407023575030867e-06, "loss": 0.1548, "step": 16791 }, { "epoch": 0.4898768889666842, "grad_norm": 0.8859854839653981, "learning_rate": 5.406552707006356e-06, "loss": 0.1384, "step": 16792 }, { "epoch": 0.48990606219732774, "grad_norm": 0.793575497542596, "learning_rate": 5.4060818353522396e-06, "loss": 0.1491, "step": 16793 }, { "epoch": 0.4899352354279713, "grad_norm": 0.888016965330607, "learning_rate": 5.405610960072721e-06, "loss": 0.1375, "step": 16794 }, { "epoch": 0.48996440865861485, "grad_norm": 0.8028485926525644, "learning_rate": 5.405140081172005e-06, "loss": 0.132, "step": 16795 }, { "epoch": 0.4899935818892584, "grad_norm": 1.093747280561161, "learning_rate": 5.4046691986542935e-06, "loss": 0.1294, "step": 16796 }, { "epoch": 0.49002275511990195, "grad_norm": 0.6918535963370159, "learning_rate": 5.404198312523793e-06, "loss": 0.1267, "step": 16797 }, { "epoch": 0.49005192835054556, "grad_norm": 0.855602394232518, "learning_rate": 5.403727422784707e-06, "loss": 0.1267, "step": 16798 }, { "epoch": 0.4900811015811891, "grad_norm": 0.9963409012648484, "learning_rate": 5.403256529441238e-06, "loss": 0.1568, "step": 16799 }, { "epoch": 0.49011027481183267, "grad_norm": 0.7977064881755879, "learning_rate": 5.402785632497593e-06, "loss": 0.1421, "step": 16800 }, { "epoch": 0.4901394480424762, "grad_norm": 0.829545169257994, "learning_rate": 5.4023147319579715e-06, "loss": 0.1345, "step": 16801 }, { "epoch": 0.4901686212731198, "grad_norm": 0.74539276370791, "learning_rate": 5.401843827826581e-06, "loss": 0.1177, "step": 16802 }, { "epoch": 0.49019779450376333, "grad_norm": 0.8563638907714458, "learning_rate": 5.4013729201076245e-06, "loss": 0.1329, "step": 16803 }, { "epoch": 0.4902269677344069, "grad_norm": 0.7020997028155187, "learning_rate": 5.400902008805306e-06, "loss": 0.1201, "step": 16804 }, { "epoch": 0.4902561409650505, "grad_norm": 0.891919959854982, "learning_rate": 5.400431093923832e-06, "loss": 0.1277, "step": 16805 }, { "epoch": 0.49028531419569404, "grad_norm": 0.9311534081126035, "learning_rate": 5.399960175467404e-06, "loss": 0.14, "step": 16806 }, { "epoch": 0.4903144874263376, "grad_norm": 0.708145217147487, "learning_rate": 5.3994892534402255e-06, "loss": 0.1306, "step": 16807 }, { "epoch": 0.49034366065698115, "grad_norm": 0.6924660526146618, "learning_rate": 5.399018327846504e-06, "loss": 0.1312, "step": 16808 }, { "epoch": 0.4903728338876247, "grad_norm": 0.7908199978659743, "learning_rate": 5.398547398690441e-06, "loss": 0.1447, "step": 16809 }, { "epoch": 0.49040200711826826, "grad_norm": 0.835693832828301, "learning_rate": 5.398076465976243e-06, "loss": 0.1392, "step": 16810 }, { "epoch": 0.49043118034891187, "grad_norm": 1.3199714941969403, "learning_rate": 5.397605529708112e-06, "loss": 0.1497, "step": 16811 }, { "epoch": 0.4904603535795554, "grad_norm": 0.7411579942711041, "learning_rate": 5.397134589890255e-06, "loss": 0.1098, "step": 16812 }, { "epoch": 0.490489526810199, "grad_norm": 0.7547862470630657, "learning_rate": 5.396663646526875e-06, "loss": 0.1402, "step": 16813 }, { "epoch": 0.4905187000408425, "grad_norm": 1.0065925346132234, "learning_rate": 5.396192699622176e-06, "loss": 0.1241, "step": 16814 }, { "epoch": 0.4905478732714861, "grad_norm": 0.9593389319689464, "learning_rate": 5.3957217491803645e-06, "loss": 0.1293, "step": 16815 }, { "epoch": 0.49057704650212963, "grad_norm": 0.7320499446501436, "learning_rate": 5.395250795205642e-06, "loss": 0.1391, "step": 16816 }, { "epoch": 0.4906062197327732, "grad_norm": 0.7895198341381162, "learning_rate": 5.394779837702216e-06, "loss": 0.1267, "step": 16817 }, { "epoch": 0.4906353929634168, "grad_norm": 0.974056337435937, "learning_rate": 5.394308876674289e-06, "loss": 0.147, "step": 16818 }, { "epoch": 0.49066456619406035, "grad_norm": 1.1127042826556375, "learning_rate": 5.3938379121260675e-06, "loss": 0.1582, "step": 16819 }, { "epoch": 0.4906937394247039, "grad_norm": 1.1167645802770252, "learning_rate": 5.393366944061754e-06, "loss": 0.1413, "step": 16820 }, { "epoch": 0.49072291265534745, "grad_norm": 0.7937400312126629, "learning_rate": 5.392895972485555e-06, "loss": 0.1402, "step": 16821 }, { "epoch": 0.490752085885991, "grad_norm": 0.9375515907959356, "learning_rate": 5.392424997401674e-06, "loss": 0.1468, "step": 16822 }, { "epoch": 0.49078125911663456, "grad_norm": 1.277586920616488, "learning_rate": 5.391954018814316e-06, "loss": 0.1282, "step": 16823 }, { "epoch": 0.4908104323472781, "grad_norm": 1.0791530362056487, "learning_rate": 5.3914830367276875e-06, "loss": 0.1292, "step": 16824 }, { "epoch": 0.4908396055779217, "grad_norm": 0.7994404509745789, "learning_rate": 5.3910120511459915e-06, "loss": 0.1317, "step": 16825 }, { "epoch": 0.4908687788085653, "grad_norm": 1.107509311312047, "learning_rate": 5.390541062073432e-06, "loss": 0.1355, "step": 16826 }, { "epoch": 0.49089795203920883, "grad_norm": 0.8922484935540654, "learning_rate": 5.390070069514216e-06, "loss": 0.1478, "step": 16827 }, { "epoch": 0.4909271252698524, "grad_norm": 1.2708040797349531, "learning_rate": 5.389599073472549e-06, "loss": 0.1216, "step": 16828 }, { "epoch": 0.49095629850049594, "grad_norm": 1.0097909657546378, "learning_rate": 5.389128073952632e-06, "loss": 0.1372, "step": 16829 }, { "epoch": 0.4909854717311395, "grad_norm": 0.7479600103341646, "learning_rate": 5.388657070958674e-06, "loss": 0.1562, "step": 16830 }, { "epoch": 0.49101464496178304, "grad_norm": 0.7886261775704383, "learning_rate": 5.388186064494878e-06, "loss": 0.1152, "step": 16831 }, { "epoch": 0.49104381819242665, "grad_norm": 1.0528727344581106, "learning_rate": 5.3877150545654486e-06, "loss": 0.1627, "step": 16832 }, { "epoch": 0.4910729914230702, "grad_norm": 0.7441770880074093, "learning_rate": 5.387244041174593e-06, "loss": 0.1424, "step": 16833 }, { "epoch": 0.49110216465371376, "grad_norm": 0.8021533562511127, "learning_rate": 5.3867730243265145e-06, "loss": 0.1319, "step": 16834 }, { "epoch": 0.4911313378843573, "grad_norm": 0.8206389697842621, "learning_rate": 5.386302004025419e-06, "loss": 0.1234, "step": 16835 }, { "epoch": 0.49116051111500086, "grad_norm": 0.9698046102401209, "learning_rate": 5.385830980275511e-06, "loss": 0.1413, "step": 16836 }, { "epoch": 0.4911896843456444, "grad_norm": 0.7334987243422961, "learning_rate": 5.385359953080997e-06, "loss": 0.1467, "step": 16837 }, { "epoch": 0.49121885757628797, "grad_norm": 0.7536300243009635, "learning_rate": 5.384888922446081e-06, "loss": 0.1278, "step": 16838 }, { "epoch": 0.4912480308069316, "grad_norm": 0.7082229165762484, "learning_rate": 5.384417888374967e-06, "loss": 0.1054, "step": 16839 }, { "epoch": 0.49127720403757513, "grad_norm": 0.756425998044131, "learning_rate": 5.383946850871865e-06, "loss": 0.1342, "step": 16840 }, { "epoch": 0.4913063772682187, "grad_norm": 0.8357922972489411, "learning_rate": 5.383475809940975e-06, "loss": 0.1389, "step": 16841 }, { "epoch": 0.49133555049886224, "grad_norm": 0.7504759075158891, "learning_rate": 5.383004765586504e-06, "loss": 0.1217, "step": 16842 }, { "epoch": 0.4913647237295058, "grad_norm": 0.6340031231950363, "learning_rate": 5.38253371781266e-06, "loss": 0.1251, "step": 16843 }, { "epoch": 0.49139389696014935, "grad_norm": 0.8961351187184075, "learning_rate": 5.3820626666236445e-06, "loss": 0.1392, "step": 16844 }, { "epoch": 0.49142307019079295, "grad_norm": 1.1382162980969783, "learning_rate": 5.381591612023665e-06, "loss": 0.1491, "step": 16845 }, { "epoch": 0.4914522434214365, "grad_norm": 1.0942466131232769, "learning_rate": 5.381120554016928e-06, "loss": 0.1151, "step": 16846 }, { "epoch": 0.49148141665208006, "grad_norm": 1.127863183941797, "learning_rate": 5.380649492607636e-06, "loss": 0.1285, "step": 16847 }, { "epoch": 0.4915105898827236, "grad_norm": 0.9259252089644728, "learning_rate": 5.380178427799997e-06, "loss": 0.1481, "step": 16848 }, { "epoch": 0.49153976311336717, "grad_norm": 0.8838430115790725, "learning_rate": 5.379707359598215e-06, "loss": 0.1654, "step": 16849 }, { "epoch": 0.4915689363440107, "grad_norm": 0.9487736874910573, "learning_rate": 5.379236288006497e-06, "loss": 0.1268, "step": 16850 }, { "epoch": 0.4915981095746543, "grad_norm": 0.983283064243009, "learning_rate": 5.378765213029048e-06, "loss": 0.1246, "step": 16851 }, { "epoch": 0.4916272828052979, "grad_norm": 0.7664008709616098, "learning_rate": 5.378294134670073e-06, "loss": 0.1375, "step": 16852 }, { "epoch": 0.49165645603594144, "grad_norm": 0.716134218745774, "learning_rate": 5.377823052933779e-06, "loss": 0.1057, "step": 16853 }, { "epoch": 0.491685629266585, "grad_norm": 0.7962058399740612, "learning_rate": 5.37735196782437e-06, "loss": 0.1333, "step": 16854 }, { "epoch": 0.49171480249722854, "grad_norm": 0.6437605306220976, "learning_rate": 5.376880879346054e-06, "loss": 0.1196, "step": 16855 }, { "epoch": 0.4917439757278721, "grad_norm": 1.111434136573803, "learning_rate": 5.376409787503034e-06, "loss": 0.1416, "step": 16856 }, { "epoch": 0.49177314895851565, "grad_norm": 0.7347356291363897, "learning_rate": 5.375938692299518e-06, "loss": 0.1116, "step": 16857 }, { "epoch": 0.4918023221891592, "grad_norm": 0.7997691791500734, "learning_rate": 5.375467593739713e-06, "loss": 0.1431, "step": 16858 }, { "epoch": 0.4918314954198028, "grad_norm": 0.6407723668348148, "learning_rate": 5.37499649182782e-06, "loss": 0.1247, "step": 16859 }, { "epoch": 0.49186066865044636, "grad_norm": 0.9153042964347438, "learning_rate": 5.37452538656805e-06, "loss": 0.1565, "step": 16860 }, { "epoch": 0.4918898418810899, "grad_norm": 0.7070742348713276, "learning_rate": 5.374054277964605e-06, "loss": 0.1345, "step": 16861 }, { "epoch": 0.49191901511173347, "grad_norm": 0.7474405370120811, "learning_rate": 5.373583166021694e-06, "loss": 0.1345, "step": 16862 }, { "epoch": 0.491948188342377, "grad_norm": 0.975596061924209, "learning_rate": 5.373112050743522e-06, "loss": 0.1201, "step": 16863 }, { "epoch": 0.4919773615730206, "grad_norm": 0.8097861084213097, "learning_rate": 5.3726409321342935e-06, "loss": 0.1245, "step": 16864 }, { "epoch": 0.49200653480366413, "grad_norm": 0.792038897417954, "learning_rate": 5.372169810198215e-06, "loss": 0.1503, "step": 16865 }, { "epoch": 0.49203570803430774, "grad_norm": 0.9405911575915272, "learning_rate": 5.371698684939495e-06, "loss": 0.1412, "step": 16866 }, { "epoch": 0.4920648812649513, "grad_norm": 0.7992514729447099, "learning_rate": 5.371227556362337e-06, "loss": 0.129, "step": 16867 }, { "epoch": 0.49209405449559485, "grad_norm": 0.6542417261430244, "learning_rate": 5.370756424470948e-06, "loss": 0.114, "step": 16868 }, { "epoch": 0.4921232277262384, "grad_norm": 1.163943728663297, "learning_rate": 5.370285289269535e-06, "loss": 0.1287, "step": 16869 }, { "epoch": 0.49215240095688195, "grad_norm": 0.9083702708069268, "learning_rate": 5.369814150762304e-06, "loss": 0.1268, "step": 16870 }, { "epoch": 0.4921815741875255, "grad_norm": 0.6994081948116999, "learning_rate": 5.369343008953458e-06, "loss": 0.1042, "step": 16871 }, { "epoch": 0.4922107474181691, "grad_norm": 0.8741433725163471, "learning_rate": 5.368871863847207e-06, "loss": 0.1171, "step": 16872 }, { "epoch": 0.49223992064881267, "grad_norm": 1.2603996912826851, "learning_rate": 5.368400715447757e-06, "loss": 0.127, "step": 16873 }, { "epoch": 0.4922690938794562, "grad_norm": 0.7652508192028512, "learning_rate": 5.367929563759311e-06, "loss": 0.1147, "step": 16874 }, { "epoch": 0.4922982671100998, "grad_norm": 1.1648667864675992, "learning_rate": 5.36745840878608e-06, "loss": 0.131, "step": 16875 }, { "epoch": 0.4923274403407433, "grad_norm": 1.0416485002240208, "learning_rate": 5.366987250532266e-06, "loss": 0.1326, "step": 16876 }, { "epoch": 0.4923566135713869, "grad_norm": 0.8748062798784032, "learning_rate": 5.36651608900208e-06, "loss": 0.1522, "step": 16877 }, { "epoch": 0.49238578680203043, "grad_norm": 0.8007945767833401, "learning_rate": 5.366044924199725e-06, "loss": 0.1286, "step": 16878 }, { "epoch": 0.49241496003267404, "grad_norm": 0.8482147388626303, "learning_rate": 5.365573756129406e-06, "loss": 0.1408, "step": 16879 }, { "epoch": 0.4924441332633176, "grad_norm": 0.967644841804688, "learning_rate": 5.365102584795334e-06, "loss": 0.1285, "step": 16880 }, { "epoch": 0.49247330649396115, "grad_norm": 0.8995162241486194, "learning_rate": 5.364631410201713e-06, "loss": 0.1251, "step": 16881 }, { "epoch": 0.4925024797246047, "grad_norm": 0.8415421441677097, "learning_rate": 5.364160232352749e-06, "loss": 0.1156, "step": 16882 }, { "epoch": 0.49253165295524826, "grad_norm": 1.323591753648415, "learning_rate": 5.363689051252651e-06, "loss": 0.1447, "step": 16883 }, { "epoch": 0.4925608261858918, "grad_norm": 0.9582912145627567, "learning_rate": 5.363217866905622e-06, "loss": 0.15, "step": 16884 }, { "epoch": 0.49258999941653536, "grad_norm": 1.0225435535678218, "learning_rate": 5.362746679315872e-06, "loss": 0.1415, "step": 16885 }, { "epoch": 0.49261917264717897, "grad_norm": 0.9494187747788593, "learning_rate": 5.362275488487606e-06, "loss": 0.1691, "step": 16886 }, { "epoch": 0.4926483458778225, "grad_norm": 0.8646619701138887, "learning_rate": 5.361804294425031e-06, "loss": 0.1336, "step": 16887 }, { "epoch": 0.4926775191084661, "grad_norm": 1.014770621302196, "learning_rate": 5.361333097132353e-06, "loss": 0.1152, "step": 16888 }, { "epoch": 0.49270669233910963, "grad_norm": 0.8472959034349283, "learning_rate": 5.360861896613779e-06, "loss": 0.1257, "step": 16889 }, { "epoch": 0.4927358655697532, "grad_norm": 0.9647487125299989, "learning_rate": 5.360390692873518e-06, "loss": 0.1325, "step": 16890 }, { "epoch": 0.49276503880039674, "grad_norm": 0.9419671715760097, "learning_rate": 5.3599194859157735e-06, "loss": 0.1395, "step": 16891 }, { "epoch": 0.4927942120310403, "grad_norm": 0.7140778222154549, "learning_rate": 5.359448275744755e-06, "loss": 0.1413, "step": 16892 }, { "epoch": 0.4928233852616839, "grad_norm": 0.7519770736931981, "learning_rate": 5.358977062364666e-06, "loss": 0.1369, "step": 16893 }, { "epoch": 0.49285255849232745, "grad_norm": 0.8270270530223526, "learning_rate": 5.358505845779717e-06, "loss": 0.1402, "step": 16894 }, { "epoch": 0.492881731722971, "grad_norm": 0.7663369407857998, "learning_rate": 5.358034625994113e-06, "loss": 0.1514, "step": 16895 }, { "epoch": 0.49291090495361456, "grad_norm": 0.5966561564754324, "learning_rate": 5.357563403012061e-06, "loss": 0.1077, "step": 16896 }, { "epoch": 0.4929400781842581, "grad_norm": 0.7748324464328683, "learning_rate": 5.357092176837769e-06, "loss": 0.1258, "step": 16897 }, { "epoch": 0.49296925141490167, "grad_norm": 0.8450404382189073, "learning_rate": 5.3566209474754425e-06, "loss": 0.1422, "step": 16898 }, { "epoch": 0.4929984246455453, "grad_norm": 0.8087937305077653, "learning_rate": 5.356149714929291e-06, "loss": 0.1176, "step": 16899 }, { "epoch": 0.49302759787618883, "grad_norm": 0.7299956387668687, "learning_rate": 5.355678479203518e-06, "loss": 0.1003, "step": 16900 }, { "epoch": 0.4930567711068324, "grad_norm": 0.817078900020243, "learning_rate": 5.355207240302332e-06, "loss": 0.1476, "step": 16901 }, { "epoch": 0.49308594433747593, "grad_norm": 0.7672277827493014, "learning_rate": 5.354735998229943e-06, "loss": 0.1209, "step": 16902 }, { "epoch": 0.4931151175681195, "grad_norm": 0.7747143025480175, "learning_rate": 5.354264752990553e-06, "loss": 0.1388, "step": 16903 }, { "epoch": 0.49314429079876304, "grad_norm": 0.8244094445952639, "learning_rate": 5.353793504588374e-06, "loss": 0.1287, "step": 16904 }, { "epoch": 0.4931734640294066, "grad_norm": 0.8267489229302534, "learning_rate": 5.353322253027611e-06, "loss": 0.1216, "step": 16905 }, { "epoch": 0.4932026372600502, "grad_norm": 1.0800355112043782, "learning_rate": 5.352850998312469e-06, "loss": 0.1241, "step": 16906 }, { "epoch": 0.49323181049069376, "grad_norm": 0.8004539660085748, "learning_rate": 5.35237974044716e-06, "loss": 0.1577, "step": 16907 }, { "epoch": 0.4932609837213373, "grad_norm": 0.9291822275348421, "learning_rate": 5.351908479435888e-06, "loss": 0.1247, "step": 16908 }, { "epoch": 0.49329015695198086, "grad_norm": 0.5909709718039137, "learning_rate": 5.35143721528286e-06, "loss": 0.1318, "step": 16909 }, { "epoch": 0.4933193301826244, "grad_norm": 0.8182605410853571, "learning_rate": 5.350965947992286e-06, "loss": 0.1248, "step": 16910 }, { "epoch": 0.49334850341326797, "grad_norm": 1.060945591380358, "learning_rate": 5.350494677568371e-06, "loss": 0.1479, "step": 16911 }, { "epoch": 0.4933776766439115, "grad_norm": 0.7781852485598358, "learning_rate": 5.350023404015323e-06, "loss": 0.1208, "step": 16912 }, { "epoch": 0.49340684987455513, "grad_norm": 0.897173862130397, "learning_rate": 5.3495521273373504e-06, "loss": 0.1429, "step": 16913 }, { "epoch": 0.4934360231051987, "grad_norm": 0.902893401242971, "learning_rate": 5.349080847538659e-06, "loss": 0.1225, "step": 16914 }, { "epoch": 0.49346519633584224, "grad_norm": 1.0301174153869221, "learning_rate": 5.348609564623458e-06, "loss": 0.136, "step": 16915 }, { "epoch": 0.4934943695664858, "grad_norm": 0.8247224773588024, "learning_rate": 5.3481382785959536e-06, "loss": 0.1451, "step": 16916 }, { "epoch": 0.49352354279712934, "grad_norm": 0.7293546646236825, "learning_rate": 5.347666989460353e-06, "loss": 0.1282, "step": 16917 }, { "epoch": 0.4935527160277729, "grad_norm": 0.8207541196798567, "learning_rate": 5.347195697220865e-06, "loss": 0.13, "step": 16918 }, { "epoch": 0.49358188925841645, "grad_norm": 0.8288687812887132, "learning_rate": 5.346724401881697e-06, "loss": 0.1341, "step": 16919 }, { "epoch": 0.49361106248906006, "grad_norm": 0.8174263177651826, "learning_rate": 5.346253103447058e-06, "loss": 0.124, "step": 16920 }, { "epoch": 0.4936402357197036, "grad_norm": 0.7612552679145954, "learning_rate": 5.34578180192115e-06, "loss": 0.1357, "step": 16921 }, { "epoch": 0.49366940895034717, "grad_norm": 0.7953009668694182, "learning_rate": 5.3453104973081884e-06, "loss": 0.1326, "step": 16922 }, { "epoch": 0.4936985821809907, "grad_norm": 0.6520145509816958, "learning_rate": 5.344839189612375e-06, "loss": 0.1283, "step": 16923 }, { "epoch": 0.4937277554116343, "grad_norm": 0.7295993419099278, "learning_rate": 5.3443678788379195e-06, "loss": 0.1388, "step": 16924 }, { "epoch": 0.4937569286422778, "grad_norm": 0.7165709618403117, "learning_rate": 5.343896564989031e-06, "loss": 0.1231, "step": 16925 }, { "epoch": 0.49378610187292143, "grad_norm": 0.7751106265930863, "learning_rate": 5.3434252480699154e-06, "loss": 0.1229, "step": 16926 }, { "epoch": 0.493815275103565, "grad_norm": 0.872227451919471, "learning_rate": 5.3429539280847805e-06, "loss": 0.1312, "step": 16927 }, { "epoch": 0.49384444833420854, "grad_norm": 0.8025408400035391, "learning_rate": 5.3424826050378365e-06, "loss": 0.1433, "step": 16928 }, { "epoch": 0.4938736215648521, "grad_norm": 0.7289180481653799, "learning_rate": 5.3420112789332875e-06, "loss": 0.1432, "step": 16929 }, { "epoch": 0.49390279479549565, "grad_norm": 0.9134284659715837, "learning_rate": 5.341539949775345e-06, "loss": 0.1296, "step": 16930 }, { "epoch": 0.4939319680261392, "grad_norm": 0.8583498510757264, "learning_rate": 5.341068617568215e-06, "loss": 0.1486, "step": 16931 }, { "epoch": 0.49396114125678275, "grad_norm": 0.8446957679303193, "learning_rate": 5.340597282316105e-06, "loss": 0.1292, "step": 16932 }, { "epoch": 0.49399031448742636, "grad_norm": 0.9423209681327167, "learning_rate": 5.340125944023226e-06, "loss": 0.1138, "step": 16933 }, { "epoch": 0.4940194877180699, "grad_norm": 1.06153794678825, "learning_rate": 5.339654602693781e-06, "loss": 0.1379, "step": 16934 }, { "epoch": 0.49404866094871347, "grad_norm": 1.039660574620419, "learning_rate": 5.339183258331983e-06, "loss": 0.118, "step": 16935 }, { "epoch": 0.494077834179357, "grad_norm": 0.9709315235538598, "learning_rate": 5.338711910942036e-06, "loss": 0.1238, "step": 16936 }, { "epoch": 0.4941070074100006, "grad_norm": 1.068794208306307, "learning_rate": 5.338240560528152e-06, "loss": 0.1545, "step": 16937 }, { "epoch": 0.49413618064064413, "grad_norm": 0.9457659930370551, "learning_rate": 5.337769207094535e-06, "loss": 0.1354, "step": 16938 }, { "epoch": 0.4941653538712877, "grad_norm": 1.0797531203555015, "learning_rate": 5.337297850645395e-06, "loss": 0.1494, "step": 16939 }, { "epoch": 0.4941945271019313, "grad_norm": 0.8296869908372474, "learning_rate": 5.336826491184943e-06, "loss": 0.1304, "step": 16940 }, { "epoch": 0.49422370033257484, "grad_norm": 0.9105037236438693, "learning_rate": 5.336355128717382e-06, "loss": 0.1407, "step": 16941 }, { "epoch": 0.4942528735632184, "grad_norm": 1.0522028001869357, "learning_rate": 5.335883763246924e-06, "loss": 0.1433, "step": 16942 }, { "epoch": 0.49428204679386195, "grad_norm": 0.9423373371445346, "learning_rate": 5.335412394777775e-06, "loss": 0.158, "step": 16943 }, { "epoch": 0.4943112200245055, "grad_norm": 0.9413246687873199, "learning_rate": 5.334941023314145e-06, "loss": 0.1376, "step": 16944 }, { "epoch": 0.49434039325514906, "grad_norm": 0.8574886498954905, "learning_rate": 5.334469648860241e-06, "loss": 0.1611, "step": 16945 }, { "epoch": 0.4943695664857926, "grad_norm": 1.4212075923177745, "learning_rate": 5.333998271420272e-06, "loss": 0.1194, "step": 16946 }, { "epoch": 0.4943987397164362, "grad_norm": 0.9849675768205657, "learning_rate": 5.333526890998446e-06, "loss": 0.1482, "step": 16947 }, { "epoch": 0.4944279129470798, "grad_norm": 1.124274564036261, "learning_rate": 5.333055507598971e-06, "loss": 0.1518, "step": 16948 }, { "epoch": 0.4944570861777233, "grad_norm": 0.8075400744977255, "learning_rate": 5.332584121226057e-06, "loss": 0.1295, "step": 16949 }, { "epoch": 0.4944862594083669, "grad_norm": 1.019394901701943, "learning_rate": 5.332112731883912e-06, "loss": 0.148, "step": 16950 }, { "epoch": 0.49451543263901043, "grad_norm": 0.9657542465799582, "learning_rate": 5.3316413395767405e-06, "loss": 0.135, "step": 16951 }, { "epoch": 0.494544605869654, "grad_norm": 0.7827734704996198, "learning_rate": 5.331169944308758e-06, "loss": 0.1252, "step": 16952 }, { "epoch": 0.49457377910029754, "grad_norm": 0.8204546421277574, "learning_rate": 5.330698546084167e-06, "loss": 0.1169, "step": 16953 }, { "epoch": 0.49460295233094115, "grad_norm": 0.7606126873012299, "learning_rate": 5.330227144907179e-06, "loss": 0.1158, "step": 16954 }, { "epoch": 0.4946321255615847, "grad_norm": 0.8045375654519857, "learning_rate": 5.329755740782003e-06, "loss": 0.1444, "step": 16955 }, { "epoch": 0.49466129879222825, "grad_norm": 1.0550458230531574, "learning_rate": 5.329284333712845e-06, "loss": 0.1402, "step": 16956 }, { "epoch": 0.4946904720228718, "grad_norm": 1.0603512795656371, "learning_rate": 5.328812923703917e-06, "loss": 0.1318, "step": 16957 }, { "epoch": 0.49471964525351536, "grad_norm": 0.5955430122008735, "learning_rate": 5.328341510759423e-06, "loss": 0.1174, "step": 16958 }, { "epoch": 0.4947488184841589, "grad_norm": 0.8398390339422245, "learning_rate": 5.327870094883576e-06, "loss": 0.1311, "step": 16959 }, { "epoch": 0.4947779917148025, "grad_norm": 1.1291956634852869, "learning_rate": 5.327398676080583e-06, "loss": 0.1309, "step": 16960 }, { "epoch": 0.4948071649454461, "grad_norm": 1.0516302051600936, "learning_rate": 5.3269272543546524e-06, "loss": 0.162, "step": 16961 }, { "epoch": 0.49483633817608963, "grad_norm": 0.7976172681207964, "learning_rate": 5.3264558297099935e-06, "loss": 0.1362, "step": 16962 }, { "epoch": 0.4948655114067332, "grad_norm": 0.8877281464621941, "learning_rate": 5.3259844021508145e-06, "loss": 0.1407, "step": 16963 }, { "epoch": 0.49489468463737674, "grad_norm": 0.7885792964523457, "learning_rate": 5.325512971681325e-06, "loss": 0.1184, "step": 16964 }, { "epoch": 0.4949238578680203, "grad_norm": 0.8746196809306633, "learning_rate": 5.325041538305734e-06, "loss": 0.1411, "step": 16965 }, { "epoch": 0.49495303109866384, "grad_norm": 0.8485752718426207, "learning_rate": 5.324570102028248e-06, "loss": 0.161, "step": 16966 }, { "epoch": 0.49498220432930745, "grad_norm": 0.9043789611546547, "learning_rate": 5.324098662853079e-06, "loss": 0.1362, "step": 16967 }, { "epoch": 0.495011377559951, "grad_norm": 0.8806146882536067, "learning_rate": 5.323627220784434e-06, "loss": 0.1237, "step": 16968 }, { "epoch": 0.49504055079059456, "grad_norm": 0.8910099585858573, "learning_rate": 5.3231557758265215e-06, "loss": 0.1273, "step": 16969 }, { "epoch": 0.4950697240212381, "grad_norm": 0.691561728123018, "learning_rate": 5.322684327983554e-06, "loss": 0.1314, "step": 16970 }, { "epoch": 0.49509889725188166, "grad_norm": 0.7960444633372479, "learning_rate": 5.3222128772597355e-06, "loss": 0.1522, "step": 16971 }, { "epoch": 0.4951280704825252, "grad_norm": 0.9629556327673812, "learning_rate": 5.321741423659279e-06, "loss": 0.1416, "step": 16972 }, { "epoch": 0.49515724371316877, "grad_norm": 0.7045362984911328, "learning_rate": 5.321269967186391e-06, "loss": 0.1217, "step": 16973 }, { "epoch": 0.4951864169438124, "grad_norm": 0.742526438025149, "learning_rate": 5.320798507845281e-06, "loss": 0.1239, "step": 16974 }, { "epoch": 0.49521559017445593, "grad_norm": 0.7586203050264122, "learning_rate": 5.320327045640159e-06, "loss": 0.1191, "step": 16975 }, { "epoch": 0.4952447634050995, "grad_norm": 1.1978140322051605, "learning_rate": 5.319855580575233e-06, "loss": 0.1335, "step": 16976 }, { "epoch": 0.49527393663574304, "grad_norm": 0.8545638705041876, "learning_rate": 5.319384112654713e-06, "loss": 0.1341, "step": 16977 }, { "epoch": 0.4953031098663866, "grad_norm": 0.8353652565520191, "learning_rate": 5.318912641882809e-06, "loss": 0.1414, "step": 16978 }, { "epoch": 0.49533228309703015, "grad_norm": 0.7460690829875166, "learning_rate": 5.318441168263727e-06, "loss": 0.1399, "step": 16979 }, { "epoch": 0.4953614563276737, "grad_norm": 0.8707347033873954, "learning_rate": 5.317969691801681e-06, "loss": 0.1239, "step": 16980 }, { "epoch": 0.4953906295583173, "grad_norm": 0.8130012177841824, "learning_rate": 5.3174982125008745e-06, "loss": 0.1288, "step": 16981 }, { "epoch": 0.49541980278896086, "grad_norm": 0.8792166690507628, "learning_rate": 5.317026730365523e-06, "loss": 0.1311, "step": 16982 }, { "epoch": 0.4954489760196044, "grad_norm": 0.8265517968595797, "learning_rate": 5.31655524539983e-06, "loss": 0.1121, "step": 16983 }, { "epoch": 0.49547814925024797, "grad_norm": 0.6604723081097706, "learning_rate": 5.316083757608007e-06, "loss": 0.1495, "step": 16984 }, { "epoch": 0.4955073224808915, "grad_norm": 0.947929565131271, "learning_rate": 5.3156122669942665e-06, "loss": 0.1392, "step": 16985 }, { "epoch": 0.4955364957115351, "grad_norm": 0.8239737182678122, "learning_rate": 5.3151407735628125e-06, "loss": 0.1251, "step": 16986 }, { "epoch": 0.4955656689421787, "grad_norm": 1.0205966496438035, "learning_rate": 5.314669277317858e-06, "loss": 0.1379, "step": 16987 }, { "epoch": 0.49559484217282224, "grad_norm": 0.7194479213053977, "learning_rate": 5.314197778263611e-06, "loss": 0.1248, "step": 16988 }, { "epoch": 0.4956240154034658, "grad_norm": 1.1724484690623163, "learning_rate": 5.313726276404281e-06, "loss": 0.1354, "step": 16989 }, { "epoch": 0.49565318863410934, "grad_norm": 1.2049310165373772, "learning_rate": 5.313254771744079e-06, "loss": 0.1389, "step": 16990 }, { "epoch": 0.4956823618647529, "grad_norm": 0.700208096745436, "learning_rate": 5.3127832642872116e-06, "loss": 0.1218, "step": 16991 }, { "epoch": 0.49571153509539645, "grad_norm": 0.8755550331867326, "learning_rate": 5.3123117540378895e-06, "loss": 0.1475, "step": 16992 }, { "epoch": 0.49574070832604, "grad_norm": 0.9273784916004748, "learning_rate": 5.311840241000323e-06, "loss": 0.1163, "step": 16993 }, { "epoch": 0.4957698815566836, "grad_norm": 0.9852128356665886, "learning_rate": 5.311368725178723e-06, "loss": 0.1165, "step": 16994 }, { "epoch": 0.49579905478732716, "grad_norm": 0.8172719672545941, "learning_rate": 5.310897206577297e-06, "loss": 0.1326, "step": 16995 }, { "epoch": 0.4958282280179707, "grad_norm": 0.8514774838543385, "learning_rate": 5.310425685200252e-06, "loss": 0.1381, "step": 16996 }, { "epoch": 0.49585740124861427, "grad_norm": 0.8554652494690688, "learning_rate": 5.3099541610518046e-06, "loss": 0.1313, "step": 16997 }, { "epoch": 0.4958865744792578, "grad_norm": 0.9276791529462564, "learning_rate": 5.309482634136158e-06, "loss": 0.1306, "step": 16998 }, { "epoch": 0.4959157477099014, "grad_norm": 1.096328000628962, "learning_rate": 5.309011104457524e-06, "loss": 0.1463, "step": 16999 }, { "epoch": 0.49594492094054493, "grad_norm": 0.8096521912923862, "learning_rate": 5.3085395720201145e-06, "loss": 0.1124, "step": 17000 }, { "epoch": 0.49597409417118854, "grad_norm": 0.8665191497573511, "learning_rate": 5.308068036828137e-06, "loss": 0.1177, "step": 17001 }, { "epoch": 0.4960032674018321, "grad_norm": 1.0612210196688785, "learning_rate": 5.3075964988857995e-06, "loss": 0.1255, "step": 17002 }, { "epoch": 0.49603244063247565, "grad_norm": 1.0490245799527906, "learning_rate": 5.307124958197316e-06, "loss": 0.1372, "step": 17003 }, { "epoch": 0.4960616138631192, "grad_norm": 0.7495009661428474, "learning_rate": 5.306653414766894e-06, "loss": 0.138, "step": 17004 }, { "epoch": 0.49609078709376275, "grad_norm": 0.7803730397205744, "learning_rate": 5.306181868598742e-06, "loss": 0.1332, "step": 17005 }, { "epoch": 0.4961199603244063, "grad_norm": 1.0449426882750648, "learning_rate": 5.305710319697073e-06, "loss": 0.1249, "step": 17006 }, { "epoch": 0.49614913355504986, "grad_norm": 0.8090543934474392, "learning_rate": 5.3052387680660945e-06, "loss": 0.1164, "step": 17007 }, { "epoch": 0.49617830678569347, "grad_norm": 0.8323158832759161, "learning_rate": 5.304767213710017e-06, "loss": 0.1237, "step": 17008 }, { "epoch": 0.496207480016337, "grad_norm": 1.0577348037359635, "learning_rate": 5.304295656633051e-06, "loss": 0.1461, "step": 17009 }, { "epoch": 0.4962366532469806, "grad_norm": 0.9646279194872153, "learning_rate": 5.303824096839407e-06, "loss": 0.1394, "step": 17010 }, { "epoch": 0.49626582647762413, "grad_norm": 1.0613858884412792, "learning_rate": 5.303352534333291e-06, "loss": 0.1401, "step": 17011 }, { "epoch": 0.4962949997082677, "grad_norm": 1.0609249588291445, "learning_rate": 5.30288096911892e-06, "loss": 0.1209, "step": 17012 }, { "epoch": 0.49632417293891123, "grad_norm": 6.964887388903303, "learning_rate": 5.302409401200497e-06, "loss": 0.1429, "step": 17013 }, { "epoch": 0.49635334616955484, "grad_norm": 0.9194167504080002, "learning_rate": 5.301937830582235e-06, "loss": 0.1268, "step": 17014 }, { "epoch": 0.4963825194001984, "grad_norm": 0.8043827704686469, "learning_rate": 5.301466257268346e-06, "loss": 0.1389, "step": 17015 }, { "epoch": 0.49641169263084195, "grad_norm": 0.9819150658530034, "learning_rate": 5.300994681263038e-06, "loss": 0.1403, "step": 17016 }, { "epoch": 0.4964408658614855, "grad_norm": 0.7903323502090318, "learning_rate": 5.3005231025705195e-06, "loss": 0.1651, "step": 17017 }, { "epoch": 0.49647003909212906, "grad_norm": 0.8828242039006373, "learning_rate": 5.300051521195004e-06, "loss": 0.1167, "step": 17018 }, { "epoch": 0.4964992123227726, "grad_norm": 0.9808924137735425, "learning_rate": 5.299579937140699e-06, "loss": 0.1254, "step": 17019 }, { "epoch": 0.49652838555341616, "grad_norm": 0.8858867618245164, "learning_rate": 5.299108350411817e-06, "loss": 0.1466, "step": 17020 }, { "epoch": 0.49655755878405977, "grad_norm": 0.7861078381162786, "learning_rate": 5.298636761012567e-06, "loss": 0.1251, "step": 17021 }, { "epoch": 0.4965867320147033, "grad_norm": 0.9829584641559423, "learning_rate": 5.298165168947158e-06, "loss": 0.1346, "step": 17022 }, { "epoch": 0.4966159052453469, "grad_norm": 1.0145337046355396, "learning_rate": 5.297693574219803e-06, "loss": 0.1492, "step": 17023 }, { "epoch": 0.49664507847599043, "grad_norm": 0.9425629232566466, "learning_rate": 5.29722197683471e-06, "loss": 0.1304, "step": 17024 }, { "epoch": 0.496674251706634, "grad_norm": 0.9778512536435043, "learning_rate": 5.296750376796092e-06, "loss": 0.1197, "step": 17025 }, { "epoch": 0.49670342493727754, "grad_norm": 0.8531332874595413, "learning_rate": 5.296278774108154e-06, "loss": 0.142, "step": 17026 }, { "epoch": 0.4967325981679211, "grad_norm": 0.9996692545825431, "learning_rate": 5.295807168775113e-06, "loss": 0.1341, "step": 17027 }, { "epoch": 0.4967617713985647, "grad_norm": 0.8120564563782053, "learning_rate": 5.295335560801175e-06, "loss": 0.1363, "step": 17028 }, { "epoch": 0.49679094462920825, "grad_norm": 0.7709973211616562, "learning_rate": 5.294863950190551e-06, "loss": 0.1373, "step": 17029 }, { "epoch": 0.4968201178598518, "grad_norm": 0.9389352398385432, "learning_rate": 5.294392336947454e-06, "loss": 0.1236, "step": 17030 }, { "epoch": 0.49684929109049536, "grad_norm": 0.8049604165370342, "learning_rate": 5.29392072107609e-06, "loss": 0.1339, "step": 17031 }, { "epoch": 0.4968784643211389, "grad_norm": 0.885934958894433, "learning_rate": 5.293449102580674e-06, "loss": 0.1257, "step": 17032 }, { "epoch": 0.49690763755178247, "grad_norm": 0.7998677185861385, "learning_rate": 5.292977481465413e-06, "loss": 0.1383, "step": 17033 }, { "epoch": 0.496936810782426, "grad_norm": 0.9177388784485104, "learning_rate": 5.292505857734519e-06, "loss": 0.1202, "step": 17034 }, { "epoch": 0.49696598401306963, "grad_norm": 0.900434529486035, "learning_rate": 5.292034231392204e-06, "loss": 0.1467, "step": 17035 }, { "epoch": 0.4969951572437132, "grad_norm": 0.9666660790595324, "learning_rate": 5.2915626024426755e-06, "loss": 0.1487, "step": 17036 }, { "epoch": 0.49702433047435673, "grad_norm": 0.9791511838331073, "learning_rate": 5.291090970890146e-06, "loss": 0.1446, "step": 17037 }, { "epoch": 0.4970535037050003, "grad_norm": 0.852126728562316, "learning_rate": 5.290619336738826e-06, "loss": 0.1504, "step": 17038 }, { "epoch": 0.49708267693564384, "grad_norm": 0.9246946420062684, "learning_rate": 5.290147699992926e-06, "loss": 0.1421, "step": 17039 }, { "epoch": 0.4971118501662874, "grad_norm": 0.8825312853080106, "learning_rate": 5.2896760606566576e-06, "loss": 0.1361, "step": 17040 }, { "epoch": 0.497141023396931, "grad_norm": 0.7683017994188074, "learning_rate": 5.289204418734228e-06, "loss": 0.1192, "step": 17041 }, { "epoch": 0.49717019662757456, "grad_norm": 0.745517946160222, "learning_rate": 5.288732774229853e-06, "loss": 0.1235, "step": 17042 }, { "epoch": 0.4971993698582181, "grad_norm": 0.7803483486077735, "learning_rate": 5.28826112714774e-06, "loss": 0.1141, "step": 17043 }, { "epoch": 0.49722854308886166, "grad_norm": 0.9824857045201814, "learning_rate": 5.287789477492099e-06, "loss": 0.1528, "step": 17044 }, { "epoch": 0.4972577163195052, "grad_norm": 0.7936125583236037, "learning_rate": 5.287317825267146e-06, "loss": 0.1353, "step": 17045 }, { "epoch": 0.49728688955014877, "grad_norm": 0.8966151860414091, "learning_rate": 5.286846170477085e-06, "loss": 0.115, "step": 17046 }, { "epoch": 0.4973160627807923, "grad_norm": 0.8581127729591428, "learning_rate": 5.286374513126129e-06, "loss": 0.1411, "step": 17047 }, { "epoch": 0.49734523601143593, "grad_norm": 0.6876296940895243, "learning_rate": 5.285902853218492e-06, "loss": 0.1385, "step": 17048 }, { "epoch": 0.4973744092420795, "grad_norm": 0.9251394566503488, "learning_rate": 5.285431190758381e-06, "loss": 0.1414, "step": 17049 }, { "epoch": 0.49740358247272304, "grad_norm": 0.7486646186866653, "learning_rate": 5.2849595257500085e-06, "loss": 0.12, "step": 17050 }, { "epoch": 0.4974327557033666, "grad_norm": 0.7712107945516644, "learning_rate": 5.284487858197586e-06, "loss": 0.1267, "step": 17051 }, { "epoch": 0.49746192893401014, "grad_norm": 0.8771844825510239, "learning_rate": 5.284016188105324e-06, "loss": 0.1144, "step": 17052 }, { "epoch": 0.4974911021646537, "grad_norm": 0.8092455419223078, "learning_rate": 5.283544515477434e-06, "loss": 0.1428, "step": 17053 }, { "epoch": 0.49752027539529725, "grad_norm": 0.8932150374580071, "learning_rate": 5.283072840318124e-06, "loss": 0.155, "step": 17054 }, { "epoch": 0.49754944862594086, "grad_norm": 0.6802444985614569, "learning_rate": 5.282601162631609e-06, "loss": 0.1164, "step": 17055 }, { "epoch": 0.4975786218565844, "grad_norm": 0.6382936620776493, "learning_rate": 5.282129482422097e-06, "loss": 0.129, "step": 17056 }, { "epoch": 0.49760779508722797, "grad_norm": 0.9247116257299203, "learning_rate": 5.281657799693803e-06, "loss": 0.1318, "step": 17057 }, { "epoch": 0.4976369683178715, "grad_norm": 0.728335394824729, "learning_rate": 5.281186114450934e-06, "loss": 0.1292, "step": 17058 }, { "epoch": 0.4976661415485151, "grad_norm": 0.713713454618646, "learning_rate": 5.2807144266977e-06, "loss": 0.1404, "step": 17059 }, { "epoch": 0.4976953147791586, "grad_norm": 0.8888424569177863, "learning_rate": 5.280242736438318e-06, "loss": 0.1315, "step": 17060 }, { "epoch": 0.4977244880098022, "grad_norm": 0.7007810331234429, "learning_rate": 5.279771043676994e-06, "loss": 0.1312, "step": 17061 }, { "epoch": 0.4977536612404458, "grad_norm": 0.7150189101992875, "learning_rate": 5.2792993484179415e-06, "loss": 0.1473, "step": 17062 }, { "epoch": 0.49778283447108934, "grad_norm": 0.8462435442992592, "learning_rate": 5.27882765066537e-06, "loss": 0.1469, "step": 17063 }, { "epoch": 0.4978120077017329, "grad_norm": 0.6706241051842962, "learning_rate": 5.2783559504234926e-06, "loss": 0.1433, "step": 17064 }, { "epoch": 0.49784118093237645, "grad_norm": 0.8418848267602387, "learning_rate": 5.277884247696521e-06, "loss": 0.1388, "step": 17065 }, { "epoch": 0.49787035416302, "grad_norm": 0.9550010976560557, "learning_rate": 5.277412542488664e-06, "loss": 0.1389, "step": 17066 }, { "epoch": 0.49789952739366355, "grad_norm": 0.8098212287485531, "learning_rate": 5.276940834804133e-06, "loss": 0.1343, "step": 17067 }, { "epoch": 0.49792870062430716, "grad_norm": 0.9511043289148973, "learning_rate": 5.276469124647141e-06, "loss": 0.1363, "step": 17068 }, { "epoch": 0.4979578738549507, "grad_norm": 0.9632947494313432, "learning_rate": 5.2759974120218995e-06, "loss": 0.1337, "step": 17069 }, { "epoch": 0.49798704708559427, "grad_norm": 0.7655909900395728, "learning_rate": 5.2755256969326195e-06, "loss": 0.1213, "step": 17070 }, { "epoch": 0.4980162203162378, "grad_norm": 0.6625999224547507, "learning_rate": 5.27505397938351e-06, "loss": 0.1251, "step": 17071 }, { "epoch": 0.4980453935468814, "grad_norm": 0.9600395652719803, "learning_rate": 5.274582259378785e-06, "loss": 0.1443, "step": 17072 }, { "epoch": 0.49807456677752493, "grad_norm": 0.7291946285816626, "learning_rate": 5.274110536922655e-06, "loss": 0.1217, "step": 17073 }, { "epoch": 0.4981037400081685, "grad_norm": 0.7580019253967736, "learning_rate": 5.273638812019331e-06, "loss": 0.1324, "step": 17074 }, { "epoch": 0.4981329132388121, "grad_norm": 1.0467376295916913, "learning_rate": 5.273167084673028e-06, "loss": 0.1233, "step": 17075 }, { "epoch": 0.49816208646945564, "grad_norm": 0.7543399720664358, "learning_rate": 5.272695354887951e-06, "loss": 0.1601, "step": 17076 }, { "epoch": 0.4981912597000992, "grad_norm": 0.8197716089735835, "learning_rate": 5.272223622668316e-06, "loss": 0.1113, "step": 17077 }, { "epoch": 0.49822043293074275, "grad_norm": 0.7635338222701459, "learning_rate": 5.271751888018335e-06, "loss": 0.1312, "step": 17078 }, { "epoch": 0.4982496061613863, "grad_norm": 0.8843088300732025, "learning_rate": 5.271280150942217e-06, "loss": 0.1398, "step": 17079 }, { "epoch": 0.49827877939202986, "grad_norm": 0.8341103549713862, "learning_rate": 5.270808411444174e-06, "loss": 0.1568, "step": 17080 }, { "epoch": 0.4983079526226734, "grad_norm": 0.8561214965639796, "learning_rate": 5.270336669528417e-06, "loss": 0.1426, "step": 17081 }, { "epoch": 0.498337125853317, "grad_norm": 0.8894287839903012, "learning_rate": 5.269864925199161e-06, "loss": 0.1328, "step": 17082 }, { "epoch": 0.4983662990839606, "grad_norm": 0.8532956943212627, "learning_rate": 5.269393178460614e-06, "loss": 0.1541, "step": 17083 }, { "epoch": 0.4983954723146041, "grad_norm": 0.6599364125191604, "learning_rate": 5.2689214293169896e-06, "loss": 0.1393, "step": 17084 }, { "epoch": 0.4984246455452477, "grad_norm": 0.8397851924690483, "learning_rate": 5.268449677772499e-06, "loss": 0.1133, "step": 17085 }, { "epoch": 0.49845381877589123, "grad_norm": 0.9352705543099153, "learning_rate": 5.267977923831354e-06, "loss": 0.1353, "step": 17086 }, { "epoch": 0.4984829920065348, "grad_norm": 0.8282603629619283, "learning_rate": 5.2675061674977665e-06, "loss": 0.1202, "step": 17087 }, { "epoch": 0.49851216523717834, "grad_norm": 0.9538341436559521, "learning_rate": 5.2670344087759466e-06, "loss": 0.1569, "step": 17088 }, { "epoch": 0.49854133846782195, "grad_norm": 0.9729144870371288, "learning_rate": 5.266562647670107e-06, "loss": 0.1534, "step": 17089 }, { "epoch": 0.4985705116984655, "grad_norm": 0.798530296346233, "learning_rate": 5.266090884184462e-06, "loss": 0.137, "step": 17090 }, { "epoch": 0.49859968492910905, "grad_norm": 0.6972318621149772, "learning_rate": 5.265619118323218e-06, "loss": 0.1275, "step": 17091 }, { "epoch": 0.4986288581597526, "grad_norm": 0.9491328908344574, "learning_rate": 5.2651473500905925e-06, "loss": 0.1149, "step": 17092 }, { "epoch": 0.49865803139039616, "grad_norm": 0.9503833956624851, "learning_rate": 5.264675579490793e-06, "loss": 0.1386, "step": 17093 }, { "epoch": 0.4986872046210397, "grad_norm": 0.5601770519055654, "learning_rate": 5.264203806528034e-06, "loss": 0.1171, "step": 17094 }, { "epoch": 0.49871637785168327, "grad_norm": 1.0063804905034068, "learning_rate": 5.263732031206527e-06, "loss": 0.1451, "step": 17095 }, { "epoch": 0.4987455510823269, "grad_norm": 0.9061470755468986, "learning_rate": 5.263260253530482e-06, "loss": 0.1304, "step": 17096 }, { "epoch": 0.49877472431297043, "grad_norm": 0.7608954569541991, "learning_rate": 5.262788473504112e-06, "loss": 0.1301, "step": 17097 }, { "epoch": 0.498803897543614, "grad_norm": 0.6842391012230838, "learning_rate": 5.262316691131631e-06, "loss": 0.1284, "step": 17098 }, { "epoch": 0.49883307077425754, "grad_norm": 0.8127145270545167, "learning_rate": 5.261844906417249e-06, "loss": 0.124, "step": 17099 }, { "epoch": 0.4988622440049011, "grad_norm": 0.9134184916856534, "learning_rate": 5.261373119365176e-06, "loss": 0.1242, "step": 17100 }, { "epoch": 0.49889141723554464, "grad_norm": 0.809696198233572, "learning_rate": 5.260901329979628e-06, "loss": 0.1367, "step": 17101 }, { "epoch": 0.49892059046618825, "grad_norm": 0.7523160407219962, "learning_rate": 5.260429538264816e-06, "loss": 0.1291, "step": 17102 }, { "epoch": 0.4989497636968318, "grad_norm": 0.7269267040884179, "learning_rate": 5.2599577442249496e-06, "loss": 0.1219, "step": 17103 }, { "epoch": 0.49897893692747536, "grad_norm": 0.7285468407026217, "learning_rate": 5.259485947864242e-06, "loss": 0.1026, "step": 17104 }, { "epoch": 0.4990081101581189, "grad_norm": 0.9600386351774044, "learning_rate": 5.259014149186908e-06, "loss": 0.1585, "step": 17105 }, { "epoch": 0.49903728338876246, "grad_norm": 0.7941039515535414, "learning_rate": 5.258542348197157e-06, "loss": 0.1416, "step": 17106 }, { "epoch": 0.499066456619406, "grad_norm": 0.8670383025657609, "learning_rate": 5.258070544899201e-06, "loss": 0.1206, "step": 17107 }, { "epoch": 0.49909562985004957, "grad_norm": 0.8872511624243985, "learning_rate": 5.257598739297253e-06, "loss": 0.1621, "step": 17108 }, { "epoch": 0.4991248030806932, "grad_norm": 1.6453368524010874, "learning_rate": 5.257126931395524e-06, "loss": 0.1462, "step": 17109 }, { "epoch": 0.49915397631133673, "grad_norm": 0.9544847583473068, "learning_rate": 5.256655121198229e-06, "loss": 0.1182, "step": 17110 }, { "epoch": 0.4991831495419803, "grad_norm": 0.9484962867566957, "learning_rate": 5.256183308709577e-06, "loss": 0.1488, "step": 17111 }, { "epoch": 0.49921232277262384, "grad_norm": 0.8161254533832683, "learning_rate": 5.255711493933781e-06, "loss": 0.1282, "step": 17112 }, { "epoch": 0.4992414960032674, "grad_norm": 1.0173781251841534, "learning_rate": 5.255239676875055e-06, "loss": 0.1635, "step": 17113 }, { "epoch": 0.49927066923391095, "grad_norm": 0.9580842003699206, "learning_rate": 5.254767857537611e-06, "loss": 0.1376, "step": 17114 }, { "epoch": 0.4992998424645545, "grad_norm": 0.7094881150961431, "learning_rate": 5.254296035925658e-06, "loss": 0.1143, "step": 17115 }, { "epoch": 0.4993290156951981, "grad_norm": 1.0444373873103063, "learning_rate": 5.253824212043411e-06, "loss": 0.1381, "step": 17116 }, { "epoch": 0.49935818892584166, "grad_norm": 1.245752921310101, "learning_rate": 5.253352385895085e-06, "loss": 0.1466, "step": 17117 }, { "epoch": 0.4993873621564852, "grad_norm": 0.8277694308556317, "learning_rate": 5.252880557484886e-06, "loss": 0.1134, "step": 17118 }, { "epoch": 0.49941653538712877, "grad_norm": 0.6658102339743692, "learning_rate": 5.252408726817031e-06, "loss": 0.1406, "step": 17119 }, { "epoch": 0.4994457086177723, "grad_norm": 2.0027386387552615, "learning_rate": 5.251936893895732e-06, "loss": 0.1202, "step": 17120 }, { "epoch": 0.4994748818484159, "grad_norm": 1.0980822142946722, "learning_rate": 5.251465058725198e-06, "loss": 0.1448, "step": 17121 }, { "epoch": 0.49950405507905943, "grad_norm": 1.2828284325790056, "learning_rate": 5.250993221309647e-06, "loss": 0.1389, "step": 17122 }, { "epoch": 0.49953322830970304, "grad_norm": 0.7465328188197644, "learning_rate": 5.250521381653287e-06, "loss": 0.1369, "step": 17123 }, { "epoch": 0.4995624015403466, "grad_norm": 0.6626564926755844, "learning_rate": 5.250049539760332e-06, "loss": 0.1179, "step": 17124 }, { "epoch": 0.49959157477099014, "grad_norm": 0.7576224810787092, "learning_rate": 5.249577695634994e-06, "loss": 0.143, "step": 17125 }, { "epoch": 0.4996207480016337, "grad_norm": 0.7086279516859726, "learning_rate": 5.2491058492814875e-06, "loss": 0.119, "step": 17126 }, { "epoch": 0.49964992123227725, "grad_norm": 0.9251884333562526, "learning_rate": 5.248634000704021e-06, "loss": 0.1602, "step": 17127 }, { "epoch": 0.4996790944629208, "grad_norm": 0.7102153188699789, "learning_rate": 5.248162149906811e-06, "loss": 0.1381, "step": 17128 }, { "epoch": 0.4997082676935644, "grad_norm": 0.6747754949169598, "learning_rate": 5.247690296894069e-06, "loss": 0.1293, "step": 17129 }, { "epoch": 0.49973744092420797, "grad_norm": 0.8786216402254168, "learning_rate": 5.247218441670005e-06, "loss": 0.1292, "step": 17130 }, { "epoch": 0.4997666141548515, "grad_norm": 1.1623128843874437, "learning_rate": 5.246746584238837e-06, "loss": 0.1353, "step": 17131 }, { "epoch": 0.49979578738549507, "grad_norm": 0.7744472521828143, "learning_rate": 5.246274724604773e-06, "loss": 0.1321, "step": 17132 }, { "epoch": 0.4998249606161386, "grad_norm": 0.7848661400042279, "learning_rate": 5.245802862772026e-06, "loss": 0.1452, "step": 17133 }, { "epoch": 0.4998541338467822, "grad_norm": 1.0082561941495158, "learning_rate": 5.24533099874481e-06, "loss": 0.1612, "step": 17134 }, { "epoch": 0.49988330707742573, "grad_norm": 0.9243029079990752, "learning_rate": 5.244859132527339e-06, "loss": 0.1464, "step": 17135 }, { "epoch": 0.49991248030806934, "grad_norm": 0.8644881692321109, "learning_rate": 5.2443872641238215e-06, "loss": 0.1561, "step": 17136 }, { "epoch": 0.4999416535387129, "grad_norm": 1.1887181921751124, "learning_rate": 5.243915393538476e-06, "loss": 0.1301, "step": 17137 }, { "epoch": 0.49997082676935645, "grad_norm": 1.118070939766289, "learning_rate": 5.2434435207755094e-06, "loss": 0.1285, "step": 17138 }, { "epoch": 0.5, "grad_norm": 0.9731561634333157, "learning_rate": 5.242971645839139e-06, "loss": 0.1201, "step": 17139 }, { "epoch": 0.5000291732306436, "grad_norm": 1.3684619331189036, "learning_rate": 5.242499768733574e-06, "loss": 0.1311, "step": 17140 }, { "epoch": 0.5000583464612871, "grad_norm": 1.0099767079129824, "learning_rate": 5.24202788946303e-06, "loss": 0.1416, "step": 17141 }, { "epoch": 0.5000875196919307, "grad_norm": 1.035754821415772, "learning_rate": 5.2415560080317184e-06, "loss": 0.1365, "step": 17142 }, { "epoch": 0.5001166929225742, "grad_norm": 0.7112097320370867, "learning_rate": 5.241084124443854e-06, "loss": 0.1017, "step": 17143 }, { "epoch": 0.5001458661532178, "grad_norm": 0.8004353611466325, "learning_rate": 5.240612238703646e-06, "loss": 0.1241, "step": 17144 }, { "epoch": 0.5001750393838613, "grad_norm": 1.120319803724126, "learning_rate": 5.24014035081531e-06, "loss": 0.1161, "step": 17145 }, { "epoch": 0.500204212614505, "grad_norm": 0.8980134409796721, "learning_rate": 5.239668460783059e-06, "loss": 0.1249, "step": 17146 }, { "epoch": 0.5002333858451485, "grad_norm": 0.7903064197778377, "learning_rate": 5.239196568611105e-06, "loss": 0.1249, "step": 17147 }, { "epoch": 0.5002625590757921, "grad_norm": 0.8356499527549497, "learning_rate": 5.2387246743036595e-06, "loss": 0.1364, "step": 17148 }, { "epoch": 0.5002917323064356, "grad_norm": 0.886421625560439, "learning_rate": 5.238252777864938e-06, "loss": 0.1397, "step": 17149 }, { "epoch": 0.5003209055370792, "grad_norm": 0.6963085457001312, "learning_rate": 5.237780879299155e-06, "loss": 0.1148, "step": 17150 }, { "epoch": 0.5003500787677228, "grad_norm": 1.025842471472968, "learning_rate": 5.237308978610517e-06, "loss": 0.139, "step": 17151 }, { "epoch": 0.5003792519983663, "grad_norm": 0.794770099589364, "learning_rate": 5.236837075803244e-06, "loss": 0.1493, "step": 17152 }, { "epoch": 0.5004084252290099, "grad_norm": 0.780344725681366, "learning_rate": 5.236365170881545e-06, "loss": 0.1188, "step": 17153 }, { "epoch": 0.5004375984596534, "grad_norm": 0.8627918229814081, "learning_rate": 5.235893263849635e-06, "loss": 0.127, "step": 17154 }, { "epoch": 0.500466771690297, "grad_norm": 0.8956561634544297, "learning_rate": 5.2354213547117246e-06, "loss": 0.1147, "step": 17155 }, { "epoch": 0.5004959449209405, "grad_norm": 0.8951160083460781, "learning_rate": 5.234949443472031e-06, "loss": 0.1633, "step": 17156 }, { "epoch": 0.5005251181515841, "grad_norm": 0.6589209539871892, "learning_rate": 5.234477530134763e-06, "loss": 0.1147, "step": 17157 }, { "epoch": 0.5005542913822276, "grad_norm": 0.8054137191463541, "learning_rate": 5.2340056147041356e-06, "loss": 0.1418, "step": 17158 }, { "epoch": 0.5005834646128712, "grad_norm": 0.9100534464272424, "learning_rate": 5.233533697184362e-06, "loss": 0.113, "step": 17159 }, { "epoch": 0.5006126378435148, "grad_norm": 0.8610173686244597, "learning_rate": 5.233061777579656e-06, "loss": 0.1306, "step": 17160 }, { "epoch": 0.5006418110741584, "grad_norm": 0.6738213603824865, "learning_rate": 5.23258985589423e-06, "loss": 0.1144, "step": 17161 }, { "epoch": 0.500670984304802, "grad_norm": 0.81525623459888, "learning_rate": 5.232117932132298e-06, "loss": 0.1318, "step": 17162 }, { "epoch": 0.5007001575354455, "grad_norm": 0.7610579385189001, "learning_rate": 5.23164600629807e-06, "loss": 0.1357, "step": 17163 }, { "epoch": 0.500729330766089, "grad_norm": 0.7452739226198208, "learning_rate": 5.231174078395763e-06, "loss": 0.1253, "step": 17164 }, { "epoch": 0.5007585039967326, "grad_norm": 0.7060718897728941, "learning_rate": 5.230702148429591e-06, "loss": 0.1226, "step": 17165 }, { "epoch": 0.5007876772273762, "grad_norm": 0.622535066895069, "learning_rate": 5.230230216403762e-06, "loss": 0.1507, "step": 17166 }, { "epoch": 0.5008168504580197, "grad_norm": 0.8489303336404533, "learning_rate": 5.2297582823224955e-06, "loss": 0.1125, "step": 17167 }, { "epoch": 0.5008460236886633, "grad_norm": 0.8558647579546741, "learning_rate": 5.22928634619e-06, "loss": 0.1231, "step": 17168 }, { "epoch": 0.5008751969193068, "grad_norm": 0.6552461441578921, "learning_rate": 5.228814408010492e-06, "loss": 0.1494, "step": 17169 }, { "epoch": 0.5009043701499504, "grad_norm": 0.7765841525541597, "learning_rate": 5.228342467788182e-06, "loss": 0.1229, "step": 17170 }, { "epoch": 0.5009335433805939, "grad_norm": 0.9703386582263274, "learning_rate": 5.2278705255272866e-06, "loss": 0.1409, "step": 17171 }, { "epoch": 0.5009627166112375, "grad_norm": 0.782969804611631, "learning_rate": 5.227398581232016e-06, "loss": 0.1373, "step": 17172 }, { "epoch": 0.5009918898418811, "grad_norm": 0.7574228508574635, "learning_rate": 5.226926634906586e-06, "loss": 0.1392, "step": 17173 }, { "epoch": 0.5010210630725247, "grad_norm": 0.7353146706328247, "learning_rate": 5.226454686555209e-06, "loss": 0.1391, "step": 17174 }, { "epoch": 0.5010502363031683, "grad_norm": 0.8493344005529188, "learning_rate": 5.225982736182099e-06, "loss": 0.1193, "step": 17175 }, { "epoch": 0.5010794095338118, "grad_norm": 0.7026209480147556, "learning_rate": 5.2255107837914685e-06, "loss": 0.1357, "step": 17176 }, { "epoch": 0.5011085827644554, "grad_norm": 2.137751931106153, "learning_rate": 5.225038829387533e-06, "loss": 0.1389, "step": 17177 }, { "epoch": 0.5011377559950989, "grad_norm": 0.7991677006618545, "learning_rate": 5.224566872974502e-06, "loss": 0.1473, "step": 17178 }, { "epoch": 0.5011669292257425, "grad_norm": 0.674882459748988, "learning_rate": 5.2240949145565935e-06, "loss": 0.1323, "step": 17179 }, { "epoch": 0.501196102456386, "grad_norm": 0.8200767887070802, "learning_rate": 5.22362295413802e-06, "loss": 0.1507, "step": 17180 }, { "epoch": 0.5012252756870296, "grad_norm": 0.9659816451627661, "learning_rate": 5.223150991722992e-06, "loss": 0.1223, "step": 17181 }, { "epoch": 0.5012544489176731, "grad_norm": 0.9508972043286074, "learning_rate": 5.222679027315727e-06, "loss": 0.156, "step": 17182 }, { "epoch": 0.5012836221483167, "grad_norm": 1.0595972555282738, "learning_rate": 5.2222070609204355e-06, "loss": 0.1317, "step": 17183 }, { "epoch": 0.5013127953789602, "grad_norm": 0.8614280346225054, "learning_rate": 5.221735092541332e-06, "loss": 0.1532, "step": 17184 }, { "epoch": 0.5013419686096038, "grad_norm": 0.884064970889249, "learning_rate": 5.2212631221826315e-06, "loss": 0.117, "step": 17185 }, { "epoch": 0.5013711418402473, "grad_norm": 0.7387140232644068, "learning_rate": 5.220791149848547e-06, "loss": 0.128, "step": 17186 }, { "epoch": 0.501400315070891, "grad_norm": 0.883379906728551, "learning_rate": 5.22031917554329e-06, "loss": 0.1153, "step": 17187 }, { "epoch": 0.5014294883015346, "grad_norm": 0.8607105100624723, "learning_rate": 5.219847199271078e-06, "loss": 0.1654, "step": 17188 }, { "epoch": 0.5014586615321781, "grad_norm": 0.8471029065213588, "learning_rate": 5.219375221036122e-06, "loss": 0.1217, "step": 17189 }, { "epoch": 0.5014878347628217, "grad_norm": 0.9251528919789282, "learning_rate": 5.218903240842635e-06, "loss": 0.1559, "step": 17190 }, { "epoch": 0.5015170079934652, "grad_norm": 0.7348551840984193, "learning_rate": 5.218431258694833e-06, "loss": 0.1461, "step": 17191 }, { "epoch": 0.5015461812241088, "grad_norm": 0.8236715980687773, "learning_rate": 5.217959274596931e-06, "loss": 0.1364, "step": 17192 }, { "epoch": 0.5015753544547523, "grad_norm": 1.0239782261428787, "learning_rate": 5.217487288553138e-06, "loss": 0.1323, "step": 17193 }, { "epoch": 0.5016045276853959, "grad_norm": 1.0428771227201818, "learning_rate": 5.2170153005676715e-06, "loss": 0.1445, "step": 17194 }, { "epoch": 0.5016337009160394, "grad_norm": 0.8244842854108386, "learning_rate": 5.216543310644745e-06, "loss": 0.1448, "step": 17195 }, { "epoch": 0.501662874146683, "grad_norm": 1.141564332369589, "learning_rate": 5.216071318788569e-06, "loss": 0.1586, "step": 17196 }, { "epoch": 0.5016920473773265, "grad_norm": 1.0340795327004673, "learning_rate": 5.215599325003362e-06, "loss": 0.1437, "step": 17197 }, { "epoch": 0.5017212206079701, "grad_norm": 0.7727040580340079, "learning_rate": 5.215127329293336e-06, "loss": 0.1219, "step": 17198 }, { "epoch": 0.5017503938386136, "grad_norm": 0.8878697654869425, "learning_rate": 5.214655331662703e-06, "loss": 0.1405, "step": 17199 }, { "epoch": 0.5017795670692573, "grad_norm": 0.8413827455853727, "learning_rate": 5.2141833321156785e-06, "loss": 0.1363, "step": 17200 }, { "epoch": 0.5018087402999009, "grad_norm": 0.8404351792994844, "learning_rate": 5.213711330656478e-06, "loss": 0.133, "step": 17201 }, { "epoch": 0.5018379135305444, "grad_norm": 1.5971743469107753, "learning_rate": 5.213239327289312e-06, "loss": 0.1466, "step": 17202 }, { "epoch": 0.501867086761188, "grad_norm": 0.8603876780371584, "learning_rate": 5.212767322018397e-06, "loss": 0.151, "step": 17203 }, { "epoch": 0.5018962599918315, "grad_norm": 0.9672789957744976, "learning_rate": 5.212295314847946e-06, "loss": 0.1923, "step": 17204 }, { "epoch": 0.5019254332224751, "grad_norm": 0.7749333811009574, "learning_rate": 5.211823305782173e-06, "loss": 0.1469, "step": 17205 }, { "epoch": 0.5019546064531186, "grad_norm": 0.7871876121019177, "learning_rate": 5.211351294825292e-06, "loss": 0.1061, "step": 17206 }, { "epoch": 0.5019837796837622, "grad_norm": 0.7937711212665466, "learning_rate": 5.210879281981518e-06, "loss": 0.1079, "step": 17207 }, { "epoch": 0.5020129529144057, "grad_norm": 0.7134331140405762, "learning_rate": 5.210407267255062e-06, "loss": 0.1367, "step": 17208 }, { "epoch": 0.5020421261450493, "grad_norm": 0.7591675522412211, "learning_rate": 5.209935250650142e-06, "loss": 0.1272, "step": 17209 }, { "epoch": 0.5020712993756928, "grad_norm": 0.8322558863772237, "learning_rate": 5.2094632321709705e-06, "loss": 0.1512, "step": 17210 }, { "epoch": 0.5021004726063364, "grad_norm": 0.754612848043372, "learning_rate": 5.20899121182176e-06, "loss": 0.1352, "step": 17211 }, { "epoch": 0.5021296458369799, "grad_norm": 0.8021606134855779, "learning_rate": 5.2085191896067265e-06, "loss": 0.1252, "step": 17212 }, { "epoch": 0.5021588190676235, "grad_norm": 0.950624928019119, "learning_rate": 5.208047165530083e-06, "loss": 0.1273, "step": 17213 }, { "epoch": 0.5021879922982672, "grad_norm": 0.768628499878225, "learning_rate": 5.207575139596045e-06, "loss": 0.1417, "step": 17214 }, { "epoch": 0.5022171655289107, "grad_norm": 0.8059146562851579, "learning_rate": 5.2071031118088255e-06, "loss": 0.1371, "step": 17215 }, { "epoch": 0.5022463387595543, "grad_norm": 0.8437210934009688, "learning_rate": 5.206631082172638e-06, "loss": 0.143, "step": 17216 }, { "epoch": 0.5022755119901978, "grad_norm": 0.7986468902053767, "learning_rate": 5.206159050691698e-06, "loss": 0.1444, "step": 17217 }, { "epoch": 0.5023046852208414, "grad_norm": 0.8032047586295259, "learning_rate": 5.205687017370219e-06, "loss": 0.1155, "step": 17218 }, { "epoch": 0.5023338584514849, "grad_norm": 0.7235984459577574, "learning_rate": 5.205214982212416e-06, "loss": 0.1363, "step": 17219 }, { "epoch": 0.5023630316821285, "grad_norm": 0.82779791602683, "learning_rate": 5.204742945222502e-06, "loss": 0.1315, "step": 17220 }, { "epoch": 0.502392204912772, "grad_norm": 0.9646787226862298, "learning_rate": 5.204270906404692e-06, "loss": 0.1261, "step": 17221 }, { "epoch": 0.5024213781434156, "grad_norm": 0.7659089965180165, "learning_rate": 5.203798865763201e-06, "loss": 0.1122, "step": 17222 }, { "epoch": 0.5024505513740591, "grad_norm": 0.9048215158293285, "learning_rate": 5.20332682330224e-06, "loss": 0.1357, "step": 17223 }, { "epoch": 0.5024797246047027, "grad_norm": 1.0139816087897788, "learning_rate": 5.202854779026028e-06, "loss": 0.128, "step": 17224 }, { "epoch": 0.5025088978353462, "grad_norm": 0.8499543411608957, "learning_rate": 5.202382732938777e-06, "loss": 0.1157, "step": 17225 }, { "epoch": 0.5025380710659898, "grad_norm": 0.8338528415467421, "learning_rate": 5.201910685044699e-06, "loss": 0.1214, "step": 17226 }, { "epoch": 0.5025672442966335, "grad_norm": 0.8279495772859653, "learning_rate": 5.201438635348013e-06, "loss": 0.1572, "step": 17227 }, { "epoch": 0.502596417527277, "grad_norm": 1.0496619765438127, "learning_rate": 5.20096658385293e-06, "loss": 0.1218, "step": 17228 }, { "epoch": 0.5026255907579206, "grad_norm": 0.7130666128350417, "learning_rate": 5.2004945305636656e-06, "loss": 0.1158, "step": 17229 }, { "epoch": 0.5026547639885641, "grad_norm": 0.7951027918329903, "learning_rate": 5.200022475484433e-06, "loss": 0.1284, "step": 17230 }, { "epoch": 0.5026839372192077, "grad_norm": 1.0339598951541478, "learning_rate": 5.1995504186194476e-06, "loss": 0.1598, "step": 17231 }, { "epoch": 0.5027131104498512, "grad_norm": 0.8752915020425636, "learning_rate": 5.199078359972925e-06, "loss": 0.1205, "step": 17232 }, { "epoch": 0.5027422836804948, "grad_norm": 1.0506697388721178, "learning_rate": 5.198606299549077e-06, "loss": 0.1383, "step": 17233 }, { "epoch": 0.5027714569111383, "grad_norm": 1.0142581175250243, "learning_rate": 5.198134237352121e-06, "loss": 0.148, "step": 17234 }, { "epoch": 0.5028006301417819, "grad_norm": 0.8689653063950162, "learning_rate": 5.1976621733862675e-06, "loss": 0.1228, "step": 17235 }, { "epoch": 0.5028298033724254, "grad_norm": 0.9116054998973984, "learning_rate": 5.197190107655735e-06, "loss": 0.1437, "step": 17236 }, { "epoch": 0.502858976603069, "grad_norm": 1.1149824829170243, "learning_rate": 5.196718040164737e-06, "loss": 0.134, "step": 17237 }, { "epoch": 0.5028881498337125, "grad_norm": 1.1332287799316638, "learning_rate": 5.196245970917485e-06, "loss": 0.1562, "step": 17238 }, { "epoch": 0.5029173230643561, "grad_norm": 0.8639495800020006, "learning_rate": 5.195773899918196e-06, "loss": 0.1352, "step": 17239 }, { "epoch": 0.5029464962949997, "grad_norm": 0.8801817825528243, "learning_rate": 5.195301827171086e-06, "loss": 0.1466, "step": 17240 }, { "epoch": 0.5029756695256433, "grad_norm": 0.9851435143146585, "learning_rate": 5.194829752680367e-06, "loss": 0.1381, "step": 17241 }, { "epoch": 0.5030048427562869, "grad_norm": 0.9734840545233086, "learning_rate": 5.194357676450256e-06, "loss": 0.1477, "step": 17242 }, { "epoch": 0.5030340159869304, "grad_norm": 0.7232070671923675, "learning_rate": 5.1938855984849645e-06, "loss": 0.1174, "step": 17243 }, { "epoch": 0.503063189217574, "grad_norm": 0.9599050197452415, "learning_rate": 5.193413518788709e-06, "loss": 0.1448, "step": 17244 }, { "epoch": 0.5030923624482175, "grad_norm": 0.8497767215530986, "learning_rate": 5.192941437365704e-06, "loss": 0.1379, "step": 17245 }, { "epoch": 0.5031215356788611, "grad_norm": 0.9261743928033254, "learning_rate": 5.192469354220163e-06, "loss": 0.1252, "step": 17246 }, { "epoch": 0.5031507089095046, "grad_norm": 0.7747379280891233, "learning_rate": 5.191997269356302e-06, "loss": 0.1194, "step": 17247 }, { "epoch": 0.5031798821401482, "grad_norm": 0.8353798486093982, "learning_rate": 5.1915251827783355e-06, "loss": 0.1383, "step": 17248 }, { "epoch": 0.5032090553707917, "grad_norm": 1.078824034442684, "learning_rate": 5.191053094490477e-06, "loss": 0.1344, "step": 17249 }, { "epoch": 0.5032382286014353, "grad_norm": 0.7618045597560886, "learning_rate": 5.190581004496943e-06, "loss": 0.1263, "step": 17250 }, { "epoch": 0.5032674018320789, "grad_norm": 0.6335554826591071, "learning_rate": 5.190108912801948e-06, "loss": 0.1226, "step": 17251 }, { "epoch": 0.5032965750627224, "grad_norm": 0.8226690093523454, "learning_rate": 5.189636819409706e-06, "loss": 0.1391, "step": 17252 }, { "epoch": 0.503325748293366, "grad_norm": 0.8102035247872031, "learning_rate": 5.1891647243244295e-06, "loss": 0.1238, "step": 17253 }, { "epoch": 0.5033549215240096, "grad_norm": 0.8244272403590951, "learning_rate": 5.188692627550337e-06, "loss": 0.1511, "step": 17254 }, { "epoch": 0.5033840947546532, "grad_norm": 1.0458971247754452, "learning_rate": 5.188220529091642e-06, "loss": 0.1224, "step": 17255 }, { "epoch": 0.5034132679852967, "grad_norm": 0.753577592633354, "learning_rate": 5.187748428952557e-06, "loss": 0.1357, "step": 17256 }, { "epoch": 0.5034424412159403, "grad_norm": 0.853675461914812, "learning_rate": 5.187276327137302e-06, "loss": 0.1338, "step": 17257 }, { "epoch": 0.5034716144465838, "grad_norm": 1.1959383984510523, "learning_rate": 5.186804223650087e-06, "loss": 0.1496, "step": 17258 }, { "epoch": 0.5035007876772274, "grad_norm": 0.708652813153985, "learning_rate": 5.1863321184951285e-06, "loss": 0.1222, "step": 17259 }, { "epoch": 0.5035299609078709, "grad_norm": 1.0634244139453963, "learning_rate": 5.185860011676643e-06, "loss": 0.1452, "step": 17260 }, { "epoch": 0.5035591341385145, "grad_norm": 0.8887788334081251, "learning_rate": 5.185387903198841e-06, "loss": 0.1542, "step": 17261 }, { "epoch": 0.503588307369158, "grad_norm": 0.8628642665490859, "learning_rate": 5.184915793065941e-06, "loss": 0.1375, "step": 17262 }, { "epoch": 0.5036174805998016, "grad_norm": 0.8509809838676219, "learning_rate": 5.184443681282157e-06, "loss": 0.1186, "step": 17263 }, { "epoch": 0.5036466538304452, "grad_norm": 1.1490226197470912, "learning_rate": 5.183971567851704e-06, "loss": 0.1483, "step": 17264 }, { "epoch": 0.5036758270610887, "grad_norm": 0.8110213708126766, "learning_rate": 5.183499452778797e-06, "loss": 0.1401, "step": 17265 }, { "epoch": 0.5037050002917323, "grad_norm": 1.1587106322731249, "learning_rate": 5.183027336067649e-06, "loss": 0.1303, "step": 17266 }, { "epoch": 0.5037341735223758, "grad_norm": 0.8795036241011459, "learning_rate": 5.182555217722479e-06, "loss": 0.1396, "step": 17267 }, { "epoch": 0.5037633467530195, "grad_norm": 0.777523482896419, "learning_rate": 5.182083097747499e-06, "loss": 0.158, "step": 17268 }, { "epoch": 0.503792519983663, "grad_norm": 1.1357611384784294, "learning_rate": 5.181610976146924e-06, "loss": 0.1344, "step": 17269 }, { "epoch": 0.5038216932143066, "grad_norm": 1.185696125110025, "learning_rate": 5.1811388529249695e-06, "loss": 0.126, "step": 17270 }, { "epoch": 0.5038508664449501, "grad_norm": 0.9539166554928546, "learning_rate": 5.180666728085852e-06, "loss": 0.1149, "step": 17271 }, { "epoch": 0.5038800396755937, "grad_norm": 0.9670610747291339, "learning_rate": 5.180194601633784e-06, "loss": 0.129, "step": 17272 }, { "epoch": 0.5039092129062372, "grad_norm": 0.7969008634462226, "learning_rate": 5.179722473572982e-06, "loss": 0.1302, "step": 17273 }, { "epoch": 0.5039383861368808, "grad_norm": 0.7885981007998627, "learning_rate": 5.17925034390766e-06, "loss": 0.1476, "step": 17274 }, { "epoch": 0.5039675593675244, "grad_norm": 1.2347809487692973, "learning_rate": 5.178778212642034e-06, "loss": 0.1478, "step": 17275 }, { "epoch": 0.5039967325981679, "grad_norm": 0.7202792160277384, "learning_rate": 5.178306079780318e-06, "loss": 0.1397, "step": 17276 }, { "epoch": 0.5040259058288115, "grad_norm": 0.7922208300396085, "learning_rate": 5.177833945326729e-06, "loss": 0.1454, "step": 17277 }, { "epoch": 0.504055079059455, "grad_norm": 1.0010676963995142, "learning_rate": 5.17736180928548e-06, "loss": 0.1166, "step": 17278 }, { "epoch": 0.5040842522900986, "grad_norm": 0.7995820822926964, "learning_rate": 5.176889671660789e-06, "loss": 0.1111, "step": 17279 }, { "epoch": 0.5041134255207421, "grad_norm": 0.6889976012705192, "learning_rate": 5.176417532456868e-06, "loss": 0.1149, "step": 17280 }, { "epoch": 0.5041425987513857, "grad_norm": 1.2054995080238184, "learning_rate": 5.175945391677932e-06, "loss": 0.1591, "step": 17281 }, { "epoch": 0.5041717719820293, "grad_norm": 0.7467556408446708, "learning_rate": 5.175473249328199e-06, "loss": 0.0966, "step": 17282 }, { "epoch": 0.5042009452126729, "grad_norm": 0.7376648464058364, "learning_rate": 5.175001105411883e-06, "loss": 0.1376, "step": 17283 }, { "epoch": 0.5042301184433164, "grad_norm": 0.7796029587698763, "learning_rate": 5.174528959933198e-06, "loss": 0.1686, "step": 17284 }, { "epoch": 0.50425929167396, "grad_norm": 0.937693107754778, "learning_rate": 5.1740568128963605e-06, "loss": 0.1515, "step": 17285 }, { "epoch": 0.5042884649046036, "grad_norm": 0.761824456179311, "learning_rate": 5.173584664305587e-06, "loss": 0.1281, "step": 17286 }, { "epoch": 0.5043176381352471, "grad_norm": 0.7372483380958332, "learning_rate": 5.173112514165089e-06, "loss": 0.1194, "step": 17287 }, { "epoch": 0.5043468113658907, "grad_norm": 0.8701508919391344, "learning_rate": 5.1726403624790834e-06, "loss": 0.1265, "step": 17288 }, { "epoch": 0.5043759845965342, "grad_norm": 0.6910382203783673, "learning_rate": 5.172168209251788e-06, "loss": 0.124, "step": 17289 }, { "epoch": 0.5044051578271778, "grad_norm": 1.2675084606568687, "learning_rate": 5.171696054487415e-06, "loss": 0.1611, "step": 17290 }, { "epoch": 0.5044343310578213, "grad_norm": 0.8944759267988633, "learning_rate": 5.171223898190178e-06, "loss": 0.1395, "step": 17291 }, { "epoch": 0.5044635042884649, "grad_norm": 0.9528528080657033, "learning_rate": 5.170751740364299e-06, "loss": 0.1302, "step": 17292 }, { "epoch": 0.5044926775191084, "grad_norm": 0.9272631413814407, "learning_rate": 5.170279581013987e-06, "loss": 0.1146, "step": 17293 }, { "epoch": 0.504521850749752, "grad_norm": 0.6632904208785081, "learning_rate": 5.16980742014346e-06, "loss": 0.1361, "step": 17294 }, { "epoch": 0.5045510239803956, "grad_norm": 0.7432749761201312, "learning_rate": 5.169335257756933e-06, "loss": 0.1313, "step": 17295 }, { "epoch": 0.5045801972110392, "grad_norm": 0.8658986817112828, "learning_rate": 5.168863093858622e-06, "loss": 0.1258, "step": 17296 }, { "epoch": 0.5046093704416827, "grad_norm": 0.9471182737787873, "learning_rate": 5.1683909284527404e-06, "loss": 0.1364, "step": 17297 }, { "epoch": 0.5046385436723263, "grad_norm": 0.7554185249892994, "learning_rate": 5.1679187615435045e-06, "loss": 0.1465, "step": 17298 }, { "epoch": 0.5046677169029699, "grad_norm": 0.8796981762674053, "learning_rate": 5.16744659313513e-06, "loss": 0.143, "step": 17299 }, { "epoch": 0.5046968901336134, "grad_norm": 0.778147412229105, "learning_rate": 5.1669744232318345e-06, "loss": 0.1454, "step": 17300 }, { "epoch": 0.504726063364257, "grad_norm": 0.6812293522365617, "learning_rate": 5.1665022518378285e-06, "loss": 0.131, "step": 17301 }, { "epoch": 0.5047552365949005, "grad_norm": 1.0503963227237636, "learning_rate": 5.166030078957333e-06, "loss": 0.1555, "step": 17302 }, { "epoch": 0.5047844098255441, "grad_norm": 0.7414429945332138, "learning_rate": 5.165557904594557e-06, "loss": 0.1221, "step": 17303 }, { "epoch": 0.5048135830561876, "grad_norm": 0.9916526761838073, "learning_rate": 5.165085728753723e-06, "loss": 0.1478, "step": 17304 }, { "epoch": 0.5048427562868312, "grad_norm": 0.9172534816491521, "learning_rate": 5.16461355143904e-06, "loss": 0.1365, "step": 17305 }, { "epoch": 0.5048719295174747, "grad_norm": 0.8760311130636964, "learning_rate": 5.164141372654728e-06, "loss": 0.163, "step": 17306 }, { "epoch": 0.5049011027481183, "grad_norm": 0.7033881953878998, "learning_rate": 5.163669192405002e-06, "loss": 0.1272, "step": 17307 }, { "epoch": 0.5049302759787618, "grad_norm": 0.7781406001882464, "learning_rate": 5.163197010694076e-06, "loss": 0.1351, "step": 17308 }, { "epoch": 0.5049594492094055, "grad_norm": 1.0139965614663418, "learning_rate": 5.162724827526164e-06, "loss": 0.1586, "step": 17309 }, { "epoch": 0.504988622440049, "grad_norm": 0.770099026281592, "learning_rate": 5.1622526429054855e-06, "loss": 0.1156, "step": 17310 }, { "epoch": 0.5050177956706926, "grad_norm": 0.7775863334155969, "learning_rate": 5.161780456836254e-06, "loss": 0.1316, "step": 17311 }, { "epoch": 0.5050469689013362, "grad_norm": 0.9193186397371704, "learning_rate": 5.161308269322684e-06, "loss": 0.1335, "step": 17312 }, { "epoch": 0.5050761421319797, "grad_norm": 0.961528245068585, "learning_rate": 5.160836080368994e-06, "loss": 0.1624, "step": 17313 }, { "epoch": 0.5051053153626233, "grad_norm": 0.8024931120018204, "learning_rate": 5.160363889979396e-06, "loss": 0.1126, "step": 17314 }, { "epoch": 0.5051344885932668, "grad_norm": 0.792742673458629, "learning_rate": 5.159891698158109e-06, "loss": 0.1508, "step": 17315 }, { "epoch": 0.5051636618239104, "grad_norm": 0.8145078846492846, "learning_rate": 5.159419504909346e-06, "loss": 0.1252, "step": 17316 }, { "epoch": 0.5051928350545539, "grad_norm": 1.1263377609224792, "learning_rate": 5.1589473102373265e-06, "loss": 0.1498, "step": 17317 }, { "epoch": 0.5052220082851975, "grad_norm": 0.7968997022564035, "learning_rate": 5.15847511414626e-06, "loss": 0.118, "step": 17318 }, { "epoch": 0.505251181515841, "grad_norm": 0.8437469819237442, "learning_rate": 5.1580029166403675e-06, "loss": 0.1354, "step": 17319 }, { "epoch": 0.5052803547464846, "grad_norm": 0.8186802097733634, "learning_rate": 5.157530717723862e-06, "loss": 0.1367, "step": 17320 }, { "epoch": 0.5053095279771281, "grad_norm": 0.7429109514425973, "learning_rate": 5.157058517400958e-06, "loss": 0.1568, "step": 17321 }, { "epoch": 0.5053387012077718, "grad_norm": 0.7847091485413135, "learning_rate": 5.156586315675877e-06, "loss": 0.1374, "step": 17322 }, { "epoch": 0.5053678744384154, "grad_norm": 1.0191237270795168, "learning_rate": 5.156114112552828e-06, "loss": 0.1348, "step": 17323 }, { "epoch": 0.5053970476690589, "grad_norm": 0.739147094279075, "learning_rate": 5.15564190803603e-06, "loss": 0.1297, "step": 17324 }, { "epoch": 0.5054262208997025, "grad_norm": 0.8209983422623021, "learning_rate": 5.1551697021296975e-06, "loss": 0.1397, "step": 17325 }, { "epoch": 0.505455394130346, "grad_norm": 0.9436519694130765, "learning_rate": 5.154697494838048e-06, "loss": 0.1406, "step": 17326 }, { "epoch": 0.5054845673609896, "grad_norm": 0.7957110386933195, "learning_rate": 5.154225286165296e-06, "loss": 0.1353, "step": 17327 }, { "epoch": 0.5055137405916331, "grad_norm": 1.012930471614181, "learning_rate": 5.153753076115657e-06, "loss": 0.1512, "step": 17328 }, { "epoch": 0.5055429138222767, "grad_norm": 0.5983101306414064, "learning_rate": 5.153280864693348e-06, "loss": 0.1356, "step": 17329 }, { "epoch": 0.5055720870529202, "grad_norm": 0.8152696892143733, "learning_rate": 5.152808651902583e-06, "loss": 0.1427, "step": 17330 }, { "epoch": 0.5056012602835638, "grad_norm": 1.0830767427558556, "learning_rate": 5.152336437747579e-06, "loss": 0.1408, "step": 17331 }, { "epoch": 0.5056304335142073, "grad_norm": 0.7452343454132013, "learning_rate": 5.1518642222325535e-06, "loss": 0.1188, "step": 17332 }, { "epoch": 0.5056596067448509, "grad_norm": 0.7369772632010159, "learning_rate": 5.151392005361719e-06, "loss": 0.1191, "step": 17333 }, { "epoch": 0.5056887799754944, "grad_norm": 0.9904101752353428, "learning_rate": 5.150919787139294e-06, "loss": 0.1746, "step": 17334 }, { "epoch": 0.505717953206138, "grad_norm": 1.0406323486541111, "learning_rate": 5.150447567569491e-06, "loss": 0.1254, "step": 17335 }, { "epoch": 0.5057471264367817, "grad_norm": 0.7075305093172395, "learning_rate": 5.149975346656528e-06, "loss": 0.1096, "step": 17336 }, { "epoch": 0.5057762996674252, "grad_norm": 0.702184811195534, "learning_rate": 5.149503124404624e-06, "loss": 0.1179, "step": 17337 }, { "epoch": 0.5058054728980688, "grad_norm": 1.0631778209550506, "learning_rate": 5.149030900817988e-06, "loss": 0.1211, "step": 17338 }, { "epoch": 0.5058346461287123, "grad_norm": 0.7921009141751055, "learning_rate": 5.148558675900842e-06, "loss": 0.1131, "step": 17339 }, { "epoch": 0.5058638193593559, "grad_norm": 0.8507068470545315, "learning_rate": 5.148086449657399e-06, "loss": 0.1414, "step": 17340 }, { "epoch": 0.5058929925899994, "grad_norm": 0.910500527934626, "learning_rate": 5.147614222091876e-06, "loss": 0.1339, "step": 17341 }, { "epoch": 0.505922165820643, "grad_norm": 1.0105572210231657, "learning_rate": 5.147141993208487e-06, "loss": 0.1495, "step": 17342 }, { "epoch": 0.5059513390512865, "grad_norm": 0.8055268268118744, "learning_rate": 5.146669763011452e-06, "loss": 0.1345, "step": 17343 }, { "epoch": 0.5059805122819301, "grad_norm": 0.8120024890477584, "learning_rate": 5.146197531504982e-06, "loss": 0.1315, "step": 17344 }, { "epoch": 0.5060096855125736, "grad_norm": 0.83127325265799, "learning_rate": 5.145725298693296e-06, "loss": 0.1173, "step": 17345 }, { "epoch": 0.5060388587432172, "grad_norm": 0.8872147149249974, "learning_rate": 5.145253064580609e-06, "loss": 0.1066, "step": 17346 }, { "epoch": 0.5060680319738607, "grad_norm": 0.8112256411173704, "learning_rate": 5.144780829171139e-06, "loss": 0.1231, "step": 17347 }, { "epoch": 0.5060972052045043, "grad_norm": 0.9873245820093021, "learning_rate": 5.1443085924690986e-06, "loss": 0.1734, "step": 17348 }, { "epoch": 0.506126378435148, "grad_norm": 1.1000236816155275, "learning_rate": 5.143836354478706e-06, "loss": 0.1439, "step": 17349 }, { "epoch": 0.5061555516657915, "grad_norm": 0.8240164434531867, "learning_rate": 5.143364115204178e-06, "loss": 0.1482, "step": 17350 }, { "epoch": 0.5061847248964351, "grad_norm": 0.80167303383792, "learning_rate": 5.142891874649727e-06, "loss": 0.1475, "step": 17351 }, { "epoch": 0.5062138981270786, "grad_norm": 0.7506224208283753, "learning_rate": 5.142419632819573e-06, "loss": 0.1247, "step": 17352 }, { "epoch": 0.5062430713577222, "grad_norm": 0.8844447321758908, "learning_rate": 5.14194738971793e-06, "loss": 0.1449, "step": 17353 }, { "epoch": 0.5062722445883657, "grad_norm": 0.7550763334019078, "learning_rate": 5.1414751453490154e-06, "loss": 0.1478, "step": 17354 }, { "epoch": 0.5063014178190093, "grad_norm": 0.7393954077232628, "learning_rate": 5.141002899717044e-06, "loss": 0.1353, "step": 17355 }, { "epoch": 0.5063305910496528, "grad_norm": 0.6858918738623919, "learning_rate": 5.140530652826232e-06, "loss": 0.1338, "step": 17356 }, { "epoch": 0.5063597642802964, "grad_norm": 0.7929363077110662, "learning_rate": 5.1400584046807955e-06, "loss": 0.1545, "step": 17357 }, { "epoch": 0.5063889375109399, "grad_norm": 0.8007111275954959, "learning_rate": 5.139586155284953e-06, "loss": 0.1383, "step": 17358 }, { "epoch": 0.5064181107415835, "grad_norm": 0.6879773730704335, "learning_rate": 5.139113904642916e-06, "loss": 0.1092, "step": 17359 }, { "epoch": 0.506447283972227, "grad_norm": 0.7878276610794269, "learning_rate": 5.138641652758904e-06, "loss": 0.1178, "step": 17360 }, { "epoch": 0.5064764572028706, "grad_norm": 0.756600483158757, "learning_rate": 5.138169399637134e-06, "loss": 0.1171, "step": 17361 }, { "epoch": 0.5065056304335142, "grad_norm": 0.6037140498171352, "learning_rate": 5.137697145281821e-06, "loss": 0.1156, "step": 17362 }, { "epoch": 0.5065348036641578, "grad_norm": 0.8472983398237784, "learning_rate": 5.137224889697178e-06, "loss": 0.1358, "step": 17363 }, { "epoch": 0.5065639768948014, "grad_norm": 0.7878403651131111, "learning_rate": 5.136752632887425e-06, "loss": 0.1225, "step": 17364 }, { "epoch": 0.5065931501254449, "grad_norm": 0.8571396047247545, "learning_rate": 5.136280374856778e-06, "loss": 0.1395, "step": 17365 }, { "epoch": 0.5066223233560885, "grad_norm": 0.8080667401260934, "learning_rate": 5.135808115609451e-06, "loss": 0.1165, "step": 17366 }, { "epoch": 0.506651496586732, "grad_norm": 0.8838439608089907, "learning_rate": 5.135335855149662e-06, "loss": 0.1404, "step": 17367 }, { "epoch": 0.5066806698173756, "grad_norm": 0.7789429548029285, "learning_rate": 5.134863593481628e-06, "loss": 0.1242, "step": 17368 }, { "epoch": 0.5067098430480191, "grad_norm": 0.7882215555047755, "learning_rate": 5.134391330609563e-06, "loss": 0.155, "step": 17369 }, { "epoch": 0.5067390162786627, "grad_norm": 0.8961196088251444, "learning_rate": 5.133919066537683e-06, "loss": 0.1711, "step": 17370 }, { "epoch": 0.5067681895093062, "grad_norm": 0.9771907644859715, "learning_rate": 5.133446801270207e-06, "loss": 0.1462, "step": 17371 }, { "epoch": 0.5067973627399498, "grad_norm": 0.7900261333947414, "learning_rate": 5.13297453481135e-06, "loss": 0.1387, "step": 17372 }, { "epoch": 0.5068265359705934, "grad_norm": 0.9798097602595854, "learning_rate": 5.1325022671653275e-06, "loss": 0.1571, "step": 17373 }, { "epoch": 0.5068557092012369, "grad_norm": 0.9566408462411358, "learning_rate": 5.1320299983363576e-06, "loss": 0.1191, "step": 17374 }, { "epoch": 0.5068848824318805, "grad_norm": 0.8072634113664551, "learning_rate": 5.131557728328655e-06, "loss": 0.1379, "step": 17375 }, { "epoch": 0.5069140556625241, "grad_norm": 0.773098212033721, "learning_rate": 5.131085457146435e-06, "loss": 0.1303, "step": 17376 }, { "epoch": 0.5069432288931677, "grad_norm": 0.8632777823305222, "learning_rate": 5.130613184793918e-06, "loss": 0.1422, "step": 17377 }, { "epoch": 0.5069724021238112, "grad_norm": 0.7959921007310494, "learning_rate": 5.130140911275315e-06, "loss": 0.1416, "step": 17378 }, { "epoch": 0.5070015753544548, "grad_norm": 0.6881613802772266, "learning_rate": 5.129668636594847e-06, "loss": 0.1276, "step": 17379 }, { "epoch": 0.5070307485850983, "grad_norm": 0.7637479052797145, "learning_rate": 5.129196360756726e-06, "loss": 0.118, "step": 17380 }, { "epoch": 0.5070599218157419, "grad_norm": 1.0347095831555904, "learning_rate": 5.128724083765172e-06, "loss": 0.1183, "step": 17381 }, { "epoch": 0.5070890950463854, "grad_norm": 0.8213310815684511, "learning_rate": 5.1282518056244006e-06, "loss": 0.1245, "step": 17382 }, { "epoch": 0.507118268277029, "grad_norm": 0.9759906612344345, "learning_rate": 5.127779526338628e-06, "loss": 0.1478, "step": 17383 }, { "epoch": 0.5071474415076725, "grad_norm": 1.2924699679050022, "learning_rate": 5.127307245912069e-06, "loss": 0.1506, "step": 17384 }, { "epoch": 0.5071766147383161, "grad_norm": 0.9077479603573876, "learning_rate": 5.126834964348941e-06, "loss": 0.1405, "step": 17385 }, { "epoch": 0.5072057879689597, "grad_norm": 0.882082982669461, "learning_rate": 5.1263626816534616e-06, "loss": 0.1445, "step": 17386 }, { "epoch": 0.5072349611996032, "grad_norm": 0.939918307589359, "learning_rate": 5.125890397829847e-06, "loss": 0.1472, "step": 17387 }, { "epoch": 0.5072641344302468, "grad_norm": 0.8187095096245093, "learning_rate": 5.1254181128823124e-06, "loss": 0.1252, "step": 17388 }, { "epoch": 0.5072933076608903, "grad_norm": 0.8300657431033446, "learning_rate": 5.124945826815074e-06, "loss": 0.1153, "step": 17389 }, { "epoch": 0.507322480891534, "grad_norm": 0.8988592144257003, "learning_rate": 5.1244735396323495e-06, "loss": 0.1483, "step": 17390 }, { "epoch": 0.5073516541221775, "grad_norm": 0.9213824058900065, "learning_rate": 5.124001251338355e-06, "loss": 0.1379, "step": 17391 }, { "epoch": 0.5073808273528211, "grad_norm": 0.7782039516202991, "learning_rate": 5.1235289619373085e-06, "loss": 0.1461, "step": 17392 }, { "epoch": 0.5074100005834646, "grad_norm": 0.7872022005111858, "learning_rate": 5.123056671433423e-06, "loss": 0.1508, "step": 17393 }, { "epoch": 0.5074391738141082, "grad_norm": 0.8579017110269386, "learning_rate": 5.122584379830918e-06, "loss": 0.1369, "step": 17394 }, { "epoch": 0.5074683470447517, "grad_norm": 0.8714802581837974, "learning_rate": 5.122112087134008e-06, "loss": 0.1285, "step": 17395 }, { "epoch": 0.5074975202753953, "grad_norm": 0.7290086984449238, "learning_rate": 5.12163979334691e-06, "loss": 0.1221, "step": 17396 }, { "epoch": 0.5075266935060389, "grad_norm": 0.8952570803272655, "learning_rate": 5.121167498473844e-06, "loss": 0.1322, "step": 17397 }, { "epoch": 0.5075558667366824, "grad_norm": 0.7462410291138011, "learning_rate": 5.12069520251902e-06, "loss": 0.1215, "step": 17398 }, { "epoch": 0.507585039967326, "grad_norm": 0.7841741056002378, "learning_rate": 5.1202229054866595e-06, "loss": 0.1241, "step": 17399 }, { "epoch": 0.5076142131979695, "grad_norm": 0.9448811646218165, "learning_rate": 5.119750607380977e-06, "loss": 0.1421, "step": 17400 }, { "epoch": 0.5076433864286131, "grad_norm": 0.7560075039981516, "learning_rate": 5.119278308206191e-06, "loss": 0.113, "step": 17401 }, { "epoch": 0.5076725596592566, "grad_norm": 1.1668001301099393, "learning_rate": 5.118806007966516e-06, "loss": 0.1544, "step": 17402 }, { "epoch": 0.5077017328899003, "grad_norm": 1.2219707925183994, "learning_rate": 5.118333706666168e-06, "loss": 0.1326, "step": 17403 }, { "epoch": 0.5077309061205438, "grad_norm": 0.9900389618322261, "learning_rate": 5.117861404309367e-06, "loss": 0.1083, "step": 17404 }, { "epoch": 0.5077600793511874, "grad_norm": 0.7042060620325492, "learning_rate": 5.117389100900326e-06, "loss": 0.1499, "step": 17405 }, { "epoch": 0.5077892525818309, "grad_norm": 1.0610797808611254, "learning_rate": 5.116916796443264e-06, "loss": 0.1354, "step": 17406 }, { "epoch": 0.5078184258124745, "grad_norm": 1.2112837320831007, "learning_rate": 5.116444490942397e-06, "loss": 0.1188, "step": 17407 }, { "epoch": 0.507847599043118, "grad_norm": 0.757975343444948, "learning_rate": 5.1159721844019406e-06, "loss": 0.1446, "step": 17408 }, { "epoch": 0.5078767722737616, "grad_norm": 0.8757713595024581, "learning_rate": 5.115499876826113e-06, "loss": 0.1363, "step": 17409 }, { "epoch": 0.5079059455044052, "grad_norm": 1.0200454868819515, "learning_rate": 5.115027568219129e-06, "loss": 0.1445, "step": 17410 }, { "epoch": 0.5079351187350487, "grad_norm": 0.7946393618360597, "learning_rate": 5.114555258585207e-06, "loss": 0.1259, "step": 17411 }, { "epoch": 0.5079642919656923, "grad_norm": 0.755484192261936, "learning_rate": 5.114082947928563e-06, "loss": 0.1579, "step": 17412 }, { "epoch": 0.5079934651963358, "grad_norm": 0.805338398118202, "learning_rate": 5.113610636253413e-06, "loss": 0.1415, "step": 17413 }, { "epoch": 0.5080226384269794, "grad_norm": 0.7999682988143448, "learning_rate": 5.113138323563975e-06, "loss": 0.1414, "step": 17414 }, { "epoch": 0.5080518116576229, "grad_norm": 0.798125551415898, "learning_rate": 5.112666009864466e-06, "loss": 0.126, "step": 17415 }, { "epoch": 0.5080809848882665, "grad_norm": 0.6759290380349545, "learning_rate": 5.1121936951591e-06, "loss": 0.1105, "step": 17416 }, { "epoch": 0.5081101581189101, "grad_norm": 0.6684177179268687, "learning_rate": 5.111721379452096e-06, "loss": 0.1297, "step": 17417 }, { "epoch": 0.5081393313495537, "grad_norm": 0.8306387513930461, "learning_rate": 5.111249062747671e-06, "loss": 0.1288, "step": 17418 }, { "epoch": 0.5081685045801972, "grad_norm": 1.0230107081557667, "learning_rate": 5.11077674505004e-06, "loss": 0.1212, "step": 17419 }, { "epoch": 0.5081976778108408, "grad_norm": 0.9203159726182012, "learning_rate": 5.11030442636342e-06, "loss": 0.1463, "step": 17420 }, { "epoch": 0.5082268510414844, "grad_norm": 0.8596779820305135, "learning_rate": 5.10983210669203e-06, "loss": 0.1225, "step": 17421 }, { "epoch": 0.5082560242721279, "grad_norm": 0.8261003187516668, "learning_rate": 5.109359786040086e-06, "loss": 0.1305, "step": 17422 }, { "epoch": 0.5082851975027715, "grad_norm": 1.0041683647406259, "learning_rate": 5.108887464411802e-06, "loss": 0.1547, "step": 17423 }, { "epoch": 0.508314370733415, "grad_norm": 1.0042134265473222, "learning_rate": 5.108415141811398e-06, "loss": 0.117, "step": 17424 }, { "epoch": 0.5083435439640586, "grad_norm": 0.6871673440834162, "learning_rate": 5.107942818243088e-06, "loss": 0.1151, "step": 17425 }, { "epoch": 0.5083727171947021, "grad_norm": 0.8310470443975275, "learning_rate": 5.1074704937110895e-06, "loss": 0.1337, "step": 17426 }, { "epoch": 0.5084018904253457, "grad_norm": 0.9398306613967943, "learning_rate": 5.1069981682196235e-06, "loss": 0.123, "step": 17427 }, { "epoch": 0.5084310636559892, "grad_norm": 0.8089940837278848, "learning_rate": 5.106525841772902e-06, "loss": 0.1067, "step": 17428 }, { "epoch": 0.5084602368866328, "grad_norm": 0.6566858907538361, "learning_rate": 5.106053514375142e-06, "loss": 0.1653, "step": 17429 }, { "epoch": 0.5084894101172764, "grad_norm": 0.8595110387011408, "learning_rate": 5.105581186030563e-06, "loss": 0.1445, "step": 17430 }, { "epoch": 0.50851858334792, "grad_norm": 0.7523261269920449, "learning_rate": 5.1051088567433785e-06, "loss": 0.1308, "step": 17431 }, { "epoch": 0.5085477565785635, "grad_norm": 0.829148605483477, "learning_rate": 5.104636526517809e-06, "loss": 0.1104, "step": 17432 }, { "epoch": 0.5085769298092071, "grad_norm": 0.7246840243450478, "learning_rate": 5.104164195358068e-06, "loss": 0.1569, "step": 17433 }, { "epoch": 0.5086061030398507, "grad_norm": 0.9536137671145061, "learning_rate": 5.103691863268375e-06, "loss": 0.137, "step": 17434 }, { "epoch": 0.5086352762704942, "grad_norm": 1.0648240016753865, "learning_rate": 5.103219530252945e-06, "loss": 0.1312, "step": 17435 }, { "epoch": 0.5086644495011378, "grad_norm": 0.7450880858814997, "learning_rate": 5.102747196315997e-06, "loss": 0.1155, "step": 17436 }, { "epoch": 0.5086936227317813, "grad_norm": 0.8871366210252537, "learning_rate": 5.102274861461747e-06, "loss": 0.1493, "step": 17437 }, { "epoch": 0.5087227959624249, "grad_norm": 0.8154720472206844, "learning_rate": 5.101802525694409e-06, "loss": 0.1353, "step": 17438 }, { "epoch": 0.5087519691930684, "grad_norm": 0.9148185585487233, "learning_rate": 5.101330189018205e-06, "loss": 0.1093, "step": 17439 }, { "epoch": 0.508781142423712, "grad_norm": 0.8445701927656427, "learning_rate": 5.100857851437347e-06, "loss": 0.1264, "step": 17440 }, { "epoch": 0.5088103156543555, "grad_norm": 0.7670467790226362, "learning_rate": 5.100385512956054e-06, "loss": 0.1374, "step": 17441 }, { "epoch": 0.5088394888849991, "grad_norm": 0.8350280756894248, "learning_rate": 5.099913173578546e-06, "loss": 0.1626, "step": 17442 }, { "epoch": 0.5088686621156426, "grad_norm": 1.124872795905888, "learning_rate": 5.099440833309035e-06, "loss": 0.1231, "step": 17443 }, { "epoch": 0.5088978353462863, "grad_norm": 1.0196721305331584, "learning_rate": 5.09896849215174e-06, "loss": 0.1748, "step": 17444 }, { "epoch": 0.5089270085769299, "grad_norm": 0.7511860367937689, "learning_rate": 5.0984961501108785e-06, "loss": 0.1374, "step": 17445 }, { "epoch": 0.5089561818075734, "grad_norm": 0.829116893736274, "learning_rate": 5.098023807190666e-06, "loss": 0.1036, "step": 17446 }, { "epoch": 0.508985355038217, "grad_norm": 0.8072556863341455, "learning_rate": 5.097551463395321e-06, "loss": 0.1075, "step": 17447 }, { "epoch": 0.5090145282688605, "grad_norm": 0.9118758133732958, "learning_rate": 5.0970791187290605e-06, "loss": 0.1241, "step": 17448 }, { "epoch": 0.5090437014995041, "grad_norm": 0.699691390826038, "learning_rate": 5.0966067731961e-06, "loss": 0.1224, "step": 17449 }, { "epoch": 0.5090728747301476, "grad_norm": 0.8384194162642024, "learning_rate": 5.096134426800657e-06, "loss": 0.1512, "step": 17450 }, { "epoch": 0.5091020479607912, "grad_norm": 0.786729089874759, "learning_rate": 5.095662079546949e-06, "loss": 0.1318, "step": 17451 }, { "epoch": 0.5091312211914347, "grad_norm": 0.8723570546988967, "learning_rate": 5.095189731439194e-06, "loss": 0.1526, "step": 17452 }, { "epoch": 0.5091603944220783, "grad_norm": 0.8062033254186469, "learning_rate": 5.094717382481605e-06, "loss": 0.1367, "step": 17453 }, { "epoch": 0.5091895676527218, "grad_norm": 0.7979303462656121, "learning_rate": 5.094245032678406e-06, "loss": 0.1245, "step": 17454 }, { "epoch": 0.5092187408833654, "grad_norm": 0.7828414313310849, "learning_rate": 5.093772682033806e-06, "loss": 0.1407, "step": 17455 }, { "epoch": 0.5092479141140089, "grad_norm": 0.7756363833803713, "learning_rate": 5.093300330552027e-06, "loss": 0.1241, "step": 17456 }, { "epoch": 0.5092770873446526, "grad_norm": 0.793736937840087, "learning_rate": 5.0928279782372855e-06, "loss": 0.1347, "step": 17457 }, { "epoch": 0.5093062605752962, "grad_norm": 0.7788558158774311, "learning_rate": 5.092355625093798e-06, "loss": 0.1446, "step": 17458 }, { "epoch": 0.5093354338059397, "grad_norm": 0.673358783316202, "learning_rate": 5.0918832711257805e-06, "loss": 0.1237, "step": 17459 }, { "epoch": 0.5093646070365833, "grad_norm": 0.8420016865602312, "learning_rate": 5.091410916337452e-06, "loss": 0.1274, "step": 17460 }, { "epoch": 0.5093937802672268, "grad_norm": 0.7502259822245504, "learning_rate": 5.090938560733029e-06, "loss": 0.1307, "step": 17461 }, { "epoch": 0.5094229534978704, "grad_norm": 0.7980311105265848, "learning_rate": 5.090466204316727e-06, "loss": 0.1256, "step": 17462 }, { "epoch": 0.5094521267285139, "grad_norm": 0.7784150212040712, "learning_rate": 5.089993847092764e-06, "loss": 0.1449, "step": 17463 }, { "epoch": 0.5094812999591575, "grad_norm": 0.7988999313518343, "learning_rate": 5.089521489065358e-06, "loss": 0.1404, "step": 17464 }, { "epoch": 0.509510473189801, "grad_norm": 0.8574086188145461, "learning_rate": 5.089049130238727e-06, "loss": 0.1422, "step": 17465 }, { "epoch": 0.5095396464204446, "grad_norm": 0.713583071387903, "learning_rate": 5.088576770617086e-06, "loss": 0.134, "step": 17466 }, { "epoch": 0.5095688196510881, "grad_norm": 0.7387580018763268, "learning_rate": 5.088104410204652e-06, "loss": 0.1192, "step": 17467 }, { "epoch": 0.5095979928817317, "grad_norm": 0.8620868978061649, "learning_rate": 5.087632049005643e-06, "loss": 0.122, "step": 17468 }, { "epoch": 0.5096271661123752, "grad_norm": 0.7908676300118478, "learning_rate": 5.087159687024277e-06, "loss": 0.1358, "step": 17469 }, { "epoch": 0.5096563393430188, "grad_norm": 0.8784872923271049, "learning_rate": 5.086687324264768e-06, "loss": 0.1409, "step": 17470 }, { "epoch": 0.5096855125736625, "grad_norm": 1.0483879927732656, "learning_rate": 5.086214960731337e-06, "loss": 0.1493, "step": 17471 }, { "epoch": 0.509714685804306, "grad_norm": 0.7200407098885422, "learning_rate": 5.085742596428199e-06, "loss": 0.1311, "step": 17472 }, { "epoch": 0.5097438590349496, "grad_norm": 0.6268472174358896, "learning_rate": 5.085270231359572e-06, "loss": 0.1261, "step": 17473 }, { "epoch": 0.5097730322655931, "grad_norm": 0.9703086724801832, "learning_rate": 5.084797865529673e-06, "loss": 0.1338, "step": 17474 }, { "epoch": 0.5098022054962367, "grad_norm": 0.8499811925044137, "learning_rate": 5.084325498942717e-06, "loss": 0.1165, "step": 17475 }, { "epoch": 0.5098313787268802, "grad_norm": 0.8616719357043495, "learning_rate": 5.083853131602924e-06, "loss": 0.1412, "step": 17476 }, { "epoch": 0.5098605519575238, "grad_norm": 0.8336430000759242, "learning_rate": 5.083380763514511e-06, "loss": 0.1298, "step": 17477 }, { "epoch": 0.5098897251881673, "grad_norm": 0.7461456777905648, "learning_rate": 5.082908394681694e-06, "loss": 0.1189, "step": 17478 }, { "epoch": 0.5099188984188109, "grad_norm": 0.9183240458443164, "learning_rate": 5.08243602510869e-06, "loss": 0.1506, "step": 17479 }, { "epoch": 0.5099480716494544, "grad_norm": 0.8203233671850735, "learning_rate": 5.081963654799717e-06, "loss": 0.1306, "step": 17480 }, { "epoch": 0.509977244880098, "grad_norm": 1.0360600413094372, "learning_rate": 5.0814912837589926e-06, "loss": 0.141, "step": 17481 }, { "epoch": 0.5100064181107415, "grad_norm": 1.1149290780890595, "learning_rate": 5.081018911990734e-06, "loss": 0.1279, "step": 17482 }, { "epoch": 0.5100355913413851, "grad_norm": 1.0399195394392293, "learning_rate": 5.080546539499156e-06, "loss": 0.1554, "step": 17483 }, { "epoch": 0.5100647645720288, "grad_norm": 0.7729377457812602, "learning_rate": 5.08007416628848e-06, "loss": 0.1164, "step": 17484 }, { "epoch": 0.5100939378026723, "grad_norm": 0.6820680789318545, "learning_rate": 5.079601792362919e-06, "loss": 0.1182, "step": 17485 }, { "epoch": 0.5101231110333159, "grad_norm": 0.9411728310634727, "learning_rate": 5.079129417726694e-06, "loss": 0.1135, "step": 17486 }, { "epoch": 0.5101522842639594, "grad_norm": 0.954943445540097, "learning_rate": 5.07865704238402e-06, "loss": 0.12, "step": 17487 }, { "epoch": 0.510181457494603, "grad_norm": 0.7679639014582577, "learning_rate": 5.078184666339113e-06, "loss": 0.1333, "step": 17488 }, { "epoch": 0.5102106307252465, "grad_norm": 0.8363182413744034, "learning_rate": 5.077712289596194e-06, "loss": 0.1335, "step": 17489 }, { "epoch": 0.5102398039558901, "grad_norm": 0.7267867294204925, "learning_rate": 5.077239912159477e-06, "loss": 0.1288, "step": 17490 }, { "epoch": 0.5102689771865336, "grad_norm": 1.1942479269315884, "learning_rate": 5.076767534033181e-06, "loss": 0.1359, "step": 17491 }, { "epoch": 0.5102981504171772, "grad_norm": 0.9139303107840787, "learning_rate": 5.076295155221523e-06, "loss": 0.1269, "step": 17492 }, { "epoch": 0.5103273236478207, "grad_norm": 0.811733228885649, "learning_rate": 5.07582277572872e-06, "loss": 0.1416, "step": 17493 }, { "epoch": 0.5103564968784643, "grad_norm": 0.7993082769531046, "learning_rate": 5.075350395558989e-06, "loss": 0.1239, "step": 17494 }, { "epoch": 0.5103856701091078, "grad_norm": 1.0356845721725232, "learning_rate": 5.074878014716548e-06, "loss": 0.1319, "step": 17495 }, { "epoch": 0.5104148433397514, "grad_norm": 0.9208334162372714, "learning_rate": 5.0744056332056135e-06, "loss": 0.1312, "step": 17496 }, { "epoch": 0.510444016570395, "grad_norm": 1.1003553933247492, "learning_rate": 5.073933251030403e-06, "loss": 0.1519, "step": 17497 }, { "epoch": 0.5104731898010386, "grad_norm": 0.7936843075235583, "learning_rate": 5.073460868195135e-06, "loss": 0.1394, "step": 17498 }, { "epoch": 0.5105023630316822, "grad_norm": 1.0421305551238624, "learning_rate": 5.072988484704026e-06, "loss": 0.1207, "step": 17499 }, { "epoch": 0.5105315362623257, "grad_norm": 1.2136703774253128, "learning_rate": 5.072516100561292e-06, "loss": 0.1125, "step": 17500 }, { "epoch": 0.5105607094929693, "grad_norm": 1.0813212636620535, "learning_rate": 5.0720437157711525e-06, "loss": 0.148, "step": 17501 }, { "epoch": 0.5105898827236128, "grad_norm": 1.0195924965369898, "learning_rate": 5.0715713303378245e-06, "loss": 0.1378, "step": 17502 }, { "epoch": 0.5106190559542564, "grad_norm": 0.9360398833281904, "learning_rate": 5.071098944265524e-06, "loss": 0.135, "step": 17503 }, { "epoch": 0.5106482291848999, "grad_norm": 1.0452041218987642, "learning_rate": 5.070626557558469e-06, "loss": 0.1481, "step": 17504 }, { "epoch": 0.5106774024155435, "grad_norm": 1.0237875216021775, "learning_rate": 5.070154170220877e-06, "loss": 0.1709, "step": 17505 }, { "epoch": 0.510706575646187, "grad_norm": 0.856728350090205, "learning_rate": 5.069681782256965e-06, "loss": 0.1202, "step": 17506 }, { "epoch": 0.5107357488768306, "grad_norm": 0.9616693050458184, "learning_rate": 5.069209393670951e-06, "loss": 0.1446, "step": 17507 }, { "epoch": 0.5107649221074742, "grad_norm": 0.8628997542542997, "learning_rate": 5.0687370044670525e-06, "loss": 0.1379, "step": 17508 }, { "epoch": 0.5107940953381177, "grad_norm": 0.8683347621934591, "learning_rate": 5.068264614649485e-06, "loss": 0.1336, "step": 17509 }, { "epoch": 0.5108232685687613, "grad_norm": 0.7882985045782255, "learning_rate": 5.067792224222469e-06, "loss": 0.1357, "step": 17510 }, { "epoch": 0.5108524417994049, "grad_norm": 0.802689756041416, "learning_rate": 5.06731983319022e-06, "loss": 0.1137, "step": 17511 }, { "epoch": 0.5108816150300485, "grad_norm": 0.8961327508057595, "learning_rate": 5.066847441556955e-06, "loss": 0.1501, "step": 17512 }, { "epoch": 0.510910788260692, "grad_norm": 1.072911160374994, "learning_rate": 5.066375049326891e-06, "loss": 0.1558, "step": 17513 }, { "epoch": 0.5109399614913356, "grad_norm": 0.7563739992307312, "learning_rate": 5.065902656504249e-06, "loss": 0.1367, "step": 17514 }, { "epoch": 0.5109691347219791, "grad_norm": 0.9185384509595925, "learning_rate": 5.065430263093241e-06, "loss": 0.1302, "step": 17515 }, { "epoch": 0.5109983079526227, "grad_norm": 0.7980854257951039, "learning_rate": 5.064957869098089e-06, "loss": 0.1258, "step": 17516 }, { "epoch": 0.5110274811832662, "grad_norm": 0.8508350465342617, "learning_rate": 5.064485474523009e-06, "loss": 0.1474, "step": 17517 }, { "epoch": 0.5110566544139098, "grad_norm": 0.8594741215635663, "learning_rate": 5.064013079372217e-06, "loss": 0.1448, "step": 17518 }, { "epoch": 0.5110858276445533, "grad_norm": 0.9353567233323511, "learning_rate": 5.063540683649932e-06, "loss": 0.1599, "step": 17519 }, { "epoch": 0.5111150008751969, "grad_norm": 0.7570092298781683, "learning_rate": 5.063068287360371e-06, "loss": 0.1218, "step": 17520 }, { "epoch": 0.5111441741058405, "grad_norm": 0.9619765117042295, "learning_rate": 5.062595890507751e-06, "loss": 0.1459, "step": 17521 }, { "epoch": 0.511173347336484, "grad_norm": 0.9031932490312972, "learning_rate": 5.0621234930962905e-06, "loss": 0.1289, "step": 17522 }, { "epoch": 0.5112025205671276, "grad_norm": 0.913318391035932, "learning_rate": 5.061651095130205e-06, "loss": 0.1411, "step": 17523 }, { "epoch": 0.5112316937977711, "grad_norm": 0.7774639919480909, "learning_rate": 5.061178696613714e-06, "loss": 0.131, "step": 17524 }, { "epoch": 0.5112608670284148, "grad_norm": 0.9805357211513239, "learning_rate": 5.060706297551035e-06, "loss": 0.1281, "step": 17525 }, { "epoch": 0.5112900402590583, "grad_norm": 0.7394969456489723, "learning_rate": 5.060233897946383e-06, "loss": 0.1324, "step": 17526 }, { "epoch": 0.5113192134897019, "grad_norm": 0.9975029201407476, "learning_rate": 5.059761497803978e-06, "loss": 0.1324, "step": 17527 }, { "epoch": 0.5113483867203454, "grad_norm": 0.7696731450396544, "learning_rate": 5.059289097128036e-06, "loss": 0.1304, "step": 17528 }, { "epoch": 0.511377559950989, "grad_norm": 0.8056673509710328, "learning_rate": 5.058816695922777e-06, "loss": 0.1347, "step": 17529 }, { "epoch": 0.5114067331816325, "grad_norm": 0.9691332752521078, "learning_rate": 5.058344294192414e-06, "loss": 0.1321, "step": 17530 }, { "epoch": 0.5114359064122761, "grad_norm": 0.7686200758317691, "learning_rate": 5.057871891941168e-06, "loss": 0.1207, "step": 17531 }, { "epoch": 0.5114650796429197, "grad_norm": 0.8759365359133702, "learning_rate": 5.057399489173258e-06, "loss": 0.1299, "step": 17532 }, { "epoch": 0.5114942528735632, "grad_norm": 0.8709034331203749, "learning_rate": 5.056927085892895e-06, "loss": 0.1235, "step": 17533 }, { "epoch": 0.5115234261042068, "grad_norm": 0.9124801522464158, "learning_rate": 5.056454682104304e-06, "loss": 0.1473, "step": 17534 }, { "epoch": 0.5115525993348503, "grad_norm": 1.1281914280370087, "learning_rate": 5.055982277811698e-06, "loss": 0.1331, "step": 17535 }, { "epoch": 0.5115817725654939, "grad_norm": 0.797872254490622, "learning_rate": 5.055509873019295e-06, "loss": 0.1517, "step": 17536 }, { "epoch": 0.5116109457961374, "grad_norm": 0.7042960811354347, "learning_rate": 5.055037467731313e-06, "loss": 0.1542, "step": 17537 }, { "epoch": 0.511640119026781, "grad_norm": 0.8982350288571852, "learning_rate": 5.05456506195197e-06, "loss": 0.1301, "step": 17538 }, { "epoch": 0.5116692922574246, "grad_norm": 0.9645314326560633, "learning_rate": 5.054092655685483e-06, "loss": 0.1189, "step": 17539 }, { "epoch": 0.5116984654880682, "grad_norm": 0.6831060075712297, "learning_rate": 5.05362024893607e-06, "loss": 0.1207, "step": 17540 }, { "epoch": 0.5117276387187117, "grad_norm": 0.8136685565395272, "learning_rate": 5.053147841707949e-06, "loss": 0.1289, "step": 17541 }, { "epoch": 0.5117568119493553, "grad_norm": 0.9101201701656183, "learning_rate": 5.052675434005334e-06, "loss": 0.148, "step": 17542 }, { "epoch": 0.5117859851799988, "grad_norm": 0.9390603386217481, "learning_rate": 5.052203025832447e-06, "loss": 0.1241, "step": 17543 }, { "epoch": 0.5118151584106424, "grad_norm": 0.7610631210478856, "learning_rate": 5.051730617193505e-06, "loss": 0.118, "step": 17544 }, { "epoch": 0.511844331641286, "grad_norm": 0.7814541424739787, "learning_rate": 5.051258208092723e-06, "loss": 0.1219, "step": 17545 }, { "epoch": 0.5118735048719295, "grad_norm": 0.7967455639705543, "learning_rate": 5.05078579853432e-06, "loss": 0.1385, "step": 17546 }, { "epoch": 0.5119026781025731, "grad_norm": 0.6755519418654807, "learning_rate": 5.050313388522514e-06, "loss": 0.1315, "step": 17547 }, { "epoch": 0.5119318513332166, "grad_norm": 0.8998580896578239, "learning_rate": 5.0498409780615205e-06, "loss": 0.1306, "step": 17548 }, { "epoch": 0.5119610245638602, "grad_norm": 0.92405078889, "learning_rate": 5.049368567155561e-06, "loss": 0.1323, "step": 17549 }, { "epoch": 0.5119901977945037, "grad_norm": 0.785876774642788, "learning_rate": 5.04889615580885e-06, "loss": 0.1383, "step": 17550 }, { "epoch": 0.5120193710251473, "grad_norm": 0.7165323386571989, "learning_rate": 5.048423744025605e-06, "loss": 0.1329, "step": 17551 }, { "epoch": 0.5120485442557909, "grad_norm": 0.7620758413797071, "learning_rate": 5.047951331810046e-06, "loss": 0.1339, "step": 17552 }, { "epoch": 0.5120777174864345, "grad_norm": 0.8656110648443649, "learning_rate": 5.047478919166388e-06, "loss": 0.1296, "step": 17553 }, { "epoch": 0.512106890717078, "grad_norm": 0.9855420724478027, "learning_rate": 5.047006506098849e-06, "loss": 0.1347, "step": 17554 }, { "epoch": 0.5121360639477216, "grad_norm": 0.8203545053568296, "learning_rate": 5.046534092611648e-06, "loss": 0.1383, "step": 17555 }, { "epoch": 0.5121652371783652, "grad_norm": 0.8336024513130412, "learning_rate": 5.046061678709001e-06, "loss": 0.1253, "step": 17556 }, { "epoch": 0.5121944104090087, "grad_norm": 0.8251741447645392, "learning_rate": 5.045589264395127e-06, "loss": 0.1279, "step": 17557 }, { "epoch": 0.5122235836396523, "grad_norm": 0.7249019702836738, "learning_rate": 5.045116849674242e-06, "loss": 0.1158, "step": 17558 }, { "epoch": 0.5122527568702958, "grad_norm": 0.8212257179288065, "learning_rate": 5.0446444345505655e-06, "loss": 0.1363, "step": 17559 }, { "epoch": 0.5122819301009394, "grad_norm": 0.7925447040494781, "learning_rate": 5.044172019028313e-06, "loss": 0.127, "step": 17560 }, { "epoch": 0.5123111033315829, "grad_norm": 0.8123743961452655, "learning_rate": 5.043699603111703e-06, "loss": 0.12, "step": 17561 }, { "epoch": 0.5123402765622265, "grad_norm": 0.8630978133301008, "learning_rate": 5.043227186804956e-06, "loss": 0.1229, "step": 17562 }, { "epoch": 0.51236944979287, "grad_norm": 0.8541533524559927, "learning_rate": 5.042754770112284e-06, "loss": 0.1292, "step": 17563 }, { "epoch": 0.5123986230235136, "grad_norm": 0.842949923027598, "learning_rate": 5.0422823530379105e-06, "loss": 0.1472, "step": 17564 }, { "epoch": 0.5124277962541571, "grad_norm": 0.8409518885481113, "learning_rate": 5.0418099355860484e-06, "loss": 0.1346, "step": 17565 }, { "epoch": 0.5124569694848008, "grad_norm": 0.7374106530409561, "learning_rate": 5.041337517760917e-06, "loss": 0.1303, "step": 17566 }, { "epoch": 0.5124861427154443, "grad_norm": 0.8742229032087201, "learning_rate": 5.040865099566735e-06, "loss": 0.1367, "step": 17567 }, { "epoch": 0.5125153159460879, "grad_norm": 1.016779321728823, "learning_rate": 5.040392681007718e-06, "loss": 0.1245, "step": 17568 }, { "epoch": 0.5125444891767315, "grad_norm": 0.7807388059490147, "learning_rate": 5.039920262088086e-06, "loss": 0.1505, "step": 17569 }, { "epoch": 0.512573662407375, "grad_norm": 0.7747413133406592, "learning_rate": 5.039447842812055e-06, "loss": 0.1195, "step": 17570 }, { "epoch": 0.5126028356380186, "grad_norm": 0.8271880281283326, "learning_rate": 5.038975423183842e-06, "loss": 0.1298, "step": 17571 }, { "epoch": 0.5126320088686621, "grad_norm": 0.7830936799746516, "learning_rate": 5.038503003207668e-06, "loss": 0.1273, "step": 17572 }, { "epoch": 0.5126611820993057, "grad_norm": 0.8771580466338059, "learning_rate": 5.0380305828877465e-06, "loss": 0.1221, "step": 17573 }, { "epoch": 0.5126903553299492, "grad_norm": 0.8718309850456868, "learning_rate": 5.037558162228299e-06, "loss": 0.1324, "step": 17574 }, { "epoch": 0.5127195285605928, "grad_norm": 0.9590825845942171, "learning_rate": 5.037085741233538e-06, "loss": 0.1425, "step": 17575 }, { "epoch": 0.5127487017912363, "grad_norm": 0.8733374508286359, "learning_rate": 5.036613319907686e-06, "loss": 0.1509, "step": 17576 }, { "epoch": 0.5127778750218799, "grad_norm": 0.8010297410646896, "learning_rate": 5.036140898254961e-06, "loss": 0.1281, "step": 17577 }, { "epoch": 0.5128070482525234, "grad_norm": 0.818046871337447, "learning_rate": 5.035668476279576e-06, "loss": 0.1186, "step": 17578 }, { "epoch": 0.5128362214831671, "grad_norm": 1.2083012101166253, "learning_rate": 5.035196053985753e-06, "loss": 0.1373, "step": 17579 }, { "epoch": 0.5128653947138107, "grad_norm": 0.9142451664612667, "learning_rate": 5.034723631377707e-06, "loss": 0.1431, "step": 17580 }, { "epoch": 0.5128945679444542, "grad_norm": 0.8908800856985881, "learning_rate": 5.034251208459657e-06, "loss": 0.1472, "step": 17581 }, { "epoch": 0.5129237411750978, "grad_norm": 1.0317160445217477, "learning_rate": 5.03377878523582e-06, "loss": 0.1572, "step": 17582 }, { "epoch": 0.5129529144057413, "grad_norm": 0.8756008051929964, "learning_rate": 5.033306361710415e-06, "loss": 0.1299, "step": 17583 }, { "epoch": 0.5129820876363849, "grad_norm": 0.7728398782043059, "learning_rate": 5.032833937887658e-06, "loss": 0.1269, "step": 17584 }, { "epoch": 0.5130112608670284, "grad_norm": 0.7538051765640672, "learning_rate": 5.032361513771767e-06, "loss": 0.1318, "step": 17585 }, { "epoch": 0.513040434097672, "grad_norm": 0.8545320646510951, "learning_rate": 5.0318890893669615e-06, "loss": 0.1359, "step": 17586 }, { "epoch": 0.5130696073283155, "grad_norm": 0.9585410008967832, "learning_rate": 5.031416664677456e-06, "loss": 0.1292, "step": 17587 }, { "epoch": 0.5130987805589591, "grad_norm": 1.0299142078365062, "learning_rate": 5.030944239707471e-06, "loss": 0.1143, "step": 17588 }, { "epoch": 0.5131279537896026, "grad_norm": 0.9619283975036986, "learning_rate": 5.0304718144612255e-06, "loss": 0.1362, "step": 17589 }, { "epoch": 0.5131571270202462, "grad_norm": 0.7429583931989896, "learning_rate": 5.029999388942931e-06, "loss": 0.1368, "step": 17590 }, { "epoch": 0.5131863002508897, "grad_norm": 0.8229938502388782, "learning_rate": 5.029526963156811e-06, "loss": 0.1452, "step": 17591 }, { "epoch": 0.5132154734815333, "grad_norm": 1.0138785203681089, "learning_rate": 5.029054537107082e-06, "loss": 0.1206, "step": 17592 }, { "epoch": 0.513244646712177, "grad_norm": 0.8362051566001064, "learning_rate": 5.028582110797959e-06, "loss": 0.1138, "step": 17593 }, { "epoch": 0.5132738199428205, "grad_norm": 1.0142553667413128, "learning_rate": 5.028109684233664e-06, "loss": 0.1448, "step": 17594 }, { "epoch": 0.5133029931734641, "grad_norm": 0.9349747283301558, "learning_rate": 5.027637257418412e-06, "loss": 0.1438, "step": 17595 }, { "epoch": 0.5133321664041076, "grad_norm": 0.8878888874083781, "learning_rate": 5.02716483035642e-06, "loss": 0.1513, "step": 17596 }, { "epoch": 0.5133613396347512, "grad_norm": 0.7865816982391449, "learning_rate": 5.026692403051908e-06, "loss": 0.1119, "step": 17597 }, { "epoch": 0.5133905128653947, "grad_norm": 0.7100548874746431, "learning_rate": 5.026219975509091e-06, "loss": 0.1223, "step": 17598 }, { "epoch": 0.5134196860960383, "grad_norm": 0.9445340145825436, "learning_rate": 5.02574754773219e-06, "loss": 0.1398, "step": 17599 }, { "epoch": 0.5134488593266818, "grad_norm": 1.002108731341329, "learning_rate": 5.02527511972542e-06, "loss": 0.1184, "step": 17600 }, { "epoch": 0.5134780325573254, "grad_norm": 0.6824222856757094, "learning_rate": 5.0248026914930006e-06, "loss": 0.1557, "step": 17601 }, { "epoch": 0.5135072057879689, "grad_norm": 0.9177669362364731, "learning_rate": 5.024330263039148e-06, "loss": 0.1191, "step": 17602 }, { "epoch": 0.5135363790186125, "grad_norm": 0.9441676301186944, "learning_rate": 5.023857834368081e-06, "loss": 0.1572, "step": 17603 }, { "epoch": 0.513565552249256, "grad_norm": 0.7064923808484347, "learning_rate": 5.023385405484018e-06, "loss": 0.1245, "step": 17604 }, { "epoch": 0.5135947254798996, "grad_norm": 0.7447612330377841, "learning_rate": 5.022912976391174e-06, "loss": 0.1275, "step": 17605 }, { "epoch": 0.5136238987105433, "grad_norm": 0.9186222233356722, "learning_rate": 5.022440547093768e-06, "loss": 0.1269, "step": 17606 }, { "epoch": 0.5136530719411868, "grad_norm": 0.674380939403684, "learning_rate": 5.02196811759602e-06, "loss": 0.1119, "step": 17607 }, { "epoch": 0.5136822451718304, "grad_norm": 0.6018218128908944, "learning_rate": 5.021495687902144e-06, "loss": 0.122, "step": 17608 }, { "epoch": 0.5137114184024739, "grad_norm": 0.8038044934493266, "learning_rate": 5.021023258016362e-06, "loss": 0.137, "step": 17609 }, { "epoch": 0.5137405916331175, "grad_norm": 0.7717034856489727, "learning_rate": 5.020550827942887e-06, "loss": 0.1238, "step": 17610 }, { "epoch": 0.513769764863761, "grad_norm": 0.7571141400189079, "learning_rate": 5.02007839768594e-06, "loss": 0.1161, "step": 17611 }, { "epoch": 0.5137989380944046, "grad_norm": 0.7604141439047912, "learning_rate": 5.019605967249739e-06, "loss": 0.1792, "step": 17612 }, { "epoch": 0.5138281113250481, "grad_norm": 0.9124473784167703, "learning_rate": 5.019133536638499e-06, "loss": 0.1323, "step": 17613 }, { "epoch": 0.5138572845556917, "grad_norm": 0.9377508153489198, "learning_rate": 5.018661105856439e-06, "loss": 0.1159, "step": 17614 }, { "epoch": 0.5138864577863352, "grad_norm": 0.9459563426893842, "learning_rate": 5.0181886749077795e-06, "loss": 0.1254, "step": 17615 }, { "epoch": 0.5139156310169788, "grad_norm": 0.8730453821464376, "learning_rate": 5.017716243796733e-06, "loss": 0.1417, "step": 17616 }, { "epoch": 0.5139448042476223, "grad_norm": 0.9597291800094656, "learning_rate": 5.017243812527522e-06, "loss": 0.1297, "step": 17617 }, { "epoch": 0.5139739774782659, "grad_norm": 1.116313804130204, "learning_rate": 5.0167713811043615e-06, "loss": 0.1524, "step": 17618 }, { "epoch": 0.5140031507089095, "grad_norm": 0.6809247080188088, "learning_rate": 5.016298949531472e-06, "loss": 0.1129, "step": 17619 }, { "epoch": 0.5140323239395531, "grad_norm": 0.9446785784678333, "learning_rate": 5.015826517813066e-06, "loss": 0.1142, "step": 17620 }, { "epoch": 0.5140614971701967, "grad_norm": 0.8700839566767229, "learning_rate": 5.0153540859533666e-06, "loss": 0.1257, "step": 17621 }, { "epoch": 0.5140906704008402, "grad_norm": 0.6687352248983223, "learning_rate": 5.01488165395659e-06, "loss": 0.1361, "step": 17622 }, { "epoch": 0.5141198436314838, "grad_norm": 0.8536488013155896, "learning_rate": 5.0144092218269524e-06, "loss": 0.1298, "step": 17623 }, { "epoch": 0.5141490168621273, "grad_norm": 0.8045445463211859, "learning_rate": 5.013936789568674e-06, "loss": 0.1075, "step": 17624 }, { "epoch": 0.5141781900927709, "grad_norm": 0.886544578939302, "learning_rate": 5.013464357185971e-06, "loss": 0.1283, "step": 17625 }, { "epoch": 0.5142073633234144, "grad_norm": 0.8834498824643948, "learning_rate": 5.01299192468306e-06, "loss": 0.1465, "step": 17626 }, { "epoch": 0.514236536554058, "grad_norm": 0.8235137054250127, "learning_rate": 5.012519492064162e-06, "loss": 0.1357, "step": 17627 }, { "epoch": 0.5142657097847015, "grad_norm": 0.7779755421483731, "learning_rate": 5.012047059333492e-06, "loss": 0.1136, "step": 17628 }, { "epoch": 0.5142948830153451, "grad_norm": 0.7521641273170295, "learning_rate": 5.011574626495269e-06, "loss": 0.1601, "step": 17629 }, { "epoch": 0.5143240562459886, "grad_norm": 0.7202731880658887, "learning_rate": 5.01110219355371e-06, "loss": 0.133, "step": 17630 }, { "epoch": 0.5143532294766322, "grad_norm": 0.771261148817068, "learning_rate": 5.010629760513034e-06, "loss": 0.1204, "step": 17631 }, { "epoch": 0.5143824027072758, "grad_norm": 0.7949068248008649, "learning_rate": 5.010157327377457e-06, "loss": 0.1112, "step": 17632 }, { "epoch": 0.5144115759379194, "grad_norm": 0.7917686820114463, "learning_rate": 5.009684894151199e-06, "loss": 0.1575, "step": 17633 }, { "epoch": 0.514440749168563, "grad_norm": 0.7125942396034246, "learning_rate": 5.009212460838477e-06, "loss": 0.123, "step": 17634 }, { "epoch": 0.5144699223992065, "grad_norm": 0.8952945642164067, "learning_rate": 5.008740027443506e-06, "loss": 0.1482, "step": 17635 }, { "epoch": 0.5144990956298501, "grad_norm": 0.8271549553230554, "learning_rate": 5.008267593970507e-06, "loss": 0.1398, "step": 17636 }, { "epoch": 0.5145282688604936, "grad_norm": 0.7547581475037284, "learning_rate": 5.0077951604236985e-06, "loss": 0.144, "step": 17637 }, { "epoch": 0.5145574420911372, "grad_norm": 0.8611805200564947, "learning_rate": 5.007322726807294e-06, "loss": 0.1212, "step": 17638 }, { "epoch": 0.5145866153217807, "grad_norm": 0.8804799122613016, "learning_rate": 5.006850293125517e-06, "loss": 0.1372, "step": 17639 }, { "epoch": 0.5146157885524243, "grad_norm": 0.7698369940580078, "learning_rate": 5.0063778593825805e-06, "loss": 0.123, "step": 17640 }, { "epoch": 0.5146449617830678, "grad_norm": 0.916682018196249, "learning_rate": 5.005905425582705e-06, "loss": 0.1588, "step": 17641 }, { "epoch": 0.5146741350137114, "grad_norm": 0.7255126727410842, "learning_rate": 5.005432991730106e-06, "loss": 0.1291, "step": 17642 }, { "epoch": 0.514703308244355, "grad_norm": 0.8011659620481545, "learning_rate": 5.0049605578290025e-06, "loss": 0.1393, "step": 17643 }, { "epoch": 0.5147324814749985, "grad_norm": 0.770103932765385, "learning_rate": 5.004488123883614e-06, "loss": 0.1165, "step": 17644 }, { "epoch": 0.5147616547056421, "grad_norm": 0.6142928258533513, "learning_rate": 5.004015689898155e-06, "loss": 0.1191, "step": 17645 }, { "epoch": 0.5147908279362856, "grad_norm": 1.0560506694150225, "learning_rate": 5.003543255876845e-06, "loss": 0.157, "step": 17646 }, { "epoch": 0.5148200011669293, "grad_norm": 0.8755699419388113, "learning_rate": 5.0030708218239025e-06, "loss": 0.1436, "step": 17647 }, { "epoch": 0.5148491743975728, "grad_norm": 0.794533891524701, "learning_rate": 5.002598387743544e-06, "loss": 0.1349, "step": 17648 }, { "epoch": 0.5148783476282164, "grad_norm": 0.8392470518062469, "learning_rate": 5.002125953639988e-06, "loss": 0.1253, "step": 17649 }, { "epoch": 0.5149075208588599, "grad_norm": 0.9559536658700135, "learning_rate": 5.001653519517451e-06, "loss": 0.1304, "step": 17650 }, { "epoch": 0.5149366940895035, "grad_norm": 0.7364703795814111, "learning_rate": 5.001181085380152e-06, "loss": 0.1282, "step": 17651 }, { "epoch": 0.514965867320147, "grad_norm": 0.8225490988635491, "learning_rate": 5.00070865123231e-06, "loss": 0.1213, "step": 17652 }, { "epoch": 0.5149950405507906, "grad_norm": 1.0874898380750317, "learning_rate": 5.000236217078139e-06, "loss": 0.1303, "step": 17653 }, { "epoch": 0.5150242137814341, "grad_norm": 0.6688242311374385, "learning_rate": 4.999763782921862e-06, "loss": 0.1224, "step": 17654 }, { "epoch": 0.5150533870120777, "grad_norm": 0.8607544504149954, "learning_rate": 4.999291348767692e-06, "loss": 0.1468, "step": 17655 }, { "epoch": 0.5150825602427213, "grad_norm": 1.0217087991651117, "learning_rate": 4.998818914619849e-06, "loss": 0.1319, "step": 17656 }, { "epoch": 0.5151117334733648, "grad_norm": 0.8437567190693104, "learning_rate": 4.99834648048255e-06, "loss": 0.1451, "step": 17657 }, { "epoch": 0.5151409067040084, "grad_norm": 0.7201711567972063, "learning_rate": 4.997874046360013e-06, "loss": 0.1269, "step": 17658 }, { "epoch": 0.5151700799346519, "grad_norm": 0.8352761030653358, "learning_rate": 4.997401612256458e-06, "loss": 0.1332, "step": 17659 }, { "epoch": 0.5151992531652956, "grad_norm": 1.004129729343521, "learning_rate": 4.996929178176099e-06, "loss": 0.1416, "step": 17660 }, { "epoch": 0.5152284263959391, "grad_norm": 1.2824937203244915, "learning_rate": 4.996456744123156e-06, "loss": 0.117, "step": 17661 }, { "epoch": 0.5152575996265827, "grad_norm": 1.1857359085225379, "learning_rate": 4.995984310101847e-06, "loss": 0.1129, "step": 17662 }, { "epoch": 0.5152867728572262, "grad_norm": 1.1371941299943034, "learning_rate": 4.995511876116387e-06, "loss": 0.121, "step": 17663 }, { "epoch": 0.5153159460878698, "grad_norm": 1.196261158483275, "learning_rate": 4.995039442170998e-06, "loss": 0.1331, "step": 17664 }, { "epoch": 0.5153451193185133, "grad_norm": 1.1620631468864877, "learning_rate": 4.9945670082698945e-06, "loss": 0.1541, "step": 17665 }, { "epoch": 0.5153742925491569, "grad_norm": 1.0097065937778051, "learning_rate": 4.994094574417296e-06, "loss": 0.1258, "step": 17666 }, { "epoch": 0.5154034657798005, "grad_norm": 0.821252905649037, "learning_rate": 4.993622140617421e-06, "loss": 0.144, "step": 17667 }, { "epoch": 0.515432639010444, "grad_norm": 0.9205304381811602, "learning_rate": 4.993149706874485e-06, "loss": 0.1354, "step": 17668 }, { "epoch": 0.5154618122410876, "grad_norm": 1.2005603618970164, "learning_rate": 4.992677273192706e-06, "loss": 0.1268, "step": 17669 }, { "epoch": 0.5154909854717311, "grad_norm": 0.9359330023349163, "learning_rate": 4.992204839576302e-06, "loss": 0.138, "step": 17670 }, { "epoch": 0.5155201587023747, "grad_norm": 0.8701714764215726, "learning_rate": 4.9917324060294946e-06, "loss": 0.1119, "step": 17671 }, { "epoch": 0.5155493319330182, "grad_norm": 0.8238086979703602, "learning_rate": 4.991259972556496e-06, "loss": 0.1162, "step": 17672 }, { "epoch": 0.5155785051636618, "grad_norm": 0.770604622471963, "learning_rate": 4.990787539161525e-06, "loss": 0.1222, "step": 17673 }, { "epoch": 0.5156076783943054, "grad_norm": 0.6771629181307338, "learning_rate": 4.990315105848804e-06, "loss": 0.1263, "step": 17674 }, { "epoch": 0.515636851624949, "grad_norm": 0.8926126858578739, "learning_rate": 4.989842672622543e-06, "loss": 0.1166, "step": 17675 }, { "epoch": 0.5156660248555925, "grad_norm": 0.8297622269993699, "learning_rate": 4.989370239486968e-06, "loss": 0.1322, "step": 17676 }, { "epoch": 0.5156951980862361, "grad_norm": 0.7446893830893277, "learning_rate": 4.988897806446291e-06, "loss": 0.1313, "step": 17677 }, { "epoch": 0.5157243713168796, "grad_norm": 0.9243250876529159, "learning_rate": 4.9884253735047325e-06, "loss": 0.1541, "step": 17678 }, { "epoch": 0.5157535445475232, "grad_norm": 0.8860627931972566, "learning_rate": 4.98795294066651e-06, "loss": 0.1197, "step": 17679 }, { "epoch": 0.5157827177781668, "grad_norm": 1.0050192417510047, "learning_rate": 4.987480507935841e-06, "loss": 0.1245, "step": 17680 }, { "epoch": 0.5158118910088103, "grad_norm": 0.7079725561051315, "learning_rate": 4.987008075316941e-06, "loss": 0.1387, "step": 17681 }, { "epoch": 0.5158410642394539, "grad_norm": 0.7984669677892621, "learning_rate": 4.986535642814031e-06, "loss": 0.1236, "step": 17682 }, { "epoch": 0.5158702374700974, "grad_norm": 0.9139776999032819, "learning_rate": 4.9860632104313276e-06, "loss": 0.1452, "step": 17683 }, { "epoch": 0.515899410700741, "grad_norm": 0.7610917318336733, "learning_rate": 4.985590778173049e-06, "loss": 0.1086, "step": 17684 }, { "epoch": 0.5159285839313845, "grad_norm": 0.7632751121205954, "learning_rate": 4.9851183460434115e-06, "loss": 0.1461, "step": 17685 }, { "epoch": 0.5159577571620281, "grad_norm": 0.7501627366479159, "learning_rate": 4.984645914046635e-06, "loss": 0.1279, "step": 17686 }, { "epoch": 0.5159869303926717, "grad_norm": 1.019683836730631, "learning_rate": 4.984173482186934e-06, "loss": 0.1554, "step": 17687 }, { "epoch": 0.5160161036233153, "grad_norm": 0.8740614422971671, "learning_rate": 4.98370105046853e-06, "loss": 0.168, "step": 17688 }, { "epoch": 0.5160452768539588, "grad_norm": 0.7718973254598634, "learning_rate": 4.983228618895639e-06, "loss": 0.1221, "step": 17689 }, { "epoch": 0.5160744500846024, "grad_norm": 0.7935483578242476, "learning_rate": 4.98275618747248e-06, "loss": 0.1459, "step": 17690 }, { "epoch": 0.516103623315246, "grad_norm": 1.0596746784375382, "learning_rate": 4.982283756203268e-06, "loss": 0.1521, "step": 17691 }, { "epoch": 0.5161327965458895, "grad_norm": 0.7472360856697575, "learning_rate": 4.981811325092224e-06, "loss": 0.1212, "step": 17692 }, { "epoch": 0.5161619697765331, "grad_norm": 0.7540684429918678, "learning_rate": 4.98133889414356e-06, "loss": 0.1408, "step": 17693 }, { "epoch": 0.5161911430071766, "grad_norm": 0.8577320979432324, "learning_rate": 4.980866463361502e-06, "loss": 0.1413, "step": 17694 }, { "epoch": 0.5162203162378202, "grad_norm": 0.9033068190112308, "learning_rate": 4.980394032750263e-06, "loss": 0.1542, "step": 17695 }, { "epoch": 0.5162494894684637, "grad_norm": 0.7692487755193975, "learning_rate": 4.979921602314061e-06, "loss": 0.1314, "step": 17696 }, { "epoch": 0.5162786626991073, "grad_norm": 0.9400376900876605, "learning_rate": 4.979449172057115e-06, "loss": 0.1336, "step": 17697 }, { "epoch": 0.5163078359297508, "grad_norm": 0.7299198992071241, "learning_rate": 4.978976741983641e-06, "loss": 0.1241, "step": 17698 }, { "epoch": 0.5163370091603944, "grad_norm": 0.7482717083200953, "learning_rate": 4.978504312097856e-06, "loss": 0.1343, "step": 17699 }, { "epoch": 0.5163661823910379, "grad_norm": 0.8087436392121131, "learning_rate": 4.978031882403981e-06, "loss": 0.1507, "step": 17700 }, { "epoch": 0.5163953556216816, "grad_norm": 0.868521306445695, "learning_rate": 4.977559452906233e-06, "loss": 0.1319, "step": 17701 }, { "epoch": 0.5164245288523251, "grad_norm": 0.936283371969216, "learning_rate": 4.977087023608828e-06, "loss": 0.1178, "step": 17702 }, { "epoch": 0.5164537020829687, "grad_norm": 1.175195682655177, "learning_rate": 4.976614594515985e-06, "loss": 0.1491, "step": 17703 }, { "epoch": 0.5164828753136123, "grad_norm": 0.9890558252635985, "learning_rate": 4.976142165631921e-06, "loss": 0.1439, "step": 17704 }, { "epoch": 0.5165120485442558, "grad_norm": 0.9898106675366942, "learning_rate": 4.975669736960852e-06, "loss": 0.1493, "step": 17705 }, { "epoch": 0.5165412217748994, "grad_norm": 1.0099130958080584, "learning_rate": 4.975197308507001e-06, "loss": 0.1283, "step": 17706 }, { "epoch": 0.5165703950055429, "grad_norm": 0.6482686432632625, "learning_rate": 4.9747248802745814e-06, "loss": 0.114, "step": 17707 }, { "epoch": 0.5165995682361865, "grad_norm": 0.9074951647529433, "learning_rate": 4.974252452267811e-06, "loss": 0.1499, "step": 17708 }, { "epoch": 0.51662874146683, "grad_norm": 0.8029354281442845, "learning_rate": 4.973780024490911e-06, "loss": 0.1286, "step": 17709 }, { "epoch": 0.5166579146974736, "grad_norm": 0.9127033778546586, "learning_rate": 4.9733075969480945e-06, "loss": 0.1491, "step": 17710 }, { "epoch": 0.5166870879281171, "grad_norm": 0.7613957159556328, "learning_rate": 4.972835169643581e-06, "loss": 0.125, "step": 17711 }, { "epoch": 0.5167162611587607, "grad_norm": 0.6994314010900585, "learning_rate": 4.9723627425815895e-06, "loss": 0.1548, "step": 17712 }, { "epoch": 0.5167454343894042, "grad_norm": 0.7249464277301007, "learning_rate": 4.9718903157663364e-06, "loss": 0.1374, "step": 17713 }, { "epoch": 0.5167746076200479, "grad_norm": 0.8740554154516622, "learning_rate": 4.971417889202042e-06, "loss": 0.1414, "step": 17714 }, { "epoch": 0.5168037808506915, "grad_norm": 0.7367444142907535, "learning_rate": 4.97094546289292e-06, "loss": 0.1279, "step": 17715 }, { "epoch": 0.516832954081335, "grad_norm": 0.8791510432754738, "learning_rate": 4.97047303684319e-06, "loss": 0.1249, "step": 17716 }, { "epoch": 0.5168621273119786, "grad_norm": 0.8161126947784589, "learning_rate": 4.970000611057069e-06, "loss": 0.1386, "step": 17717 }, { "epoch": 0.5168913005426221, "grad_norm": 0.8634750377528411, "learning_rate": 4.969528185538776e-06, "loss": 0.1372, "step": 17718 }, { "epoch": 0.5169204737732657, "grad_norm": 0.8407895590347889, "learning_rate": 4.96905576029253e-06, "loss": 0.1436, "step": 17719 }, { "epoch": 0.5169496470039092, "grad_norm": 1.0012309494361669, "learning_rate": 4.968583335322545e-06, "loss": 0.1389, "step": 17720 }, { "epoch": 0.5169788202345528, "grad_norm": 1.0487690076129972, "learning_rate": 4.96811091063304e-06, "loss": 0.1409, "step": 17721 }, { "epoch": 0.5170079934651963, "grad_norm": 0.6886249733579866, "learning_rate": 4.967638486228235e-06, "loss": 0.1461, "step": 17722 }, { "epoch": 0.5170371666958399, "grad_norm": 1.7070397978566778, "learning_rate": 4.967166062112342e-06, "loss": 0.1489, "step": 17723 }, { "epoch": 0.5170663399264834, "grad_norm": 0.8597711687918821, "learning_rate": 4.966693638289587e-06, "loss": 0.1172, "step": 17724 }, { "epoch": 0.517095513157127, "grad_norm": 0.853591338688919, "learning_rate": 4.9662212147641805e-06, "loss": 0.1234, "step": 17725 }, { "epoch": 0.5171246863877705, "grad_norm": 0.719466915487513, "learning_rate": 4.9657487915403446e-06, "loss": 0.1324, "step": 17726 }, { "epoch": 0.5171538596184141, "grad_norm": 0.8899182333294233, "learning_rate": 4.965276368622295e-06, "loss": 0.1291, "step": 17727 }, { "epoch": 0.5171830328490578, "grad_norm": 0.9061730129709415, "learning_rate": 4.96480394601425e-06, "loss": 0.1344, "step": 17728 }, { "epoch": 0.5172122060797013, "grad_norm": 0.9292343509728695, "learning_rate": 4.9643315237204246e-06, "loss": 0.155, "step": 17729 }, { "epoch": 0.5172413793103449, "grad_norm": 0.9195084573904982, "learning_rate": 4.963859101745041e-06, "loss": 0.1327, "step": 17730 }, { "epoch": 0.5172705525409884, "grad_norm": 1.9916532290169138, "learning_rate": 4.9633866800923145e-06, "loss": 0.1512, "step": 17731 }, { "epoch": 0.517299725771632, "grad_norm": 0.8279354201987689, "learning_rate": 4.962914258766463e-06, "loss": 0.1268, "step": 17732 }, { "epoch": 0.5173288990022755, "grad_norm": 0.7465001089712736, "learning_rate": 4.962441837771704e-06, "loss": 0.126, "step": 17733 }, { "epoch": 0.5173580722329191, "grad_norm": 1.2480594259412, "learning_rate": 4.961969417112256e-06, "loss": 0.1317, "step": 17734 }, { "epoch": 0.5173872454635626, "grad_norm": 1.1180437220550352, "learning_rate": 4.961496996792333e-06, "loss": 0.1374, "step": 17735 }, { "epoch": 0.5174164186942062, "grad_norm": 0.7644397191168941, "learning_rate": 4.961024576816158e-06, "loss": 0.1264, "step": 17736 }, { "epoch": 0.5174455919248497, "grad_norm": 1.125579767318, "learning_rate": 4.960552157187947e-06, "loss": 0.1287, "step": 17737 }, { "epoch": 0.5174747651554933, "grad_norm": 1.7882388009657824, "learning_rate": 4.9600797379119155e-06, "loss": 0.154, "step": 17738 }, { "epoch": 0.5175039383861368, "grad_norm": 0.800660711242164, "learning_rate": 4.959607318992284e-06, "loss": 0.1181, "step": 17739 }, { "epoch": 0.5175331116167804, "grad_norm": 1.1732319497813264, "learning_rate": 4.959134900433268e-06, "loss": 0.1502, "step": 17740 }, { "epoch": 0.5175622848474241, "grad_norm": 1.1002167995407082, "learning_rate": 4.958662482239084e-06, "loss": 0.1526, "step": 17741 }, { "epoch": 0.5175914580780676, "grad_norm": 0.839185950225196, "learning_rate": 4.958190064413953e-06, "loss": 0.1357, "step": 17742 }, { "epoch": 0.5176206313087112, "grad_norm": 0.8207595591182254, "learning_rate": 4.957717646962091e-06, "loss": 0.147, "step": 17743 }, { "epoch": 0.5176498045393547, "grad_norm": 1.0114933892310523, "learning_rate": 4.957245229887717e-06, "loss": 0.1193, "step": 17744 }, { "epoch": 0.5176789777699983, "grad_norm": 1.0847668761992957, "learning_rate": 4.956772813195046e-06, "loss": 0.135, "step": 17745 }, { "epoch": 0.5177081510006418, "grad_norm": 0.7678586858077632, "learning_rate": 4.9563003968882975e-06, "loss": 0.1245, "step": 17746 }, { "epoch": 0.5177373242312854, "grad_norm": 1.1090211114566713, "learning_rate": 4.955827980971688e-06, "loss": 0.147, "step": 17747 }, { "epoch": 0.5177664974619289, "grad_norm": 0.8378878431401968, "learning_rate": 4.955355565449435e-06, "loss": 0.1256, "step": 17748 }, { "epoch": 0.5177956706925725, "grad_norm": 0.748962072543815, "learning_rate": 4.95488315032576e-06, "loss": 0.1365, "step": 17749 }, { "epoch": 0.517824843923216, "grad_norm": 0.8700736053153962, "learning_rate": 4.9544107356048756e-06, "loss": 0.1205, "step": 17750 }, { "epoch": 0.5178540171538596, "grad_norm": 0.9766888630828362, "learning_rate": 4.953938321291001e-06, "loss": 0.1291, "step": 17751 }, { "epoch": 0.5178831903845031, "grad_norm": 0.8786663876319774, "learning_rate": 4.953465907388353e-06, "loss": 0.1241, "step": 17752 }, { "epoch": 0.5179123636151467, "grad_norm": 0.7814683983961322, "learning_rate": 4.9529934939011514e-06, "loss": 0.1288, "step": 17753 }, { "epoch": 0.5179415368457903, "grad_norm": 0.7162963342767883, "learning_rate": 4.952521080833614e-06, "loss": 0.1172, "step": 17754 }, { "epoch": 0.5179707100764339, "grad_norm": 0.7422130006335729, "learning_rate": 4.952048668189956e-06, "loss": 0.1167, "step": 17755 }, { "epoch": 0.5179998833070775, "grad_norm": 0.9636738077209109, "learning_rate": 4.9515762559743955e-06, "loss": 0.1192, "step": 17756 }, { "epoch": 0.518029056537721, "grad_norm": 0.8079579914363468, "learning_rate": 4.9511038441911515e-06, "loss": 0.1201, "step": 17757 }, { "epoch": 0.5180582297683646, "grad_norm": 0.7498702957223226, "learning_rate": 4.9506314328444395e-06, "loss": 0.1288, "step": 17758 }, { "epoch": 0.5180874029990081, "grad_norm": 0.754107044012069, "learning_rate": 4.950159021938479e-06, "loss": 0.137, "step": 17759 }, { "epoch": 0.5181165762296517, "grad_norm": 0.7427974198618872, "learning_rate": 4.949686611477487e-06, "loss": 0.1127, "step": 17760 }, { "epoch": 0.5181457494602952, "grad_norm": 0.7629154342469427, "learning_rate": 4.949214201465682e-06, "loss": 0.1163, "step": 17761 }, { "epoch": 0.5181749226909388, "grad_norm": 0.7295565415995703, "learning_rate": 4.948741791907279e-06, "loss": 0.111, "step": 17762 }, { "epoch": 0.5182040959215823, "grad_norm": 0.9266575335231287, "learning_rate": 4.948269382806497e-06, "loss": 0.1416, "step": 17763 }, { "epoch": 0.5182332691522259, "grad_norm": 0.8798685542925329, "learning_rate": 4.947796974167553e-06, "loss": 0.1222, "step": 17764 }, { "epoch": 0.5182624423828694, "grad_norm": 1.2083189150380842, "learning_rate": 4.947324565994666e-06, "loss": 0.1371, "step": 17765 }, { "epoch": 0.518291615613513, "grad_norm": 1.0981785801723138, "learning_rate": 4.946852158292054e-06, "loss": 0.1448, "step": 17766 }, { "epoch": 0.5183207888441566, "grad_norm": 0.8165686750111091, "learning_rate": 4.946379751063932e-06, "loss": 0.1293, "step": 17767 }, { "epoch": 0.5183499620748001, "grad_norm": 1.1957180571188588, "learning_rate": 4.9459073443145185e-06, "loss": 0.1216, "step": 17768 }, { "epoch": 0.5183791353054438, "grad_norm": 0.7525766178725002, "learning_rate": 4.945434938048032e-06, "loss": 0.139, "step": 17769 }, { "epoch": 0.5184083085360873, "grad_norm": 0.8103711165343084, "learning_rate": 4.9449625322686874e-06, "loss": 0.1322, "step": 17770 }, { "epoch": 0.5184374817667309, "grad_norm": 1.1193703453006678, "learning_rate": 4.944490126980706e-06, "loss": 0.1148, "step": 17771 }, { "epoch": 0.5184666549973744, "grad_norm": 0.8859469023702427, "learning_rate": 4.944017722188303e-06, "loss": 0.1308, "step": 17772 }, { "epoch": 0.518495828228018, "grad_norm": 0.7090720779509818, "learning_rate": 4.943545317895697e-06, "loss": 0.1268, "step": 17773 }, { "epoch": 0.5185250014586615, "grad_norm": 0.9905126126943034, "learning_rate": 4.9430729141071056e-06, "loss": 0.1225, "step": 17774 }, { "epoch": 0.5185541746893051, "grad_norm": 0.9087945970173723, "learning_rate": 4.942600510826745e-06, "loss": 0.1123, "step": 17775 }, { "epoch": 0.5185833479199486, "grad_norm": 0.753379908493967, "learning_rate": 4.942128108058832e-06, "loss": 0.1122, "step": 17776 }, { "epoch": 0.5186125211505922, "grad_norm": 0.9766768732241771, "learning_rate": 4.941655705807586e-06, "loss": 0.1476, "step": 17777 }, { "epoch": 0.5186416943812358, "grad_norm": 0.8597888733321941, "learning_rate": 4.941183304077224e-06, "loss": 0.1202, "step": 17778 }, { "epoch": 0.5186708676118793, "grad_norm": 0.7241541105926349, "learning_rate": 4.9407109028719644e-06, "loss": 0.1273, "step": 17779 }, { "epoch": 0.5187000408425229, "grad_norm": 0.9018762007631137, "learning_rate": 4.940238502196024e-06, "loss": 0.1425, "step": 17780 }, { "epoch": 0.5187292140731664, "grad_norm": 1.113150947027481, "learning_rate": 4.939766102053619e-06, "loss": 0.1359, "step": 17781 }, { "epoch": 0.5187583873038101, "grad_norm": 0.9067067536712301, "learning_rate": 4.939293702448966e-06, "loss": 0.1366, "step": 17782 }, { "epoch": 0.5187875605344536, "grad_norm": 0.869308613776223, "learning_rate": 4.938821303386287e-06, "loss": 0.1348, "step": 17783 }, { "epoch": 0.5188167337650972, "grad_norm": 1.030779845618955, "learning_rate": 4.938348904869796e-06, "loss": 0.1516, "step": 17784 }, { "epoch": 0.5188459069957407, "grad_norm": 0.7756506386233125, "learning_rate": 4.937876506903711e-06, "loss": 0.1155, "step": 17785 }, { "epoch": 0.5188750802263843, "grad_norm": 0.8730181617833321, "learning_rate": 4.9374041094922506e-06, "loss": 0.1316, "step": 17786 }, { "epoch": 0.5189042534570278, "grad_norm": 1.0480714878501245, "learning_rate": 4.936931712639632e-06, "loss": 0.1389, "step": 17787 }, { "epoch": 0.5189334266876714, "grad_norm": 0.918854444863424, "learning_rate": 4.936459316350069e-06, "loss": 0.1568, "step": 17788 }, { "epoch": 0.518962599918315, "grad_norm": 0.7993345800592149, "learning_rate": 4.935986920627784e-06, "loss": 0.121, "step": 17789 }, { "epoch": 0.5189917731489585, "grad_norm": 1.1084824142156962, "learning_rate": 4.935514525476992e-06, "loss": 0.1463, "step": 17790 }, { "epoch": 0.519020946379602, "grad_norm": 0.8425773712132383, "learning_rate": 4.9350421309019125e-06, "loss": 0.1402, "step": 17791 }, { "epoch": 0.5190501196102456, "grad_norm": 0.8271009729135738, "learning_rate": 4.93456973690676e-06, "loss": 0.1229, "step": 17792 }, { "epoch": 0.5190792928408892, "grad_norm": 0.7775744912527479, "learning_rate": 4.934097343495753e-06, "loss": 0.112, "step": 17793 }, { "epoch": 0.5191084660715327, "grad_norm": 0.8043375251488273, "learning_rate": 4.933624950673109e-06, "loss": 0.1205, "step": 17794 }, { "epoch": 0.5191376393021763, "grad_norm": 0.7250849815657042, "learning_rate": 4.933152558443045e-06, "loss": 0.1102, "step": 17795 }, { "epoch": 0.5191668125328199, "grad_norm": 0.6975336060490384, "learning_rate": 4.932680166809782e-06, "loss": 0.1209, "step": 17796 }, { "epoch": 0.5191959857634635, "grad_norm": 0.744123554043262, "learning_rate": 4.932207775777532e-06, "loss": 0.1128, "step": 17797 }, { "epoch": 0.519225158994107, "grad_norm": 0.7732292829867687, "learning_rate": 4.9317353853505154e-06, "loss": 0.1554, "step": 17798 }, { "epoch": 0.5192543322247506, "grad_norm": 1.0536207532554802, "learning_rate": 4.931262995532951e-06, "loss": 0.1365, "step": 17799 }, { "epoch": 0.5192835054553941, "grad_norm": 0.7685445534392636, "learning_rate": 4.930790606329049e-06, "loss": 0.1342, "step": 17800 }, { "epoch": 0.5193126786860377, "grad_norm": 1.2868047870908867, "learning_rate": 4.9303182177430355e-06, "loss": 0.1524, "step": 17801 }, { "epoch": 0.5193418519166813, "grad_norm": 0.9592999503782187, "learning_rate": 4.9298458297791245e-06, "loss": 0.1396, "step": 17802 }, { "epoch": 0.5193710251473248, "grad_norm": 0.8482988127290358, "learning_rate": 4.929373442441533e-06, "loss": 0.1252, "step": 17803 }, { "epoch": 0.5194001983779684, "grad_norm": 0.8686835971258066, "learning_rate": 4.928901055734479e-06, "loss": 0.1088, "step": 17804 }, { "epoch": 0.5194293716086119, "grad_norm": 1.549980148624875, "learning_rate": 4.928428669662178e-06, "loss": 0.1414, "step": 17805 }, { "epoch": 0.5194585448392555, "grad_norm": 0.954973356348734, "learning_rate": 4.927956284228848e-06, "loss": 0.1187, "step": 17806 }, { "epoch": 0.519487718069899, "grad_norm": 1.031618422626665, "learning_rate": 4.927483899438708e-06, "loss": 0.1476, "step": 17807 }, { "epoch": 0.5195168913005426, "grad_norm": 1.2986421010957385, "learning_rate": 4.9270115152959744e-06, "loss": 0.1192, "step": 17808 }, { "epoch": 0.5195460645311862, "grad_norm": 1.1509890595581167, "learning_rate": 4.926539131804867e-06, "loss": 0.1427, "step": 17809 }, { "epoch": 0.5195752377618298, "grad_norm": 0.9717509027750101, "learning_rate": 4.926066748969598e-06, "loss": 0.142, "step": 17810 }, { "epoch": 0.5196044109924733, "grad_norm": 0.720133769036651, "learning_rate": 4.925594366794388e-06, "loss": 0.1319, "step": 17811 }, { "epoch": 0.5196335842231169, "grad_norm": 0.7808693343070151, "learning_rate": 4.925121985283453e-06, "loss": 0.1183, "step": 17812 }, { "epoch": 0.5196627574537604, "grad_norm": 0.7395202818105009, "learning_rate": 4.924649604441012e-06, "loss": 0.1172, "step": 17813 }, { "epoch": 0.519691930684404, "grad_norm": 0.7640524827593631, "learning_rate": 4.9241772242712815e-06, "loss": 0.1289, "step": 17814 }, { "epoch": 0.5197211039150476, "grad_norm": 0.7583174019718506, "learning_rate": 4.9237048447784785e-06, "loss": 0.137, "step": 17815 }, { "epoch": 0.5197502771456911, "grad_norm": 0.7979366143303395, "learning_rate": 4.92323246596682e-06, "loss": 0.1201, "step": 17816 }, { "epoch": 0.5197794503763347, "grad_norm": 0.6877777126262582, "learning_rate": 4.9227600878405255e-06, "loss": 0.1341, "step": 17817 }, { "epoch": 0.5198086236069782, "grad_norm": 0.8602122677761864, "learning_rate": 4.922287710403807e-06, "loss": 0.1194, "step": 17818 }, { "epoch": 0.5198377968376218, "grad_norm": 0.7375428944619327, "learning_rate": 4.921815333660888e-06, "loss": 0.137, "step": 17819 }, { "epoch": 0.5198669700682653, "grad_norm": 0.9867675926808651, "learning_rate": 4.9213429576159815e-06, "loss": 0.1333, "step": 17820 }, { "epoch": 0.5198961432989089, "grad_norm": 0.7379810370770487, "learning_rate": 4.920870582273308e-06, "loss": 0.1384, "step": 17821 }, { "epoch": 0.5199253165295524, "grad_norm": 0.848173272023187, "learning_rate": 4.920398207637082e-06, "loss": 0.1087, "step": 17822 }, { "epoch": 0.5199544897601961, "grad_norm": 0.9426890226773692, "learning_rate": 4.919925833711522e-06, "loss": 0.1423, "step": 17823 }, { "epoch": 0.5199836629908396, "grad_norm": 0.7548892339920918, "learning_rate": 4.919453460500844e-06, "loss": 0.1172, "step": 17824 }, { "epoch": 0.5200128362214832, "grad_norm": 0.771375472373932, "learning_rate": 4.918981088009267e-06, "loss": 0.1259, "step": 17825 }, { "epoch": 0.5200420094521268, "grad_norm": 0.806062664754491, "learning_rate": 4.918508716241009e-06, "loss": 0.1294, "step": 17826 }, { "epoch": 0.5200711826827703, "grad_norm": 0.918711591193355, "learning_rate": 4.918036345200284e-06, "loss": 0.1431, "step": 17827 }, { "epoch": 0.5201003559134139, "grad_norm": 0.8793300357363661, "learning_rate": 4.917563974891311e-06, "loss": 0.1093, "step": 17828 }, { "epoch": 0.5201295291440574, "grad_norm": 0.6963931385633928, "learning_rate": 4.917091605318309e-06, "loss": 0.1057, "step": 17829 }, { "epoch": 0.520158702374701, "grad_norm": 0.7431056834038489, "learning_rate": 4.91661923648549e-06, "loss": 0.1095, "step": 17830 }, { "epoch": 0.5201878756053445, "grad_norm": 0.8950737012664087, "learning_rate": 4.916146868397077e-06, "loss": 0.1289, "step": 17831 }, { "epoch": 0.5202170488359881, "grad_norm": 0.8267027874287781, "learning_rate": 4.915674501057284e-06, "loss": 0.1234, "step": 17832 }, { "epoch": 0.5202462220666316, "grad_norm": 0.9156664218792455, "learning_rate": 4.91520213447033e-06, "loss": 0.118, "step": 17833 }, { "epoch": 0.5202753952972752, "grad_norm": 0.8229915052361602, "learning_rate": 4.914729768640431e-06, "loss": 0.1257, "step": 17834 }, { "epoch": 0.5203045685279187, "grad_norm": 0.8387745144811057, "learning_rate": 4.914257403571803e-06, "loss": 0.1486, "step": 17835 }, { "epoch": 0.5203337417585624, "grad_norm": 0.8746183836734868, "learning_rate": 4.9137850392686635e-06, "loss": 0.1214, "step": 17836 }, { "epoch": 0.520362914989206, "grad_norm": 0.8686617024439045, "learning_rate": 4.913312675735233e-06, "loss": 0.1357, "step": 17837 }, { "epoch": 0.5203920882198495, "grad_norm": 0.804809133997618, "learning_rate": 4.912840312975725e-06, "loss": 0.1516, "step": 17838 }, { "epoch": 0.5204212614504931, "grad_norm": 0.8651921029484754, "learning_rate": 4.912367950994358e-06, "loss": 0.1175, "step": 17839 }, { "epoch": 0.5204504346811366, "grad_norm": 0.858855350432069, "learning_rate": 4.91189558979535e-06, "loss": 0.1501, "step": 17840 }, { "epoch": 0.5204796079117802, "grad_norm": 0.7022536918286209, "learning_rate": 4.911423229382915e-06, "loss": 0.1339, "step": 17841 }, { "epoch": 0.5205087811424237, "grad_norm": 0.929108569504628, "learning_rate": 4.910950869761273e-06, "loss": 0.1502, "step": 17842 }, { "epoch": 0.5205379543730673, "grad_norm": 0.810850832292827, "learning_rate": 4.910478510934642e-06, "loss": 0.109, "step": 17843 }, { "epoch": 0.5205671276037108, "grad_norm": 0.7207408769937799, "learning_rate": 4.9100061529072365e-06, "loss": 0.1498, "step": 17844 }, { "epoch": 0.5205963008343544, "grad_norm": 0.875596935326705, "learning_rate": 4.9095337956832744e-06, "loss": 0.1288, "step": 17845 }, { "epoch": 0.5206254740649979, "grad_norm": 0.7938756474073557, "learning_rate": 4.9090614392669735e-06, "loss": 0.1186, "step": 17846 }, { "epoch": 0.5206546472956415, "grad_norm": 1.020857028179819, "learning_rate": 4.90858908366255e-06, "loss": 0.1051, "step": 17847 }, { "epoch": 0.520683820526285, "grad_norm": 1.0545725879355818, "learning_rate": 4.90811672887422e-06, "loss": 0.1255, "step": 17848 }, { "epoch": 0.5207129937569286, "grad_norm": 0.7909117978497822, "learning_rate": 4.907644374906204e-06, "loss": 0.1341, "step": 17849 }, { "epoch": 0.5207421669875723, "grad_norm": 0.7248427416592674, "learning_rate": 4.907172021762715e-06, "loss": 0.1305, "step": 17850 }, { "epoch": 0.5207713402182158, "grad_norm": 0.7969163509737802, "learning_rate": 4.906699669447975e-06, "loss": 0.1369, "step": 17851 }, { "epoch": 0.5208005134488594, "grad_norm": 0.7503094517373456, "learning_rate": 4.9062273179661965e-06, "loss": 0.1368, "step": 17852 }, { "epoch": 0.5208296866795029, "grad_norm": 0.764510925958115, "learning_rate": 4.9057549673215976e-06, "loss": 0.139, "step": 17853 }, { "epoch": 0.5208588599101465, "grad_norm": 0.9238158315036814, "learning_rate": 4.9052826175183946e-06, "loss": 0.1379, "step": 17854 }, { "epoch": 0.52088803314079, "grad_norm": 0.8985348784809547, "learning_rate": 4.904810268560807e-06, "loss": 0.1513, "step": 17855 }, { "epoch": 0.5209172063714336, "grad_norm": 0.9198487150224192, "learning_rate": 4.904337920453053e-06, "loss": 0.1471, "step": 17856 }, { "epoch": 0.5209463796020771, "grad_norm": 1.3279149130796433, "learning_rate": 4.903865573199344e-06, "loss": 0.1306, "step": 17857 }, { "epoch": 0.5209755528327207, "grad_norm": 0.9732633830223836, "learning_rate": 4.903393226803902e-06, "loss": 0.1389, "step": 17858 }, { "epoch": 0.5210047260633642, "grad_norm": 0.9224297104087886, "learning_rate": 4.902920881270942e-06, "loss": 0.146, "step": 17859 }, { "epoch": 0.5210338992940078, "grad_norm": 0.9539881656581738, "learning_rate": 4.902448536604679e-06, "loss": 0.1304, "step": 17860 }, { "epoch": 0.5210630725246513, "grad_norm": 0.7559097551927099, "learning_rate": 4.901976192809335e-06, "loss": 0.123, "step": 17861 }, { "epoch": 0.5210922457552949, "grad_norm": 0.9404253632952876, "learning_rate": 4.901503849889122e-06, "loss": 0.1328, "step": 17862 }, { "epoch": 0.5211214189859386, "grad_norm": 0.8255707159001553, "learning_rate": 4.901031507848261e-06, "loss": 0.1389, "step": 17863 }, { "epoch": 0.5211505922165821, "grad_norm": 0.8598240984880731, "learning_rate": 4.900559166690968e-06, "loss": 0.1184, "step": 17864 }, { "epoch": 0.5211797654472257, "grad_norm": 0.7687898219097739, "learning_rate": 4.900086826421457e-06, "loss": 0.1361, "step": 17865 }, { "epoch": 0.5212089386778692, "grad_norm": 1.2607156104616033, "learning_rate": 4.899614487043945e-06, "loss": 0.136, "step": 17866 }, { "epoch": 0.5212381119085128, "grad_norm": 1.1577965033863988, "learning_rate": 4.899142148562654e-06, "loss": 0.1024, "step": 17867 }, { "epoch": 0.5212672851391563, "grad_norm": 0.8993489940128342, "learning_rate": 4.8986698109817965e-06, "loss": 0.1366, "step": 17868 }, { "epoch": 0.5212964583697999, "grad_norm": 0.8256751835174652, "learning_rate": 4.8981974743055924e-06, "loss": 0.1407, "step": 17869 }, { "epoch": 0.5213256316004434, "grad_norm": 1.8909717940180342, "learning_rate": 4.897725138538256e-06, "loss": 0.1468, "step": 17870 }, { "epoch": 0.521354804831087, "grad_norm": 0.7629539101206839, "learning_rate": 4.897252803684004e-06, "loss": 0.1455, "step": 17871 }, { "epoch": 0.5213839780617305, "grad_norm": 0.844166739001318, "learning_rate": 4.896780469747055e-06, "loss": 0.1389, "step": 17872 }, { "epoch": 0.5214131512923741, "grad_norm": 0.8643156654137838, "learning_rate": 4.896308136731626e-06, "loss": 0.1184, "step": 17873 }, { "epoch": 0.5214423245230176, "grad_norm": 0.6863796990266603, "learning_rate": 4.895835804641933e-06, "loss": 0.1166, "step": 17874 }, { "epoch": 0.5214714977536612, "grad_norm": 0.6952077020349884, "learning_rate": 4.895363473482193e-06, "loss": 0.1326, "step": 17875 }, { "epoch": 0.5215006709843047, "grad_norm": 0.8471047800515966, "learning_rate": 4.894891143256622e-06, "loss": 0.125, "step": 17876 }, { "epoch": 0.5215298442149484, "grad_norm": 0.7736863298443879, "learning_rate": 4.894418813969441e-06, "loss": 0.1225, "step": 17877 }, { "epoch": 0.521559017445592, "grad_norm": 0.7933700083617907, "learning_rate": 4.893946485624859e-06, "loss": 0.1219, "step": 17878 }, { "epoch": 0.5215881906762355, "grad_norm": 0.6815658508332655, "learning_rate": 4.8934741582271e-06, "loss": 0.1132, "step": 17879 }, { "epoch": 0.5216173639068791, "grad_norm": 0.7561530162098145, "learning_rate": 4.893001831780378e-06, "loss": 0.1395, "step": 17880 }, { "epoch": 0.5216465371375226, "grad_norm": 0.8749908551044697, "learning_rate": 4.892529506288911e-06, "loss": 0.1068, "step": 17881 }, { "epoch": 0.5216757103681662, "grad_norm": 1.3182560129097096, "learning_rate": 4.892057181756914e-06, "loss": 0.1061, "step": 17882 }, { "epoch": 0.5217048835988097, "grad_norm": 0.6291798547307672, "learning_rate": 4.891584858188605e-06, "loss": 0.1268, "step": 17883 }, { "epoch": 0.5217340568294533, "grad_norm": 0.7589851009998014, "learning_rate": 4.891112535588199e-06, "loss": 0.1286, "step": 17884 }, { "epoch": 0.5217632300600968, "grad_norm": 0.9022577316131594, "learning_rate": 4.890640213959915e-06, "loss": 0.1268, "step": 17885 }, { "epoch": 0.5217924032907404, "grad_norm": 0.9731030876978904, "learning_rate": 4.890167893307971e-06, "loss": 0.1518, "step": 17886 }, { "epoch": 0.521821576521384, "grad_norm": 0.7098412431093563, "learning_rate": 4.889695573636581e-06, "loss": 0.1295, "step": 17887 }, { "epoch": 0.5218507497520275, "grad_norm": 0.9869573351912039, "learning_rate": 4.889223254949961e-06, "loss": 0.1299, "step": 17888 }, { "epoch": 0.521879922982671, "grad_norm": 0.9072045110410829, "learning_rate": 4.888750937252332e-06, "loss": 0.1259, "step": 17889 }, { "epoch": 0.5219090962133147, "grad_norm": 0.8978386946958383, "learning_rate": 4.8882786205479035e-06, "loss": 0.1377, "step": 17890 }, { "epoch": 0.5219382694439583, "grad_norm": 0.8882988365300463, "learning_rate": 4.887806304840901e-06, "loss": 0.1401, "step": 17891 }, { "epoch": 0.5219674426746018, "grad_norm": 0.9094948331594452, "learning_rate": 4.887333990135536e-06, "loss": 0.1407, "step": 17892 }, { "epoch": 0.5219966159052454, "grad_norm": 0.6853262552371552, "learning_rate": 4.886861676436026e-06, "loss": 0.1167, "step": 17893 }, { "epoch": 0.5220257891358889, "grad_norm": 0.7485944270381532, "learning_rate": 4.886389363746588e-06, "loss": 0.1317, "step": 17894 }, { "epoch": 0.5220549623665325, "grad_norm": 0.8338742387687241, "learning_rate": 4.885917052071439e-06, "loss": 0.1138, "step": 17895 }, { "epoch": 0.522084135597176, "grad_norm": 0.7409709048756515, "learning_rate": 4.885444741414794e-06, "loss": 0.1173, "step": 17896 }, { "epoch": 0.5221133088278196, "grad_norm": 0.7580988775971637, "learning_rate": 4.884972431780872e-06, "loss": 0.1255, "step": 17897 }, { "epoch": 0.5221424820584631, "grad_norm": 0.8213209898400915, "learning_rate": 4.884500123173888e-06, "loss": 0.158, "step": 17898 }, { "epoch": 0.5221716552891067, "grad_norm": 0.8303623412264455, "learning_rate": 4.884027815598061e-06, "loss": 0.1214, "step": 17899 }, { "epoch": 0.5222008285197502, "grad_norm": 0.7614129240235559, "learning_rate": 4.8835555090576054e-06, "loss": 0.108, "step": 17900 }, { "epoch": 0.5222300017503938, "grad_norm": 0.860717154105392, "learning_rate": 4.883083203556738e-06, "loss": 0.1396, "step": 17901 }, { "epoch": 0.5222591749810374, "grad_norm": 0.757320014920277, "learning_rate": 4.882610899099674e-06, "loss": 0.1281, "step": 17902 }, { "epoch": 0.5222883482116809, "grad_norm": 0.8023616902442609, "learning_rate": 4.882138595690635e-06, "loss": 0.1353, "step": 17903 }, { "epoch": 0.5223175214423246, "grad_norm": 0.7335446218700364, "learning_rate": 4.881666293333832e-06, "loss": 0.1366, "step": 17904 }, { "epoch": 0.5223466946729681, "grad_norm": 0.6739565189851321, "learning_rate": 4.881193992033486e-06, "loss": 0.1359, "step": 17905 }, { "epoch": 0.5223758679036117, "grad_norm": 0.911529365925771, "learning_rate": 4.880721691793812e-06, "loss": 0.1478, "step": 17906 }, { "epoch": 0.5224050411342552, "grad_norm": 0.8483509395103137, "learning_rate": 4.880249392619025e-06, "loss": 0.1425, "step": 17907 }, { "epoch": 0.5224342143648988, "grad_norm": 0.8178655393409022, "learning_rate": 4.879777094513341e-06, "loss": 0.1368, "step": 17908 }, { "epoch": 0.5224633875955423, "grad_norm": 0.8577307170717238, "learning_rate": 4.879304797480981e-06, "loss": 0.1404, "step": 17909 }, { "epoch": 0.5224925608261859, "grad_norm": 0.9602509403066533, "learning_rate": 4.878832501526158e-06, "loss": 0.1429, "step": 17910 }, { "epoch": 0.5225217340568294, "grad_norm": 0.8733446378812333, "learning_rate": 4.878360206653091e-06, "loss": 0.15, "step": 17911 }, { "epoch": 0.522550907287473, "grad_norm": 0.9655338759400075, "learning_rate": 4.877887912865994e-06, "loss": 0.1472, "step": 17912 }, { "epoch": 0.5225800805181166, "grad_norm": 0.8931499260447278, "learning_rate": 4.877415620169084e-06, "loss": 0.1356, "step": 17913 }, { "epoch": 0.5226092537487601, "grad_norm": 0.9231256882193069, "learning_rate": 4.876943328566578e-06, "loss": 0.1235, "step": 17914 }, { "epoch": 0.5226384269794037, "grad_norm": 1.1325598380436084, "learning_rate": 4.876471038062693e-06, "loss": 0.1531, "step": 17915 }, { "epoch": 0.5226676002100472, "grad_norm": 1.186704811758057, "learning_rate": 4.875998748661646e-06, "loss": 0.1905, "step": 17916 }, { "epoch": 0.5226967734406909, "grad_norm": 0.8180383593609283, "learning_rate": 4.875526460367651e-06, "loss": 0.1346, "step": 17917 }, { "epoch": 0.5227259466713344, "grad_norm": 0.9811199477158028, "learning_rate": 4.8750541731849274e-06, "loss": 0.1134, "step": 17918 }, { "epoch": 0.522755119901978, "grad_norm": 0.9616250190910933, "learning_rate": 4.874581887117691e-06, "loss": 0.1319, "step": 17919 }, { "epoch": 0.5227842931326215, "grad_norm": 0.8495857545743318, "learning_rate": 4.874109602170154e-06, "loss": 0.1222, "step": 17920 }, { "epoch": 0.5228134663632651, "grad_norm": 0.9981163796100289, "learning_rate": 4.873637318346539e-06, "loss": 0.1475, "step": 17921 }, { "epoch": 0.5228426395939086, "grad_norm": 0.9542983875370893, "learning_rate": 4.8731650356510605e-06, "loss": 0.1239, "step": 17922 }, { "epoch": 0.5228718128245522, "grad_norm": 0.8052732719910499, "learning_rate": 4.872692754087933e-06, "loss": 0.1433, "step": 17923 }, { "epoch": 0.5229009860551957, "grad_norm": 0.7535801357383433, "learning_rate": 4.872220473661376e-06, "loss": 0.1296, "step": 17924 }, { "epoch": 0.5229301592858393, "grad_norm": 0.7714976778803393, "learning_rate": 4.871748194375602e-06, "loss": 0.1479, "step": 17925 }, { "epoch": 0.5229593325164829, "grad_norm": 0.8562842980877414, "learning_rate": 4.871275916234829e-06, "loss": 0.1138, "step": 17926 }, { "epoch": 0.5229885057471264, "grad_norm": 0.7413114455314824, "learning_rate": 4.870803639243275e-06, "loss": 0.1324, "step": 17927 }, { "epoch": 0.52301767897777, "grad_norm": 0.7978088224484438, "learning_rate": 4.8703313634051555e-06, "loss": 0.1557, "step": 17928 }, { "epoch": 0.5230468522084135, "grad_norm": 0.7027258225234448, "learning_rate": 4.869859088724687e-06, "loss": 0.1324, "step": 17929 }, { "epoch": 0.5230760254390571, "grad_norm": 0.7510819893563819, "learning_rate": 4.8693868152060844e-06, "loss": 0.1338, "step": 17930 }, { "epoch": 0.5231051986697007, "grad_norm": 0.8155236962523871, "learning_rate": 4.868914542853566e-06, "loss": 0.1259, "step": 17931 }, { "epoch": 0.5231343719003443, "grad_norm": 0.7732616948535456, "learning_rate": 4.868442271671346e-06, "loss": 0.1276, "step": 17932 }, { "epoch": 0.5231635451309878, "grad_norm": 0.833652499312342, "learning_rate": 4.867970001663644e-06, "loss": 0.1335, "step": 17933 }, { "epoch": 0.5231927183616314, "grad_norm": 0.8366874000975631, "learning_rate": 4.867497732834673e-06, "loss": 0.1387, "step": 17934 }, { "epoch": 0.523221891592275, "grad_norm": 0.7508403598543987, "learning_rate": 4.867025465188651e-06, "loss": 0.1398, "step": 17935 }, { "epoch": 0.5232510648229185, "grad_norm": 0.8776091395129162, "learning_rate": 4.866553198729795e-06, "loss": 0.1144, "step": 17936 }, { "epoch": 0.523280238053562, "grad_norm": 0.7561549927851885, "learning_rate": 4.866080933462318e-06, "loss": 0.1259, "step": 17937 }, { "epoch": 0.5233094112842056, "grad_norm": 0.8702700009110349, "learning_rate": 4.865608669390439e-06, "loss": 0.1182, "step": 17938 }, { "epoch": 0.5233385845148492, "grad_norm": 0.8289225573785339, "learning_rate": 4.8651364065183735e-06, "loss": 0.123, "step": 17939 }, { "epoch": 0.5233677577454927, "grad_norm": 2.2467421701393797, "learning_rate": 4.864664144850339e-06, "loss": 0.1335, "step": 17940 }, { "epoch": 0.5233969309761363, "grad_norm": 0.9348183107750612, "learning_rate": 4.864191884390551e-06, "loss": 0.1268, "step": 17941 }, { "epoch": 0.5234261042067798, "grad_norm": 0.8360624824019696, "learning_rate": 4.863719625143225e-06, "loss": 0.1202, "step": 17942 }, { "epoch": 0.5234552774374234, "grad_norm": 0.8640078876565908, "learning_rate": 4.8632473671125765e-06, "loss": 0.1066, "step": 17943 }, { "epoch": 0.523484450668067, "grad_norm": 0.9376836752967381, "learning_rate": 4.862775110302823e-06, "loss": 0.1333, "step": 17944 }, { "epoch": 0.5235136238987106, "grad_norm": 0.819108078445002, "learning_rate": 4.862302854718181e-06, "loss": 0.1325, "step": 17945 }, { "epoch": 0.5235427971293541, "grad_norm": 0.8755362805588103, "learning_rate": 4.861830600362868e-06, "loss": 0.142, "step": 17946 }, { "epoch": 0.5235719703599977, "grad_norm": 0.7584084125841402, "learning_rate": 4.861358347241097e-06, "loss": 0.1347, "step": 17947 }, { "epoch": 0.5236011435906412, "grad_norm": 0.9762185605530063, "learning_rate": 4.860886095357085e-06, "loss": 0.1266, "step": 17948 }, { "epoch": 0.5236303168212848, "grad_norm": 0.850976016020747, "learning_rate": 4.860413844715048e-06, "loss": 0.1518, "step": 17949 }, { "epoch": 0.5236594900519284, "grad_norm": 1.0727609637353392, "learning_rate": 4.859941595319204e-06, "loss": 0.138, "step": 17950 }, { "epoch": 0.5236886632825719, "grad_norm": 0.7412763744903017, "learning_rate": 4.859469347173769e-06, "loss": 0.1341, "step": 17951 }, { "epoch": 0.5237178365132155, "grad_norm": 0.6602935382793448, "learning_rate": 4.858997100282958e-06, "loss": 0.1229, "step": 17952 }, { "epoch": 0.523747009743859, "grad_norm": 0.8813426744458847, "learning_rate": 4.858524854650986e-06, "loss": 0.0988, "step": 17953 }, { "epoch": 0.5237761829745026, "grad_norm": 0.7512036713866219, "learning_rate": 4.858052610282072e-06, "loss": 0.1381, "step": 17954 }, { "epoch": 0.5238053562051461, "grad_norm": 0.9621052485718656, "learning_rate": 4.857580367180427e-06, "loss": 0.1346, "step": 17955 }, { "epoch": 0.5238345294357897, "grad_norm": 0.7738331930747737, "learning_rate": 4.857108125350274e-06, "loss": 0.1134, "step": 17956 }, { "epoch": 0.5238637026664332, "grad_norm": 0.8259746644171821, "learning_rate": 4.856635884795824e-06, "loss": 0.1252, "step": 17957 }, { "epoch": 0.5238928758970769, "grad_norm": 0.731470398207601, "learning_rate": 4.856163645521295e-06, "loss": 0.1423, "step": 17958 }, { "epoch": 0.5239220491277204, "grad_norm": 0.8188305469047616, "learning_rate": 4.855691407530903e-06, "loss": 0.1247, "step": 17959 }, { "epoch": 0.523951222358364, "grad_norm": 0.8393247378678323, "learning_rate": 4.855219170828863e-06, "loss": 0.136, "step": 17960 }, { "epoch": 0.5239803955890076, "grad_norm": 0.7694881940520427, "learning_rate": 4.854746935419391e-06, "loss": 0.1373, "step": 17961 }, { "epoch": 0.5240095688196511, "grad_norm": 0.6646887651826038, "learning_rate": 4.8542747013067046e-06, "loss": 0.1216, "step": 17962 }, { "epoch": 0.5240387420502947, "grad_norm": 0.7698194675114775, "learning_rate": 4.85380246849502e-06, "loss": 0.1143, "step": 17963 }, { "epoch": 0.5240679152809382, "grad_norm": 0.9663948066989679, "learning_rate": 4.853330236988551e-06, "loss": 0.1528, "step": 17964 }, { "epoch": 0.5240970885115818, "grad_norm": 0.8500157732306555, "learning_rate": 4.852858006791513e-06, "loss": 0.1301, "step": 17965 }, { "epoch": 0.5241262617422253, "grad_norm": 0.6561816000060233, "learning_rate": 4.852385777908127e-06, "loss": 0.1154, "step": 17966 }, { "epoch": 0.5241554349728689, "grad_norm": 1.2660261846960206, "learning_rate": 4.8519135503426014e-06, "loss": 0.1201, "step": 17967 }, { "epoch": 0.5241846082035124, "grad_norm": 0.9443999314956311, "learning_rate": 4.851441324099159e-06, "loss": 0.1088, "step": 17968 }, { "epoch": 0.524213781434156, "grad_norm": 0.8122989850323468, "learning_rate": 4.850969099182013e-06, "loss": 0.1508, "step": 17969 }, { "epoch": 0.5242429546647995, "grad_norm": 0.9833396853502512, "learning_rate": 4.850496875595379e-06, "loss": 0.1233, "step": 17970 }, { "epoch": 0.5242721278954432, "grad_norm": 0.986616223271879, "learning_rate": 4.850024653343473e-06, "loss": 0.1344, "step": 17971 }, { "epoch": 0.5243013011260867, "grad_norm": 0.8007862832192146, "learning_rate": 4.849552432430512e-06, "loss": 0.1372, "step": 17972 }, { "epoch": 0.5243304743567303, "grad_norm": 1.0666085204198423, "learning_rate": 4.849080212860709e-06, "loss": 0.1324, "step": 17973 }, { "epoch": 0.5243596475873739, "grad_norm": 0.9058754526315972, "learning_rate": 4.848607994638282e-06, "loss": 0.1593, "step": 17974 }, { "epoch": 0.5243888208180174, "grad_norm": 0.8441704512336876, "learning_rate": 4.848135777767447e-06, "loss": 0.1104, "step": 17975 }, { "epoch": 0.524417994048661, "grad_norm": 0.8940368108007474, "learning_rate": 4.847663562252422e-06, "loss": 0.1223, "step": 17976 }, { "epoch": 0.5244471672793045, "grad_norm": 0.800211833876078, "learning_rate": 4.8471913480974184e-06, "loss": 0.1291, "step": 17977 }, { "epoch": 0.5244763405099481, "grad_norm": 0.7713669762026707, "learning_rate": 4.846719135306654e-06, "loss": 0.1247, "step": 17978 }, { "epoch": 0.5245055137405916, "grad_norm": 0.9492886397041358, "learning_rate": 4.846246923884343e-06, "loss": 0.136, "step": 17979 }, { "epoch": 0.5245346869712352, "grad_norm": 1.0445991721570165, "learning_rate": 4.845774713834705e-06, "loss": 0.1535, "step": 17980 }, { "epoch": 0.5245638602018787, "grad_norm": 0.8926232799278784, "learning_rate": 4.845302505161954e-06, "loss": 0.1349, "step": 17981 }, { "epoch": 0.5245930334325223, "grad_norm": 0.869689043109833, "learning_rate": 4.844830297870303e-06, "loss": 0.1393, "step": 17982 }, { "epoch": 0.5246222066631658, "grad_norm": 0.9578446700207232, "learning_rate": 4.844358091963971e-06, "loss": 0.1361, "step": 17983 }, { "epoch": 0.5246513798938094, "grad_norm": 1.1267656458186959, "learning_rate": 4.8438858874471754e-06, "loss": 0.1239, "step": 17984 }, { "epoch": 0.524680553124453, "grad_norm": 0.9362500665418243, "learning_rate": 4.843413684324124e-06, "loss": 0.1585, "step": 17985 }, { "epoch": 0.5247097263550966, "grad_norm": 0.8570036739285297, "learning_rate": 4.842941482599041e-06, "loss": 0.1336, "step": 17986 }, { "epoch": 0.5247388995857402, "grad_norm": 1.1604667286988664, "learning_rate": 4.8424692822761395e-06, "loss": 0.1415, "step": 17987 }, { "epoch": 0.5247680728163837, "grad_norm": 1.0697962974820818, "learning_rate": 4.841997083359634e-06, "loss": 0.1177, "step": 17988 }, { "epoch": 0.5247972460470273, "grad_norm": 0.7622841840977652, "learning_rate": 4.841524885853742e-06, "loss": 0.1328, "step": 17989 }, { "epoch": 0.5248264192776708, "grad_norm": 0.9267699854793973, "learning_rate": 4.841052689762676e-06, "loss": 0.1262, "step": 17990 }, { "epoch": 0.5248555925083144, "grad_norm": 1.0435908215995608, "learning_rate": 4.840580495090654e-06, "loss": 0.1209, "step": 17991 }, { "epoch": 0.5248847657389579, "grad_norm": 0.9682931262440511, "learning_rate": 4.840108301841891e-06, "loss": 0.1447, "step": 17992 }, { "epoch": 0.5249139389696015, "grad_norm": 0.5997827566998676, "learning_rate": 4.839636110020605e-06, "loss": 0.1083, "step": 17993 }, { "epoch": 0.524943112200245, "grad_norm": 0.8357244523095261, "learning_rate": 4.839163919631008e-06, "loss": 0.1394, "step": 17994 }, { "epoch": 0.5249722854308886, "grad_norm": 0.8528270064538509, "learning_rate": 4.8386917306773166e-06, "loss": 0.1316, "step": 17995 }, { "epoch": 0.5250014586615321, "grad_norm": 0.7857553200242777, "learning_rate": 4.838219543163749e-06, "loss": 0.1245, "step": 17996 }, { "epoch": 0.5250306318921757, "grad_norm": 0.760674877072806, "learning_rate": 4.837747357094515e-06, "loss": 0.1301, "step": 17997 }, { "epoch": 0.5250598051228194, "grad_norm": 0.8523709086749641, "learning_rate": 4.837275172473837e-06, "loss": 0.177, "step": 17998 }, { "epoch": 0.5250889783534629, "grad_norm": 1.2116779135550535, "learning_rate": 4.836802989305927e-06, "loss": 0.1324, "step": 17999 }, { "epoch": 0.5251181515841065, "grad_norm": 0.8755427782452537, "learning_rate": 4.836330807595e-06, "loss": 0.1252, "step": 18000 }, { "epoch": 0.52514732481475, "grad_norm": 0.8016962301962923, "learning_rate": 4.835858627345273e-06, "loss": 0.1389, "step": 18001 }, { "epoch": 0.5251764980453936, "grad_norm": 0.8869979893036224, "learning_rate": 4.835386448560961e-06, "loss": 0.1498, "step": 18002 }, { "epoch": 0.5252056712760371, "grad_norm": 0.9275440747445214, "learning_rate": 4.834914271246279e-06, "loss": 0.1411, "step": 18003 }, { "epoch": 0.5252348445066807, "grad_norm": 0.8299382513115088, "learning_rate": 4.834442095405443e-06, "loss": 0.1371, "step": 18004 }, { "epoch": 0.5252640177373242, "grad_norm": 0.7344429097988446, "learning_rate": 4.833969921042669e-06, "loss": 0.1354, "step": 18005 }, { "epoch": 0.5252931909679678, "grad_norm": 0.8020292187680751, "learning_rate": 4.833497748162172e-06, "loss": 0.1174, "step": 18006 }, { "epoch": 0.5253223641986113, "grad_norm": 0.7589677544688201, "learning_rate": 4.833025576768168e-06, "loss": 0.1168, "step": 18007 }, { "epoch": 0.5253515374292549, "grad_norm": 1.0809993126779969, "learning_rate": 4.8325534068648705e-06, "loss": 0.134, "step": 18008 }, { "epoch": 0.5253807106598984, "grad_norm": 0.8306161921408216, "learning_rate": 4.8320812384564955e-06, "loss": 0.1252, "step": 18009 }, { "epoch": 0.525409883890542, "grad_norm": 0.7180312168582362, "learning_rate": 4.83160907154726e-06, "loss": 0.1113, "step": 18010 }, { "epoch": 0.5254390571211855, "grad_norm": 0.7818417928655756, "learning_rate": 4.83113690614138e-06, "loss": 0.1431, "step": 18011 }, { "epoch": 0.5254682303518292, "grad_norm": 0.9556346596527349, "learning_rate": 4.830664742243068e-06, "loss": 0.1287, "step": 18012 }, { "epoch": 0.5254974035824728, "grad_norm": 1.2172309169139588, "learning_rate": 4.830192579856541e-06, "loss": 0.135, "step": 18013 }, { "epoch": 0.5255265768131163, "grad_norm": 0.9589657292617719, "learning_rate": 4.829720418986015e-06, "loss": 0.1319, "step": 18014 }, { "epoch": 0.5255557500437599, "grad_norm": 1.05892071662572, "learning_rate": 4.829248259635701e-06, "loss": 0.1404, "step": 18015 }, { "epoch": 0.5255849232744034, "grad_norm": 0.9285604008415789, "learning_rate": 4.828776101809821e-06, "loss": 0.1275, "step": 18016 }, { "epoch": 0.525614096505047, "grad_norm": 0.975494494513817, "learning_rate": 4.8283039455125865e-06, "loss": 0.1325, "step": 18017 }, { "epoch": 0.5256432697356905, "grad_norm": 0.7933315168665201, "learning_rate": 4.827831790748213e-06, "loss": 0.1181, "step": 18018 }, { "epoch": 0.5256724429663341, "grad_norm": 1.0561985706953883, "learning_rate": 4.827359637520917e-06, "loss": 0.1438, "step": 18019 }, { "epoch": 0.5257016161969776, "grad_norm": 0.8767416437329977, "learning_rate": 4.826887485834913e-06, "loss": 0.1425, "step": 18020 }, { "epoch": 0.5257307894276212, "grad_norm": 1.030326838227654, "learning_rate": 4.826415335694414e-06, "loss": 0.1291, "step": 18021 }, { "epoch": 0.5257599626582647, "grad_norm": 0.6721898444369182, "learning_rate": 4.8259431871036395e-06, "loss": 0.1278, "step": 18022 }, { "epoch": 0.5257891358889083, "grad_norm": 0.709761200362223, "learning_rate": 4.825471040066803e-06, "loss": 0.1208, "step": 18023 }, { "epoch": 0.5258183091195519, "grad_norm": 0.8058778338832103, "learning_rate": 4.824998894588118e-06, "loss": 0.1473, "step": 18024 }, { "epoch": 0.5258474823501954, "grad_norm": 0.7934397989321833, "learning_rate": 4.824526750671802e-06, "loss": 0.1096, "step": 18025 }, { "epoch": 0.5258766555808391, "grad_norm": 0.7762464528155616, "learning_rate": 4.8240546083220705e-06, "loss": 0.1274, "step": 18026 }, { "epoch": 0.5259058288114826, "grad_norm": 0.8353728222100885, "learning_rate": 4.823582467543133e-06, "loss": 0.1287, "step": 18027 }, { "epoch": 0.5259350020421262, "grad_norm": 1.001478652873019, "learning_rate": 4.823110328339213e-06, "loss": 0.1248, "step": 18028 }, { "epoch": 0.5259641752727697, "grad_norm": 0.7875069717632527, "learning_rate": 4.822638190714521e-06, "loss": 0.1423, "step": 18029 }, { "epoch": 0.5259933485034133, "grad_norm": 1.136917047056234, "learning_rate": 4.822166054673273e-06, "loss": 0.1286, "step": 18030 }, { "epoch": 0.5260225217340568, "grad_norm": 0.8338412718231685, "learning_rate": 4.821693920219684e-06, "loss": 0.1506, "step": 18031 }, { "epoch": 0.5260516949647004, "grad_norm": 0.8810387903852749, "learning_rate": 4.821221787357969e-06, "loss": 0.1283, "step": 18032 }, { "epoch": 0.5260808681953439, "grad_norm": 0.8387559980864229, "learning_rate": 4.820749656092342e-06, "loss": 0.1445, "step": 18033 }, { "epoch": 0.5261100414259875, "grad_norm": 0.6970389163980308, "learning_rate": 4.820277526427019e-06, "loss": 0.1138, "step": 18034 }, { "epoch": 0.526139214656631, "grad_norm": 1.1730141652813515, "learning_rate": 4.8198053983662175e-06, "loss": 0.1412, "step": 18035 }, { "epoch": 0.5261683878872746, "grad_norm": 0.8330160665392379, "learning_rate": 4.81933327191415e-06, "loss": 0.1441, "step": 18036 }, { "epoch": 0.5261975611179182, "grad_norm": 1.1819803911284248, "learning_rate": 4.818861147075031e-06, "loss": 0.1326, "step": 18037 }, { "epoch": 0.5262267343485617, "grad_norm": 0.7637892124219897, "learning_rate": 4.818389023853077e-06, "loss": 0.1344, "step": 18038 }, { "epoch": 0.5262559075792054, "grad_norm": 0.8886704083543827, "learning_rate": 4.817916902252501e-06, "loss": 0.1468, "step": 18039 }, { "epoch": 0.5262850808098489, "grad_norm": 0.738187964168913, "learning_rate": 4.817444782277521e-06, "loss": 0.129, "step": 18040 }, { "epoch": 0.5263142540404925, "grad_norm": 0.739917219648246, "learning_rate": 4.8169726639323514e-06, "loss": 0.1182, "step": 18041 }, { "epoch": 0.526343427271136, "grad_norm": 0.703088669527814, "learning_rate": 4.816500547221204e-06, "loss": 0.1242, "step": 18042 }, { "epoch": 0.5263726005017796, "grad_norm": 0.9440728665811604, "learning_rate": 4.816028432148298e-06, "loss": 0.1125, "step": 18043 }, { "epoch": 0.5264017737324231, "grad_norm": 0.6979161404789853, "learning_rate": 4.8155563187178454e-06, "loss": 0.1252, "step": 18044 }, { "epoch": 0.5264309469630667, "grad_norm": 0.9156033733058908, "learning_rate": 4.815084206934059e-06, "loss": 0.1681, "step": 18045 }, { "epoch": 0.5264601201937102, "grad_norm": 0.8652140379149614, "learning_rate": 4.8146120968011605e-06, "loss": 0.137, "step": 18046 }, { "epoch": 0.5264892934243538, "grad_norm": 1.2206878237891405, "learning_rate": 4.81413998832336e-06, "loss": 0.1134, "step": 18047 }, { "epoch": 0.5265184666549974, "grad_norm": 0.8484945222426621, "learning_rate": 4.813667881504872e-06, "loss": 0.1251, "step": 18048 }, { "epoch": 0.5265476398856409, "grad_norm": 0.8924153261661256, "learning_rate": 4.813195776349915e-06, "loss": 0.1281, "step": 18049 }, { "epoch": 0.5265768131162845, "grad_norm": 0.8985907140185593, "learning_rate": 4.8127236728627005e-06, "loss": 0.1218, "step": 18050 }, { "epoch": 0.526605986346928, "grad_norm": 0.7852840943978696, "learning_rate": 4.8122515710474426e-06, "loss": 0.1316, "step": 18051 }, { "epoch": 0.5266351595775716, "grad_norm": 0.850122060780698, "learning_rate": 4.8117794709083595e-06, "loss": 0.1353, "step": 18052 }, { "epoch": 0.5266643328082152, "grad_norm": 0.7281238097267843, "learning_rate": 4.811307372449665e-06, "loss": 0.1529, "step": 18053 }, { "epoch": 0.5266935060388588, "grad_norm": 0.8030551206498359, "learning_rate": 4.810835275675572e-06, "loss": 0.1123, "step": 18054 }, { "epoch": 0.5267226792695023, "grad_norm": 0.8632120116245481, "learning_rate": 4.810363180590298e-06, "loss": 0.1318, "step": 18055 }, { "epoch": 0.5267518525001459, "grad_norm": 0.7320787543014441, "learning_rate": 4.809891087198056e-06, "loss": 0.1319, "step": 18056 }, { "epoch": 0.5267810257307894, "grad_norm": 0.8081987981181039, "learning_rate": 4.8094189955030576e-06, "loss": 0.1503, "step": 18057 }, { "epoch": 0.526810198961433, "grad_norm": 0.9697511601745913, "learning_rate": 4.808946905509524e-06, "loss": 0.1378, "step": 18058 }, { "epoch": 0.5268393721920765, "grad_norm": 0.7832968130363371, "learning_rate": 4.808474817221666e-06, "loss": 0.1357, "step": 18059 }, { "epoch": 0.5268685454227201, "grad_norm": 0.743714129006087, "learning_rate": 4.808002730643699e-06, "loss": 0.1348, "step": 18060 }, { "epoch": 0.5268977186533637, "grad_norm": 0.8473482931279858, "learning_rate": 4.80753064577984e-06, "loss": 0.1377, "step": 18061 }, { "epoch": 0.5269268918840072, "grad_norm": 0.9650357162729263, "learning_rate": 4.807058562634299e-06, "loss": 0.1463, "step": 18062 }, { "epoch": 0.5269560651146508, "grad_norm": 0.9278197670917236, "learning_rate": 4.806586481211293e-06, "loss": 0.1289, "step": 18063 }, { "epoch": 0.5269852383452943, "grad_norm": 1.0563501491813705, "learning_rate": 4.806114401515037e-06, "loss": 0.1137, "step": 18064 }, { "epoch": 0.5270144115759379, "grad_norm": 0.8898702218224827, "learning_rate": 4.805642323549746e-06, "loss": 0.1269, "step": 18065 }, { "epoch": 0.5270435848065815, "grad_norm": 1.1734522270005805, "learning_rate": 4.805170247319634e-06, "loss": 0.1188, "step": 18066 }, { "epoch": 0.5270727580372251, "grad_norm": 0.9957179615002449, "learning_rate": 4.804698172828915e-06, "loss": 0.1203, "step": 18067 }, { "epoch": 0.5271019312678686, "grad_norm": 0.9750430119631452, "learning_rate": 4.804226100081805e-06, "loss": 0.1241, "step": 18068 }, { "epoch": 0.5271311044985122, "grad_norm": 0.8965316300644635, "learning_rate": 4.803754029082516e-06, "loss": 0.1403, "step": 18069 }, { "epoch": 0.5271602777291557, "grad_norm": 0.9609371465783336, "learning_rate": 4.803281959835265e-06, "loss": 0.1589, "step": 18070 }, { "epoch": 0.5271894509597993, "grad_norm": 1.0046840178298868, "learning_rate": 4.802809892344267e-06, "loss": 0.1542, "step": 18071 }, { "epoch": 0.5272186241904429, "grad_norm": 0.8883119399217058, "learning_rate": 4.802337826613733e-06, "loss": 0.1275, "step": 18072 }, { "epoch": 0.5272477974210864, "grad_norm": 1.129326124691106, "learning_rate": 4.801865762647881e-06, "loss": 0.1218, "step": 18073 }, { "epoch": 0.52727697065173, "grad_norm": 0.8033499427478014, "learning_rate": 4.8013937004509255e-06, "loss": 0.1454, "step": 18074 }, { "epoch": 0.5273061438823735, "grad_norm": 0.838794416406774, "learning_rate": 4.800921640027075e-06, "loss": 0.1143, "step": 18075 }, { "epoch": 0.5273353171130171, "grad_norm": 0.7796150481679842, "learning_rate": 4.800449581380553e-06, "loss": 0.1467, "step": 18076 }, { "epoch": 0.5273644903436606, "grad_norm": 0.7990660761113042, "learning_rate": 4.799977524515569e-06, "loss": 0.1448, "step": 18077 }, { "epoch": 0.5273936635743042, "grad_norm": 0.7281474478754107, "learning_rate": 4.799505469436336e-06, "loss": 0.1143, "step": 18078 }, { "epoch": 0.5274228368049477, "grad_norm": 0.6220072426216634, "learning_rate": 4.799033416147072e-06, "loss": 0.1214, "step": 18079 }, { "epoch": 0.5274520100355914, "grad_norm": 0.8078280407580145, "learning_rate": 4.798561364651989e-06, "loss": 0.129, "step": 18080 }, { "epoch": 0.5274811832662349, "grad_norm": 0.9613274756810768, "learning_rate": 4.798089314955301e-06, "loss": 0.1186, "step": 18081 }, { "epoch": 0.5275103564968785, "grad_norm": 0.9082737903113405, "learning_rate": 4.797617267061225e-06, "loss": 0.1264, "step": 18082 }, { "epoch": 0.527539529727522, "grad_norm": 0.8825640198193014, "learning_rate": 4.797145220973974e-06, "loss": 0.1178, "step": 18083 }, { "epoch": 0.5275687029581656, "grad_norm": 1.0322207175883822, "learning_rate": 4.796673176697761e-06, "loss": 0.1431, "step": 18084 }, { "epoch": 0.5275978761888092, "grad_norm": 0.8572898625618728, "learning_rate": 4.796201134236802e-06, "loss": 0.1313, "step": 18085 }, { "epoch": 0.5276270494194527, "grad_norm": 0.6383544910445332, "learning_rate": 4.795729093595311e-06, "loss": 0.1033, "step": 18086 }, { "epoch": 0.5276562226500963, "grad_norm": 1.1487167821455868, "learning_rate": 4.795257054777498e-06, "loss": 0.1352, "step": 18087 }, { "epoch": 0.5276853958807398, "grad_norm": 1.227802510553886, "learning_rate": 4.794785017787586e-06, "loss": 0.1281, "step": 18088 }, { "epoch": 0.5277145691113834, "grad_norm": 1.0471796945750702, "learning_rate": 4.794312982629782e-06, "loss": 0.1271, "step": 18089 }, { "epoch": 0.5277437423420269, "grad_norm": 1.015561482373048, "learning_rate": 4.793840949308303e-06, "loss": 0.1337, "step": 18090 }, { "epoch": 0.5277729155726705, "grad_norm": 1.2683844195254879, "learning_rate": 4.793368917827364e-06, "loss": 0.1393, "step": 18091 }, { "epoch": 0.527802088803314, "grad_norm": 1.0412588226822705, "learning_rate": 4.792896888191178e-06, "loss": 0.1199, "step": 18092 }, { "epoch": 0.5278312620339577, "grad_norm": 0.8197454736929621, "learning_rate": 4.792424860403956e-06, "loss": 0.1458, "step": 18093 }, { "epoch": 0.5278604352646012, "grad_norm": 0.937736034001576, "learning_rate": 4.791952834469918e-06, "loss": 0.1415, "step": 18094 }, { "epoch": 0.5278896084952448, "grad_norm": 0.7921275622064367, "learning_rate": 4.791480810393274e-06, "loss": 0.1434, "step": 18095 }, { "epoch": 0.5279187817258884, "grad_norm": 0.981332889317743, "learning_rate": 4.791008788178242e-06, "loss": 0.1139, "step": 18096 }, { "epoch": 0.5279479549565319, "grad_norm": 0.7827055333623226, "learning_rate": 4.790536767829031e-06, "loss": 0.1229, "step": 18097 }, { "epoch": 0.5279771281871755, "grad_norm": 0.7327383006507882, "learning_rate": 4.790064749349859e-06, "loss": 0.1229, "step": 18098 }, { "epoch": 0.528006301417819, "grad_norm": 0.9917459690118268, "learning_rate": 4.789592732744938e-06, "loss": 0.145, "step": 18099 }, { "epoch": 0.5280354746484626, "grad_norm": 0.8834634369964915, "learning_rate": 4.789120718018483e-06, "loss": 0.1365, "step": 18100 }, { "epoch": 0.5280646478791061, "grad_norm": 0.8989532761807614, "learning_rate": 4.788648705174709e-06, "loss": 0.1509, "step": 18101 }, { "epoch": 0.5280938211097497, "grad_norm": 0.9894850757227963, "learning_rate": 4.788176694217829e-06, "loss": 0.1635, "step": 18102 }, { "epoch": 0.5281229943403932, "grad_norm": 0.7255062625172629, "learning_rate": 4.787704685152056e-06, "loss": 0.1191, "step": 18103 }, { "epoch": 0.5281521675710368, "grad_norm": 0.9273245366090763, "learning_rate": 4.787232677981606e-06, "loss": 0.1336, "step": 18104 }, { "epoch": 0.5281813408016803, "grad_norm": 1.0659834874039709, "learning_rate": 4.786760672710688e-06, "loss": 0.1449, "step": 18105 }, { "epoch": 0.5282105140323239, "grad_norm": 0.9509628085125836, "learning_rate": 4.786288669343524e-06, "loss": 0.1389, "step": 18106 }, { "epoch": 0.5282396872629675, "grad_norm": 0.8294774142889741, "learning_rate": 4.785816667884322e-06, "loss": 0.1167, "step": 18107 }, { "epoch": 0.5282688604936111, "grad_norm": 0.7845219601055182, "learning_rate": 4.785344668337298e-06, "loss": 0.1248, "step": 18108 }, { "epoch": 0.5282980337242547, "grad_norm": 1.0530402775112828, "learning_rate": 4.784872670706667e-06, "loss": 0.1316, "step": 18109 }, { "epoch": 0.5283272069548982, "grad_norm": 0.7586720307979221, "learning_rate": 4.78440067499664e-06, "loss": 0.109, "step": 18110 }, { "epoch": 0.5283563801855418, "grad_norm": 0.7611260963669129, "learning_rate": 4.783928681211431e-06, "loss": 0.1457, "step": 18111 }, { "epoch": 0.5283855534161853, "grad_norm": 0.7210615560156368, "learning_rate": 4.7834566893552566e-06, "loss": 0.1175, "step": 18112 }, { "epoch": 0.5284147266468289, "grad_norm": 0.7575481673555559, "learning_rate": 4.78298469943233e-06, "loss": 0.1223, "step": 18113 }, { "epoch": 0.5284438998774724, "grad_norm": 0.9555994621082579, "learning_rate": 4.782512711446864e-06, "loss": 0.1534, "step": 18114 }, { "epoch": 0.528473073108116, "grad_norm": 0.604928116169317, "learning_rate": 4.782040725403071e-06, "loss": 0.1299, "step": 18115 }, { "epoch": 0.5285022463387595, "grad_norm": 0.7659277118805252, "learning_rate": 4.781568741305168e-06, "loss": 0.1377, "step": 18116 }, { "epoch": 0.5285314195694031, "grad_norm": 0.8654886039910025, "learning_rate": 4.781096759157365e-06, "loss": 0.1313, "step": 18117 }, { "epoch": 0.5285605928000466, "grad_norm": 0.6664524862480887, "learning_rate": 4.78062477896388e-06, "loss": 0.1355, "step": 18118 }, { "epoch": 0.5285897660306902, "grad_norm": 0.5936013882066392, "learning_rate": 4.780152800728924e-06, "loss": 0.113, "step": 18119 }, { "epoch": 0.5286189392613339, "grad_norm": 0.7703810986902896, "learning_rate": 4.779680824456711e-06, "loss": 0.1262, "step": 18120 }, { "epoch": 0.5286481124919774, "grad_norm": 0.7784612891200107, "learning_rate": 4.779208850151456e-06, "loss": 0.1179, "step": 18121 }, { "epoch": 0.528677285722621, "grad_norm": 0.8464230582609625, "learning_rate": 4.778736877817371e-06, "loss": 0.1367, "step": 18122 }, { "epoch": 0.5287064589532645, "grad_norm": 0.7059243571332273, "learning_rate": 4.778264907458669e-06, "loss": 0.1244, "step": 18123 }, { "epoch": 0.5287356321839081, "grad_norm": 1.0484842853668985, "learning_rate": 4.777792939079566e-06, "loss": 0.1199, "step": 18124 }, { "epoch": 0.5287648054145516, "grad_norm": 0.87445289181572, "learning_rate": 4.777320972684275e-06, "loss": 0.1375, "step": 18125 }, { "epoch": 0.5287939786451952, "grad_norm": 0.955840944166885, "learning_rate": 4.77684900827701e-06, "loss": 0.1306, "step": 18126 }, { "epoch": 0.5288231518758387, "grad_norm": 1.0386475112368847, "learning_rate": 4.776377045861983e-06, "loss": 0.1288, "step": 18127 }, { "epoch": 0.5288523251064823, "grad_norm": 0.7025667394062385, "learning_rate": 4.775905085443407e-06, "loss": 0.1256, "step": 18128 }, { "epoch": 0.5288814983371258, "grad_norm": 1.1036444458007375, "learning_rate": 4.775433127025498e-06, "loss": 0.1225, "step": 18129 }, { "epoch": 0.5289106715677694, "grad_norm": 0.6748829544189402, "learning_rate": 4.774961170612468e-06, "loss": 0.1239, "step": 18130 }, { "epoch": 0.5289398447984129, "grad_norm": 0.7721274296734262, "learning_rate": 4.774489216208532e-06, "loss": 0.1278, "step": 18131 }, { "epoch": 0.5289690180290565, "grad_norm": 0.7973691056935861, "learning_rate": 4.774017263817902e-06, "loss": 0.1169, "step": 18132 }, { "epoch": 0.5289981912597, "grad_norm": 0.8711552955112548, "learning_rate": 4.773545313444792e-06, "loss": 0.1538, "step": 18133 }, { "epoch": 0.5290273644903437, "grad_norm": 0.9545024171331314, "learning_rate": 4.773073365093417e-06, "loss": 0.1515, "step": 18134 }, { "epoch": 0.5290565377209873, "grad_norm": 0.6827698208518546, "learning_rate": 4.772601418767983e-06, "loss": 0.1116, "step": 18135 }, { "epoch": 0.5290857109516308, "grad_norm": 0.927146834457475, "learning_rate": 4.772129474472715e-06, "loss": 0.1261, "step": 18136 }, { "epoch": 0.5291148841822744, "grad_norm": 0.9836776608461734, "learning_rate": 4.771657532211819e-06, "loss": 0.138, "step": 18137 }, { "epoch": 0.5291440574129179, "grad_norm": 0.7881476004287299, "learning_rate": 4.77118559198951e-06, "loss": 0.1208, "step": 18138 }, { "epoch": 0.5291732306435615, "grad_norm": 0.8841536043278565, "learning_rate": 4.7707136538100026e-06, "loss": 0.1395, "step": 18139 }, { "epoch": 0.529202403874205, "grad_norm": 1.2884414593715057, "learning_rate": 4.770241717677506e-06, "loss": 0.1543, "step": 18140 }, { "epoch": 0.5292315771048486, "grad_norm": 0.8431313028877536, "learning_rate": 4.769769783596238e-06, "loss": 0.1508, "step": 18141 }, { "epoch": 0.5292607503354921, "grad_norm": 0.8680004736037638, "learning_rate": 4.769297851570411e-06, "loss": 0.1198, "step": 18142 }, { "epoch": 0.5292899235661357, "grad_norm": 0.9459812532286426, "learning_rate": 4.768825921604238e-06, "loss": 0.1185, "step": 18143 }, { "epoch": 0.5293190967967792, "grad_norm": 1.0297871352150496, "learning_rate": 4.768353993701931e-06, "loss": 0.1218, "step": 18144 }, { "epoch": 0.5293482700274228, "grad_norm": 0.8701244527498139, "learning_rate": 4.767882067867705e-06, "loss": 0.1291, "step": 18145 }, { "epoch": 0.5293774432580663, "grad_norm": 1.066987590461949, "learning_rate": 4.7674101441057705e-06, "loss": 0.1386, "step": 18146 }, { "epoch": 0.52940661648871, "grad_norm": 0.9628167726103253, "learning_rate": 4.766938222420344e-06, "loss": 0.1346, "step": 18147 }, { "epoch": 0.5294357897193536, "grad_norm": 0.9860449775612433, "learning_rate": 4.766466302815639e-06, "loss": 0.1339, "step": 18148 }, { "epoch": 0.5294649629499971, "grad_norm": 0.787033332965655, "learning_rate": 4.765994385295865e-06, "loss": 0.1201, "step": 18149 }, { "epoch": 0.5294941361806407, "grad_norm": 0.7549511063452695, "learning_rate": 4.765522469865239e-06, "loss": 0.1245, "step": 18150 }, { "epoch": 0.5295233094112842, "grad_norm": 0.9348648305971067, "learning_rate": 4.765050556527973e-06, "loss": 0.1428, "step": 18151 }, { "epoch": 0.5295524826419278, "grad_norm": 0.8012189678711377, "learning_rate": 4.7645786452882746e-06, "loss": 0.1439, "step": 18152 }, { "epoch": 0.5295816558725713, "grad_norm": 0.7953172030733313, "learning_rate": 4.764106736150367e-06, "loss": 0.131, "step": 18153 }, { "epoch": 0.5296108291032149, "grad_norm": 0.7893499175576796, "learning_rate": 4.7636348291184555e-06, "loss": 0.1301, "step": 18154 }, { "epoch": 0.5296400023338584, "grad_norm": 0.7676449840213458, "learning_rate": 4.763162924196757e-06, "loss": 0.1129, "step": 18155 }, { "epoch": 0.529669175564502, "grad_norm": 0.8807712011856247, "learning_rate": 4.762691021389484e-06, "loss": 0.1423, "step": 18156 }, { "epoch": 0.5296983487951455, "grad_norm": 0.8807696551720602, "learning_rate": 4.762219120700848e-06, "loss": 0.1317, "step": 18157 }, { "epoch": 0.5297275220257891, "grad_norm": 1.0291963794905532, "learning_rate": 4.761747222135062e-06, "loss": 0.1134, "step": 18158 }, { "epoch": 0.5297566952564327, "grad_norm": 0.8358811957714001, "learning_rate": 4.7612753256963405e-06, "loss": 0.1367, "step": 18159 }, { "epoch": 0.5297858684870762, "grad_norm": 0.932982533203107, "learning_rate": 4.760803431388896e-06, "loss": 0.1354, "step": 18160 }, { "epoch": 0.5298150417177199, "grad_norm": 1.0072010753475942, "learning_rate": 4.760331539216943e-06, "loss": 0.1431, "step": 18161 }, { "epoch": 0.5298442149483634, "grad_norm": 0.9612798982168413, "learning_rate": 4.759859649184692e-06, "loss": 0.1273, "step": 18162 }, { "epoch": 0.529873388179007, "grad_norm": 0.7939814182068036, "learning_rate": 4.759387761296355e-06, "loss": 0.1472, "step": 18163 }, { "epoch": 0.5299025614096505, "grad_norm": 0.6750643649613849, "learning_rate": 4.758915875556147e-06, "loss": 0.1361, "step": 18164 }, { "epoch": 0.5299317346402941, "grad_norm": 1.0139754207777394, "learning_rate": 4.758443991968282e-06, "loss": 0.1373, "step": 18165 }, { "epoch": 0.5299609078709376, "grad_norm": 0.9152048475594219, "learning_rate": 4.7579721105369705e-06, "loss": 0.1263, "step": 18166 }, { "epoch": 0.5299900811015812, "grad_norm": 1.0364722244867663, "learning_rate": 4.757500231266427e-06, "loss": 0.1678, "step": 18167 }, { "epoch": 0.5300192543322247, "grad_norm": 0.8473708298773877, "learning_rate": 4.757028354160862e-06, "loss": 0.1267, "step": 18168 }, { "epoch": 0.5300484275628683, "grad_norm": 0.879045223881066, "learning_rate": 4.756556479224493e-06, "loss": 0.1385, "step": 18169 }, { "epoch": 0.5300776007935118, "grad_norm": 0.9393381422047005, "learning_rate": 4.756084606461526e-06, "loss": 0.1461, "step": 18170 }, { "epoch": 0.5301067740241554, "grad_norm": 1.0444034765539267, "learning_rate": 4.7556127358761785e-06, "loss": 0.1426, "step": 18171 }, { "epoch": 0.530135947254799, "grad_norm": 0.8727968068672698, "learning_rate": 4.755140867472663e-06, "loss": 0.1317, "step": 18172 }, { "epoch": 0.5301651204854425, "grad_norm": 0.9287140610081616, "learning_rate": 4.754669001255192e-06, "loss": 0.1292, "step": 18173 }, { "epoch": 0.5301942937160862, "grad_norm": 1.0607407479073734, "learning_rate": 4.754197137227976e-06, "loss": 0.1476, "step": 18174 }, { "epoch": 0.5302234669467297, "grad_norm": 0.8928006746861766, "learning_rate": 4.753725275395229e-06, "loss": 0.1408, "step": 18175 }, { "epoch": 0.5302526401773733, "grad_norm": 0.8564182428234062, "learning_rate": 4.753253415761164e-06, "loss": 0.1365, "step": 18176 }, { "epoch": 0.5302818134080168, "grad_norm": 0.8327865366363296, "learning_rate": 4.752781558329994e-06, "loss": 0.1476, "step": 18177 }, { "epoch": 0.5303109866386604, "grad_norm": 0.8220980041107818, "learning_rate": 4.752309703105933e-06, "loss": 0.1617, "step": 18178 }, { "epoch": 0.5303401598693039, "grad_norm": 0.8700150925041665, "learning_rate": 4.75183785009319e-06, "loss": 0.1533, "step": 18179 }, { "epoch": 0.5303693330999475, "grad_norm": 0.8551945400335113, "learning_rate": 4.7513659992959795e-06, "loss": 0.1315, "step": 18180 }, { "epoch": 0.530398506330591, "grad_norm": 0.8874755265774167, "learning_rate": 4.750894150718516e-06, "loss": 0.1223, "step": 18181 }, { "epoch": 0.5304276795612346, "grad_norm": 0.9902142340042741, "learning_rate": 4.750422304365006e-06, "loss": 0.133, "step": 18182 }, { "epoch": 0.5304568527918782, "grad_norm": 0.826339138436057, "learning_rate": 4.749950460239669e-06, "loss": 0.1382, "step": 18183 }, { "epoch": 0.5304860260225217, "grad_norm": 0.9591896728975197, "learning_rate": 4.749478618346714e-06, "loss": 0.1482, "step": 18184 }, { "epoch": 0.5305151992531653, "grad_norm": 1.0308127724807137, "learning_rate": 4.749006778690354e-06, "loss": 0.1308, "step": 18185 }, { "epoch": 0.5305443724838088, "grad_norm": 0.8470755796075768, "learning_rate": 4.748534941274803e-06, "loss": 0.1498, "step": 18186 }, { "epoch": 0.5305735457144524, "grad_norm": 0.730272608511541, "learning_rate": 4.748063106104271e-06, "loss": 0.1125, "step": 18187 }, { "epoch": 0.530602718945096, "grad_norm": 0.7683053062257869, "learning_rate": 4.74759127318297e-06, "loss": 0.1319, "step": 18188 }, { "epoch": 0.5306318921757396, "grad_norm": 0.7876833514650872, "learning_rate": 4.7471194425151145e-06, "loss": 0.1086, "step": 18189 }, { "epoch": 0.5306610654063831, "grad_norm": 0.6449356624882449, "learning_rate": 4.746647614104917e-06, "loss": 0.1102, "step": 18190 }, { "epoch": 0.5306902386370267, "grad_norm": 0.7419098809986556, "learning_rate": 4.74617578795659e-06, "loss": 0.137, "step": 18191 }, { "epoch": 0.5307194118676702, "grad_norm": 0.8498207794600134, "learning_rate": 4.745703964074344e-06, "loss": 0.1456, "step": 18192 }, { "epoch": 0.5307485850983138, "grad_norm": 0.8679690788819033, "learning_rate": 4.745232142462392e-06, "loss": 0.1223, "step": 18193 }, { "epoch": 0.5307777583289573, "grad_norm": 0.6259903203854398, "learning_rate": 4.744760323124945e-06, "loss": 0.1455, "step": 18194 }, { "epoch": 0.5308069315596009, "grad_norm": 0.7957118791469273, "learning_rate": 4.744288506066219e-06, "loss": 0.1343, "step": 18195 }, { "epoch": 0.5308361047902445, "grad_norm": 0.8133105330846443, "learning_rate": 4.743816691290425e-06, "loss": 0.1217, "step": 18196 }, { "epoch": 0.530865278020888, "grad_norm": 0.7151682397135266, "learning_rate": 4.743344878801773e-06, "loss": 0.1258, "step": 18197 }, { "epoch": 0.5308944512515316, "grad_norm": 0.761165780384021, "learning_rate": 4.742873068604477e-06, "loss": 0.1513, "step": 18198 }, { "epoch": 0.5309236244821751, "grad_norm": 0.8718161032869127, "learning_rate": 4.74240126070275e-06, "loss": 0.1281, "step": 18199 }, { "epoch": 0.5309527977128187, "grad_norm": 0.8866589715428614, "learning_rate": 4.7419294551008e-06, "loss": 0.1262, "step": 18200 }, { "epoch": 0.5309819709434623, "grad_norm": 0.8178419401134385, "learning_rate": 4.741457651802844e-06, "loss": 0.1365, "step": 18201 }, { "epoch": 0.5310111441741059, "grad_norm": 0.8823398417940378, "learning_rate": 4.7409858508130925e-06, "loss": 0.1512, "step": 18202 }, { "epoch": 0.5310403174047494, "grad_norm": 0.9261725894905353, "learning_rate": 4.7405140521357585e-06, "loss": 0.145, "step": 18203 }, { "epoch": 0.531069490635393, "grad_norm": 0.927896716636194, "learning_rate": 4.740042255775052e-06, "loss": 0.1288, "step": 18204 }, { "epoch": 0.5310986638660365, "grad_norm": 0.7190119095763443, "learning_rate": 4.739570461735186e-06, "loss": 0.1238, "step": 18205 }, { "epoch": 0.5311278370966801, "grad_norm": 0.9254830175686871, "learning_rate": 4.739098670020372e-06, "loss": 0.1329, "step": 18206 }, { "epoch": 0.5311570103273237, "grad_norm": 0.7772275685820007, "learning_rate": 4.738626880634823e-06, "loss": 0.1603, "step": 18207 }, { "epoch": 0.5311861835579672, "grad_norm": 0.9083866916896929, "learning_rate": 4.738155093582753e-06, "loss": 0.1805, "step": 18208 }, { "epoch": 0.5312153567886108, "grad_norm": 0.9578283106622949, "learning_rate": 4.73768330886837e-06, "loss": 0.1221, "step": 18209 }, { "epoch": 0.5312445300192543, "grad_norm": 0.9589037101494035, "learning_rate": 4.7372115264958885e-06, "loss": 0.1472, "step": 18210 }, { "epoch": 0.5312737032498979, "grad_norm": 0.8128078720110864, "learning_rate": 4.736739746469521e-06, "loss": 0.1292, "step": 18211 }, { "epoch": 0.5313028764805414, "grad_norm": 1.1196850412460473, "learning_rate": 4.736267968793474e-06, "loss": 0.1276, "step": 18212 }, { "epoch": 0.531332049711185, "grad_norm": 0.9749254399697772, "learning_rate": 4.735796193471967e-06, "loss": 0.1386, "step": 18213 }, { "epoch": 0.5313612229418285, "grad_norm": 0.8128488652324242, "learning_rate": 4.735324420509208e-06, "loss": 0.1253, "step": 18214 }, { "epoch": 0.5313903961724722, "grad_norm": 0.7719495539185899, "learning_rate": 4.734852649909409e-06, "loss": 0.1437, "step": 18215 }, { "epoch": 0.5314195694031157, "grad_norm": 1.3777910466487588, "learning_rate": 4.734380881676783e-06, "loss": 0.1249, "step": 18216 }, { "epoch": 0.5314487426337593, "grad_norm": 0.9424403854254995, "learning_rate": 4.733909115815541e-06, "loss": 0.1421, "step": 18217 }, { "epoch": 0.5314779158644029, "grad_norm": 0.7700984218747459, "learning_rate": 4.733437352329893e-06, "loss": 0.128, "step": 18218 }, { "epoch": 0.5315070890950464, "grad_norm": 0.7503305025733185, "learning_rate": 4.732965591224054e-06, "loss": 0.127, "step": 18219 }, { "epoch": 0.53153626232569, "grad_norm": 1.0114362003752448, "learning_rate": 4.732493832502234e-06, "loss": 0.1402, "step": 18220 }, { "epoch": 0.5315654355563335, "grad_norm": 0.8115849228481044, "learning_rate": 4.7320220761686474e-06, "loss": 0.1194, "step": 18221 }, { "epoch": 0.5315946087869771, "grad_norm": 0.7141742109317262, "learning_rate": 4.731550322227502e-06, "loss": 0.1222, "step": 18222 }, { "epoch": 0.5316237820176206, "grad_norm": 0.7910402233028568, "learning_rate": 4.731078570683011e-06, "loss": 0.1375, "step": 18223 }, { "epoch": 0.5316529552482642, "grad_norm": 0.866716925506884, "learning_rate": 4.730606821539386e-06, "loss": 0.1341, "step": 18224 }, { "epoch": 0.5316821284789077, "grad_norm": 0.7100187237823343, "learning_rate": 4.73013507480084e-06, "loss": 0.1265, "step": 18225 }, { "epoch": 0.5317113017095513, "grad_norm": 0.7937530420766605, "learning_rate": 4.7296633304715834e-06, "loss": 0.1542, "step": 18226 }, { "epoch": 0.5317404749401948, "grad_norm": 0.8790082634737036, "learning_rate": 4.729191588555827e-06, "loss": 0.1292, "step": 18227 }, { "epoch": 0.5317696481708385, "grad_norm": 0.7977555259832114, "learning_rate": 4.728719849057785e-06, "loss": 0.11, "step": 18228 }, { "epoch": 0.531798821401482, "grad_norm": 0.8802254598764814, "learning_rate": 4.7282481119816684e-06, "loss": 0.129, "step": 18229 }, { "epoch": 0.5318279946321256, "grad_norm": 0.8308378588462866, "learning_rate": 4.727776377331685e-06, "loss": 0.1318, "step": 18230 }, { "epoch": 0.5318571678627692, "grad_norm": 1.0403894789855197, "learning_rate": 4.72730464511205e-06, "loss": 0.1639, "step": 18231 }, { "epoch": 0.5318863410934127, "grad_norm": 0.777906028070433, "learning_rate": 4.726832915326974e-06, "loss": 0.1096, "step": 18232 }, { "epoch": 0.5319155143240563, "grad_norm": 0.6856424421621916, "learning_rate": 4.7263611879806694e-06, "loss": 0.1511, "step": 18233 }, { "epoch": 0.5319446875546998, "grad_norm": 1.054616312461978, "learning_rate": 4.725889463077346e-06, "loss": 0.1324, "step": 18234 }, { "epoch": 0.5319738607853434, "grad_norm": 1.0271182679398891, "learning_rate": 4.725417740621217e-06, "loss": 0.1371, "step": 18235 }, { "epoch": 0.5320030340159869, "grad_norm": 1.0183451635925291, "learning_rate": 4.724946020616491e-06, "loss": 0.1444, "step": 18236 }, { "epoch": 0.5320322072466305, "grad_norm": 1.1563805190256737, "learning_rate": 4.724474303067381e-06, "loss": 0.1199, "step": 18237 }, { "epoch": 0.532061380477274, "grad_norm": 1.378097088732142, "learning_rate": 4.724002587978102e-06, "loss": 0.1242, "step": 18238 }, { "epoch": 0.5320905537079176, "grad_norm": 0.7790409610984101, "learning_rate": 4.7235308753528596e-06, "loss": 0.1382, "step": 18239 }, { "epoch": 0.5321197269385611, "grad_norm": 0.9516491913643963, "learning_rate": 4.723059165195868e-06, "loss": 0.1451, "step": 18240 }, { "epoch": 0.5321489001692047, "grad_norm": 1.070747120837479, "learning_rate": 4.722587457511339e-06, "loss": 0.1201, "step": 18241 }, { "epoch": 0.5321780733998484, "grad_norm": 0.890840183191396, "learning_rate": 4.72211575230348e-06, "loss": 0.1096, "step": 18242 }, { "epoch": 0.5322072466304919, "grad_norm": 0.8084631046858048, "learning_rate": 4.721644049576508e-06, "loss": 0.1327, "step": 18243 }, { "epoch": 0.5322364198611355, "grad_norm": 1.0445416958195826, "learning_rate": 4.721172349334631e-06, "loss": 0.1206, "step": 18244 }, { "epoch": 0.532265593091779, "grad_norm": 0.8798676295080163, "learning_rate": 4.72070065158206e-06, "loss": 0.145, "step": 18245 }, { "epoch": 0.5322947663224226, "grad_norm": 0.8842797310684821, "learning_rate": 4.720228956323009e-06, "loss": 0.1291, "step": 18246 }, { "epoch": 0.5323239395530661, "grad_norm": 0.9091357762333823, "learning_rate": 4.719757263561685e-06, "loss": 0.1208, "step": 18247 }, { "epoch": 0.5323531127837097, "grad_norm": 0.8923979134276224, "learning_rate": 4.7192855733023e-06, "loss": 0.1412, "step": 18248 }, { "epoch": 0.5323822860143532, "grad_norm": 0.8404394908659193, "learning_rate": 4.718813885549069e-06, "loss": 0.1361, "step": 18249 }, { "epoch": 0.5324114592449968, "grad_norm": 0.8390766493099656, "learning_rate": 4.718342200306199e-06, "loss": 0.1082, "step": 18250 }, { "epoch": 0.5324406324756403, "grad_norm": 0.9777215074669852, "learning_rate": 4.717870517577904e-06, "loss": 0.1215, "step": 18251 }, { "epoch": 0.5324698057062839, "grad_norm": 0.6881839018889654, "learning_rate": 4.717398837368392e-06, "loss": 0.1365, "step": 18252 }, { "epoch": 0.5324989789369274, "grad_norm": 0.9999322787493645, "learning_rate": 4.716927159681877e-06, "loss": 0.1379, "step": 18253 }, { "epoch": 0.532528152167571, "grad_norm": 0.8681590806963833, "learning_rate": 4.716455484522567e-06, "loss": 0.1291, "step": 18254 }, { "epoch": 0.5325573253982147, "grad_norm": 0.7197364165320143, "learning_rate": 4.715983811894678e-06, "loss": 0.1143, "step": 18255 }, { "epoch": 0.5325864986288582, "grad_norm": 0.8390255890591161, "learning_rate": 4.715512141802415e-06, "loss": 0.1334, "step": 18256 }, { "epoch": 0.5326156718595018, "grad_norm": 0.8020806753716382, "learning_rate": 4.715040474249993e-06, "loss": 0.1228, "step": 18257 }, { "epoch": 0.5326448450901453, "grad_norm": 0.8289200491529507, "learning_rate": 4.714568809241622e-06, "loss": 0.13, "step": 18258 }, { "epoch": 0.5326740183207889, "grad_norm": 0.9476852145563937, "learning_rate": 4.7140971467815115e-06, "loss": 0.1385, "step": 18259 }, { "epoch": 0.5327031915514324, "grad_norm": 0.8103467856705289, "learning_rate": 4.713625486873872e-06, "loss": 0.122, "step": 18260 }, { "epoch": 0.532732364782076, "grad_norm": 0.8168846218972035, "learning_rate": 4.713153829522918e-06, "loss": 0.1151, "step": 18261 }, { "epoch": 0.5327615380127195, "grad_norm": 0.7159185013828294, "learning_rate": 4.712682174732857e-06, "loss": 0.128, "step": 18262 }, { "epoch": 0.5327907112433631, "grad_norm": 0.7375691227334467, "learning_rate": 4.7122105225079015e-06, "loss": 0.1491, "step": 18263 }, { "epoch": 0.5328198844740066, "grad_norm": 0.9646780648256801, "learning_rate": 4.711738872852262e-06, "loss": 0.1343, "step": 18264 }, { "epoch": 0.5328490577046502, "grad_norm": 0.7199992123139404, "learning_rate": 4.711267225770149e-06, "loss": 0.1362, "step": 18265 }, { "epoch": 0.5328782309352937, "grad_norm": 0.6174057422571153, "learning_rate": 4.710795581265772e-06, "loss": 0.144, "step": 18266 }, { "epoch": 0.5329074041659373, "grad_norm": 0.8041162452665113, "learning_rate": 4.710323939343343e-06, "loss": 0.1202, "step": 18267 }, { "epoch": 0.5329365773965808, "grad_norm": 0.8354353108165564, "learning_rate": 4.709852300007075e-06, "loss": 0.1137, "step": 18268 }, { "epoch": 0.5329657506272245, "grad_norm": 0.9102347114209531, "learning_rate": 4.709380663261175e-06, "loss": 0.1598, "step": 18269 }, { "epoch": 0.5329949238578681, "grad_norm": 0.8691516424405844, "learning_rate": 4.7089090291098555e-06, "loss": 0.1469, "step": 18270 }, { "epoch": 0.5330240970885116, "grad_norm": 0.894379100194545, "learning_rate": 4.708437397557327e-06, "loss": 0.1528, "step": 18271 }, { "epoch": 0.5330532703191552, "grad_norm": 0.7686223797221675, "learning_rate": 4.707965768607797e-06, "loss": 0.1186, "step": 18272 }, { "epoch": 0.5330824435497987, "grad_norm": 0.9922473040053408, "learning_rate": 4.7074941422654825e-06, "loss": 0.1301, "step": 18273 }, { "epoch": 0.5331116167804423, "grad_norm": 0.9553222278714265, "learning_rate": 4.7070225185345885e-06, "loss": 0.1348, "step": 18274 }, { "epoch": 0.5331407900110858, "grad_norm": 0.8780426277561764, "learning_rate": 4.706550897419328e-06, "loss": 0.1292, "step": 18275 }, { "epoch": 0.5331699632417294, "grad_norm": 1.1271964222389244, "learning_rate": 4.706079278923912e-06, "loss": 0.1277, "step": 18276 }, { "epoch": 0.5331991364723729, "grad_norm": 0.7803256032983364, "learning_rate": 4.70560766305255e-06, "loss": 0.1235, "step": 18277 }, { "epoch": 0.5332283097030165, "grad_norm": 1.062307277822807, "learning_rate": 4.70513604980945e-06, "loss": 0.1201, "step": 18278 }, { "epoch": 0.53325748293366, "grad_norm": 0.8432346201306513, "learning_rate": 4.704664439198826e-06, "loss": 0.1199, "step": 18279 }, { "epoch": 0.5332866561643036, "grad_norm": 0.8053567904561258, "learning_rate": 4.704192831224888e-06, "loss": 0.1382, "step": 18280 }, { "epoch": 0.5333158293949471, "grad_norm": 0.7709820122302663, "learning_rate": 4.703721225891847e-06, "loss": 0.1163, "step": 18281 }, { "epoch": 0.5333450026255907, "grad_norm": 0.9814907406419364, "learning_rate": 4.703249623203911e-06, "loss": 0.1502, "step": 18282 }, { "epoch": 0.5333741758562344, "grad_norm": 0.8545745632077943, "learning_rate": 4.702778023165291e-06, "loss": 0.1574, "step": 18283 }, { "epoch": 0.5334033490868779, "grad_norm": 0.7053356035176296, "learning_rate": 4.7023064257801976e-06, "loss": 0.1506, "step": 18284 }, { "epoch": 0.5334325223175215, "grad_norm": 1.1326987087328027, "learning_rate": 4.7018348310528424e-06, "loss": 0.1292, "step": 18285 }, { "epoch": 0.533461695548165, "grad_norm": 0.7508903359106306, "learning_rate": 4.701363238987435e-06, "loss": 0.1339, "step": 18286 }, { "epoch": 0.5334908687788086, "grad_norm": 0.6477469308655547, "learning_rate": 4.700891649588185e-06, "loss": 0.117, "step": 18287 }, { "epoch": 0.5335200420094521, "grad_norm": 0.6742799625847177, "learning_rate": 4.700420062859303e-06, "loss": 0.1634, "step": 18288 }, { "epoch": 0.5335492152400957, "grad_norm": 1.3577615709575646, "learning_rate": 4.6999484788049985e-06, "loss": 0.1352, "step": 18289 }, { "epoch": 0.5335783884707392, "grad_norm": 0.7758935998448669, "learning_rate": 4.699476897429481e-06, "loss": 0.1581, "step": 18290 }, { "epoch": 0.5336075617013828, "grad_norm": 2.846208854867453, "learning_rate": 4.699005318736965e-06, "loss": 0.1346, "step": 18291 }, { "epoch": 0.5336367349320263, "grad_norm": 0.6612591799521408, "learning_rate": 4.698533742731655e-06, "loss": 0.1322, "step": 18292 }, { "epoch": 0.5336659081626699, "grad_norm": 0.8580813226827683, "learning_rate": 4.698062169417766e-06, "loss": 0.1117, "step": 18293 }, { "epoch": 0.5336950813933135, "grad_norm": 0.5712477496198003, "learning_rate": 4.697590598799505e-06, "loss": 0.1257, "step": 18294 }, { "epoch": 0.533724254623957, "grad_norm": 0.895768043690643, "learning_rate": 4.697119030881083e-06, "loss": 0.1391, "step": 18295 }, { "epoch": 0.5337534278546007, "grad_norm": 0.8231409957602026, "learning_rate": 4.696647465666709e-06, "loss": 0.1208, "step": 18296 }, { "epoch": 0.5337826010852442, "grad_norm": 0.794681568119074, "learning_rate": 4.6961759031605945e-06, "loss": 0.1171, "step": 18297 }, { "epoch": 0.5338117743158878, "grad_norm": 0.7995639882069969, "learning_rate": 4.695704343366951e-06, "loss": 0.1487, "step": 18298 }, { "epoch": 0.5338409475465313, "grad_norm": 0.860057475567077, "learning_rate": 4.695232786289984e-06, "loss": 0.1186, "step": 18299 }, { "epoch": 0.5338701207771749, "grad_norm": 0.9380352743996345, "learning_rate": 4.694761231933907e-06, "loss": 0.1115, "step": 18300 }, { "epoch": 0.5338992940078184, "grad_norm": 0.7989324976010658, "learning_rate": 4.694289680302929e-06, "loss": 0.1485, "step": 18301 }, { "epoch": 0.533928467238462, "grad_norm": 0.87914004930729, "learning_rate": 4.693818131401258e-06, "loss": 0.1346, "step": 18302 }, { "epoch": 0.5339576404691055, "grad_norm": 0.7506380667800656, "learning_rate": 4.693346585233108e-06, "loss": 0.1205, "step": 18303 }, { "epoch": 0.5339868136997491, "grad_norm": 1.2642888865243207, "learning_rate": 4.692875041802686e-06, "loss": 0.1198, "step": 18304 }, { "epoch": 0.5340159869303926, "grad_norm": 0.8676706384944769, "learning_rate": 4.692403501114201e-06, "loss": 0.1275, "step": 18305 }, { "epoch": 0.5340451601610362, "grad_norm": 0.7709817409464969, "learning_rate": 4.691931963171866e-06, "loss": 0.1002, "step": 18306 }, { "epoch": 0.5340743333916798, "grad_norm": 1.351751291138029, "learning_rate": 4.691460427979888e-06, "loss": 0.1518, "step": 18307 }, { "epoch": 0.5341035066223233, "grad_norm": 1.1386904341793178, "learning_rate": 4.690988895542477e-06, "loss": 0.1496, "step": 18308 }, { "epoch": 0.5341326798529669, "grad_norm": 1.2382735820396864, "learning_rate": 4.690517365863843e-06, "loss": 0.1769, "step": 18309 }, { "epoch": 0.5341618530836105, "grad_norm": 1.2838804628353189, "learning_rate": 4.690045838948197e-06, "loss": 0.1269, "step": 18310 }, { "epoch": 0.5341910263142541, "grad_norm": 0.948425471216006, "learning_rate": 4.689574314799749e-06, "loss": 0.1213, "step": 18311 }, { "epoch": 0.5342201995448976, "grad_norm": 0.8091162461170046, "learning_rate": 4.689102793422706e-06, "loss": 0.1259, "step": 18312 }, { "epoch": 0.5342493727755412, "grad_norm": 1.2537966712209563, "learning_rate": 4.688631274821279e-06, "loss": 0.1353, "step": 18313 }, { "epoch": 0.5342785460061847, "grad_norm": 0.937753720979893, "learning_rate": 4.688159758999676e-06, "loss": 0.1308, "step": 18314 }, { "epoch": 0.5343077192368283, "grad_norm": 0.8199593811915793, "learning_rate": 4.687688245962111e-06, "loss": 0.1301, "step": 18315 }, { "epoch": 0.5343368924674718, "grad_norm": 1.0522108926739333, "learning_rate": 4.68721673571279e-06, "loss": 0.1357, "step": 18316 }, { "epoch": 0.5343660656981154, "grad_norm": 0.9877584992858726, "learning_rate": 4.686745228255923e-06, "loss": 0.1278, "step": 18317 }, { "epoch": 0.534395238928759, "grad_norm": 0.6561644672374607, "learning_rate": 4.686273723595721e-06, "loss": 0.1138, "step": 18318 }, { "epoch": 0.5344244121594025, "grad_norm": 0.674583807057486, "learning_rate": 4.685802221736391e-06, "loss": 0.1439, "step": 18319 }, { "epoch": 0.5344535853900461, "grad_norm": 0.8126408168353374, "learning_rate": 4.685330722682143e-06, "loss": 0.1348, "step": 18320 }, { "epoch": 0.5344827586206896, "grad_norm": 0.9298237473943516, "learning_rate": 4.684859226437188e-06, "loss": 0.123, "step": 18321 }, { "epoch": 0.5345119318513332, "grad_norm": 0.6441497151952471, "learning_rate": 4.684387733005735e-06, "loss": 0.1285, "step": 18322 }, { "epoch": 0.5345411050819768, "grad_norm": 0.6988583308964291, "learning_rate": 4.6839162423919946e-06, "loss": 0.1261, "step": 18323 }, { "epoch": 0.5345702783126204, "grad_norm": 0.729748063937843, "learning_rate": 4.683444754600172e-06, "loss": 0.1171, "step": 18324 }, { "epoch": 0.5345994515432639, "grad_norm": 0.7203507248612669, "learning_rate": 4.6829732696344796e-06, "loss": 0.1534, "step": 18325 }, { "epoch": 0.5346286247739075, "grad_norm": 0.8065783589834921, "learning_rate": 4.6825017874991255e-06, "loss": 0.1368, "step": 18326 }, { "epoch": 0.534657798004551, "grad_norm": 0.9653649908196572, "learning_rate": 4.6820303081983205e-06, "loss": 0.1433, "step": 18327 }, { "epoch": 0.5346869712351946, "grad_norm": 0.9393678065840173, "learning_rate": 4.681558831736274e-06, "loss": 0.136, "step": 18328 }, { "epoch": 0.5347161444658382, "grad_norm": 0.808057939594074, "learning_rate": 4.681087358117193e-06, "loss": 0.1583, "step": 18329 }, { "epoch": 0.5347453176964817, "grad_norm": 0.7662604504264521, "learning_rate": 4.680615887345288e-06, "loss": 0.1311, "step": 18330 }, { "epoch": 0.5347744909271253, "grad_norm": 0.9298896011013003, "learning_rate": 4.680144419424769e-06, "loss": 0.1489, "step": 18331 }, { "epoch": 0.5348036641577688, "grad_norm": 0.7472354714700918, "learning_rate": 4.679672954359842e-06, "loss": 0.1452, "step": 18332 }, { "epoch": 0.5348328373884124, "grad_norm": 0.8432475776533255, "learning_rate": 4.679201492154721e-06, "loss": 0.1393, "step": 18333 }, { "epoch": 0.5348620106190559, "grad_norm": 0.8881170093770653, "learning_rate": 4.678730032813611e-06, "loss": 0.1253, "step": 18334 }, { "epoch": 0.5348911838496995, "grad_norm": 0.9069534303549787, "learning_rate": 4.678258576340723e-06, "loss": 0.117, "step": 18335 }, { "epoch": 0.534920357080343, "grad_norm": 0.7528468968079108, "learning_rate": 4.677787122740267e-06, "loss": 0.1175, "step": 18336 }, { "epoch": 0.5349495303109867, "grad_norm": 0.9679569993903948, "learning_rate": 4.677315672016446e-06, "loss": 0.1154, "step": 18337 }, { "epoch": 0.5349787035416302, "grad_norm": 1.2178409028124713, "learning_rate": 4.6768442241734785e-06, "loss": 0.1319, "step": 18338 }, { "epoch": 0.5350078767722738, "grad_norm": 1.094091458256245, "learning_rate": 4.676372779215568e-06, "loss": 0.1321, "step": 18339 }, { "epoch": 0.5350370500029173, "grad_norm": 0.82715505011348, "learning_rate": 4.675901337146922e-06, "loss": 0.1224, "step": 18340 }, { "epoch": 0.5350662232335609, "grad_norm": 1.0231909253261822, "learning_rate": 4.675429897971754e-06, "loss": 0.1355, "step": 18341 }, { "epoch": 0.5350953964642045, "grad_norm": 0.9893810869493288, "learning_rate": 4.674958461694269e-06, "loss": 0.147, "step": 18342 }, { "epoch": 0.535124569694848, "grad_norm": 0.7506321777001245, "learning_rate": 4.674487028318676e-06, "loss": 0.117, "step": 18343 }, { "epoch": 0.5351537429254916, "grad_norm": 1.0344483586868616, "learning_rate": 4.674015597849186e-06, "loss": 0.1283, "step": 18344 }, { "epoch": 0.5351829161561351, "grad_norm": 0.9586845928124975, "learning_rate": 4.673544170290009e-06, "loss": 0.1486, "step": 18345 }, { "epoch": 0.5352120893867787, "grad_norm": 0.7940525509400088, "learning_rate": 4.673072745645349e-06, "loss": 0.1306, "step": 18346 }, { "epoch": 0.5352412626174222, "grad_norm": 1.3334246405174768, "learning_rate": 4.672601323919419e-06, "loss": 0.1287, "step": 18347 }, { "epoch": 0.5352704358480658, "grad_norm": 1.078371798384496, "learning_rate": 4.6721299051164265e-06, "loss": 0.1197, "step": 18348 }, { "epoch": 0.5352996090787093, "grad_norm": 0.9785701120685548, "learning_rate": 4.671658489240577e-06, "loss": 0.1278, "step": 18349 }, { "epoch": 0.535328782309353, "grad_norm": 0.7759886837248762, "learning_rate": 4.671187076296085e-06, "loss": 0.109, "step": 18350 }, { "epoch": 0.5353579555399965, "grad_norm": 1.1109812964350543, "learning_rate": 4.670715666287156e-06, "loss": 0.1415, "step": 18351 }, { "epoch": 0.5353871287706401, "grad_norm": 1.2929363377866827, "learning_rate": 4.670244259217998e-06, "loss": 0.1513, "step": 18352 }, { "epoch": 0.5354163020012837, "grad_norm": 0.9022509312476248, "learning_rate": 4.669772855092822e-06, "loss": 0.1476, "step": 18353 }, { "epoch": 0.5354454752319272, "grad_norm": 0.988257571648059, "learning_rate": 4.6693014539158345e-06, "loss": 0.1361, "step": 18354 }, { "epoch": 0.5354746484625708, "grad_norm": 1.0434758164432263, "learning_rate": 4.668830055691243e-06, "loss": 0.1454, "step": 18355 }, { "epoch": 0.5355038216932143, "grad_norm": 0.9014227433768871, "learning_rate": 4.668358660423259e-06, "loss": 0.1285, "step": 18356 }, { "epoch": 0.5355329949238579, "grad_norm": 0.8525085880182496, "learning_rate": 4.66788726811609e-06, "loss": 0.1295, "step": 18357 }, { "epoch": 0.5355621681545014, "grad_norm": 0.9098629368849164, "learning_rate": 4.667415878773945e-06, "loss": 0.1453, "step": 18358 }, { "epoch": 0.535591341385145, "grad_norm": 0.8745548385921665, "learning_rate": 4.6669444924010305e-06, "loss": 0.1233, "step": 18359 }, { "epoch": 0.5356205146157885, "grad_norm": 0.8206904657070538, "learning_rate": 4.666473109001556e-06, "loss": 0.1677, "step": 18360 }, { "epoch": 0.5356496878464321, "grad_norm": 0.8218439636259399, "learning_rate": 4.666001728579729e-06, "loss": 0.1466, "step": 18361 }, { "epoch": 0.5356788610770756, "grad_norm": 0.9085453548345437, "learning_rate": 4.66553035113976e-06, "loss": 0.1272, "step": 18362 }, { "epoch": 0.5357080343077192, "grad_norm": 0.8833952600137825, "learning_rate": 4.665058976685857e-06, "loss": 0.1234, "step": 18363 }, { "epoch": 0.5357372075383628, "grad_norm": 1.0060277647472204, "learning_rate": 4.664587605222226e-06, "loss": 0.1292, "step": 18364 }, { "epoch": 0.5357663807690064, "grad_norm": 1.0899336140879554, "learning_rate": 4.6641162367530775e-06, "loss": 0.1033, "step": 18365 }, { "epoch": 0.53579555399965, "grad_norm": 1.2235143866002742, "learning_rate": 4.66364487128262e-06, "loss": 0.1169, "step": 18366 }, { "epoch": 0.5358247272302935, "grad_norm": 0.8596889715983336, "learning_rate": 4.663173508815058e-06, "loss": 0.1535, "step": 18367 }, { "epoch": 0.5358539004609371, "grad_norm": 0.9054611873990683, "learning_rate": 4.662702149354605e-06, "loss": 0.1192, "step": 18368 }, { "epoch": 0.5358830736915806, "grad_norm": 0.9634229690797912, "learning_rate": 4.662230792905465e-06, "loss": 0.118, "step": 18369 }, { "epoch": 0.5359122469222242, "grad_norm": 1.0595127571269867, "learning_rate": 4.66175943947185e-06, "loss": 0.1328, "step": 18370 }, { "epoch": 0.5359414201528677, "grad_norm": 1.0649972455942749, "learning_rate": 4.661288089057965e-06, "loss": 0.1497, "step": 18371 }, { "epoch": 0.5359705933835113, "grad_norm": 0.8622703140674189, "learning_rate": 4.660816741668019e-06, "loss": 0.1228, "step": 18372 }, { "epoch": 0.5359997666141548, "grad_norm": 0.9624169763035911, "learning_rate": 4.660345397306219e-06, "loss": 0.1315, "step": 18373 }, { "epoch": 0.5360289398447984, "grad_norm": 0.8373174658170262, "learning_rate": 4.659874055976775e-06, "loss": 0.1505, "step": 18374 }, { "epoch": 0.5360581130754419, "grad_norm": 0.7389060289683349, "learning_rate": 4.6594027176838955e-06, "loss": 0.1159, "step": 18375 }, { "epoch": 0.5360872863060855, "grad_norm": 0.6855628089539305, "learning_rate": 4.658931382431786e-06, "loss": 0.1153, "step": 18376 }, { "epoch": 0.5361164595367292, "grad_norm": 0.9392507510743571, "learning_rate": 4.658460050224656e-06, "loss": 0.1362, "step": 18377 }, { "epoch": 0.5361456327673727, "grad_norm": 0.9069703368660548, "learning_rate": 4.657988721066714e-06, "loss": 0.1269, "step": 18378 }, { "epoch": 0.5361748059980163, "grad_norm": 0.8773801036238599, "learning_rate": 4.657517394962164e-06, "loss": 0.1242, "step": 18379 }, { "epoch": 0.5362039792286598, "grad_norm": 0.7399085306556702, "learning_rate": 4.65704607191522e-06, "loss": 0.1503, "step": 18380 }, { "epoch": 0.5362331524593034, "grad_norm": 1.0119142077092582, "learning_rate": 4.656574751930085e-06, "loss": 0.118, "step": 18381 }, { "epoch": 0.5362623256899469, "grad_norm": 1.5654223764628867, "learning_rate": 4.65610343501097e-06, "loss": 0.152, "step": 18382 }, { "epoch": 0.5362914989205905, "grad_norm": 0.8274538592274344, "learning_rate": 4.655632121162082e-06, "loss": 0.1457, "step": 18383 }, { "epoch": 0.536320672151234, "grad_norm": 1.2318373194834533, "learning_rate": 4.6551608103876275e-06, "loss": 0.1415, "step": 18384 }, { "epoch": 0.5363498453818776, "grad_norm": 0.9486937306815246, "learning_rate": 4.654689502691813e-06, "loss": 0.1259, "step": 18385 }, { "epoch": 0.5363790186125211, "grad_norm": 0.7057054803619004, "learning_rate": 4.65421819807885e-06, "loss": 0.1167, "step": 18386 }, { "epoch": 0.5364081918431647, "grad_norm": 1.0210682161050608, "learning_rate": 4.653746896552944e-06, "loss": 0.1405, "step": 18387 }, { "epoch": 0.5364373650738082, "grad_norm": 0.8450355942890404, "learning_rate": 4.653275598118304e-06, "loss": 0.1375, "step": 18388 }, { "epoch": 0.5364665383044518, "grad_norm": 0.743774645346137, "learning_rate": 4.652804302779136e-06, "loss": 0.1071, "step": 18389 }, { "epoch": 0.5364957115350953, "grad_norm": 0.7219983884839402, "learning_rate": 4.652333010539648e-06, "loss": 0.1317, "step": 18390 }, { "epoch": 0.536524884765739, "grad_norm": 1.6790037943494809, "learning_rate": 4.651861721404047e-06, "loss": 0.1561, "step": 18391 }, { "epoch": 0.5365540579963826, "grad_norm": 0.8807400125325477, "learning_rate": 4.651390435376543e-06, "loss": 0.1159, "step": 18392 }, { "epoch": 0.5365832312270261, "grad_norm": 0.9634830345396149, "learning_rate": 4.650919152461342e-06, "loss": 0.1004, "step": 18393 }, { "epoch": 0.5366124044576697, "grad_norm": 0.8857535844913024, "learning_rate": 4.650447872662651e-06, "loss": 0.1382, "step": 18394 }, { "epoch": 0.5366415776883132, "grad_norm": 0.8063250823663929, "learning_rate": 4.649976595984678e-06, "loss": 0.1289, "step": 18395 }, { "epoch": 0.5366707509189568, "grad_norm": 0.8622685643994183, "learning_rate": 4.649505322431631e-06, "loss": 0.1182, "step": 18396 }, { "epoch": 0.5366999241496003, "grad_norm": 1.0892713923536639, "learning_rate": 4.649034052007714e-06, "loss": 0.1478, "step": 18397 }, { "epoch": 0.5367290973802439, "grad_norm": 0.8673382068332008, "learning_rate": 4.648562784717141e-06, "loss": 0.1265, "step": 18398 }, { "epoch": 0.5367582706108874, "grad_norm": 0.8608091835670153, "learning_rate": 4.648091520564114e-06, "loss": 0.1288, "step": 18399 }, { "epoch": 0.536787443841531, "grad_norm": 0.856921635053501, "learning_rate": 4.647620259552841e-06, "loss": 0.134, "step": 18400 }, { "epoch": 0.5368166170721745, "grad_norm": 0.8035283308925599, "learning_rate": 4.647149001687532e-06, "loss": 0.1229, "step": 18401 }, { "epoch": 0.5368457903028181, "grad_norm": 0.7661735637383169, "learning_rate": 4.6466777469723916e-06, "loss": 0.134, "step": 18402 }, { "epoch": 0.5368749635334616, "grad_norm": 1.0260182917262406, "learning_rate": 4.646206495411627e-06, "loss": 0.1318, "step": 18403 }, { "epoch": 0.5369041367641053, "grad_norm": 0.7281531016042366, "learning_rate": 4.645735247009447e-06, "loss": 0.1312, "step": 18404 }, { "epoch": 0.5369333099947489, "grad_norm": 0.9359715608461058, "learning_rate": 4.645264001770059e-06, "loss": 0.1369, "step": 18405 }, { "epoch": 0.5369624832253924, "grad_norm": 0.7474998995425077, "learning_rate": 4.6447927596976685e-06, "loss": 0.1356, "step": 18406 }, { "epoch": 0.536991656456036, "grad_norm": 0.9396636845455977, "learning_rate": 4.644321520796484e-06, "loss": 0.1235, "step": 18407 }, { "epoch": 0.5370208296866795, "grad_norm": 0.900428593419211, "learning_rate": 4.6438502850707125e-06, "loss": 0.1396, "step": 18408 }, { "epoch": 0.5370500029173231, "grad_norm": 0.7501024115615466, "learning_rate": 4.643379052524557e-06, "loss": 0.1275, "step": 18409 }, { "epoch": 0.5370791761479666, "grad_norm": 0.8911013787200006, "learning_rate": 4.642907823162232e-06, "loss": 0.1263, "step": 18410 }, { "epoch": 0.5371083493786102, "grad_norm": 1.267359403901331, "learning_rate": 4.642436596987939e-06, "loss": 0.1241, "step": 18411 }, { "epoch": 0.5371375226092537, "grad_norm": 1.2193543182524031, "learning_rate": 4.6419653740058875e-06, "loss": 0.1349, "step": 18412 }, { "epoch": 0.5371666958398973, "grad_norm": 0.869884365322348, "learning_rate": 4.6414941542202854e-06, "loss": 0.1479, "step": 18413 }, { "epoch": 0.5371958690705408, "grad_norm": 0.9270581440670198, "learning_rate": 4.6410229376353355e-06, "loss": 0.1255, "step": 18414 }, { "epoch": 0.5372250423011844, "grad_norm": 0.8335069150306736, "learning_rate": 4.6405517242552465e-06, "loss": 0.1413, "step": 18415 }, { "epoch": 0.537254215531828, "grad_norm": 0.9004884093575155, "learning_rate": 4.640080514084227e-06, "loss": 0.1376, "step": 18416 }, { "epoch": 0.5372833887624715, "grad_norm": 0.9495672066905021, "learning_rate": 4.639609307126483e-06, "loss": 0.1207, "step": 18417 }, { "epoch": 0.5373125619931152, "grad_norm": 0.8584298778242642, "learning_rate": 4.639138103386222e-06, "loss": 0.1182, "step": 18418 }, { "epoch": 0.5373417352237587, "grad_norm": 0.9689798923975219, "learning_rate": 4.638666902867649e-06, "loss": 0.1518, "step": 18419 }, { "epoch": 0.5373709084544023, "grad_norm": 0.8713494961194964, "learning_rate": 4.63819570557497e-06, "loss": 0.1625, "step": 18420 }, { "epoch": 0.5374000816850458, "grad_norm": 0.8097312640427291, "learning_rate": 4.637724511512394e-06, "loss": 0.1019, "step": 18421 }, { "epoch": 0.5374292549156894, "grad_norm": 0.7818637297060073, "learning_rate": 4.637253320684128e-06, "loss": 0.1411, "step": 18422 }, { "epoch": 0.5374584281463329, "grad_norm": 0.7725537560170543, "learning_rate": 4.636782133094379e-06, "loss": 0.1358, "step": 18423 }, { "epoch": 0.5374876013769765, "grad_norm": 0.9685417795608715, "learning_rate": 4.636310948747351e-06, "loss": 0.1161, "step": 18424 }, { "epoch": 0.53751677460762, "grad_norm": 0.8661703798486129, "learning_rate": 4.6358397676472514e-06, "loss": 0.1179, "step": 18425 }, { "epoch": 0.5375459478382636, "grad_norm": 0.9748207706114557, "learning_rate": 4.63536858979829e-06, "loss": 0.1328, "step": 18426 }, { "epoch": 0.5375751210689071, "grad_norm": 0.8068977563207552, "learning_rate": 4.634897415204665e-06, "loss": 0.1255, "step": 18427 }, { "epoch": 0.5376042942995507, "grad_norm": 0.9814118356836369, "learning_rate": 4.6344262438705945e-06, "loss": 0.1238, "step": 18428 }, { "epoch": 0.5376334675301943, "grad_norm": 0.8697629592549748, "learning_rate": 4.633955075800277e-06, "loss": 0.1236, "step": 18429 }, { "epoch": 0.5376626407608378, "grad_norm": 0.818823172399622, "learning_rate": 4.633483910997921e-06, "loss": 0.1373, "step": 18430 }, { "epoch": 0.5376918139914815, "grad_norm": 1.1620154208251865, "learning_rate": 4.633012749467735e-06, "loss": 0.1327, "step": 18431 }, { "epoch": 0.537720987222125, "grad_norm": 0.9945830533717772, "learning_rate": 4.632541591213922e-06, "loss": 0.1381, "step": 18432 }, { "epoch": 0.5377501604527686, "grad_norm": 1.0759595745860047, "learning_rate": 4.6320704362406895e-06, "loss": 0.1394, "step": 18433 }, { "epoch": 0.5377793336834121, "grad_norm": 1.0332282875284455, "learning_rate": 4.6315992845522445e-06, "loss": 0.1262, "step": 18434 }, { "epoch": 0.5378085069140557, "grad_norm": 1.0009817731707173, "learning_rate": 4.631128136152795e-06, "loss": 0.1424, "step": 18435 }, { "epoch": 0.5378376801446992, "grad_norm": 0.829985548869482, "learning_rate": 4.6306569910465435e-06, "loss": 0.1438, "step": 18436 }, { "epoch": 0.5378668533753428, "grad_norm": 0.9543572266061056, "learning_rate": 4.630185849237699e-06, "loss": 0.1326, "step": 18437 }, { "epoch": 0.5378960266059863, "grad_norm": 1.1472750665460945, "learning_rate": 4.629714710730468e-06, "loss": 0.1523, "step": 18438 }, { "epoch": 0.5379251998366299, "grad_norm": 0.9609814818823639, "learning_rate": 4.629243575529052e-06, "loss": 0.1196, "step": 18439 }, { "epoch": 0.5379543730672735, "grad_norm": 1.0839039490434084, "learning_rate": 4.628772443637664e-06, "loss": 0.1168, "step": 18440 }, { "epoch": 0.537983546297917, "grad_norm": 0.9058019196393359, "learning_rate": 4.628301315060506e-06, "loss": 0.127, "step": 18441 }, { "epoch": 0.5380127195285606, "grad_norm": 0.7880115419121816, "learning_rate": 4.627830189801785e-06, "loss": 0.1108, "step": 18442 }, { "epoch": 0.5380418927592041, "grad_norm": 0.8383131103373016, "learning_rate": 4.627359067865709e-06, "loss": 0.1246, "step": 18443 }, { "epoch": 0.5380710659898477, "grad_norm": 0.7706655732006188, "learning_rate": 4.6268879492564815e-06, "loss": 0.1326, "step": 18444 }, { "epoch": 0.5381002392204913, "grad_norm": 0.6893824246782932, "learning_rate": 4.626416833978307e-06, "loss": 0.1111, "step": 18445 }, { "epoch": 0.5381294124511349, "grad_norm": 0.7094778064231246, "learning_rate": 4.6259457220353955e-06, "loss": 0.1244, "step": 18446 }, { "epoch": 0.5381585856817784, "grad_norm": 0.816157148753955, "learning_rate": 4.625474613431951e-06, "loss": 0.1257, "step": 18447 }, { "epoch": 0.538187758912422, "grad_norm": 0.7448126542639731, "learning_rate": 4.625003508172181e-06, "loss": 0.1234, "step": 18448 }, { "epoch": 0.5382169321430655, "grad_norm": 0.7032555178388354, "learning_rate": 4.624532406260289e-06, "loss": 0.1298, "step": 18449 }, { "epoch": 0.5382461053737091, "grad_norm": 0.7566557780826755, "learning_rate": 4.6240613077004825e-06, "loss": 0.1118, "step": 18450 }, { "epoch": 0.5382752786043526, "grad_norm": 1.024552593339411, "learning_rate": 4.623590212496966e-06, "loss": 0.1501, "step": 18451 }, { "epoch": 0.5383044518349962, "grad_norm": 1.002637863063864, "learning_rate": 4.6231191206539464e-06, "loss": 0.1304, "step": 18452 }, { "epoch": 0.5383336250656398, "grad_norm": 0.8508473027450237, "learning_rate": 4.622648032175631e-06, "loss": 0.1146, "step": 18453 }, { "epoch": 0.5383627982962833, "grad_norm": 0.7604976770223463, "learning_rate": 4.622176947066223e-06, "loss": 0.1287, "step": 18454 }, { "epoch": 0.5383919715269269, "grad_norm": 0.8756015731827811, "learning_rate": 4.621705865329928e-06, "loss": 0.1435, "step": 18455 }, { "epoch": 0.5384211447575704, "grad_norm": 0.9054175597379397, "learning_rate": 4.621234786970955e-06, "loss": 0.1207, "step": 18456 }, { "epoch": 0.538450317988214, "grad_norm": 0.7993475119439174, "learning_rate": 4.620763711993504e-06, "loss": 0.1231, "step": 18457 }, { "epoch": 0.5384794912188576, "grad_norm": 1.0069903678745922, "learning_rate": 4.620292640401786e-06, "loss": 0.1306, "step": 18458 }, { "epoch": 0.5385086644495012, "grad_norm": 0.7675858475618413, "learning_rate": 4.619821572200005e-06, "loss": 0.1371, "step": 18459 }, { "epoch": 0.5385378376801447, "grad_norm": 0.9962535852556412, "learning_rate": 4.6193505073923655e-06, "loss": 0.1272, "step": 18460 }, { "epoch": 0.5385670109107883, "grad_norm": 0.7955796040305474, "learning_rate": 4.6188794459830756e-06, "loss": 0.1281, "step": 18461 }, { "epoch": 0.5385961841414318, "grad_norm": 0.803729078148684, "learning_rate": 4.618408387976337e-06, "loss": 0.131, "step": 18462 }, { "epoch": 0.5386253573720754, "grad_norm": 0.8122718679589646, "learning_rate": 4.617937333376356e-06, "loss": 0.1207, "step": 18463 }, { "epoch": 0.538654530602719, "grad_norm": 0.9027543324881537, "learning_rate": 4.617466282187341e-06, "loss": 0.1115, "step": 18464 }, { "epoch": 0.5386837038333625, "grad_norm": 1.1345576211791466, "learning_rate": 4.616995234413498e-06, "loss": 0.13, "step": 18465 }, { "epoch": 0.5387128770640061, "grad_norm": 0.9931104688525274, "learning_rate": 4.616524190059028e-06, "loss": 0.1356, "step": 18466 }, { "epoch": 0.5387420502946496, "grad_norm": 0.9092572288132816, "learning_rate": 4.616053149128137e-06, "loss": 0.1498, "step": 18467 }, { "epoch": 0.5387712235252932, "grad_norm": 0.8681668748462004, "learning_rate": 4.615582111625035e-06, "loss": 0.1703, "step": 18468 }, { "epoch": 0.5388003967559367, "grad_norm": 0.9380233133435865, "learning_rate": 4.61511107755392e-06, "loss": 0.1305, "step": 18469 }, { "epoch": 0.5388295699865803, "grad_norm": 0.8526693956400522, "learning_rate": 4.614640046919004e-06, "loss": 0.1352, "step": 18470 }, { "epoch": 0.5388587432172238, "grad_norm": 0.8469171229904489, "learning_rate": 4.6141690197244895e-06, "loss": 0.1688, "step": 18471 }, { "epoch": 0.5388879164478675, "grad_norm": 1.0020668817402003, "learning_rate": 4.613697995974582e-06, "loss": 0.1278, "step": 18472 }, { "epoch": 0.538917089678511, "grad_norm": 0.84438952777186, "learning_rate": 4.613226975673488e-06, "loss": 0.1405, "step": 18473 }, { "epoch": 0.5389462629091546, "grad_norm": 0.945618628826909, "learning_rate": 4.61275595882541e-06, "loss": 0.1465, "step": 18474 }, { "epoch": 0.5389754361397981, "grad_norm": 1.0235841023439358, "learning_rate": 4.612284945434552e-06, "loss": 0.1443, "step": 18475 }, { "epoch": 0.5390046093704417, "grad_norm": 0.6546548950283324, "learning_rate": 4.611813935505124e-06, "loss": 0.1225, "step": 18476 }, { "epoch": 0.5390337826010853, "grad_norm": 1.0493183644750592, "learning_rate": 4.611342929041327e-06, "loss": 0.1713, "step": 18477 }, { "epoch": 0.5390629558317288, "grad_norm": 0.9463567859642354, "learning_rate": 4.61087192604737e-06, "loss": 0.1094, "step": 18478 }, { "epoch": 0.5390921290623724, "grad_norm": 0.772709685839752, "learning_rate": 4.610400926527454e-06, "loss": 0.1027, "step": 18479 }, { "epoch": 0.5391213022930159, "grad_norm": 0.7836107240767829, "learning_rate": 4.609929930485785e-06, "loss": 0.1312, "step": 18480 }, { "epoch": 0.5391504755236595, "grad_norm": 1.0118977320199922, "learning_rate": 4.609458937926568e-06, "loss": 0.1449, "step": 18481 }, { "epoch": 0.539179648754303, "grad_norm": 0.7708828717475101, "learning_rate": 4.608987948854009e-06, "loss": 0.1142, "step": 18482 }, { "epoch": 0.5392088219849466, "grad_norm": 0.7905709797869556, "learning_rate": 4.608516963272314e-06, "loss": 0.118, "step": 18483 }, { "epoch": 0.5392379952155901, "grad_norm": 0.8069241387258512, "learning_rate": 4.6080459811856845e-06, "loss": 0.1328, "step": 18484 }, { "epoch": 0.5392671684462338, "grad_norm": 1.0309158279777073, "learning_rate": 4.6075750025983274e-06, "loss": 0.1236, "step": 18485 }, { "epoch": 0.5392963416768773, "grad_norm": 0.7039157364644283, "learning_rate": 4.607104027514448e-06, "loss": 0.1254, "step": 18486 }, { "epoch": 0.5393255149075209, "grad_norm": 0.8343523265323943, "learning_rate": 4.606633055938247e-06, "loss": 0.1404, "step": 18487 }, { "epoch": 0.5393546881381645, "grad_norm": 0.6620159980620091, "learning_rate": 4.606162087873934e-06, "loss": 0.1576, "step": 18488 }, { "epoch": 0.539383861368808, "grad_norm": 0.7441419946854502, "learning_rate": 4.605691123325712e-06, "loss": 0.1205, "step": 18489 }, { "epoch": 0.5394130345994516, "grad_norm": 1.0306287315275824, "learning_rate": 4.605220162297785e-06, "loss": 0.1245, "step": 18490 }, { "epoch": 0.5394422078300951, "grad_norm": 0.9544581723560933, "learning_rate": 4.60474920479436e-06, "loss": 0.1322, "step": 18491 }, { "epoch": 0.5394713810607387, "grad_norm": 0.9809156334129635, "learning_rate": 4.604278250819638e-06, "loss": 0.1248, "step": 18492 }, { "epoch": 0.5395005542913822, "grad_norm": 0.9011113477017791, "learning_rate": 4.603807300377825e-06, "loss": 0.1178, "step": 18493 }, { "epoch": 0.5395297275220258, "grad_norm": 0.8838891360815239, "learning_rate": 4.603336353473126e-06, "loss": 0.134, "step": 18494 }, { "epoch": 0.5395589007526693, "grad_norm": 0.9477505501652712, "learning_rate": 4.602865410109747e-06, "loss": 0.1588, "step": 18495 }, { "epoch": 0.5395880739833129, "grad_norm": 1.1140434308745006, "learning_rate": 4.602394470291889e-06, "loss": 0.1084, "step": 18496 }, { "epoch": 0.5396172472139564, "grad_norm": 0.8818063594020238, "learning_rate": 4.601923534023759e-06, "loss": 0.0971, "step": 18497 }, { "epoch": 0.5396464204446, "grad_norm": 0.8463094940252679, "learning_rate": 4.601452601309562e-06, "loss": 0.1281, "step": 18498 }, { "epoch": 0.5396755936752436, "grad_norm": 0.8236064778402271, "learning_rate": 4.600981672153497e-06, "loss": 0.1254, "step": 18499 }, { "epoch": 0.5397047669058872, "grad_norm": 0.762362504079061, "learning_rate": 4.600510746559776e-06, "loss": 0.1093, "step": 18500 }, { "epoch": 0.5397339401365308, "grad_norm": 0.9572674060564242, "learning_rate": 4.600039824532599e-06, "loss": 0.1193, "step": 18501 }, { "epoch": 0.5397631133671743, "grad_norm": 0.7105556118142405, "learning_rate": 4.599568906076169e-06, "loss": 0.1038, "step": 18502 }, { "epoch": 0.5397922865978179, "grad_norm": 1.4230552122953444, "learning_rate": 4.599097991194695e-06, "loss": 0.1558, "step": 18503 }, { "epoch": 0.5398214598284614, "grad_norm": 1.0784991990489363, "learning_rate": 4.598627079892378e-06, "loss": 0.1072, "step": 18504 }, { "epoch": 0.539850633059105, "grad_norm": 0.7736688661207487, "learning_rate": 4.59815617217342e-06, "loss": 0.1289, "step": 18505 }, { "epoch": 0.5398798062897485, "grad_norm": 0.9221273996752505, "learning_rate": 4.59768526804203e-06, "loss": 0.1189, "step": 18506 }, { "epoch": 0.5399089795203921, "grad_norm": 0.821146265980746, "learning_rate": 4.597214367502409e-06, "loss": 0.1321, "step": 18507 }, { "epoch": 0.5399381527510356, "grad_norm": 0.7993447388617366, "learning_rate": 4.596743470558764e-06, "loss": 0.1314, "step": 18508 }, { "epoch": 0.5399673259816792, "grad_norm": 0.7863949008157836, "learning_rate": 4.596272577215295e-06, "loss": 0.1096, "step": 18509 }, { "epoch": 0.5399964992123227, "grad_norm": 0.7868174842687745, "learning_rate": 4.595801687476209e-06, "loss": 0.1194, "step": 18510 }, { "epoch": 0.5400256724429663, "grad_norm": 0.8563130500686809, "learning_rate": 4.595330801345707e-06, "loss": 0.1379, "step": 18511 }, { "epoch": 0.5400548456736098, "grad_norm": 0.8570011607210023, "learning_rate": 4.594859918827996e-06, "loss": 0.1217, "step": 18512 }, { "epoch": 0.5400840189042535, "grad_norm": 0.7020689895705245, "learning_rate": 4.594389039927281e-06, "loss": 0.1245, "step": 18513 }, { "epoch": 0.5401131921348971, "grad_norm": 0.7365351004202181, "learning_rate": 4.593918164647763e-06, "loss": 0.1227, "step": 18514 }, { "epoch": 0.5401423653655406, "grad_norm": 0.9497995167706723, "learning_rate": 4.593447292993645e-06, "loss": 0.1248, "step": 18515 }, { "epoch": 0.5401715385961842, "grad_norm": 0.9012064912899808, "learning_rate": 4.592976424969135e-06, "loss": 0.1325, "step": 18516 }, { "epoch": 0.5402007118268277, "grad_norm": 1.0477580990927593, "learning_rate": 4.592505560578431e-06, "loss": 0.1188, "step": 18517 }, { "epoch": 0.5402298850574713, "grad_norm": 0.8948818940261655, "learning_rate": 4.592034699825743e-06, "loss": 0.1408, "step": 18518 }, { "epoch": 0.5402590582881148, "grad_norm": 0.8467170548211386, "learning_rate": 4.59156384271527e-06, "loss": 0.137, "step": 18519 }, { "epoch": 0.5402882315187584, "grad_norm": 0.811727248192837, "learning_rate": 4.591092989251219e-06, "loss": 0.1321, "step": 18520 }, { "epoch": 0.5403174047494019, "grad_norm": 1.043395801216942, "learning_rate": 4.590622139437792e-06, "loss": 0.1317, "step": 18521 }, { "epoch": 0.5403465779800455, "grad_norm": 1.1229914141576245, "learning_rate": 4.590151293279192e-06, "loss": 0.1393, "step": 18522 }, { "epoch": 0.540375751210689, "grad_norm": 0.8158467164834956, "learning_rate": 4.589680450779622e-06, "loss": 0.1455, "step": 18523 }, { "epoch": 0.5404049244413326, "grad_norm": 0.84450730775348, "learning_rate": 4.589209611943289e-06, "loss": 0.134, "step": 18524 }, { "epoch": 0.5404340976719761, "grad_norm": 0.8061892052318363, "learning_rate": 4.5887387767743955e-06, "loss": 0.1312, "step": 18525 }, { "epoch": 0.5404632709026198, "grad_norm": 0.8457112123241478, "learning_rate": 4.588267945277142e-06, "loss": 0.1329, "step": 18526 }, { "epoch": 0.5404924441332634, "grad_norm": 0.8079282516340712, "learning_rate": 4.587797117455735e-06, "loss": 0.1353, "step": 18527 }, { "epoch": 0.5405216173639069, "grad_norm": 0.9582440122427279, "learning_rate": 4.587326293314378e-06, "loss": 0.1143, "step": 18528 }, { "epoch": 0.5405507905945505, "grad_norm": 0.9462693153503788, "learning_rate": 4.586855472857269e-06, "loss": 0.1228, "step": 18529 }, { "epoch": 0.540579963825194, "grad_norm": 0.7384460324234042, "learning_rate": 4.58638465608862e-06, "loss": 0.1174, "step": 18530 }, { "epoch": 0.5406091370558376, "grad_norm": 0.7686077928824532, "learning_rate": 4.585913843012628e-06, "loss": 0.1306, "step": 18531 }, { "epoch": 0.5406383102864811, "grad_norm": 0.9531871625500782, "learning_rate": 4.5854430336335e-06, "loss": 0.1323, "step": 18532 }, { "epoch": 0.5406674835171247, "grad_norm": 0.8085206343946517, "learning_rate": 4.584972227955437e-06, "loss": 0.1663, "step": 18533 }, { "epoch": 0.5406966567477682, "grad_norm": 0.9274307278452015, "learning_rate": 4.584501425982641e-06, "loss": 0.1396, "step": 18534 }, { "epoch": 0.5407258299784118, "grad_norm": 0.8118635885139049, "learning_rate": 4.584030627719319e-06, "loss": 0.1288, "step": 18535 }, { "epoch": 0.5407550032090553, "grad_norm": 0.6881775262599032, "learning_rate": 4.5835598331696725e-06, "loss": 0.1127, "step": 18536 }, { "epoch": 0.5407841764396989, "grad_norm": 0.8751145615556514, "learning_rate": 4.5830890423379035e-06, "loss": 0.1332, "step": 18537 }, { "epoch": 0.5408133496703424, "grad_norm": 0.9482218663944888, "learning_rate": 4.582618255228218e-06, "loss": 0.1207, "step": 18538 }, { "epoch": 0.540842522900986, "grad_norm": 0.8477653576217563, "learning_rate": 4.582147471844814e-06, "loss": 0.1437, "step": 18539 }, { "epoch": 0.5408716961316297, "grad_norm": 0.7862490789715032, "learning_rate": 4.581676692191899e-06, "loss": 0.1286, "step": 18540 }, { "epoch": 0.5409008693622732, "grad_norm": 0.7373498074330986, "learning_rate": 4.581205916273675e-06, "loss": 0.1359, "step": 18541 }, { "epoch": 0.5409300425929168, "grad_norm": 0.8518693891851921, "learning_rate": 4.580735144094343e-06, "loss": 0.1095, "step": 18542 }, { "epoch": 0.5409592158235603, "grad_norm": 0.7929335141174001, "learning_rate": 4.58026437565811e-06, "loss": 0.1223, "step": 18543 }, { "epoch": 0.5409883890542039, "grad_norm": 0.949890133436839, "learning_rate": 4.579793610969175e-06, "loss": 0.1477, "step": 18544 }, { "epoch": 0.5410175622848474, "grad_norm": 0.6923494122570102, "learning_rate": 4.579322850031743e-06, "loss": 0.1268, "step": 18545 }, { "epoch": 0.541046735515491, "grad_norm": 0.9872506294900837, "learning_rate": 4.578852092850014e-06, "loss": 0.1211, "step": 18546 }, { "epoch": 0.5410759087461345, "grad_norm": 0.7901695596699595, "learning_rate": 4.578381339428197e-06, "loss": 0.1559, "step": 18547 }, { "epoch": 0.5411050819767781, "grad_norm": 0.7477654018944593, "learning_rate": 4.5779105897704874e-06, "loss": 0.1287, "step": 18548 }, { "epoch": 0.5411342552074216, "grad_norm": 1.0079866343956774, "learning_rate": 4.577439843881093e-06, "loss": 0.1306, "step": 18549 }, { "epoch": 0.5411634284380652, "grad_norm": 0.8164972594869021, "learning_rate": 4.5769691017642135e-06, "loss": 0.1234, "step": 18550 }, { "epoch": 0.5411926016687088, "grad_norm": 0.7352347377635433, "learning_rate": 4.5764983634240554e-06, "loss": 0.1489, "step": 18551 }, { "epoch": 0.5412217748993523, "grad_norm": 0.9238793751207289, "learning_rate": 4.576027628864815e-06, "loss": 0.1356, "step": 18552 }, { "epoch": 0.541250948129996, "grad_norm": 0.9410922114238712, "learning_rate": 4.575556898090701e-06, "loss": 0.1336, "step": 18553 }, { "epoch": 0.5412801213606395, "grad_norm": 0.890025217300632, "learning_rate": 4.575086171105913e-06, "loss": 0.1284, "step": 18554 }, { "epoch": 0.5413092945912831, "grad_norm": 0.7760769456198849, "learning_rate": 4.574615447914656e-06, "loss": 0.1368, "step": 18555 }, { "epoch": 0.5413384678219266, "grad_norm": 0.8683222818050699, "learning_rate": 4.574144728521129e-06, "loss": 0.1483, "step": 18556 }, { "epoch": 0.5413676410525702, "grad_norm": 0.7828015637307851, "learning_rate": 4.573674012929537e-06, "loss": 0.1134, "step": 18557 }, { "epoch": 0.5413968142832137, "grad_norm": 0.772556403125213, "learning_rate": 4.57320330114408e-06, "loss": 0.1245, "step": 18558 }, { "epoch": 0.5414259875138573, "grad_norm": 0.8124959768576329, "learning_rate": 4.572732593168963e-06, "loss": 0.1484, "step": 18559 }, { "epoch": 0.5414551607445008, "grad_norm": 0.8928366475724265, "learning_rate": 4.5722618890083886e-06, "loss": 0.1437, "step": 18560 }, { "epoch": 0.5414843339751444, "grad_norm": 0.8576042037903062, "learning_rate": 4.571791188666556e-06, "loss": 0.1161, "step": 18561 }, { "epoch": 0.541513507205788, "grad_norm": 0.6334579477388942, "learning_rate": 4.571320492147671e-06, "loss": 0.1439, "step": 18562 }, { "epoch": 0.5415426804364315, "grad_norm": 1.2423093574568127, "learning_rate": 4.570849799455935e-06, "loss": 0.1575, "step": 18563 }, { "epoch": 0.541571853667075, "grad_norm": 1.0674356324885308, "learning_rate": 4.5703791105955465e-06, "loss": 0.1241, "step": 18564 }, { "epoch": 0.5416010268977186, "grad_norm": 0.8917905360190724, "learning_rate": 4.5699084255707135e-06, "loss": 0.1091, "step": 18565 }, { "epoch": 0.5416302001283622, "grad_norm": 0.9229579882678267, "learning_rate": 4.569437744385634e-06, "loss": 0.1418, "step": 18566 }, { "epoch": 0.5416593733590058, "grad_norm": 0.7628392567268024, "learning_rate": 4.568967067044512e-06, "loss": 0.1292, "step": 18567 }, { "epoch": 0.5416885465896494, "grad_norm": 1.3246170348709403, "learning_rate": 4.56849639355155e-06, "loss": 0.0971, "step": 18568 }, { "epoch": 0.5417177198202929, "grad_norm": 0.845091498366653, "learning_rate": 4.568025723910948e-06, "loss": 0.1319, "step": 18569 }, { "epoch": 0.5417468930509365, "grad_norm": 0.6984478644385249, "learning_rate": 4.567555058126909e-06, "loss": 0.131, "step": 18570 }, { "epoch": 0.54177606628158, "grad_norm": 1.070303298446535, "learning_rate": 4.567084396203636e-06, "loss": 0.1449, "step": 18571 }, { "epoch": 0.5418052395122236, "grad_norm": 1.1262681782985426, "learning_rate": 4.566613738145329e-06, "loss": 0.1249, "step": 18572 }, { "epoch": 0.5418344127428671, "grad_norm": 0.8433029057458278, "learning_rate": 4.566143083956193e-06, "loss": 0.1589, "step": 18573 }, { "epoch": 0.5418635859735107, "grad_norm": 0.7064582794321326, "learning_rate": 4.565672433640428e-06, "loss": 0.1134, "step": 18574 }, { "epoch": 0.5418927592041543, "grad_norm": 0.951049905265706, "learning_rate": 4.565201787202234e-06, "loss": 0.1102, "step": 18575 }, { "epoch": 0.5419219324347978, "grad_norm": 0.8282929047926882, "learning_rate": 4.564731144645814e-06, "loss": 0.148, "step": 18576 }, { "epoch": 0.5419511056654414, "grad_norm": 0.7453905798825045, "learning_rate": 4.564260505975373e-06, "loss": 0.1303, "step": 18577 }, { "epoch": 0.5419802788960849, "grad_norm": 0.7995034987138877, "learning_rate": 4.5637898711951086e-06, "loss": 0.1259, "step": 18578 }, { "epoch": 0.5420094521267285, "grad_norm": 0.8275938207800875, "learning_rate": 4.563319240309225e-06, "loss": 0.1276, "step": 18579 }, { "epoch": 0.5420386253573721, "grad_norm": 0.7407051337221364, "learning_rate": 4.562848613321922e-06, "loss": 0.1536, "step": 18580 }, { "epoch": 0.5420677985880157, "grad_norm": 0.7538172452892475, "learning_rate": 4.562377990237404e-06, "loss": 0.1321, "step": 18581 }, { "epoch": 0.5420969718186592, "grad_norm": 1.905030882722415, "learning_rate": 4.561907371059868e-06, "loss": 0.1274, "step": 18582 }, { "epoch": 0.5421261450493028, "grad_norm": 1.0625206190115513, "learning_rate": 4.5614367557935205e-06, "loss": 0.1152, "step": 18583 }, { "epoch": 0.5421553182799463, "grad_norm": 1.0209787428644586, "learning_rate": 4.56096614444256e-06, "loss": 0.155, "step": 18584 }, { "epoch": 0.5421844915105899, "grad_norm": 0.7645543365119774, "learning_rate": 4.560495537011191e-06, "loss": 0.1311, "step": 18585 }, { "epoch": 0.5422136647412334, "grad_norm": 1.1025132199310623, "learning_rate": 4.560024933503611e-06, "loss": 0.1313, "step": 18586 }, { "epoch": 0.542242837971877, "grad_norm": 0.9262771547987466, "learning_rate": 4.559554333924024e-06, "loss": 0.1334, "step": 18587 }, { "epoch": 0.5422720112025206, "grad_norm": 0.8751620781825704, "learning_rate": 4.559083738276629e-06, "loss": 0.1463, "step": 18588 }, { "epoch": 0.5423011844331641, "grad_norm": 0.9838603475520283, "learning_rate": 4.55861314656563e-06, "loss": 0.1289, "step": 18589 }, { "epoch": 0.5423303576638077, "grad_norm": 1.013986579247909, "learning_rate": 4.558142558795229e-06, "loss": 0.1137, "step": 18590 }, { "epoch": 0.5423595308944512, "grad_norm": 0.9428278662199534, "learning_rate": 4.5576719749696255e-06, "loss": 0.1319, "step": 18591 }, { "epoch": 0.5423887041250948, "grad_norm": 0.9500908104131973, "learning_rate": 4.55720139509302e-06, "loss": 0.1423, "step": 18592 }, { "epoch": 0.5424178773557383, "grad_norm": 0.7443225951884858, "learning_rate": 4.556730819169617e-06, "loss": 0.1269, "step": 18593 }, { "epoch": 0.542447050586382, "grad_norm": 0.8387766165065289, "learning_rate": 4.556260247203611e-06, "loss": 0.1208, "step": 18594 }, { "epoch": 0.5424762238170255, "grad_norm": 0.8724240408580441, "learning_rate": 4.55578967919921e-06, "loss": 0.1294, "step": 18595 }, { "epoch": 0.5425053970476691, "grad_norm": 0.9397541649578579, "learning_rate": 4.555319115160613e-06, "loss": 0.1193, "step": 18596 }, { "epoch": 0.5425345702783126, "grad_norm": 0.8715328430253131, "learning_rate": 4.554848555092021e-06, "loss": 0.1328, "step": 18597 }, { "epoch": 0.5425637435089562, "grad_norm": 1.0141080824533748, "learning_rate": 4.554377998997635e-06, "loss": 0.1373, "step": 18598 }, { "epoch": 0.5425929167395998, "grad_norm": 0.977833134828352, "learning_rate": 4.553907446881655e-06, "loss": 0.1457, "step": 18599 }, { "epoch": 0.5426220899702433, "grad_norm": 0.7314597419648076, "learning_rate": 4.553436898748283e-06, "loss": 0.1321, "step": 18600 }, { "epoch": 0.5426512632008869, "grad_norm": 0.7464298405060341, "learning_rate": 4.552966354601719e-06, "loss": 0.13, "step": 18601 }, { "epoch": 0.5426804364315304, "grad_norm": 0.9192476285587113, "learning_rate": 4.552495814446165e-06, "loss": 0.124, "step": 18602 }, { "epoch": 0.542709609662174, "grad_norm": 0.9471931251716911, "learning_rate": 4.552025278285823e-06, "loss": 0.1412, "step": 18603 }, { "epoch": 0.5427387828928175, "grad_norm": 0.7028199390854473, "learning_rate": 4.551554746124891e-06, "loss": 0.1122, "step": 18604 }, { "epoch": 0.5427679561234611, "grad_norm": 0.8186137046998053, "learning_rate": 4.551084217967573e-06, "loss": 0.1385, "step": 18605 }, { "epoch": 0.5427971293541046, "grad_norm": 0.9767308062095534, "learning_rate": 4.550613693818064e-06, "loss": 0.1273, "step": 18606 }, { "epoch": 0.5428263025847483, "grad_norm": 0.8968856658488537, "learning_rate": 4.550143173680573e-06, "loss": 0.1382, "step": 18607 }, { "epoch": 0.5428554758153918, "grad_norm": 0.8372639466185194, "learning_rate": 4.549672657559294e-06, "loss": 0.114, "step": 18608 }, { "epoch": 0.5428846490460354, "grad_norm": 0.9577222514170669, "learning_rate": 4.54920214545843e-06, "loss": 0.1362, "step": 18609 }, { "epoch": 0.542913822276679, "grad_norm": 0.8554428290139974, "learning_rate": 4.5487316373821834e-06, "loss": 0.1324, "step": 18610 }, { "epoch": 0.5429429955073225, "grad_norm": 0.8378792662146597, "learning_rate": 4.548261133334753e-06, "loss": 0.1033, "step": 18611 }, { "epoch": 0.542972168737966, "grad_norm": 0.8425684330542159, "learning_rate": 4.547790633320336e-06, "loss": 0.1192, "step": 18612 }, { "epoch": 0.5430013419686096, "grad_norm": 0.86600847540191, "learning_rate": 4.547320137343138e-06, "loss": 0.1343, "step": 18613 }, { "epoch": 0.5430305151992532, "grad_norm": 0.8970330089790087, "learning_rate": 4.546849645407359e-06, "loss": 0.1306, "step": 18614 }, { "epoch": 0.5430596884298967, "grad_norm": 1.2082125477194066, "learning_rate": 4.546379157517198e-06, "loss": 0.1082, "step": 18615 }, { "epoch": 0.5430888616605403, "grad_norm": 0.8747763423161303, "learning_rate": 4.545908673676855e-06, "loss": 0.1427, "step": 18616 }, { "epoch": 0.5431180348911838, "grad_norm": 0.9298206824513096, "learning_rate": 4.545438193890531e-06, "loss": 0.1235, "step": 18617 }, { "epoch": 0.5431472081218274, "grad_norm": 0.897925886878897, "learning_rate": 4.544967718162425e-06, "loss": 0.1105, "step": 18618 }, { "epoch": 0.5431763813524709, "grad_norm": 0.7862787817646996, "learning_rate": 4.544497246496741e-06, "loss": 0.1265, "step": 18619 }, { "epoch": 0.5432055545831145, "grad_norm": 0.7829494278367473, "learning_rate": 4.544026778897676e-06, "loss": 0.1247, "step": 18620 }, { "epoch": 0.5432347278137581, "grad_norm": 0.8739761744992527, "learning_rate": 4.54355631536943e-06, "loss": 0.1229, "step": 18621 }, { "epoch": 0.5432639010444017, "grad_norm": 0.7817666258065223, "learning_rate": 4.543085855916205e-06, "loss": 0.1522, "step": 18622 }, { "epoch": 0.5432930742750453, "grad_norm": 0.8743724793823562, "learning_rate": 4.542615400542202e-06, "loss": 0.1376, "step": 18623 }, { "epoch": 0.5433222475056888, "grad_norm": 0.8417347293161281, "learning_rate": 4.542144949251615e-06, "loss": 0.1304, "step": 18624 }, { "epoch": 0.5433514207363324, "grad_norm": 0.8349549582346112, "learning_rate": 4.541674502048653e-06, "loss": 0.1122, "step": 18625 }, { "epoch": 0.5433805939669759, "grad_norm": 0.9844219872835687, "learning_rate": 4.54120405893751e-06, "loss": 0.1391, "step": 18626 }, { "epoch": 0.5434097671976195, "grad_norm": 0.9521398611024925, "learning_rate": 4.540733619922388e-06, "loss": 0.1398, "step": 18627 }, { "epoch": 0.543438940428263, "grad_norm": 1.403520958876466, "learning_rate": 4.540263185007487e-06, "loss": 0.1415, "step": 18628 }, { "epoch": 0.5434681136589066, "grad_norm": 1.0320447509130322, "learning_rate": 4.539792754197006e-06, "loss": 0.1218, "step": 18629 }, { "epoch": 0.5434972868895501, "grad_norm": 1.2343273669027426, "learning_rate": 4.539322327495144e-06, "loss": 0.1466, "step": 18630 }, { "epoch": 0.5435264601201937, "grad_norm": 0.8574991551732334, "learning_rate": 4.538851904906103e-06, "loss": 0.1337, "step": 18631 }, { "epoch": 0.5435556333508372, "grad_norm": 0.8387819903807323, "learning_rate": 4.538381486434083e-06, "loss": 0.116, "step": 18632 }, { "epoch": 0.5435848065814808, "grad_norm": 1.2056660015842662, "learning_rate": 4.537911072083284e-06, "loss": 0.1494, "step": 18633 }, { "epoch": 0.5436139798121244, "grad_norm": 0.9794726032666625, "learning_rate": 4.537440661857903e-06, "loss": 0.1473, "step": 18634 }, { "epoch": 0.543643153042768, "grad_norm": 1.100841704205615, "learning_rate": 4.536970255762142e-06, "loss": 0.1292, "step": 18635 }, { "epoch": 0.5436723262734116, "grad_norm": 0.8550148477909255, "learning_rate": 4.536499853800198e-06, "loss": 0.137, "step": 18636 }, { "epoch": 0.5437014995040551, "grad_norm": 0.8420445117887098, "learning_rate": 4.536029455976276e-06, "loss": 0.1206, "step": 18637 }, { "epoch": 0.5437306727346987, "grad_norm": 1.0208099243470854, "learning_rate": 4.53555906229457e-06, "loss": 0.1314, "step": 18638 }, { "epoch": 0.5437598459653422, "grad_norm": 0.9248521454814603, "learning_rate": 4.5350886727592824e-06, "loss": 0.119, "step": 18639 }, { "epoch": 0.5437890191959858, "grad_norm": 0.8622934723177336, "learning_rate": 4.534618287374613e-06, "loss": 0.1101, "step": 18640 }, { "epoch": 0.5438181924266293, "grad_norm": 0.930709123492009, "learning_rate": 4.53414790614476e-06, "loss": 0.1512, "step": 18641 }, { "epoch": 0.5438473656572729, "grad_norm": 1.0691718997838953, "learning_rate": 4.533677529073921e-06, "loss": 0.1463, "step": 18642 }, { "epoch": 0.5438765388879164, "grad_norm": 0.8207725725808066, "learning_rate": 4.5332071561663e-06, "loss": 0.1404, "step": 18643 }, { "epoch": 0.54390571211856, "grad_norm": 0.8619108900355196, "learning_rate": 4.532736787426093e-06, "loss": 0.1499, "step": 18644 }, { "epoch": 0.5439348853492035, "grad_norm": 0.7963081018313473, "learning_rate": 4.5322664228575024e-06, "loss": 0.1343, "step": 18645 }, { "epoch": 0.5439640585798471, "grad_norm": 1.052160880951865, "learning_rate": 4.531796062464724e-06, "loss": 0.1287, "step": 18646 }, { "epoch": 0.5439932318104906, "grad_norm": 0.8674671496535811, "learning_rate": 4.531325706251959e-06, "loss": 0.1266, "step": 18647 }, { "epoch": 0.5440224050411343, "grad_norm": 0.7351219368697155, "learning_rate": 4.530855354223405e-06, "loss": 0.1191, "step": 18648 }, { "epoch": 0.5440515782717779, "grad_norm": 0.8441911575644822, "learning_rate": 4.530385006383263e-06, "loss": 0.1256, "step": 18649 }, { "epoch": 0.5440807515024214, "grad_norm": 0.8942356063346709, "learning_rate": 4.5299146627357325e-06, "loss": 0.1166, "step": 18650 }, { "epoch": 0.544109924733065, "grad_norm": 0.8453180381197173, "learning_rate": 4.5294443232850115e-06, "loss": 0.1316, "step": 18651 }, { "epoch": 0.5441390979637085, "grad_norm": 0.9194448328534545, "learning_rate": 4.528973988035299e-06, "loss": 0.118, "step": 18652 }, { "epoch": 0.5441682711943521, "grad_norm": 0.8963468507354588, "learning_rate": 4.528503656990794e-06, "loss": 0.1229, "step": 18653 }, { "epoch": 0.5441974444249956, "grad_norm": 0.9618334380729433, "learning_rate": 4.528033330155694e-06, "loss": 0.1295, "step": 18654 }, { "epoch": 0.5442266176556392, "grad_norm": 0.8519382505129798, "learning_rate": 4.527563007534203e-06, "loss": 0.1303, "step": 18655 }, { "epoch": 0.5442557908862827, "grad_norm": 0.9808986787414548, "learning_rate": 4.527092689130515e-06, "loss": 0.1404, "step": 18656 }, { "epoch": 0.5442849641169263, "grad_norm": 0.8217557534225198, "learning_rate": 4.526622374948831e-06, "loss": 0.1258, "step": 18657 }, { "epoch": 0.5443141373475698, "grad_norm": 0.8524935676331612, "learning_rate": 4.526152064993351e-06, "loss": 0.1387, "step": 18658 }, { "epoch": 0.5443433105782134, "grad_norm": 0.7730121827305676, "learning_rate": 4.525681759268271e-06, "loss": 0.1263, "step": 18659 }, { "epoch": 0.5443724838088569, "grad_norm": 0.9901844344913672, "learning_rate": 4.525211457777789e-06, "loss": 0.1393, "step": 18660 }, { "epoch": 0.5444016570395006, "grad_norm": 0.8078128701754148, "learning_rate": 4.524741160526107e-06, "loss": 0.1363, "step": 18661 }, { "epoch": 0.5444308302701442, "grad_norm": 0.8081108537745513, "learning_rate": 4.524270867517423e-06, "loss": 0.13, "step": 18662 }, { "epoch": 0.5444600035007877, "grad_norm": 0.7951821217886085, "learning_rate": 4.523800578755936e-06, "loss": 0.1267, "step": 18663 }, { "epoch": 0.5444891767314313, "grad_norm": 0.7916764994858416, "learning_rate": 4.523330294245843e-06, "loss": 0.1246, "step": 18664 }, { "epoch": 0.5445183499620748, "grad_norm": 0.6801620602796086, "learning_rate": 4.522860013991343e-06, "loss": 0.115, "step": 18665 }, { "epoch": 0.5445475231927184, "grad_norm": 0.8694378580903237, "learning_rate": 4.522389737996634e-06, "loss": 0.1383, "step": 18666 }, { "epoch": 0.5445766964233619, "grad_norm": 0.7734560245462737, "learning_rate": 4.5219194662659175e-06, "loss": 0.1304, "step": 18667 }, { "epoch": 0.5446058696540055, "grad_norm": 0.6987330885457839, "learning_rate": 4.521449198803388e-06, "loss": 0.1278, "step": 18668 }, { "epoch": 0.544635042884649, "grad_norm": 0.7725151046455366, "learning_rate": 4.5209789356132475e-06, "loss": 0.114, "step": 18669 }, { "epoch": 0.5446642161152926, "grad_norm": 0.9217262555492821, "learning_rate": 4.520508676699692e-06, "loss": 0.1464, "step": 18670 }, { "epoch": 0.5446933893459361, "grad_norm": 0.8209866915718372, "learning_rate": 4.5200384220669204e-06, "loss": 0.1393, "step": 18671 }, { "epoch": 0.5447225625765797, "grad_norm": 0.7208983744735961, "learning_rate": 4.519568171719131e-06, "loss": 0.1274, "step": 18672 }, { "epoch": 0.5447517358072232, "grad_norm": 0.8469303619525171, "learning_rate": 4.519097925660522e-06, "loss": 0.1345, "step": 18673 }, { "epoch": 0.5447809090378668, "grad_norm": 0.8605894598785747, "learning_rate": 4.518627683895292e-06, "loss": 0.1304, "step": 18674 }, { "epoch": 0.5448100822685105, "grad_norm": 0.9702323629730008, "learning_rate": 4.518157446427641e-06, "loss": 0.1364, "step": 18675 }, { "epoch": 0.544839255499154, "grad_norm": 0.8255608939055838, "learning_rate": 4.517687213261763e-06, "loss": 0.1324, "step": 18676 }, { "epoch": 0.5448684287297976, "grad_norm": 1.2221890049807367, "learning_rate": 4.517216984401859e-06, "loss": 0.1551, "step": 18677 }, { "epoch": 0.5448976019604411, "grad_norm": 0.958857925699575, "learning_rate": 4.5167467598521255e-06, "loss": 0.1141, "step": 18678 }, { "epoch": 0.5449267751910847, "grad_norm": 0.7141807691434207, "learning_rate": 4.516276539616763e-06, "loss": 0.1264, "step": 18679 }, { "epoch": 0.5449559484217282, "grad_norm": 0.8791452586420072, "learning_rate": 4.51580632369997e-06, "loss": 0.1384, "step": 18680 }, { "epoch": 0.5449851216523718, "grad_norm": 1.1676846362190259, "learning_rate": 4.51533611210594e-06, "loss": 0.1313, "step": 18681 }, { "epoch": 0.5450142948830153, "grad_norm": 0.8867457317922832, "learning_rate": 4.514865904838873e-06, "loss": 0.1302, "step": 18682 }, { "epoch": 0.5450434681136589, "grad_norm": 0.7035972351414402, "learning_rate": 4.51439570190297e-06, "loss": 0.1455, "step": 18683 }, { "epoch": 0.5450726413443024, "grad_norm": 0.9727928675577748, "learning_rate": 4.513925503302422e-06, "loss": 0.136, "step": 18684 }, { "epoch": 0.545101814574946, "grad_norm": 0.7911244894352147, "learning_rate": 4.513455309041435e-06, "loss": 0.1173, "step": 18685 }, { "epoch": 0.5451309878055896, "grad_norm": 0.5604570224485353, "learning_rate": 4.512985119124201e-06, "loss": 0.1074, "step": 18686 }, { "epoch": 0.5451601610362331, "grad_norm": 0.8242058665485705, "learning_rate": 4.51251493355492e-06, "loss": 0.1403, "step": 18687 }, { "epoch": 0.5451893342668768, "grad_norm": 0.7932939935576612, "learning_rate": 4.512044752337791e-06, "loss": 0.1559, "step": 18688 }, { "epoch": 0.5452185074975203, "grad_norm": 0.8979089486879996, "learning_rate": 4.511574575477008e-06, "loss": 0.1326, "step": 18689 }, { "epoch": 0.5452476807281639, "grad_norm": 0.71343820691617, "learning_rate": 4.51110440297677e-06, "loss": 0.1248, "step": 18690 }, { "epoch": 0.5452768539588074, "grad_norm": 0.6444381535690982, "learning_rate": 4.510634234841276e-06, "loss": 0.1209, "step": 18691 }, { "epoch": 0.545306027189451, "grad_norm": 0.713407745145363, "learning_rate": 4.510164071074722e-06, "loss": 0.1417, "step": 18692 }, { "epoch": 0.5453352004200945, "grad_norm": 0.7842246343689397, "learning_rate": 4.509693911681309e-06, "loss": 0.142, "step": 18693 }, { "epoch": 0.5453643736507381, "grad_norm": 0.7797764613995918, "learning_rate": 4.509223756665229e-06, "loss": 0.1373, "step": 18694 }, { "epoch": 0.5453935468813816, "grad_norm": 0.9279253897875245, "learning_rate": 4.508753606030683e-06, "loss": 0.141, "step": 18695 }, { "epoch": 0.5454227201120252, "grad_norm": 1.2388409268297025, "learning_rate": 4.508283459781866e-06, "loss": 0.1272, "step": 18696 }, { "epoch": 0.5454518933426687, "grad_norm": 0.8143512484818307, "learning_rate": 4.50781331792298e-06, "loss": 0.1023, "step": 18697 }, { "epoch": 0.5454810665733123, "grad_norm": 0.8197034669807962, "learning_rate": 4.507343180458217e-06, "loss": 0.1316, "step": 18698 }, { "epoch": 0.5455102398039559, "grad_norm": 0.9597287125437736, "learning_rate": 4.5068730473917775e-06, "loss": 0.1202, "step": 18699 }, { "epoch": 0.5455394130345994, "grad_norm": 1.746329316587141, "learning_rate": 4.506402918727858e-06, "loss": 0.111, "step": 18700 }, { "epoch": 0.545568586265243, "grad_norm": 0.8173571891349378, "learning_rate": 4.505932794470655e-06, "loss": 0.1242, "step": 18701 }, { "epoch": 0.5455977594958866, "grad_norm": 0.9096773756712586, "learning_rate": 4.505462674624364e-06, "loss": 0.1152, "step": 18702 }, { "epoch": 0.5456269327265302, "grad_norm": 0.6762544356314814, "learning_rate": 4.504992559193186e-06, "loss": 0.1144, "step": 18703 }, { "epoch": 0.5456561059571737, "grad_norm": 0.7441982050700876, "learning_rate": 4.504522448181317e-06, "loss": 0.1221, "step": 18704 }, { "epoch": 0.5456852791878173, "grad_norm": 0.6798766630157737, "learning_rate": 4.504052341592953e-06, "loss": 0.1209, "step": 18705 }, { "epoch": 0.5457144524184608, "grad_norm": 0.8271821107677139, "learning_rate": 4.503582239432291e-06, "loss": 0.1394, "step": 18706 }, { "epoch": 0.5457436256491044, "grad_norm": 0.8143690873232363, "learning_rate": 4.503112141703528e-06, "loss": 0.12, "step": 18707 }, { "epoch": 0.545772798879748, "grad_norm": 0.916131360435358, "learning_rate": 4.50264204841086e-06, "loss": 0.1279, "step": 18708 }, { "epoch": 0.5458019721103915, "grad_norm": 1.0924993975321524, "learning_rate": 4.502171959558486e-06, "loss": 0.1228, "step": 18709 }, { "epoch": 0.545831145341035, "grad_norm": 0.9329447919396929, "learning_rate": 4.501701875150604e-06, "loss": 0.1409, "step": 18710 }, { "epoch": 0.5458603185716786, "grad_norm": 1.0741350878115938, "learning_rate": 4.501231795191406e-06, "loss": 0.138, "step": 18711 }, { "epoch": 0.5458894918023222, "grad_norm": 0.917101561364549, "learning_rate": 4.500761719685093e-06, "loss": 0.1525, "step": 18712 }, { "epoch": 0.5459186650329657, "grad_norm": 0.8087043155629119, "learning_rate": 4.50029164863586e-06, "loss": 0.1264, "step": 18713 }, { "epoch": 0.5459478382636093, "grad_norm": 0.855974439774273, "learning_rate": 4.499821582047902e-06, "loss": 0.1116, "step": 18714 }, { "epoch": 0.5459770114942529, "grad_norm": 0.8794473717442446, "learning_rate": 4.4993515199254196e-06, "loss": 0.1304, "step": 18715 }, { "epoch": 0.5460061847248965, "grad_norm": 0.946860807346336, "learning_rate": 4.498881462272607e-06, "loss": 0.1559, "step": 18716 }, { "epoch": 0.54603535795554, "grad_norm": 0.8348987097597831, "learning_rate": 4.49841140909366e-06, "loss": 0.1204, "step": 18717 }, { "epoch": 0.5460645311861836, "grad_norm": 0.780462143600301, "learning_rate": 4.497941360392778e-06, "loss": 0.1174, "step": 18718 }, { "epoch": 0.5460937044168271, "grad_norm": 0.765716565392337, "learning_rate": 4.4974713161741545e-06, "loss": 0.1352, "step": 18719 }, { "epoch": 0.5461228776474707, "grad_norm": 1.0252641668279476, "learning_rate": 4.497001276441986e-06, "loss": 0.1359, "step": 18720 }, { "epoch": 0.5461520508781142, "grad_norm": 0.7572138221774259, "learning_rate": 4.496531241200472e-06, "loss": 0.1298, "step": 18721 }, { "epoch": 0.5461812241087578, "grad_norm": 0.6336423310001275, "learning_rate": 4.496061210453806e-06, "loss": 0.0975, "step": 18722 }, { "epoch": 0.5462103973394014, "grad_norm": 0.8224626514752715, "learning_rate": 4.4955911842061864e-06, "loss": 0.1116, "step": 18723 }, { "epoch": 0.5462395705700449, "grad_norm": 0.9424743288141211, "learning_rate": 4.4951211624618065e-06, "loss": 0.1461, "step": 18724 }, { "epoch": 0.5462687438006885, "grad_norm": 1.1566218562939485, "learning_rate": 4.494651145224864e-06, "loss": 0.1468, "step": 18725 }, { "epoch": 0.546297917031332, "grad_norm": 0.8847982767943576, "learning_rate": 4.494181132499557e-06, "loss": 0.1124, "step": 18726 }, { "epoch": 0.5463270902619756, "grad_norm": 0.9788825134811449, "learning_rate": 4.493711124290081e-06, "loss": 0.1251, "step": 18727 }, { "epoch": 0.5463562634926191, "grad_norm": 1.0327831909363185, "learning_rate": 4.493241120600629e-06, "loss": 0.1202, "step": 18728 }, { "epoch": 0.5463854367232628, "grad_norm": 1.04227725466138, "learning_rate": 4.4927711214354005e-06, "loss": 0.1282, "step": 18729 }, { "epoch": 0.5464146099539063, "grad_norm": 1.0892908731422253, "learning_rate": 4.492301126798591e-06, "loss": 0.1474, "step": 18730 }, { "epoch": 0.5464437831845499, "grad_norm": 1.445404712102127, "learning_rate": 4.491831136694393e-06, "loss": 0.1357, "step": 18731 }, { "epoch": 0.5464729564151934, "grad_norm": 0.8457448446123691, "learning_rate": 4.491361151127008e-06, "loss": 0.1188, "step": 18732 }, { "epoch": 0.546502129645837, "grad_norm": 0.698862966818781, "learning_rate": 4.490891170100629e-06, "loss": 0.1282, "step": 18733 }, { "epoch": 0.5465313028764806, "grad_norm": 1.075784804102348, "learning_rate": 4.490421193619451e-06, "loss": 0.136, "step": 18734 }, { "epoch": 0.5465604761071241, "grad_norm": 0.9105765184802457, "learning_rate": 4.489951221687672e-06, "loss": 0.1176, "step": 18735 }, { "epoch": 0.5465896493377677, "grad_norm": 0.8338228380624473, "learning_rate": 4.489481254309486e-06, "loss": 0.1463, "step": 18736 }, { "epoch": 0.5466188225684112, "grad_norm": 0.8184887980584117, "learning_rate": 4.489011291489089e-06, "loss": 0.1343, "step": 18737 }, { "epoch": 0.5466479957990548, "grad_norm": 1.0756234626517278, "learning_rate": 4.488541333230678e-06, "loss": 0.147, "step": 18738 }, { "epoch": 0.5466771690296983, "grad_norm": 1.1569508670024704, "learning_rate": 4.488071379538447e-06, "loss": 0.138, "step": 18739 }, { "epoch": 0.5467063422603419, "grad_norm": 1.0553493069012183, "learning_rate": 4.487601430416595e-06, "loss": 0.1382, "step": 18740 }, { "epoch": 0.5467355154909854, "grad_norm": 0.73847327189784, "learning_rate": 4.487131485869313e-06, "loss": 0.1461, "step": 18741 }, { "epoch": 0.5467646887216291, "grad_norm": 1.2745943829878523, "learning_rate": 4.486661545900799e-06, "loss": 0.1039, "step": 18742 }, { "epoch": 0.5467938619522726, "grad_norm": 0.9256812935285194, "learning_rate": 4.486191610515247e-06, "loss": 0.1368, "step": 18743 }, { "epoch": 0.5468230351829162, "grad_norm": 1.0334598848100578, "learning_rate": 4.485721679716855e-06, "loss": 0.1497, "step": 18744 }, { "epoch": 0.5468522084135597, "grad_norm": 0.9181275271618506, "learning_rate": 4.485251753509818e-06, "loss": 0.126, "step": 18745 }, { "epoch": 0.5468813816442033, "grad_norm": 0.8469800679286065, "learning_rate": 4.484781831898329e-06, "loss": 0.1555, "step": 18746 }, { "epoch": 0.5469105548748469, "grad_norm": 0.8484698155359741, "learning_rate": 4.484311914886585e-06, "loss": 0.1281, "step": 18747 }, { "epoch": 0.5469397281054904, "grad_norm": 1.2317439107696468, "learning_rate": 4.483842002478783e-06, "loss": 0.1352, "step": 18748 }, { "epoch": 0.546968901336134, "grad_norm": 1.071208504436812, "learning_rate": 4.483372094679112e-06, "loss": 0.122, "step": 18749 }, { "epoch": 0.5469980745667775, "grad_norm": 0.7195957535184634, "learning_rate": 4.482902191491775e-06, "loss": 0.1161, "step": 18750 }, { "epoch": 0.5470272477974211, "grad_norm": 0.7326552436370776, "learning_rate": 4.482432292920963e-06, "loss": 0.1303, "step": 18751 }, { "epoch": 0.5470564210280646, "grad_norm": 0.7813913349897524, "learning_rate": 4.481962398970872e-06, "loss": 0.1177, "step": 18752 }, { "epoch": 0.5470855942587082, "grad_norm": 1.783801470151127, "learning_rate": 4.481492509645698e-06, "loss": 0.1144, "step": 18753 }, { "epoch": 0.5471147674893517, "grad_norm": 0.9595920176408392, "learning_rate": 4.481022624949635e-06, "loss": 0.1535, "step": 18754 }, { "epoch": 0.5471439407199953, "grad_norm": 0.7438172546724363, "learning_rate": 4.480552744886876e-06, "loss": 0.1077, "step": 18755 }, { "epoch": 0.547173113950639, "grad_norm": 0.7688152617798003, "learning_rate": 4.4800828694616195e-06, "loss": 0.1364, "step": 18756 }, { "epoch": 0.5472022871812825, "grad_norm": 0.9405447055173943, "learning_rate": 4.479612998678059e-06, "loss": 0.1283, "step": 18757 }, { "epoch": 0.547231460411926, "grad_norm": 0.7854322612157514, "learning_rate": 4.47914313254039e-06, "loss": 0.1116, "step": 18758 }, { "epoch": 0.5472606336425696, "grad_norm": 0.8431792024419208, "learning_rate": 4.478673271052806e-06, "loss": 0.1489, "step": 18759 }, { "epoch": 0.5472898068732132, "grad_norm": 0.7991888279351618, "learning_rate": 4.478203414219503e-06, "loss": 0.1252, "step": 18760 }, { "epoch": 0.5473189801038567, "grad_norm": 0.8240294087855686, "learning_rate": 4.477733562044673e-06, "loss": 0.1249, "step": 18761 }, { "epoch": 0.5473481533345003, "grad_norm": 0.9701580584668879, "learning_rate": 4.477263714532517e-06, "loss": 0.1343, "step": 18762 }, { "epoch": 0.5473773265651438, "grad_norm": 0.9308008446272982, "learning_rate": 4.476793871687224e-06, "loss": 0.1444, "step": 18763 }, { "epoch": 0.5474064997957874, "grad_norm": 0.9927858917002728, "learning_rate": 4.4763240335129895e-06, "loss": 0.1063, "step": 18764 }, { "epoch": 0.5474356730264309, "grad_norm": 0.7792231841900343, "learning_rate": 4.475854200014011e-06, "loss": 0.1179, "step": 18765 }, { "epoch": 0.5474648462570745, "grad_norm": 0.9923921341333932, "learning_rate": 4.47538437119448e-06, "loss": 0.1396, "step": 18766 }, { "epoch": 0.547494019487718, "grad_norm": 1.1559385992876254, "learning_rate": 4.474914547058591e-06, "loss": 0.154, "step": 18767 }, { "epoch": 0.5475231927183616, "grad_norm": 0.6660916757162116, "learning_rate": 4.4744447276105405e-06, "loss": 0.1185, "step": 18768 }, { "epoch": 0.5475523659490051, "grad_norm": 1.0079416046977872, "learning_rate": 4.473974912854522e-06, "loss": 0.1483, "step": 18769 }, { "epoch": 0.5475815391796488, "grad_norm": 0.9979158077808288, "learning_rate": 4.473505102794731e-06, "loss": 0.1447, "step": 18770 }, { "epoch": 0.5476107124102924, "grad_norm": 0.6353070059279827, "learning_rate": 4.4730352974353595e-06, "loss": 0.1156, "step": 18771 }, { "epoch": 0.5476398856409359, "grad_norm": 0.8250928276288193, "learning_rate": 4.472565496780603e-06, "loss": 0.1376, "step": 18772 }, { "epoch": 0.5476690588715795, "grad_norm": 0.8017297030902534, "learning_rate": 4.472095700834655e-06, "loss": 0.1239, "step": 18773 }, { "epoch": 0.547698232102223, "grad_norm": 0.827653124740946, "learning_rate": 4.471625909601712e-06, "loss": 0.1395, "step": 18774 }, { "epoch": 0.5477274053328666, "grad_norm": 0.714812532505758, "learning_rate": 4.471156123085968e-06, "loss": 0.1034, "step": 18775 }, { "epoch": 0.5477565785635101, "grad_norm": 0.94154036922442, "learning_rate": 4.470686341291614e-06, "loss": 0.1239, "step": 18776 }, { "epoch": 0.5477857517941537, "grad_norm": 0.8996131738315101, "learning_rate": 4.470216564222846e-06, "loss": 0.1094, "step": 18777 }, { "epoch": 0.5478149250247972, "grad_norm": 0.8582004083861352, "learning_rate": 4.469746791883859e-06, "loss": 0.1262, "step": 18778 }, { "epoch": 0.5478440982554408, "grad_norm": 0.9166084305535637, "learning_rate": 4.469277024278844e-06, "loss": 0.1077, "step": 18779 }, { "epoch": 0.5478732714860843, "grad_norm": 1.2600268115693165, "learning_rate": 4.468807261412e-06, "loss": 0.1574, "step": 18780 }, { "epoch": 0.5479024447167279, "grad_norm": 0.8629881857398644, "learning_rate": 4.468337503287516e-06, "loss": 0.1545, "step": 18781 }, { "epoch": 0.5479316179473714, "grad_norm": 0.9043997551778598, "learning_rate": 4.467867749909588e-06, "loss": 0.104, "step": 18782 }, { "epoch": 0.5479607911780151, "grad_norm": 1.0151971542149922, "learning_rate": 4.4673980012824106e-06, "loss": 0.1412, "step": 18783 }, { "epoch": 0.5479899644086587, "grad_norm": 1.1540785596688323, "learning_rate": 4.466928257410176e-06, "loss": 0.178, "step": 18784 }, { "epoch": 0.5480191376393022, "grad_norm": 1.4450616556343472, "learning_rate": 4.466458518297078e-06, "loss": 0.1141, "step": 18785 }, { "epoch": 0.5480483108699458, "grad_norm": 0.8877658422972836, "learning_rate": 4.465988783947311e-06, "loss": 0.1152, "step": 18786 }, { "epoch": 0.5480774841005893, "grad_norm": 1.0677561065489143, "learning_rate": 4.465519054365071e-06, "loss": 0.1195, "step": 18787 }, { "epoch": 0.5481066573312329, "grad_norm": 1.0366187645591158, "learning_rate": 4.4650493295545475e-06, "loss": 0.1147, "step": 18788 }, { "epoch": 0.5481358305618764, "grad_norm": 0.9566943292061366, "learning_rate": 4.464579609519936e-06, "loss": 0.1167, "step": 18789 }, { "epoch": 0.54816500379252, "grad_norm": 1.015505908262457, "learning_rate": 4.464109894265431e-06, "loss": 0.1141, "step": 18790 }, { "epoch": 0.5481941770231635, "grad_norm": 1.0564093438562037, "learning_rate": 4.463640183795222e-06, "loss": 0.1225, "step": 18791 }, { "epoch": 0.5482233502538071, "grad_norm": 1.5484185495907816, "learning_rate": 4.463170478113509e-06, "loss": 0.1517, "step": 18792 }, { "epoch": 0.5482525234844506, "grad_norm": 0.9621266986658162, "learning_rate": 4.462700777224479e-06, "loss": 0.1299, "step": 18793 }, { "epoch": 0.5482816967150942, "grad_norm": 1.0427061543192153, "learning_rate": 4.46223108113233e-06, "loss": 0.1438, "step": 18794 }, { "epoch": 0.5483108699457377, "grad_norm": 0.7867770643621365, "learning_rate": 4.4617613898412534e-06, "loss": 0.1329, "step": 18795 }, { "epoch": 0.5483400431763813, "grad_norm": 0.6628114665732285, "learning_rate": 4.461291703355443e-06, "loss": 0.1289, "step": 18796 }, { "epoch": 0.548369216407025, "grad_norm": 0.8931836886938811, "learning_rate": 4.460822021679089e-06, "loss": 0.1222, "step": 18797 }, { "epoch": 0.5483983896376685, "grad_norm": 0.9416935724452078, "learning_rate": 4.4603523448163894e-06, "loss": 0.1166, "step": 18798 }, { "epoch": 0.5484275628683121, "grad_norm": 0.660218947610981, "learning_rate": 4.459882672771535e-06, "loss": 0.1337, "step": 18799 }, { "epoch": 0.5484567360989556, "grad_norm": 0.83965462166203, "learning_rate": 4.45941300554872e-06, "loss": 0.1134, "step": 18800 }, { "epoch": 0.5484859093295992, "grad_norm": 1.0496612007027168, "learning_rate": 4.4589433431521356e-06, "loss": 0.1134, "step": 18801 }, { "epoch": 0.5485150825602427, "grad_norm": 0.8463714576445494, "learning_rate": 4.458473685585976e-06, "loss": 0.1342, "step": 18802 }, { "epoch": 0.5485442557908863, "grad_norm": 0.8421325209491768, "learning_rate": 4.458004032854432e-06, "loss": 0.1328, "step": 18803 }, { "epoch": 0.5485734290215298, "grad_norm": 0.9108583905917115, "learning_rate": 4.457534384961701e-06, "loss": 0.1343, "step": 18804 }, { "epoch": 0.5486026022521734, "grad_norm": 0.9689609962829522, "learning_rate": 4.457064741911974e-06, "loss": 0.1279, "step": 18805 }, { "epoch": 0.5486317754828169, "grad_norm": 0.7041795736720375, "learning_rate": 4.456595103709443e-06, "loss": 0.1352, "step": 18806 }, { "epoch": 0.5486609487134605, "grad_norm": 0.8467823049388483, "learning_rate": 4.456125470358301e-06, "loss": 0.1292, "step": 18807 }, { "epoch": 0.548690121944104, "grad_norm": 0.7456565169942629, "learning_rate": 4.455655841862742e-06, "loss": 0.1455, "step": 18808 }, { "epoch": 0.5487192951747476, "grad_norm": 0.8075679281034006, "learning_rate": 4.455186218226953e-06, "loss": 0.1343, "step": 18809 }, { "epoch": 0.5487484684053913, "grad_norm": 0.7829420297078291, "learning_rate": 4.454716599455137e-06, "loss": 0.1273, "step": 18810 }, { "epoch": 0.5487776416360348, "grad_norm": 0.8074066550865667, "learning_rate": 4.454246985551478e-06, "loss": 0.1107, "step": 18811 }, { "epoch": 0.5488068148666784, "grad_norm": 0.877346506703811, "learning_rate": 4.453777376520173e-06, "loss": 0.1243, "step": 18812 }, { "epoch": 0.5488359880973219, "grad_norm": 0.8439324148329742, "learning_rate": 4.4533077723654134e-06, "loss": 0.1441, "step": 18813 }, { "epoch": 0.5488651613279655, "grad_norm": 0.9160281794188151, "learning_rate": 4.452838173091391e-06, "loss": 0.1096, "step": 18814 }, { "epoch": 0.548894334558609, "grad_norm": 1.2818435066609408, "learning_rate": 4.452368578702297e-06, "loss": 0.1427, "step": 18815 }, { "epoch": 0.5489235077892526, "grad_norm": 0.8903131888603528, "learning_rate": 4.451898989202327e-06, "loss": 0.1206, "step": 18816 }, { "epoch": 0.5489526810198961, "grad_norm": 1.0707974982028392, "learning_rate": 4.451429404595673e-06, "loss": 0.1292, "step": 18817 }, { "epoch": 0.5489818542505397, "grad_norm": 1.068831289893814, "learning_rate": 4.450959824886525e-06, "loss": 0.1226, "step": 18818 }, { "epoch": 0.5490110274811832, "grad_norm": 0.9618543277054882, "learning_rate": 4.450490250079077e-06, "loss": 0.1305, "step": 18819 }, { "epoch": 0.5490402007118268, "grad_norm": 0.7889767428814896, "learning_rate": 4.450020680177522e-06, "loss": 0.1379, "step": 18820 }, { "epoch": 0.5490693739424704, "grad_norm": 0.9420117986656027, "learning_rate": 4.449551115186049e-06, "loss": 0.1456, "step": 18821 }, { "epoch": 0.5490985471731139, "grad_norm": 0.9483556160076674, "learning_rate": 4.4490815551088535e-06, "loss": 0.1174, "step": 18822 }, { "epoch": 0.5491277204037575, "grad_norm": 0.8420100584383265, "learning_rate": 4.448611999950126e-06, "loss": 0.12, "step": 18823 }, { "epoch": 0.5491568936344011, "grad_norm": 0.753450656429226, "learning_rate": 4.448142449714059e-06, "loss": 0.1085, "step": 18824 }, { "epoch": 0.5491860668650447, "grad_norm": 0.7376249513628443, "learning_rate": 4.447672904404846e-06, "loss": 0.1207, "step": 18825 }, { "epoch": 0.5492152400956882, "grad_norm": 0.6532449538371288, "learning_rate": 4.447203364026675e-06, "loss": 0.1288, "step": 18826 }, { "epoch": 0.5492444133263318, "grad_norm": 0.7441042072903009, "learning_rate": 4.44673382858374e-06, "loss": 0.1357, "step": 18827 }, { "epoch": 0.5492735865569753, "grad_norm": 0.7667964860350982, "learning_rate": 4.446264298080235e-06, "loss": 0.1356, "step": 18828 }, { "epoch": 0.5493027597876189, "grad_norm": 0.7521707498130297, "learning_rate": 4.44579477252035e-06, "loss": 0.1316, "step": 18829 }, { "epoch": 0.5493319330182624, "grad_norm": 0.799486513772945, "learning_rate": 4.4453252519082775e-06, "loss": 0.1362, "step": 18830 }, { "epoch": 0.549361106248906, "grad_norm": 0.7546343097194946, "learning_rate": 4.444855736248208e-06, "loss": 0.1221, "step": 18831 }, { "epoch": 0.5493902794795495, "grad_norm": 0.7752573259250796, "learning_rate": 4.444386225544334e-06, "loss": 0.1556, "step": 18832 }, { "epoch": 0.5494194527101931, "grad_norm": 0.685234704736772, "learning_rate": 4.443916719800846e-06, "loss": 0.1318, "step": 18833 }, { "epoch": 0.5494486259408367, "grad_norm": 0.8198630358754666, "learning_rate": 4.443447219021938e-06, "loss": 0.1556, "step": 18834 }, { "epoch": 0.5494777991714802, "grad_norm": 0.7886386679426491, "learning_rate": 4.442977723211801e-06, "loss": 0.1151, "step": 18835 }, { "epoch": 0.5495069724021238, "grad_norm": 0.9014790405505484, "learning_rate": 4.442508232374625e-06, "loss": 0.1099, "step": 18836 }, { "epoch": 0.5495361456327674, "grad_norm": 0.8411352161674372, "learning_rate": 4.442038746514603e-06, "loss": 0.1483, "step": 18837 }, { "epoch": 0.549565318863411, "grad_norm": 0.6923289850293326, "learning_rate": 4.441569265635927e-06, "loss": 0.1189, "step": 18838 }, { "epoch": 0.5495944920940545, "grad_norm": 0.78649112074162, "learning_rate": 4.441099789742783e-06, "loss": 0.103, "step": 18839 }, { "epoch": 0.5496236653246981, "grad_norm": 0.7811436076507703, "learning_rate": 4.440630318839371e-06, "loss": 0.1345, "step": 18840 }, { "epoch": 0.5496528385553416, "grad_norm": 0.7639822127543663, "learning_rate": 4.4401608529298755e-06, "loss": 0.1333, "step": 18841 }, { "epoch": 0.5496820117859852, "grad_norm": 0.9188100326365559, "learning_rate": 4.439691392018492e-06, "loss": 0.1568, "step": 18842 }, { "epoch": 0.5497111850166287, "grad_norm": 0.7253342840516087, "learning_rate": 4.439221936109409e-06, "loss": 0.1204, "step": 18843 }, { "epoch": 0.5497403582472723, "grad_norm": 0.9615458196395555, "learning_rate": 4.438752485206819e-06, "loss": 0.1289, "step": 18844 }, { "epoch": 0.5497695314779159, "grad_norm": 0.8532075732843499, "learning_rate": 4.438283039314912e-06, "loss": 0.1226, "step": 18845 }, { "epoch": 0.5497987047085594, "grad_norm": 0.8816727263528346, "learning_rate": 4.437813598437881e-06, "loss": 0.1205, "step": 18846 }, { "epoch": 0.549827877939203, "grad_norm": 0.9298808247796456, "learning_rate": 4.437344162579917e-06, "loss": 0.1403, "step": 18847 }, { "epoch": 0.5498570511698465, "grad_norm": 0.7830926199444167, "learning_rate": 4.4368747317452075e-06, "loss": 0.1482, "step": 18848 }, { "epoch": 0.5498862244004901, "grad_norm": 0.7605544103110462, "learning_rate": 4.436405305937947e-06, "loss": 0.1236, "step": 18849 }, { "epoch": 0.5499153976311336, "grad_norm": 0.8261668690686542, "learning_rate": 4.435935885162327e-06, "loss": 0.1159, "step": 18850 }, { "epoch": 0.5499445708617773, "grad_norm": 1.0097377594927257, "learning_rate": 4.435466469422533e-06, "loss": 0.1247, "step": 18851 }, { "epoch": 0.5499737440924208, "grad_norm": 0.9728248677311301, "learning_rate": 4.434997058722762e-06, "loss": 0.1353, "step": 18852 }, { "epoch": 0.5500029173230644, "grad_norm": 0.7422217850705474, "learning_rate": 4.434527653067203e-06, "loss": 0.113, "step": 18853 }, { "epoch": 0.5500320905537079, "grad_norm": 0.9094787481848825, "learning_rate": 4.434058252460045e-06, "loss": 0.1263, "step": 18854 }, { "epoch": 0.5500612637843515, "grad_norm": 0.6841078949401904, "learning_rate": 4.433588856905481e-06, "loss": 0.1326, "step": 18855 }, { "epoch": 0.550090437014995, "grad_norm": 0.8828866013868408, "learning_rate": 4.4331194664077e-06, "loss": 0.1367, "step": 18856 }, { "epoch": 0.5501196102456386, "grad_norm": 0.7830079930268402, "learning_rate": 4.432650080970891e-06, "loss": 0.1462, "step": 18857 }, { "epoch": 0.5501487834762822, "grad_norm": 0.8156306303494512, "learning_rate": 4.432180700599248e-06, "loss": 0.1238, "step": 18858 }, { "epoch": 0.5501779567069257, "grad_norm": 0.7823258402850047, "learning_rate": 4.431711325296961e-06, "loss": 0.1343, "step": 18859 }, { "epoch": 0.5502071299375693, "grad_norm": 0.8022561333768476, "learning_rate": 4.43124195506822e-06, "loss": 0.1164, "step": 18860 }, { "epoch": 0.5502363031682128, "grad_norm": 0.8583481960707582, "learning_rate": 4.430772589917214e-06, "loss": 0.1213, "step": 18861 }, { "epoch": 0.5502654763988564, "grad_norm": 0.954864727716638, "learning_rate": 4.4303032298481344e-06, "loss": 0.1365, "step": 18862 }, { "epoch": 0.5502946496294999, "grad_norm": 0.8819247378086011, "learning_rate": 4.429833874865171e-06, "loss": 0.1229, "step": 18863 }, { "epoch": 0.5503238228601436, "grad_norm": 0.9175827146331453, "learning_rate": 4.429364524972516e-06, "loss": 0.1375, "step": 18864 }, { "epoch": 0.5503529960907871, "grad_norm": 0.7254745234518502, "learning_rate": 4.428895180174358e-06, "loss": 0.1235, "step": 18865 }, { "epoch": 0.5503821693214307, "grad_norm": 0.9335599154474736, "learning_rate": 4.428425840474888e-06, "loss": 0.1351, "step": 18866 }, { "epoch": 0.5504113425520742, "grad_norm": 1.048875958370967, "learning_rate": 4.427956505878294e-06, "loss": 0.148, "step": 18867 }, { "epoch": 0.5504405157827178, "grad_norm": 0.6447775964429916, "learning_rate": 4.42748717638877e-06, "loss": 0.1099, "step": 18868 }, { "epoch": 0.5504696890133614, "grad_norm": 0.9248577677453754, "learning_rate": 4.4270178520105e-06, "loss": 0.1192, "step": 18869 }, { "epoch": 0.5504988622440049, "grad_norm": 0.8270878868959397, "learning_rate": 4.426548532747681e-06, "loss": 0.1286, "step": 18870 }, { "epoch": 0.5505280354746485, "grad_norm": 0.7425708562237897, "learning_rate": 4.426079218604499e-06, "loss": 0.1607, "step": 18871 }, { "epoch": 0.550557208705292, "grad_norm": 1.4173539162091753, "learning_rate": 4.4256099095851455e-06, "loss": 0.1403, "step": 18872 }, { "epoch": 0.5505863819359356, "grad_norm": 0.9591979772681544, "learning_rate": 4.42514060569381e-06, "loss": 0.1249, "step": 18873 }, { "epoch": 0.5506155551665791, "grad_norm": 0.9629079639441862, "learning_rate": 4.424671306934681e-06, "loss": 0.1343, "step": 18874 }, { "epoch": 0.5506447283972227, "grad_norm": 0.6543564246871194, "learning_rate": 4.424202013311947e-06, "loss": 0.1111, "step": 18875 }, { "epoch": 0.5506739016278662, "grad_norm": 0.9133388080681468, "learning_rate": 4.423732724829802e-06, "loss": 0.1119, "step": 18876 }, { "epoch": 0.5507030748585098, "grad_norm": 0.8416084456937348, "learning_rate": 4.423263441492436e-06, "loss": 0.1339, "step": 18877 }, { "epoch": 0.5507322480891534, "grad_norm": 0.7324154434440577, "learning_rate": 4.4227941633040335e-06, "loss": 0.1335, "step": 18878 }, { "epoch": 0.550761421319797, "grad_norm": 0.8143888661673182, "learning_rate": 4.422324890268787e-06, "loss": 0.1416, "step": 18879 }, { "epoch": 0.5507905945504405, "grad_norm": 0.8755486013829306, "learning_rate": 4.421855622390887e-06, "loss": 0.1256, "step": 18880 }, { "epoch": 0.5508197677810841, "grad_norm": 0.9647428164434501, "learning_rate": 4.42138635967452e-06, "loss": 0.1195, "step": 18881 }, { "epoch": 0.5508489410117277, "grad_norm": 0.6968924654914538, "learning_rate": 4.420917102123879e-06, "loss": 0.1073, "step": 18882 }, { "epoch": 0.5508781142423712, "grad_norm": 0.6776839132508738, "learning_rate": 4.420447849743152e-06, "loss": 0.1218, "step": 18883 }, { "epoch": 0.5509072874730148, "grad_norm": 0.9780392719740577, "learning_rate": 4.419978602536529e-06, "loss": 0.1301, "step": 18884 }, { "epoch": 0.5509364607036583, "grad_norm": 1.269366287466131, "learning_rate": 4.419509360508198e-06, "loss": 0.1227, "step": 18885 }, { "epoch": 0.5509656339343019, "grad_norm": 0.988261489618055, "learning_rate": 4.419040123662348e-06, "loss": 0.1373, "step": 18886 }, { "epoch": 0.5509948071649454, "grad_norm": 0.998681512237093, "learning_rate": 4.418570892003169e-06, "loss": 0.1255, "step": 18887 }, { "epoch": 0.551023980395589, "grad_norm": 0.9859257190037307, "learning_rate": 4.418101665534851e-06, "loss": 0.1302, "step": 18888 }, { "epoch": 0.5510531536262325, "grad_norm": 0.7435861505082985, "learning_rate": 4.417632444261582e-06, "loss": 0.1201, "step": 18889 }, { "epoch": 0.5510823268568761, "grad_norm": 0.9044645211984617, "learning_rate": 4.417163228187552e-06, "loss": 0.1296, "step": 18890 }, { "epoch": 0.5511115000875197, "grad_norm": 0.6673412854286197, "learning_rate": 4.41669401731695e-06, "loss": 0.1198, "step": 18891 }, { "epoch": 0.5511406733181633, "grad_norm": 1.015387950005781, "learning_rate": 4.416224811653963e-06, "loss": 0.1262, "step": 18892 }, { "epoch": 0.5511698465488069, "grad_norm": 0.7409449697376059, "learning_rate": 4.415755611202782e-06, "loss": 0.1135, "step": 18893 }, { "epoch": 0.5511990197794504, "grad_norm": 0.8683402773180618, "learning_rate": 4.415286415967596e-06, "loss": 0.1353, "step": 18894 }, { "epoch": 0.551228193010094, "grad_norm": 0.8628280057573453, "learning_rate": 4.414817225952594e-06, "loss": 0.1204, "step": 18895 }, { "epoch": 0.5512573662407375, "grad_norm": 0.6847709045708442, "learning_rate": 4.414348041161963e-06, "loss": 0.1202, "step": 18896 }, { "epoch": 0.5512865394713811, "grad_norm": 0.8020461498396281, "learning_rate": 4.413878861599893e-06, "loss": 0.1239, "step": 18897 }, { "epoch": 0.5513157127020246, "grad_norm": 1.0200723866121209, "learning_rate": 4.413409687270574e-06, "loss": 0.1431, "step": 18898 }, { "epoch": 0.5513448859326682, "grad_norm": 1.0289337290145673, "learning_rate": 4.412940518178191e-06, "loss": 0.1361, "step": 18899 }, { "epoch": 0.5513740591633117, "grad_norm": 1.095416605994027, "learning_rate": 4.412471354326936e-06, "loss": 0.1303, "step": 18900 }, { "epoch": 0.5514032323939553, "grad_norm": 1.0664787186450981, "learning_rate": 4.412002195720996e-06, "loss": 0.1484, "step": 18901 }, { "epoch": 0.5514324056245988, "grad_norm": 1.1820260823233966, "learning_rate": 4.41153304236456e-06, "loss": 0.1453, "step": 18902 }, { "epoch": 0.5514615788552424, "grad_norm": 1.0926917410889156, "learning_rate": 4.411063894261818e-06, "loss": 0.1195, "step": 18903 }, { "epoch": 0.5514907520858859, "grad_norm": 0.8201506636812573, "learning_rate": 4.410594751416956e-06, "loss": 0.1208, "step": 18904 }, { "epoch": 0.5515199253165296, "grad_norm": 0.9268618846094715, "learning_rate": 4.410125613834162e-06, "loss": 0.1236, "step": 18905 }, { "epoch": 0.5515490985471732, "grad_norm": 0.8709982156274102, "learning_rate": 4.409656481517627e-06, "loss": 0.1423, "step": 18906 }, { "epoch": 0.5515782717778167, "grad_norm": 1.0799782334146752, "learning_rate": 4.409187354471539e-06, "loss": 0.151, "step": 18907 }, { "epoch": 0.5516074450084603, "grad_norm": 0.8726156079233057, "learning_rate": 4.4087182327000845e-06, "loss": 0.1437, "step": 18908 }, { "epoch": 0.5516366182391038, "grad_norm": 0.880339981058879, "learning_rate": 4.408249116207452e-06, "loss": 0.133, "step": 18909 }, { "epoch": 0.5516657914697474, "grad_norm": 1.0609898160043045, "learning_rate": 4.407780004997831e-06, "loss": 0.1209, "step": 18910 }, { "epoch": 0.5516949647003909, "grad_norm": 0.8067179625470575, "learning_rate": 4.407310899075406e-06, "loss": 0.1126, "step": 18911 }, { "epoch": 0.5517241379310345, "grad_norm": 0.8214607414672347, "learning_rate": 4.406841798444371e-06, "loss": 0.1423, "step": 18912 }, { "epoch": 0.551753311161678, "grad_norm": 0.9296961843547017, "learning_rate": 4.40637270310891e-06, "loss": 0.1279, "step": 18913 }, { "epoch": 0.5517824843923216, "grad_norm": 1.0150414366375669, "learning_rate": 4.4059036130732115e-06, "loss": 0.1354, "step": 18914 }, { "epoch": 0.5518116576229651, "grad_norm": 0.8882836837030985, "learning_rate": 4.4054345283414645e-06, "loss": 0.1278, "step": 18915 }, { "epoch": 0.5518408308536087, "grad_norm": 0.8148474926750665, "learning_rate": 4.404965448917855e-06, "loss": 0.1229, "step": 18916 }, { "epoch": 0.5518700040842522, "grad_norm": 0.9414039923913972, "learning_rate": 4.4044963748065716e-06, "loss": 0.111, "step": 18917 }, { "epoch": 0.5518991773148959, "grad_norm": 0.8041724061132699, "learning_rate": 4.404027306011804e-06, "loss": 0.1262, "step": 18918 }, { "epoch": 0.5519283505455395, "grad_norm": 0.9318898626723681, "learning_rate": 4.403558242537737e-06, "loss": 0.1236, "step": 18919 }, { "epoch": 0.551957523776183, "grad_norm": 1.1528299941047244, "learning_rate": 4.40308918438856e-06, "loss": 0.1519, "step": 18920 }, { "epoch": 0.5519866970068266, "grad_norm": 0.8343848843723077, "learning_rate": 4.402620131568461e-06, "loss": 0.1046, "step": 18921 }, { "epoch": 0.5520158702374701, "grad_norm": 0.8668240869909689, "learning_rate": 4.402151084081625e-06, "loss": 0.1454, "step": 18922 }, { "epoch": 0.5520450434681137, "grad_norm": 0.6814790320732358, "learning_rate": 4.401682041932243e-06, "loss": 0.1235, "step": 18923 }, { "epoch": 0.5520742166987572, "grad_norm": 0.8920669265290863, "learning_rate": 4.4012130051245e-06, "loss": 0.1225, "step": 18924 }, { "epoch": 0.5521033899294008, "grad_norm": 0.7313361740763353, "learning_rate": 4.400743973662586e-06, "loss": 0.1241, "step": 18925 }, { "epoch": 0.5521325631600443, "grad_norm": 0.7913429969906784, "learning_rate": 4.400274947550685e-06, "loss": 0.1375, "step": 18926 }, { "epoch": 0.5521617363906879, "grad_norm": 0.8398671674310144, "learning_rate": 4.3998059267929875e-06, "loss": 0.1427, "step": 18927 }, { "epoch": 0.5521909096213314, "grad_norm": 0.9810835566100664, "learning_rate": 4.3993369113936765e-06, "loss": 0.1091, "step": 18928 }, { "epoch": 0.552220082851975, "grad_norm": 0.6773255354104609, "learning_rate": 4.3988679013569455e-06, "loss": 0.1066, "step": 18929 }, { "epoch": 0.5522492560826185, "grad_norm": 0.7687159863426298, "learning_rate": 4.398398896686977e-06, "loss": 0.1345, "step": 18930 }, { "epoch": 0.5522784293132621, "grad_norm": 0.854676951390436, "learning_rate": 4.39792989738796e-06, "loss": 0.1345, "step": 18931 }, { "epoch": 0.5523076025439058, "grad_norm": 0.6842095574049585, "learning_rate": 4.39746090346408e-06, "loss": 0.0965, "step": 18932 }, { "epoch": 0.5523367757745493, "grad_norm": 0.7438169389739349, "learning_rate": 4.396991914919528e-06, "loss": 0.1126, "step": 18933 }, { "epoch": 0.5523659490051929, "grad_norm": 0.7650484211780397, "learning_rate": 4.3965229317584846e-06, "loss": 0.1277, "step": 18934 }, { "epoch": 0.5523951222358364, "grad_norm": 0.9242249311460413, "learning_rate": 4.396053953985142e-06, "loss": 0.1393, "step": 18935 }, { "epoch": 0.55242429546648, "grad_norm": 0.8117106767305521, "learning_rate": 4.395584981603686e-06, "loss": 0.1209, "step": 18936 }, { "epoch": 0.5524534686971235, "grad_norm": 0.6625364024823419, "learning_rate": 4.395116014618303e-06, "loss": 0.1255, "step": 18937 }, { "epoch": 0.5524826419277671, "grad_norm": 1.0487966281576853, "learning_rate": 4.39464705303318e-06, "loss": 0.1352, "step": 18938 }, { "epoch": 0.5525118151584106, "grad_norm": 0.8082036924009987, "learning_rate": 4.394178096852503e-06, "loss": 0.1269, "step": 18939 }, { "epoch": 0.5525409883890542, "grad_norm": 0.822389565897582, "learning_rate": 4.393709146080458e-06, "loss": 0.1131, "step": 18940 }, { "epoch": 0.5525701616196977, "grad_norm": 1.035314430128407, "learning_rate": 4.393240200721234e-06, "loss": 0.1193, "step": 18941 }, { "epoch": 0.5525993348503413, "grad_norm": 0.9707725354811764, "learning_rate": 4.392771260779018e-06, "loss": 0.1564, "step": 18942 }, { "epoch": 0.5526285080809848, "grad_norm": 0.9636631736749641, "learning_rate": 4.392302326257995e-06, "loss": 0.1372, "step": 18943 }, { "epoch": 0.5526576813116284, "grad_norm": 1.0086431948465115, "learning_rate": 4.39183339716235e-06, "loss": 0.1284, "step": 18944 }, { "epoch": 0.5526868545422721, "grad_norm": 0.8029281205526639, "learning_rate": 4.391364473496273e-06, "loss": 0.1226, "step": 18945 }, { "epoch": 0.5527160277729156, "grad_norm": 0.8376520467839277, "learning_rate": 4.390895555263946e-06, "loss": 0.1369, "step": 18946 }, { "epoch": 0.5527452010035592, "grad_norm": 0.8621087899813299, "learning_rate": 4.390426642469561e-06, "loss": 0.1348, "step": 18947 }, { "epoch": 0.5527743742342027, "grad_norm": 1.2560347738458584, "learning_rate": 4.3899577351173005e-06, "loss": 0.1398, "step": 18948 }, { "epoch": 0.5528035474648463, "grad_norm": 0.8478268100757564, "learning_rate": 4.389488833211351e-06, "loss": 0.1362, "step": 18949 }, { "epoch": 0.5528327206954898, "grad_norm": 0.9655586142279446, "learning_rate": 4.389019936755902e-06, "loss": 0.1502, "step": 18950 }, { "epoch": 0.5528618939261334, "grad_norm": 0.7419266281202673, "learning_rate": 4.388551045755135e-06, "loss": 0.1205, "step": 18951 }, { "epoch": 0.5528910671567769, "grad_norm": 0.8711008103723668, "learning_rate": 4.388082160213237e-06, "loss": 0.1182, "step": 18952 }, { "epoch": 0.5529202403874205, "grad_norm": 0.7856337236933707, "learning_rate": 4.387613280134397e-06, "loss": 0.1286, "step": 18953 }, { "epoch": 0.552949413618064, "grad_norm": 1.0391416184434505, "learning_rate": 4.3871444055228e-06, "loss": 0.1136, "step": 18954 }, { "epoch": 0.5529785868487076, "grad_norm": 0.750986333806838, "learning_rate": 4.386675536382631e-06, "loss": 0.1304, "step": 18955 }, { "epoch": 0.5530077600793512, "grad_norm": 1.249896122674722, "learning_rate": 4.3862066727180765e-06, "loss": 0.1202, "step": 18956 }, { "epoch": 0.5530369333099947, "grad_norm": 0.8660143694255064, "learning_rate": 4.385737814533322e-06, "loss": 0.1326, "step": 18957 }, { "epoch": 0.5530661065406383, "grad_norm": 0.7776825646497837, "learning_rate": 4.385268961832553e-06, "loss": 0.124, "step": 18958 }, { "epoch": 0.5530952797712819, "grad_norm": 0.7515619600990452, "learning_rate": 4.384800114619957e-06, "loss": 0.1178, "step": 18959 }, { "epoch": 0.5531244530019255, "grad_norm": 0.8631802248534036, "learning_rate": 4.384331272899718e-06, "loss": 0.1156, "step": 18960 }, { "epoch": 0.553153626232569, "grad_norm": 0.7442139612714272, "learning_rate": 4.383862436676023e-06, "loss": 0.1208, "step": 18961 }, { "epoch": 0.5531827994632126, "grad_norm": 1.070272932987149, "learning_rate": 4.383393605953057e-06, "loss": 0.1499, "step": 18962 }, { "epoch": 0.5532119726938561, "grad_norm": 0.869308725151239, "learning_rate": 4.382924780735007e-06, "loss": 0.1342, "step": 18963 }, { "epoch": 0.5532411459244997, "grad_norm": 0.9966991000253045, "learning_rate": 4.3824559610260545e-06, "loss": 0.1653, "step": 18964 }, { "epoch": 0.5532703191551432, "grad_norm": 0.708567997809416, "learning_rate": 4.381987146830389e-06, "loss": 0.1305, "step": 18965 }, { "epoch": 0.5532994923857868, "grad_norm": 1.0003084806399476, "learning_rate": 4.381518338152195e-06, "loss": 0.1593, "step": 18966 }, { "epoch": 0.5533286656164303, "grad_norm": 0.8213124015520988, "learning_rate": 4.381049534995658e-06, "loss": 0.1015, "step": 18967 }, { "epoch": 0.5533578388470739, "grad_norm": 0.8604842656028665, "learning_rate": 4.380580737364962e-06, "loss": 0.1271, "step": 18968 }, { "epoch": 0.5533870120777175, "grad_norm": 0.7736718020630616, "learning_rate": 4.380111945264294e-06, "loss": 0.1241, "step": 18969 }, { "epoch": 0.553416185308361, "grad_norm": 0.8683832202446158, "learning_rate": 4.379643158697837e-06, "loss": 0.1601, "step": 18970 }, { "epoch": 0.5534453585390046, "grad_norm": 0.6773294747814594, "learning_rate": 4.3791743776697795e-06, "loss": 0.1154, "step": 18971 }, { "epoch": 0.5534745317696482, "grad_norm": 0.9849618752089566, "learning_rate": 4.378705602184306e-06, "loss": 0.14, "step": 18972 }, { "epoch": 0.5535037050002918, "grad_norm": 0.7972406615254845, "learning_rate": 4.3782368322455985e-06, "loss": 0.1167, "step": 18973 }, { "epoch": 0.5535328782309353, "grad_norm": 0.8624819543776043, "learning_rate": 4.377768067857845e-06, "loss": 0.166, "step": 18974 }, { "epoch": 0.5535620514615789, "grad_norm": 0.7968597633134373, "learning_rate": 4.37729930902523e-06, "loss": 0.1241, "step": 18975 }, { "epoch": 0.5535912246922224, "grad_norm": 0.8832363884273774, "learning_rate": 4.376830555751935e-06, "loss": 0.1237, "step": 18976 }, { "epoch": 0.553620397922866, "grad_norm": 0.8994375649384976, "learning_rate": 4.376361808042152e-06, "loss": 0.1342, "step": 18977 }, { "epoch": 0.5536495711535095, "grad_norm": 0.8993620114859945, "learning_rate": 4.37589306590006e-06, "loss": 0.1263, "step": 18978 }, { "epoch": 0.5536787443841531, "grad_norm": 0.8789984648277301, "learning_rate": 4.375424329329847e-06, "loss": 0.1319, "step": 18979 }, { "epoch": 0.5537079176147967, "grad_norm": 0.700312367315295, "learning_rate": 4.374955598335696e-06, "loss": 0.1208, "step": 18980 }, { "epoch": 0.5537370908454402, "grad_norm": 0.9916636045107491, "learning_rate": 4.374486872921792e-06, "loss": 0.1459, "step": 18981 }, { "epoch": 0.5537662640760838, "grad_norm": 0.7780606346331178, "learning_rate": 4.374018153092319e-06, "loss": 0.1222, "step": 18982 }, { "epoch": 0.5537954373067273, "grad_norm": 0.8056706125102487, "learning_rate": 4.373549438851463e-06, "loss": 0.1285, "step": 18983 }, { "epoch": 0.5538246105373709, "grad_norm": 0.842929800563625, "learning_rate": 4.373080730203408e-06, "loss": 0.1195, "step": 18984 }, { "epoch": 0.5538537837680144, "grad_norm": 0.6976156825232113, "learning_rate": 4.37261202715234e-06, "loss": 0.1091, "step": 18985 }, { "epoch": 0.5538829569986581, "grad_norm": 0.7394111334881569, "learning_rate": 4.372143329702441e-06, "loss": 0.1117, "step": 18986 }, { "epoch": 0.5539121302293016, "grad_norm": 0.7721938028571832, "learning_rate": 4.371674637857896e-06, "loss": 0.126, "step": 18987 }, { "epoch": 0.5539413034599452, "grad_norm": 0.8670435034129449, "learning_rate": 4.371205951622889e-06, "loss": 0.1457, "step": 18988 }, { "epoch": 0.5539704766905887, "grad_norm": 0.7389260962426617, "learning_rate": 4.370737271001607e-06, "loss": 0.125, "step": 18989 }, { "epoch": 0.5539996499212323, "grad_norm": 0.8865940446498207, "learning_rate": 4.3702685959982326e-06, "loss": 0.1394, "step": 18990 }, { "epoch": 0.5540288231518758, "grad_norm": 0.9553162359285411, "learning_rate": 4.369799926616949e-06, "loss": 0.1293, "step": 18991 }, { "epoch": 0.5540579963825194, "grad_norm": 0.8915793746933419, "learning_rate": 4.369331262861942e-06, "loss": 0.1063, "step": 18992 }, { "epoch": 0.554087169613163, "grad_norm": 0.871306471435351, "learning_rate": 4.368862604737395e-06, "loss": 0.1222, "step": 18993 }, { "epoch": 0.5541163428438065, "grad_norm": 0.8436957957319473, "learning_rate": 4.368393952247489e-06, "loss": 0.1238, "step": 18994 }, { "epoch": 0.5541455160744501, "grad_norm": 0.8121602703143961, "learning_rate": 4.367925305396414e-06, "loss": 0.1301, "step": 18995 }, { "epoch": 0.5541746893050936, "grad_norm": 0.736501040908026, "learning_rate": 4.36745666418835e-06, "loss": 0.1354, "step": 18996 }, { "epoch": 0.5542038625357372, "grad_norm": 0.9146198369061643, "learning_rate": 4.366988028627484e-06, "loss": 0.1357, "step": 18997 }, { "epoch": 0.5542330357663807, "grad_norm": 1.0272053538843235, "learning_rate": 4.366519398717995e-06, "loss": 0.1233, "step": 18998 }, { "epoch": 0.5542622089970244, "grad_norm": 3.134792651135183, "learning_rate": 4.366050774464071e-06, "loss": 0.1599, "step": 18999 }, { "epoch": 0.5542913822276679, "grad_norm": 0.6636392571345271, "learning_rate": 4.365582155869892e-06, "loss": 0.1047, "step": 19000 }, { "epoch": 0.5543205554583115, "grad_norm": 0.8338426930577785, "learning_rate": 4.365113542939646e-06, "loss": 0.122, "step": 19001 }, { "epoch": 0.554349728688955, "grad_norm": 1.0225088274138192, "learning_rate": 4.364644935677516e-06, "loss": 0.1524, "step": 19002 }, { "epoch": 0.5543789019195986, "grad_norm": 0.8356938730103732, "learning_rate": 4.364176334087683e-06, "loss": 0.1135, "step": 19003 }, { "epoch": 0.5544080751502422, "grad_norm": 0.827849731435746, "learning_rate": 4.363707738174331e-06, "loss": 0.1275, "step": 19004 }, { "epoch": 0.5544372483808857, "grad_norm": 0.758940524020363, "learning_rate": 4.363239147941647e-06, "loss": 0.1048, "step": 19005 }, { "epoch": 0.5544664216115293, "grad_norm": 0.9455837093178442, "learning_rate": 4.362770563393808e-06, "loss": 0.1345, "step": 19006 }, { "epoch": 0.5544955948421728, "grad_norm": 1.010327736798248, "learning_rate": 4.362301984535005e-06, "loss": 0.1163, "step": 19007 }, { "epoch": 0.5545247680728164, "grad_norm": 1.5037815722126615, "learning_rate": 4.361833411369415e-06, "loss": 0.1444, "step": 19008 }, { "epoch": 0.5545539413034599, "grad_norm": 1.0660801195113891, "learning_rate": 4.361364843901226e-06, "loss": 0.1411, "step": 19009 }, { "epoch": 0.5545831145341035, "grad_norm": 0.8827088248975826, "learning_rate": 4.360896282134619e-06, "loss": 0.1292, "step": 19010 }, { "epoch": 0.554612287764747, "grad_norm": 0.8612085820432255, "learning_rate": 4.360427726073776e-06, "loss": 0.1429, "step": 19011 }, { "epoch": 0.5546414609953906, "grad_norm": 0.9299876808958056, "learning_rate": 4.359959175722881e-06, "loss": 0.1332, "step": 19012 }, { "epoch": 0.5546706342260342, "grad_norm": 1.0010153111031155, "learning_rate": 4.3594906310861195e-06, "loss": 0.1662, "step": 19013 }, { "epoch": 0.5546998074566778, "grad_norm": 0.8387242150310642, "learning_rate": 4.359022092167672e-06, "loss": 0.1316, "step": 19014 }, { "epoch": 0.5547289806873213, "grad_norm": 0.8570485680581119, "learning_rate": 4.358553558971723e-06, "loss": 0.1184, "step": 19015 }, { "epoch": 0.5547581539179649, "grad_norm": 0.8463602905838344, "learning_rate": 4.358085031502455e-06, "loss": 0.1257, "step": 19016 }, { "epoch": 0.5547873271486085, "grad_norm": 0.9155141260660282, "learning_rate": 4.35761650976405e-06, "loss": 0.1378, "step": 19017 }, { "epoch": 0.554816500379252, "grad_norm": 0.8735324406153037, "learning_rate": 4.35714799376069e-06, "loss": 0.1275, "step": 19018 }, { "epoch": 0.5548456736098956, "grad_norm": 0.6755818049046284, "learning_rate": 4.3566794834965616e-06, "loss": 0.1326, "step": 19019 }, { "epoch": 0.5548748468405391, "grad_norm": 0.9367120460741722, "learning_rate": 4.3562109789758435e-06, "loss": 0.1461, "step": 19020 }, { "epoch": 0.5549040200711827, "grad_norm": 0.8767740890279596, "learning_rate": 4.355742480202721e-06, "loss": 0.1312, "step": 19021 }, { "epoch": 0.5549331933018262, "grad_norm": 0.8035646964027161, "learning_rate": 4.355273987181376e-06, "loss": 0.1407, "step": 19022 }, { "epoch": 0.5549623665324698, "grad_norm": 0.807426950519346, "learning_rate": 4.354805499915991e-06, "loss": 0.1398, "step": 19023 }, { "epoch": 0.5549915397631133, "grad_norm": 0.9118753614808602, "learning_rate": 4.354337018410747e-06, "loss": 0.1381, "step": 19024 }, { "epoch": 0.5550207129937569, "grad_norm": 0.576043163429003, "learning_rate": 4.353868542669828e-06, "loss": 0.0961, "step": 19025 }, { "epoch": 0.5550498862244004, "grad_norm": 0.6594052129528744, "learning_rate": 4.353400072697418e-06, "loss": 0.1216, "step": 19026 }, { "epoch": 0.5550790594550441, "grad_norm": 0.7585226301717503, "learning_rate": 4.352931608497698e-06, "loss": 0.1382, "step": 19027 }, { "epoch": 0.5551082326856877, "grad_norm": 0.6750017083918501, "learning_rate": 4.3524631500748495e-06, "loss": 0.1293, "step": 19028 }, { "epoch": 0.5551374059163312, "grad_norm": 0.6961170966180397, "learning_rate": 4.351994697433055e-06, "loss": 0.1114, "step": 19029 }, { "epoch": 0.5551665791469748, "grad_norm": 0.7360047297017995, "learning_rate": 4.351526250576496e-06, "loss": 0.1234, "step": 19030 }, { "epoch": 0.5551957523776183, "grad_norm": 0.74104320568258, "learning_rate": 4.351057809509357e-06, "loss": 0.1342, "step": 19031 }, { "epoch": 0.5552249256082619, "grad_norm": 1.1616859823897157, "learning_rate": 4.35058937423582e-06, "loss": 0.1241, "step": 19032 }, { "epoch": 0.5552540988389054, "grad_norm": 0.7534353068898046, "learning_rate": 4.350120944760065e-06, "loss": 0.1146, "step": 19033 }, { "epoch": 0.555283272069549, "grad_norm": 0.7359607953267799, "learning_rate": 4.349652521086275e-06, "loss": 0.1346, "step": 19034 }, { "epoch": 0.5553124453001925, "grad_norm": 0.7887782773626593, "learning_rate": 4.349184103218633e-06, "loss": 0.1226, "step": 19035 }, { "epoch": 0.5553416185308361, "grad_norm": 0.7908862976458872, "learning_rate": 4.348715691161317e-06, "loss": 0.1244, "step": 19036 }, { "epoch": 0.5553707917614796, "grad_norm": 0.7437358248556468, "learning_rate": 4.348247284918515e-06, "loss": 0.1386, "step": 19037 }, { "epoch": 0.5553999649921232, "grad_norm": 1.1328661443383496, "learning_rate": 4.347778884494405e-06, "loss": 0.1319, "step": 19038 }, { "epoch": 0.5554291382227667, "grad_norm": 0.712735036495089, "learning_rate": 4.347310489893169e-06, "loss": 0.1029, "step": 19039 }, { "epoch": 0.5554583114534104, "grad_norm": 0.7744272683956761, "learning_rate": 4.346842101118991e-06, "loss": 0.1421, "step": 19040 }, { "epoch": 0.555487484684054, "grad_norm": 0.778418849903219, "learning_rate": 4.346373718176049e-06, "loss": 0.1211, "step": 19041 }, { "epoch": 0.5555166579146975, "grad_norm": 0.9379960028819022, "learning_rate": 4.345905341068525e-06, "loss": 0.1376, "step": 19042 }, { "epoch": 0.5555458311453411, "grad_norm": 0.7194867018599489, "learning_rate": 4.345436969800603e-06, "loss": 0.1185, "step": 19043 }, { "epoch": 0.5555750043759846, "grad_norm": 0.7764692817658038, "learning_rate": 4.344968604376465e-06, "loss": 0.1294, "step": 19044 }, { "epoch": 0.5556041776066282, "grad_norm": 1.0001105838790252, "learning_rate": 4.34450024480029e-06, "loss": 0.1275, "step": 19045 }, { "epoch": 0.5556333508372717, "grad_norm": 0.724988975142468, "learning_rate": 4.34403189107626e-06, "loss": 0.1137, "step": 19046 }, { "epoch": 0.5556625240679153, "grad_norm": 0.7300662716982012, "learning_rate": 4.343563543208557e-06, "loss": 0.1538, "step": 19047 }, { "epoch": 0.5556916972985588, "grad_norm": 0.8290228078879128, "learning_rate": 4.343095201201361e-06, "loss": 0.1399, "step": 19048 }, { "epoch": 0.5557208705292024, "grad_norm": 0.8955325333341915, "learning_rate": 4.342626865058856e-06, "loss": 0.1302, "step": 19049 }, { "epoch": 0.5557500437598459, "grad_norm": 0.7533257227519368, "learning_rate": 4.34215853478522e-06, "loss": 0.1293, "step": 19050 }, { "epoch": 0.5557792169904895, "grad_norm": 0.8504209059512653, "learning_rate": 4.341690210384636e-06, "loss": 0.1293, "step": 19051 }, { "epoch": 0.555808390221133, "grad_norm": 0.8174216954485575, "learning_rate": 4.341221891861286e-06, "loss": 0.1199, "step": 19052 }, { "epoch": 0.5558375634517766, "grad_norm": 0.9584904670466751, "learning_rate": 4.340753579219349e-06, "loss": 0.1335, "step": 19053 }, { "epoch": 0.5558667366824203, "grad_norm": 0.7971548088341041, "learning_rate": 4.340285272463005e-06, "loss": 0.132, "step": 19054 }, { "epoch": 0.5558959099130638, "grad_norm": 1.372057586506304, "learning_rate": 4.339816971596438e-06, "loss": 0.1306, "step": 19055 }, { "epoch": 0.5559250831437074, "grad_norm": 1.035451505593037, "learning_rate": 4.339348676623826e-06, "loss": 0.1601, "step": 19056 }, { "epoch": 0.5559542563743509, "grad_norm": 0.6946171225751947, "learning_rate": 4.3388803875493536e-06, "loss": 0.1075, "step": 19057 }, { "epoch": 0.5559834296049945, "grad_norm": 0.748041898500976, "learning_rate": 4.338412104377198e-06, "loss": 0.1052, "step": 19058 }, { "epoch": 0.556012602835638, "grad_norm": 0.8847162211447822, "learning_rate": 4.337943827111542e-06, "loss": 0.1302, "step": 19059 }, { "epoch": 0.5560417760662816, "grad_norm": 0.9788611723862909, "learning_rate": 4.337475555756563e-06, "loss": 0.1221, "step": 19060 }, { "epoch": 0.5560709492969251, "grad_norm": 0.7934920805269242, "learning_rate": 4.3370072903164466e-06, "loss": 0.1163, "step": 19061 }, { "epoch": 0.5561001225275687, "grad_norm": 0.8060432545753894, "learning_rate": 4.33653903079537e-06, "loss": 0.1303, "step": 19062 }, { "epoch": 0.5561292957582122, "grad_norm": 0.8694569583121312, "learning_rate": 4.3360707771975154e-06, "loss": 0.1045, "step": 19063 }, { "epoch": 0.5561584689888558, "grad_norm": 0.9722308929764426, "learning_rate": 4.335602529527061e-06, "loss": 0.1185, "step": 19064 }, { "epoch": 0.5561876422194993, "grad_norm": 0.7641553324482699, "learning_rate": 4.335134287788191e-06, "loss": 0.1224, "step": 19065 }, { "epoch": 0.5562168154501429, "grad_norm": 1.0635532818329378, "learning_rate": 4.334666051985079e-06, "loss": 0.1439, "step": 19066 }, { "epoch": 0.5562459886807866, "grad_norm": 0.6870946879967289, "learning_rate": 4.334197822121913e-06, "loss": 0.1094, "step": 19067 }, { "epoch": 0.5562751619114301, "grad_norm": 0.8234267835962014, "learning_rate": 4.333729598202869e-06, "loss": 0.1229, "step": 19068 }, { "epoch": 0.5563043351420737, "grad_norm": 0.9770794180998047, "learning_rate": 4.333261380232129e-06, "loss": 0.1426, "step": 19069 }, { "epoch": 0.5563335083727172, "grad_norm": 0.9482585300221368, "learning_rate": 4.3327931682138725e-06, "loss": 0.1423, "step": 19070 }, { "epoch": 0.5563626816033608, "grad_norm": 0.9467254690847229, "learning_rate": 4.3323249621522785e-06, "loss": 0.1296, "step": 19071 }, { "epoch": 0.5563918548340043, "grad_norm": 0.8977881860793262, "learning_rate": 4.331856762051526e-06, "loss": 0.1211, "step": 19072 }, { "epoch": 0.5564210280646479, "grad_norm": 0.9323535480483929, "learning_rate": 4.331388567915799e-06, "loss": 0.1155, "step": 19073 }, { "epoch": 0.5564502012952914, "grad_norm": 0.8592387935246127, "learning_rate": 4.330920379749274e-06, "loss": 0.1324, "step": 19074 }, { "epoch": 0.556479374525935, "grad_norm": 1.0401763103605797, "learning_rate": 4.330452197556134e-06, "loss": 0.1341, "step": 19075 }, { "epoch": 0.5565085477565785, "grad_norm": 0.8327513069803256, "learning_rate": 4.329984021340557e-06, "loss": 0.1264, "step": 19076 }, { "epoch": 0.5565377209872221, "grad_norm": 0.7398323747336847, "learning_rate": 4.329515851106721e-06, "loss": 0.1326, "step": 19077 }, { "epoch": 0.5565668942178656, "grad_norm": 0.8631278579939401, "learning_rate": 4.329047686858807e-06, "loss": 0.1504, "step": 19078 }, { "epoch": 0.5565960674485092, "grad_norm": 0.9173061038077363, "learning_rate": 4.328579528600997e-06, "loss": 0.1046, "step": 19079 }, { "epoch": 0.5566252406791528, "grad_norm": 0.8053387977027799, "learning_rate": 4.328111376337468e-06, "loss": 0.1299, "step": 19080 }, { "epoch": 0.5566544139097964, "grad_norm": 0.7943544478077899, "learning_rate": 4.3276432300723995e-06, "loss": 0.1152, "step": 19081 }, { "epoch": 0.55668358714044, "grad_norm": 0.684900982092863, "learning_rate": 4.327175089809973e-06, "loss": 0.1134, "step": 19082 }, { "epoch": 0.5567127603710835, "grad_norm": 1.0556320969726258, "learning_rate": 4.3267069555543665e-06, "loss": 0.1289, "step": 19083 }, { "epoch": 0.5567419336017271, "grad_norm": 0.8466057433921833, "learning_rate": 4.326238827309758e-06, "loss": 0.1095, "step": 19084 }, { "epoch": 0.5567711068323706, "grad_norm": 0.7338247525685471, "learning_rate": 4.3257707050803285e-06, "loss": 0.1267, "step": 19085 }, { "epoch": 0.5568002800630142, "grad_norm": 0.7557776443026418, "learning_rate": 4.325302588870258e-06, "loss": 0.11, "step": 19086 }, { "epoch": 0.5568294532936577, "grad_norm": 0.8162613754395548, "learning_rate": 4.324834478683726e-06, "loss": 0.1316, "step": 19087 }, { "epoch": 0.5568586265243013, "grad_norm": 0.7112984568497187, "learning_rate": 4.32436637452491e-06, "loss": 0.1252, "step": 19088 }, { "epoch": 0.5568877997549448, "grad_norm": 0.945271187261099, "learning_rate": 4.32389827639799e-06, "loss": 0.1239, "step": 19089 }, { "epoch": 0.5569169729855884, "grad_norm": 0.8873115463856694, "learning_rate": 4.323430184307143e-06, "loss": 0.1198, "step": 19090 }, { "epoch": 0.556946146216232, "grad_norm": 0.8925186333145753, "learning_rate": 4.3229620982565505e-06, "loss": 0.1345, "step": 19091 }, { "epoch": 0.5569753194468755, "grad_norm": 1.267998504717158, "learning_rate": 4.322494018250392e-06, "loss": 0.1319, "step": 19092 }, { "epoch": 0.5570044926775191, "grad_norm": 0.913261745842966, "learning_rate": 4.322025944292845e-06, "loss": 0.119, "step": 19093 }, { "epoch": 0.5570336659081627, "grad_norm": 0.7564078256122583, "learning_rate": 4.321557876388087e-06, "loss": 0.1337, "step": 19094 }, { "epoch": 0.5570628391388063, "grad_norm": 0.9747580617501804, "learning_rate": 4.321089814540301e-06, "loss": 0.1181, "step": 19095 }, { "epoch": 0.5570920123694498, "grad_norm": 0.9551944684281286, "learning_rate": 4.320621758753659e-06, "loss": 0.1315, "step": 19096 }, { "epoch": 0.5571211856000934, "grad_norm": 0.7604880208963859, "learning_rate": 4.320153709032347e-06, "loss": 0.1283, "step": 19097 }, { "epoch": 0.5571503588307369, "grad_norm": 0.8382857647702464, "learning_rate": 4.319685665380539e-06, "loss": 0.1295, "step": 19098 }, { "epoch": 0.5571795320613805, "grad_norm": 1.203360923454032, "learning_rate": 4.319217627802415e-06, "loss": 0.1235, "step": 19099 }, { "epoch": 0.557208705292024, "grad_norm": 0.7630826778051315, "learning_rate": 4.318749596302155e-06, "loss": 0.1329, "step": 19100 }, { "epoch": 0.5572378785226676, "grad_norm": 0.8794901446033415, "learning_rate": 4.318281570883935e-06, "loss": 0.1294, "step": 19101 }, { "epoch": 0.5572670517533111, "grad_norm": 0.9443425549623128, "learning_rate": 4.3178135515519336e-06, "loss": 0.1419, "step": 19102 }, { "epoch": 0.5572962249839547, "grad_norm": 0.8137379571082658, "learning_rate": 4.317345538310331e-06, "loss": 0.1355, "step": 19103 }, { "epoch": 0.5573253982145983, "grad_norm": 0.8853168441729572, "learning_rate": 4.316877531163304e-06, "loss": 0.1333, "step": 19104 }, { "epoch": 0.5573545714452418, "grad_norm": 0.8321714517997488, "learning_rate": 4.3164095301150325e-06, "loss": 0.1289, "step": 19105 }, { "epoch": 0.5573837446758854, "grad_norm": 0.9104922555381546, "learning_rate": 4.315941535169692e-06, "loss": 0.1392, "step": 19106 }, { "epoch": 0.5574129179065289, "grad_norm": 0.7053054006086941, "learning_rate": 4.315473546331463e-06, "loss": 0.1003, "step": 19107 }, { "epoch": 0.5574420911371726, "grad_norm": 0.7700656288780094, "learning_rate": 4.315005563604521e-06, "loss": 0.1179, "step": 19108 }, { "epoch": 0.5574712643678161, "grad_norm": 0.7747077764319444, "learning_rate": 4.314537586993048e-06, "loss": 0.1391, "step": 19109 }, { "epoch": 0.5575004375984597, "grad_norm": 0.8512534505870097, "learning_rate": 4.314069616501219e-06, "loss": 0.116, "step": 19110 }, { "epoch": 0.5575296108291032, "grad_norm": 1.1915308090824996, "learning_rate": 4.313601652133213e-06, "loss": 0.1532, "step": 19111 }, { "epoch": 0.5575587840597468, "grad_norm": 0.7630239928530291, "learning_rate": 4.3131336938932085e-06, "loss": 0.1264, "step": 19112 }, { "epoch": 0.5575879572903903, "grad_norm": 0.9180348198619798, "learning_rate": 4.312665741785379e-06, "loss": 0.1414, "step": 19113 }, { "epoch": 0.5576171305210339, "grad_norm": 0.9850176224492503, "learning_rate": 4.312197795813909e-06, "loss": 0.1146, "step": 19114 }, { "epoch": 0.5576463037516775, "grad_norm": 0.7933498422687582, "learning_rate": 4.311729855982972e-06, "loss": 0.1441, "step": 19115 }, { "epoch": 0.557675476982321, "grad_norm": 0.9139270458622918, "learning_rate": 4.311261922296746e-06, "loss": 0.1252, "step": 19116 }, { "epoch": 0.5577046502129646, "grad_norm": 1.2188145977938503, "learning_rate": 4.310793994759411e-06, "loss": 0.1584, "step": 19117 }, { "epoch": 0.5577338234436081, "grad_norm": 0.9690350110217198, "learning_rate": 4.310326073375141e-06, "loss": 0.1238, "step": 19118 }, { "epoch": 0.5577629966742517, "grad_norm": 1.1264086158332027, "learning_rate": 4.309858158148114e-06, "loss": 0.1283, "step": 19119 }, { "epoch": 0.5577921699048952, "grad_norm": 0.8654237947195278, "learning_rate": 4.30939024908251e-06, "loss": 0.1215, "step": 19120 }, { "epoch": 0.5578213431355389, "grad_norm": 0.7326600570244524, "learning_rate": 4.308922346182505e-06, "loss": 0.1098, "step": 19121 }, { "epoch": 0.5578505163661824, "grad_norm": 0.7202669208213778, "learning_rate": 4.308454449452277e-06, "loss": 0.1186, "step": 19122 }, { "epoch": 0.557879689596826, "grad_norm": 1.0690084370132689, "learning_rate": 4.3079865588960014e-06, "loss": 0.1271, "step": 19123 }, { "epoch": 0.5579088628274695, "grad_norm": 1.1007812742797274, "learning_rate": 4.307518674517858e-06, "loss": 0.1361, "step": 19124 }, { "epoch": 0.5579380360581131, "grad_norm": 0.8787765921807975, "learning_rate": 4.3070507963220195e-06, "loss": 0.1235, "step": 19125 }, { "epoch": 0.5579672092887566, "grad_norm": 0.8996433276085365, "learning_rate": 4.3065829243126685e-06, "loss": 0.1509, "step": 19126 }, { "epoch": 0.5579963825194002, "grad_norm": 0.8878376716646226, "learning_rate": 4.306115058493981e-06, "loss": 0.1583, "step": 19127 }, { "epoch": 0.5580255557500438, "grad_norm": 1.0487540761799035, "learning_rate": 4.305647198870131e-06, "loss": 0.1302, "step": 19128 }, { "epoch": 0.5580547289806873, "grad_norm": 0.8405901697246108, "learning_rate": 4.305179345445297e-06, "loss": 0.1521, "step": 19129 }, { "epoch": 0.5580839022113309, "grad_norm": 0.888072930235096, "learning_rate": 4.304711498223656e-06, "loss": 0.1411, "step": 19130 }, { "epoch": 0.5581130754419744, "grad_norm": 0.9305898421135123, "learning_rate": 4.304243657209383e-06, "loss": 0.1314, "step": 19131 }, { "epoch": 0.558142248672618, "grad_norm": 0.8085351979341941, "learning_rate": 4.30377582240666e-06, "loss": 0.1215, "step": 19132 }, { "epoch": 0.5581714219032615, "grad_norm": 0.8572758553739566, "learning_rate": 4.303307993819657e-06, "loss": 0.1419, "step": 19133 }, { "epoch": 0.5582005951339051, "grad_norm": 0.922083938679305, "learning_rate": 4.302840171452556e-06, "loss": 0.1329, "step": 19134 }, { "epoch": 0.5582297683645487, "grad_norm": 0.8911884182439094, "learning_rate": 4.302372355309532e-06, "loss": 0.1179, "step": 19135 }, { "epoch": 0.5582589415951923, "grad_norm": 0.7111043411041209, "learning_rate": 4.301904545394761e-06, "loss": 0.141, "step": 19136 }, { "epoch": 0.5582881148258358, "grad_norm": 1.0692574535871096, "learning_rate": 4.301436741712417e-06, "loss": 0.1394, "step": 19137 }, { "epoch": 0.5583172880564794, "grad_norm": 1.1114995483858565, "learning_rate": 4.30096894426668e-06, "loss": 0.1555, "step": 19138 }, { "epoch": 0.558346461287123, "grad_norm": 0.9175644226687056, "learning_rate": 4.3005011530617275e-06, "loss": 0.1215, "step": 19139 }, { "epoch": 0.5583756345177665, "grad_norm": 0.7594943854632864, "learning_rate": 4.300033368101732e-06, "loss": 0.1245, "step": 19140 }, { "epoch": 0.5584048077484101, "grad_norm": 0.980480244557107, "learning_rate": 4.299565589390872e-06, "loss": 0.1371, "step": 19141 }, { "epoch": 0.5584339809790536, "grad_norm": 0.9394728822043553, "learning_rate": 4.299097816933323e-06, "loss": 0.1414, "step": 19142 }, { "epoch": 0.5584631542096972, "grad_norm": 0.8717233172501493, "learning_rate": 4.29863005073326e-06, "loss": 0.17, "step": 19143 }, { "epoch": 0.5584923274403407, "grad_norm": 0.825546529492615, "learning_rate": 4.2981622907948625e-06, "loss": 0.1597, "step": 19144 }, { "epoch": 0.5585215006709843, "grad_norm": 1.3135694938829492, "learning_rate": 4.297694537122304e-06, "loss": 0.121, "step": 19145 }, { "epoch": 0.5585506739016278, "grad_norm": 0.808713343832058, "learning_rate": 4.297226789719761e-06, "loss": 0.1132, "step": 19146 }, { "epoch": 0.5585798471322714, "grad_norm": 0.7237285483951936, "learning_rate": 4.29675904859141e-06, "loss": 0.1202, "step": 19147 }, { "epoch": 0.558609020362915, "grad_norm": 0.943673714823329, "learning_rate": 4.296291313741425e-06, "loss": 0.1301, "step": 19148 }, { "epoch": 0.5586381935935586, "grad_norm": 0.8394084451252932, "learning_rate": 4.295823585173983e-06, "loss": 0.1457, "step": 19149 }, { "epoch": 0.5586673668242021, "grad_norm": 0.7437816288736493, "learning_rate": 4.29535586289326e-06, "loss": 0.1091, "step": 19150 }, { "epoch": 0.5586965400548457, "grad_norm": 0.7471769505431773, "learning_rate": 4.294888146903433e-06, "loss": 0.1285, "step": 19151 }, { "epoch": 0.5587257132854893, "grad_norm": 0.9376425501717874, "learning_rate": 4.294420437208677e-06, "loss": 0.1166, "step": 19152 }, { "epoch": 0.5587548865161328, "grad_norm": 0.9560575967084503, "learning_rate": 4.2939527338131654e-06, "loss": 0.1237, "step": 19153 }, { "epoch": 0.5587840597467764, "grad_norm": 1.322551694300141, "learning_rate": 4.293485036721075e-06, "loss": 0.1278, "step": 19154 }, { "epoch": 0.5588132329774199, "grad_norm": 1.3297083397872205, "learning_rate": 4.293017345936581e-06, "loss": 0.1293, "step": 19155 }, { "epoch": 0.5588424062080635, "grad_norm": 0.8807202298756728, "learning_rate": 4.29254966146386e-06, "loss": 0.1148, "step": 19156 }, { "epoch": 0.558871579438707, "grad_norm": 0.884337355599638, "learning_rate": 4.292081983307088e-06, "loss": 0.1433, "step": 19157 }, { "epoch": 0.5589007526693506, "grad_norm": 0.8870220382296279, "learning_rate": 4.291614311470438e-06, "loss": 0.1265, "step": 19158 }, { "epoch": 0.5589299258999941, "grad_norm": 1.1241527600915935, "learning_rate": 4.291146645958087e-06, "loss": 0.1561, "step": 19159 }, { "epoch": 0.5589590991306377, "grad_norm": 1.0388864135881575, "learning_rate": 4.29067898677421e-06, "loss": 0.139, "step": 19160 }, { "epoch": 0.5589882723612812, "grad_norm": 0.7738380209356532, "learning_rate": 4.2902113339229774e-06, "loss": 0.1153, "step": 19161 }, { "epoch": 0.5590174455919249, "grad_norm": 0.819588683616226, "learning_rate": 4.2897436874085735e-06, "loss": 0.1336, "step": 19162 }, { "epoch": 0.5590466188225685, "grad_norm": 1.1735502412721954, "learning_rate": 4.289276047235167e-06, "loss": 0.1464, "step": 19163 }, { "epoch": 0.559075792053212, "grad_norm": 0.8005612962686929, "learning_rate": 4.2888084134069335e-06, "loss": 0.1264, "step": 19164 }, { "epoch": 0.5591049652838556, "grad_norm": 0.8830956224221792, "learning_rate": 4.28834078592805e-06, "loss": 0.1118, "step": 19165 }, { "epoch": 0.5591341385144991, "grad_norm": 0.9955461519220135, "learning_rate": 4.28787316480269e-06, "loss": 0.133, "step": 19166 }, { "epoch": 0.5591633117451427, "grad_norm": 0.7082792680425669, "learning_rate": 4.287405550035026e-06, "loss": 0.1243, "step": 19167 }, { "epoch": 0.5591924849757862, "grad_norm": 0.7681017037077732, "learning_rate": 4.286937941629237e-06, "loss": 0.1194, "step": 19168 }, { "epoch": 0.5592216582064298, "grad_norm": 0.8187444008121079, "learning_rate": 4.286470339589497e-06, "loss": 0.1609, "step": 19169 }, { "epoch": 0.5592508314370733, "grad_norm": 0.7842607479180885, "learning_rate": 4.286002743919977e-06, "loss": 0.1358, "step": 19170 }, { "epoch": 0.5592800046677169, "grad_norm": 0.8434677490486692, "learning_rate": 4.2855351546248555e-06, "loss": 0.1118, "step": 19171 }, { "epoch": 0.5593091778983604, "grad_norm": 0.6987501642828052, "learning_rate": 4.285067571708307e-06, "loss": 0.1221, "step": 19172 }, { "epoch": 0.559338351129004, "grad_norm": 0.9471406513575809, "learning_rate": 4.2845999951744995e-06, "loss": 0.1206, "step": 19173 }, { "epoch": 0.5593675243596475, "grad_norm": 1.0593603487317498, "learning_rate": 4.284132425027617e-06, "loss": 0.1422, "step": 19174 }, { "epoch": 0.5593966975902912, "grad_norm": 0.7583069093625302, "learning_rate": 4.283664861271829e-06, "loss": 0.1103, "step": 19175 }, { "epoch": 0.5594258708209348, "grad_norm": 0.8101424809285023, "learning_rate": 4.283197303911308e-06, "loss": 0.1276, "step": 19176 }, { "epoch": 0.5594550440515783, "grad_norm": 0.943264193812396, "learning_rate": 4.282729752950233e-06, "loss": 0.1434, "step": 19177 }, { "epoch": 0.5594842172822219, "grad_norm": 0.824926061015487, "learning_rate": 4.282262208392775e-06, "loss": 0.1203, "step": 19178 }, { "epoch": 0.5595133905128654, "grad_norm": 0.7954025563054928, "learning_rate": 4.281794670243106e-06, "loss": 0.1026, "step": 19179 }, { "epoch": 0.559542563743509, "grad_norm": 0.8435891204851553, "learning_rate": 4.281327138505404e-06, "loss": 0.1344, "step": 19180 }, { "epoch": 0.5595717369741525, "grad_norm": 0.8847702424844895, "learning_rate": 4.2808596131838425e-06, "loss": 0.1279, "step": 19181 }, { "epoch": 0.5596009102047961, "grad_norm": 1.370102585499779, "learning_rate": 4.280392094282596e-06, "loss": 0.1199, "step": 19182 }, { "epoch": 0.5596300834354396, "grad_norm": 0.6293253879548771, "learning_rate": 4.2799245818058345e-06, "loss": 0.1456, "step": 19183 }, { "epoch": 0.5596592566660832, "grad_norm": 1.211199340368652, "learning_rate": 4.279457075757736e-06, "loss": 0.1244, "step": 19184 }, { "epoch": 0.5596884298967267, "grad_norm": 0.8115015571584155, "learning_rate": 4.278989576142471e-06, "loss": 0.1265, "step": 19185 }, { "epoch": 0.5597176031273703, "grad_norm": 0.7559107067971785, "learning_rate": 4.278522082964216e-06, "loss": 0.1106, "step": 19186 }, { "epoch": 0.5597467763580138, "grad_norm": 0.7125935775902695, "learning_rate": 4.278054596227144e-06, "loss": 0.1351, "step": 19187 }, { "epoch": 0.5597759495886574, "grad_norm": 0.8288329012682432, "learning_rate": 4.277587115935429e-06, "loss": 0.12, "step": 19188 }, { "epoch": 0.5598051228193011, "grad_norm": 0.7560790380864659, "learning_rate": 4.277119642093242e-06, "loss": 0.1122, "step": 19189 }, { "epoch": 0.5598342960499446, "grad_norm": 0.8941174187321133, "learning_rate": 4.276652174704761e-06, "loss": 0.1538, "step": 19190 }, { "epoch": 0.5598634692805882, "grad_norm": 0.8135442525526468, "learning_rate": 4.276184713774152e-06, "loss": 0.1106, "step": 19191 }, { "epoch": 0.5598926425112317, "grad_norm": 0.9668779603652046, "learning_rate": 4.275717259305596e-06, "loss": 0.1329, "step": 19192 }, { "epoch": 0.5599218157418753, "grad_norm": 0.9930314287723587, "learning_rate": 4.275249811303265e-06, "loss": 0.1079, "step": 19193 }, { "epoch": 0.5599509889725188, "grad_norm": 0.7501561529837268, "learning_rate": 4.274782369771328e-06, "loss": 0.1409, "step": 19194 }, { "epoch": 0.5599801622031624, "grad_norm": 1.0002464672194624, "learning_rate": 4.2743149347139624e-06, "loss": 0.1446, "step": 19195 }, { "epoch": 0.5600093354338059, "grad_norm": 0.8574637381494113, "learning_rate": 4.27384750613534e-06, "loss": 0.1245, "step": 19196 }, { "epoch": 0.5600385086644495, "grad_norm": 0.7935332889830304, "learning_rate": 4.273380084039631e-06, "loss": 0.1222, "step": 19197 }, { "epoch": 0.560067681895093, "grad_norm": 0.8225165613848285, "learning_rate": 4.2729126684310136e-06, "loss": 0.1355, "step": 19198 }, { "epoch": 0.5600968551257366, "grad_norm": 1.0342259246357455, "learning_rate": 4.272445259313659e-06, "loss": 0.1423, "step": 19199 }, { "epoch": 0.5601260283563801, "grad_norm": 0.9432765790313263, "learning_rate": 4.271977856691738e-06, "loss": 0.1444, "step": 19200 }, { "epoch": 0.5601552015870237, "grad_norm": 0.803940543852876, "learning_rate": 4.271510460569425e-06, "loss": 0.1287, "step": 19201 }, { "epoch": 0.5601843748176674, "grad_norm": 0.9773705130442183, "learning_rate": 4.271043070950894e-06, "loss": 0.1278, "step": 19202 }, { "epoch": 0.5602135480483109, "grad_norm": 0.7344465941221047, "learning_rate": 4.270575687840312e-06, "loss": 0.1094, "step": 19203 }, { "epoch": 0.5602427212789545, "grad_norm": 0.8838434826550736, "learning_rate": 4.270108311241861e-06, "loss": 0.1309, "step": 19204 }, { "epoch": 0.560271894509598, "grad_norm": 0.8994255625391175, "learning_rate": 4.269640941159707e-06, "loss": 0.141, "step": 19205 }, { "epoch": 0.5603010677402416, "grad_norm": 0.9732707594536684, "learning_rate": 4.269173577598025e-06, "loss": 0.1323, "step": 19206 }, { "epoch": 0.5603302409708851, "grad_norm": 0.7352430477521541, "learning_rate": 4.268706220560988e-06, "loss": 0.1206, "step": 19207 }, { "epoch": 0.5603594142015287, "grad_norm": 1.1259196559999454, "learning_rate": 4.268238870052765e-06, "loss": 0.1341, "step": 19208 }, { "epoch": 0.5603885874321722, "grad_norm": 1.0097374425626127, "learning_rate": 4.26777152607753e-06, "loss": 0.1619, "step": 19209 }, { "epoch": 0.5604177606628158, "grad_norm": 0.9265512576160485, "learning_rate": 4.2673041886394575e-06, "loss": 0.1451, "step": 19210 }, { "epoch": 0.5604469338934593, "grad_norm": 0.9950666042745812, "learning_rate": 4.266836857742718e-06, "loss": 0.1206, "step": 19211 }, { "epoch": 0.5604761071241029, "grad_norm": 1.1128922622827953, "learning_rate": 4.266369533391485e-06, "loss": 0.1251, "step": 19212 }, { "epoch": 0.5605052803547464, "grad_norm": 0.9989686876612985, "learning_rate": 4.265902215589929e-06, "loss": 0.1301, "step": 19213 }, { "epoch": 0.56053445358539, "grad_norm": 0.7475467204018343, "learning_rate": 4.265434904342223e-06, "loss": 0.1113, "step": 19214 }, { "epoch": 0.5605636268160336, "grad_norm": 0.9076958632300759, "learning_rate": 4.264967599652537e-06, "loss": 0.1591, "step": 19215 }, { "epoch": 0.5605928000466772, "grad_norm": 1.1850395940739917, "learning_rate": 4.264500301525047e-06, "loss": 0.1467, "step": 19216 }, { "epoch": 0.5606219732773208, "grad_norm": 0.8431486816914163, "learning_rate": 4.264033009963922e-06, "loss": 0.1459, "step": 19217 }, { "epoch": 0.5606511465079643, "grad_norm": 0.8584084368418102, "learning_rate": 4.263565724973335e-06, "loss": 0.1377, "step": 19218 }, { "epoch": 0.5606803197386079, "grad_norm": 0.9314764561785516, "learning_rate": 4.2630984465574565e-06, "loss": 0.1095, "step": 19219 }, { "epoch": 0.5607094929692514, "grad_norm": 0.8418468200486386, "learning_rate": 4.262631174720461e-06, "loss": 0.1238, "step": 19220 }, { "epoch": 0.560738666199895, "grad_norm": 0.8403701956729118, "learning_rate": 4.262163909466514e-06, "loss": 0.1225, "step": 19221 }, { "epoch": 0.5607678394305385, "grad_norm": 1.029647917924995, "learning_rate": 4.261696650799796e-06, "loss": 0.1478, "step": 19222 }, { "epoch": 0.5607970126611821, "grad_norm": 0.6898737406749272, "learning_rate": 4.2612293987244724e-06, "loss": 0.1488, "step": 19223 }, { "epoch": 0.5608261858918256, "grad_norm": 0.899877041059888, "learning_rate": 4.2607621532447165e-06, "loss": 0.1485, "step": 19224 }, { "epoch": 0.5608553591224692, "grad_norm": 0.7953760056174247, "learning_rate": 4.260294914364701e-06, "loss": 0.1338, "step": 19225 }, { "epoch": 0.5608845323531128, "grad_norm": 0.5909386877772858, "learning_rate": 4.259827682088594e-06, "loss": 0.123, "step": 19226 }, { "epoch": 0.5609137055837563, "grad_norm": 0.6730042108219717, "learning_rate": 4.259360456420568e-06, "loss": 0.1089, "step": 19227 }, { "epoch": 0.5609428788143999, "grad_norm": 0.8464745304806932, "learning_rate": 4.258893237364796e-06, "loss": 0.1294, "step": 19228 }, { "epoch": 0.5609720520450435, "grad_norm": 0.6476066777922179, "learning_rate": 4.25842602492545e-06, "loss": 0.1055, "step": 19229 }, { "epoch": 0.5610012252756871, "grad_norm": 1.07844491737013, "learning_rate": 4.257958819106698e-06, "loss": 0.141, "step": 19230 }, { "epoch": 0.5610303985063306, "grad_norm": 0.7563219447049566, "learning_rate": 4.257491619912712e-06, "loss": 0.1366, "step": 19231 }, { "epoch": 0.5610595717369742, "grad_norm": 0.7512824230749573, "learning_rate": 4.257024427347665e-06, "loss": 0.1219, "step": 19232 }, { "epoch": 0.5610887449676177, "grad_norm": 0.7878563773671461, "learning_rate": 4.256557241415724e-06, "loss": 0.1316, "step": 19233 }, { "epoch": 0.5611179181982613, "grad_norm": 0.7928603201442495, "learning_rate": 4.256090062121065e-06, "loss": 0.1207, "step": 19234 }, { "epoch": 0.5611470914289048, "grad_norm": 0.8322238051894852, "learning_rate": 4.255622889467855e-06, "loss": 0.1445, "step": 19235 }, { "epoch": 0.5611762646595484, "grad_norm": 0.8572578634645005, "learning_rate": 4.255155723460267e-06, "loss": 0.1194, "step": 19236 }, { "epoch": 0.561205437890192, "grad_norm": 1.0490382995322687, "learning_rate": 4.254688564102471e-06, "loss": 0.1489, "step": 19237 }, { "epoch": 0.5612346111208355, "grad_norm": 0.7490484906454387, "learning_rate": 4.254221411398637e-06, "loss": 0.1455, "step": 19238 }, { "epoch": 0.5612637843514791, "grad_norm": 0.7888836109931429, "learning_rate": 4.253754265352936e-06, "loss": 0.1337, "step": 19239 }, { "epoch": 0.5612929575821226, "grad_norm": 0.7320539147918943, "learning_rate": 4.253287125969539e-06, "loss": 0.1223, "step": 19240 }, { "epoch": 0.5613221308127662, "grad_norm": 0.9379249120352995, "learning_rate": 4.252819993252616e-06, "loss": 0.1507, "step": 19241 }, { "epoch": 0.5613513040434097, "grad_norm": 0.6936519850642795, "learning_rate": 4.252352867206339e-06, "loss": 0.1166, "step": 19242 }, { "epoch": 0.5613804772740534, "grad_norm": 0.7324403816158332, "learning_rate": 4.251885747834876e-06, "loss": 0.1036, "step": 19243 }, { "epoch": 0.5614096505046969, "grad_norm": 1.0492171542944357, "learning_rate": 4.251418635142399e-06, "loss": 0.1256, "step": 19244 }, { "epoch": 0.5614388237353405, "grad_norm": 0.6966023763959978, "learning_rate": 4.250951529133076e-06, "loss": 0.1481, "step": 19245 }, { "epoch": 0.561467996965984, "grad_norm": 0.8778409784254735, "learning_rate": 4.25048442981108e-06, "loss": 0.128, "step": 19246 }, { "epoch": 0.5614971701966276, "grad_norm": 0.8473416623500546, "learning_rate": 4.250017337180582e-06, "loss": 0.1057, "step": 19247 }, { "epoch": 0.5615263434272711, "grad_norm": 0.6725343787609986, "learning_rate": 4.249550251245748e-06, "loss": 0.1228, "step": 19248 }, { "epoch": 0.5615555166579147, "grad_norm": 0.7639821480113129, "learning_rate": 4.2490831720107514e-06, "loss": 0.1308, "step": 19249 }, { "epoch": 0.5615846898885583, "grad_norm": 0.8546762631832446, "learning_rate": 4.248616099479761e-06, "loss": 0.1282, "step": 19250 }, { "epoch": 0.5616138631192018, "grad_norm": 0.7126137860181593, "learning_rate": 4.248149033656944e-06, "loss": 0.1381, "step": 19251 }, { "epoch": 0.5616430363498454, "grad_norm": 0.8070827937807697, "learning_rate": 4.247681974546476e-06, "loss": 0.1258, "step": 19252 }, { "epoch": 0.5616722095804889, "grad_norm": 0.8765076472300789, "learning_rate": 4.247214922152523e-06, "loss": 0.1285, "step": 19253 }, { "epoch": 0.5617013828111325, "grad_norm": 0.7976205097523037, "learning_rate": 4.246747876479255e-06, "loss": 0.1351, "step": 19254 }, { "epoch": 0.561730556041776, "grad_norm": 0.7308835986813395, "learning_rate": 4.246280837530843e-06, "loss": 0.1523, "step": 19255 }, { "epoch": 0.5617597292724196, "grad_norm": 0.8392817980715473, "learning_rate": 4.245813805311455e-06, "loss": 0.1277, "step": 19256 }, { "epoch": 0.5617889025030632, "grad_norm": 0.6953263947730987, "learning_rate": 4.245346779825261e-06, "loss": 0.1427, "step": 19257 }, { "epoch": 0.5618180757337068, "grad_norm": 0.7919335657714905, "learning_rate": 4.244879761076431e-06, "loss": 0.1386, "step": 19258 }, { "epoch": 0.5618472489643503, "grad_norm": 0.7850194959804362, "learning_rate": 4.244412749069136e-06, "loss": 0.141, "step": 19259 }, { "epoch": 0.5618764221949939, "grad_norm": 0.8078149370581779, "learning_rate": 4.2439457438075415e-06, "loss": 0.1231, "step": 19260 }, { "epoch": 0.5619055954256374, "grad_norm": 0.8211010678317224, "learning_rate": 4.243478745295819e-06, "loss": 0.1432, "step": 19261 }, { "epoch": 0.561934768656281, "grad_norm": 0.7373873299260599, "learning_rate": 4.243011753538139e-06, "loss": 0.1204, "step": 19262 }, { "epoch": 0.5619639418869246, "grad_norm": 0.9526800720565995, "learning_rate": 4.242544768538667e-06, "loss": 0.1326, "step": 19263 }, { "epoch": 0.5619931151175681, "grad_norm": 0.8399968829161019, "learning_rate": 4.2420777903015765e-06, "loss": 0.1221, "step": 19264 }, { "epoch": 0.5620222883482117, "grad_norm": 0.8356319202526975, "learning_rate": 4.241610818831034e-06, "loss": 0.1368, "step": 19265 }, { "epoch": 0.5620514615788552, "grad_norm": 0.8500670034443689, "learning_rate": 4.241143854131209e-06, "loss": 0.1593, "step": 19266 }, { "epoch": 0.5620806348094988, "grad_norm": 0.7891021294403209, "learning_rate": 4.240676896206272e-06, "loss": 0.1282, "step": 19267 }, { "epoch": 0.5621098080401423, "grad_norm": 0.7078533875865706, "learning_rate": 4.240209945060389e-06, "loss": 0.1212, "step": 19268 }, { "epoch": 0.5621389812707859, "grad_norm": 1.0192459342862867, "learning_rate": 4.239743000697729e-06, "loss": 0.1327, "step": 19269 }, { "epoch": 0.5621681545014295, "grad_norm": 1.266716519773206, "learning_rate": 4.2392760631224635e-06, "loss": 0.1189, "step": 19270 }, { "epoch": 0.5621973277320731, "grad_norm": 0.9068333846726923, "learning_rate": 4.2388091323387595e-06, "loss": 0.1145, "step": 19271 }, { "epoch": 0.5622265009627166, "grad_norm": 0.7834339806082172, "learning_rate": 4.238342208350786e-06, "loss": 0.1341, "step": 19272 }, { "epoch": 0.5622556741933602, "grad_norm": 1.0955118180741146, "learning_rate": 4.237875291162712e-06, "loss": 0.1173, "step": 19273 }, { "epoch": 0.5622848474240038, "grad_norm": 1.1646390911415243, "learning_rate": 4.237408380778705e-06, "loss": 0.1154, "step": 19274 }, { "epoch": 0.5623140206546473, "grad_norm": 0.6779539983296421, "learning_rate": 4.236941477202932e-06, "loss": 0.124, "step": 19275 }, { "epoch": 0.5623431938852909, "grad_norm": 1.0207774300517027, "learning_rate": 4.236474580439565e-06, "loss": 0.1137, "step": 19276 }, { "epoch": 0.5623723671159344, "grad_norm": 0.871406900651337, "learning_rate": 4.236007690492772e-06, "loss": 0.1435, "step": 19277 }, { "epoch": 0.562401540346578, "grad_norm": 0.9679563557813425, "learning_rate": 4.2355408073667185e-06, "loss": 0.1303, "step": 19278 }, { "epoch": 0.5624307135772215, "grad_norm": 0.9602230166493325, "learning_rate": 4.235073931065574e-06, "loss": 0.1423, "step": 19279 }, { "epoch": 0.5624598868078651, "grad_norm": 0.7242996991793469, "learning_rate": 4.234607061593508e-06, "loss": 0.1333, "step": 19280 }, { "epoch": 0.5624890600385086, "grad_norm": 0.9829676891679778, "learning_rate": 4.234140198954686e-06, "loss": 0.1326, "step": 19281 }, { "epoch": 0.5625182332691522, "grad_norm": 0.8301560375608132, "learning_rate": 4.233673343153278e-06, "loss": 0.1111, "step": 19282 }, { "epoch": 0.5625474064997957, "grad_norm": 0.7619115022470225, "learning_rate": 4.233206494193452e-06, "loss": 0.147, "step": 19283 }, { "epoch": 0.5625765797304394, "grad_norm": 0.7479633971938229, "learning_rate": 4.232739652079374e-06, "loss": 0.1466, "step": 19284 }, { "epoch": 0.562605752961083, "grad_norm": 0.8556559103942638, "learning_rate": 4.232272816815215e-06, "loss": 0.1273, "step": 19285 }, { "epoch": 0.5626349261917265, "grad_norm": 0.7495067380402984, "learning_rate": 4.23180598840514e-06, "loss": 0.1483, "step": 19286 }, { "epoch": 0.5626640994223701, "grad_norm": 0.7616907057061205, "learning_rate": 4.2313391668533175e-06, "loss": 0.1179, "step": 19287 }, { "epoch": 0.5626932726530136, "grad_norm": 1.0785160648338838, "learning_rate": 4.230872352163915e-06, "loss": 0.1388, "step": 19288 }, { "epoch": 0.5627224458836572, "grad_norm": 0.9011776848206084, "learning_rate": 4.230405544341103e-06, "loss": 0.1333, "step": 19289 }, { "epoch": 0.5627516191143007, "grad_norm": 0.859255093890849, "learning_rate": 4.229938743389045e-06, "loss": 0.1311, "step": 19290 }, { "epoch": 0.5627807923449443, "grad_norm": 0.8478954436826731, "learning_rate": 4.229471949311909e-06, "loss": 0.1317, "step": 19291 }, { "epoch": 0.5628099655755878, "grad_norm": 0.6805753181191991, "learning_rate": 4.229005162113866e-06, "loss": 0.1081, "step": 19292 }, { "epoch": 0.5628391388062314, "grad_norm": 1.1931656618607633, "learning_rate": 4.228538381799077e-06, "loss": 0.1148, "step": 19293 }, { "epoch": 0.5628683120368749, "grad_norm": 0.7888856696280375, "learning_rate": 4.228071608371717e-06, "loss": 0.1324, "step": 19294 }, { "epoch": 0.5628974852675185, "grad_norm": 0.8647763043827412, "learning_rate": 4.227604841835948e-06, "loss": 0.1335, "step": 19295 }, { "epoch": 0.562926658498162, "grad_norm": 1.0025047658229442, "learning_rate": 4.227138082195939e-06, "loss": 0.1331, "step": 19296 }, { "epoch": 0.5629558317288057, "grad_norm": 0.8419858458809792, "learning_rate": 4.226671329455856e-06, "loss": 0.1216, "step": 19297 }, { "epoch": 0.5629850049594493, "grad_norm": 0.9431387653014118, "learning_rate": 4.226204583619868e-06, "loss": 0.1449, "step": 19298 }, { "epoch": 0.5630141781900928, "grad_norm": 1.0837020257541334, "learning_rate": 4.225737844692138e-06, "loss": 0.1242, "step": 19299 }, { "epoch": 0.5630433514207364, "grad_norm": 0.8552785217938188, "learning_rate": 4.225271112676837e-06, "loss": 0.1235, "step": 19300 }, { "epoch": 0.5630725246513799, "grad_norm": 0.7848543573552564, "learning_rate": 4.224804387578131e-06, "loss": 0.1374, "step": 19301 }, { "epoch": 0.5631016978820235, "grad_norm": 0.8830283325085005, "learning_rate": 4.224337669400188e-06, "loss": 0.1422, "step": 19302 }, { "epoch": 0.563130871112667, "grad_norm": 0.9473238107477748, "learning_rate": 4.223870958147171e-06, "loss": 0.1308, "step": 19303 }, { "epoch": 0.5631600443433106, "grad_norm": 0.8144301208569833, "learning_rate": 4.22340425382325e-06, "loss": 0.1341, "step": 19304 }, { "epoch": 0.5631892175739541, "grad_norm": 0.912128089231077, "learning_rate": 4.222937556432588e-06, "loss": 0.1387, "step": 19305 }, { "epoch": 0.5632183908045977, "grad_norm": 1.0681510926519333, "learning_rate": 4.222470865979356e-06, "loss": 0.1506, "step": 19306 }, { "epoch": 0.5632475640352412, "grad_norm": 1.0162766848870537, "learning_rate": 4.2220041824677194e-06, "loss": 0.1311, "step": 19307 }, { "epoch": 0.5632767372658848, "grad_norm": 0.8632845629407573, "learning_rate": 4.221537505901843e-06, "loss": 0.1523, "step": 19308 }, { "epoch": 0.5633059104965283, "grad_norm": 0.9065762068080127, "learning_rate": 4.221070836285893e-06, "loss": 0.117, "step": 19309 }, { "epoch": 0.5633350837271719, "grad_norm": 0.9711847233830024, "learning_rate": 4.220604173624036e-06, "loss": 0.1302, "step": 19310 }, { "epoch": 0.5633642569578156, "grad_norm": 0.9597026554698108, "learning_rate": 4.22013751792044e-06, "loss": 0.139, "step": 19311 }, { "epoch": 0.5633934301884591, "grad_norm": 0.9967824934046925, "learning_rate": 4.219670869179271e-06, "loss": 0.1472, "step": 19312 }, { "epoch": 0.5634226034191027, "grad_norm": 0.920229970603909, "learning_rate": 4.219204227404693e-06, "loss": 0.129, "step": 19313 }, { "epoch": 0.5634517766497462, "grad_norm": 0.8940024553661092, "learning_rate": 4.218737592600873e-06, "loss": 0.1218, "step": 19314 }, { "epoch": 0.5634809498803898, "grad_norm": 0.9780462343756766, "learning_rate": 4.218270964771979e-06, "loss": 0.1385, "step": 19315 }, { "epoch": 0.5635101231110333, "grad_norm": 0.7950557499939994, "learning_rate": 4.217804343922173e-06, "loss": 0.1597, "step": 19316 }, { "epoch": 0.5635392963416769, "grad_norm": 1.1356372175957845, "learning_rate": 4.217337730055624e-06, "loss": 0.1535, "step": 19317 }, { "epoch": 0.5635684695723204, "grad_norm": 0.926451669767488, "learning_rate": 4.216871123176498e-06, "loss": 0.106, "step": 19318 }, { "epoch": 0.563597642802964, "grad_norm": 0.9967468346324517, "learning_rate": 4.21640452328896e-06, "loss": 0.1296, "step": 19319 }, { "epoch": 0.5636268160336075, "grad_norm": 1.1076081985968935, "learning_rate": 4.215937930397173e-06, "loss": 0.1277, "step": 19320 }, { "epoch": 0.5636559892642511, "grad_norm": 0.8887507150342753, "learning_rate": 4.215471344505307e-06, "loss": 0.1316, "step": 19321 }, { "epoch": 0.5636851624948946, "grad_norm": 0.8451591389386952, "learning_rate": 4.215004765617522e-06, "loss": 0.117, "step": 19322 }, { "epoch": 0.5637143357255382, "grad_norm": 1.0267875290904167, "learning_rate": 4.21453819373799e-06, "loss": 0.1291, "step": 19323 }, { "epoch": 0.5637435089561819, "grad_norm": 1.0539471781510779, "learning_rate": 4.214071628870874e-06, "loss": 0.1288, "step": 19324 }, { "epoch": 0.5637726821868254, "grad_norm": 0.8185991522525249, "learning_rate": 4.213605071020338e-06, "loss": 0.1271, "step": 19325 }, { "epoch": 0.563801855417469, "grad_norm": 1.2374200149474295, "learning_rate": 4.213138520190548e-06, "loss": 0.1244, "step": 19326 }, { "epoch": 0.5638310286481125, "grad_norm": 1.0267053419097691, "learning_rate": 4.212671976385671e-06, "loss": 0.1441, "step": 19327 }, { "epoch": 0.5638602018787561, "grad_norm": 0.7783242735958762, "learning_rate": 4.212205439609868e-06, "loss": 0.1242, "step": 19328 }, { "epoch": 0.5638893751093996, "grad_norm": 1.0591920770891756, "learning_rate": 4.211738909867309e-06, "loss": 0.1207, "step": 19329 }, { "epoch": 0.5639185483400432, "grad_norm": 0.9302342414014498, "learning_rate": 4.211272387162155e-06, "loss": 0.1296, "step": 19330 }, { "epoch": 0.5639477215706867, "grad_norm": 0.8021722036322011, "learning_rate": 4.210805871498575e-06, "loss": 0.1134, "step": 19331 }, { "epoch": 0.5639768948013303, "grad_norm": 0.8071094129094918, "learning_rate": 4.210339362880731e-06, "loss": 0.1402, "step": 19332 }, { "epoch": 0.5640060680319738, "grad_norm": 1.104945245826796, "learning_rate": 4.209872861312788e-06, "loss": 0.1511, "step": 19333 }, { "epoch": 0.5640352412626174, "grad_norm": 1.158047827388993, "learning_rate": 4.209406366798911e-06, "loss": 0.1208, "step": 19334 }, { "epoch": 0.564064414493261, "grad_norm": 0.7866838173179109, "learning_rate": 4.208939879343266e-06, "loss": 0.1337, "step": 19335 }, { "epoch": 0.5640935877239045, "grad_norm": 0.9365522579795171, "learning_rate": 4.208473398950016e-06, "loss": 0.139, "step": 19336 }, { "epoch": 0.564122760954548, "grad_norm": 1.0414850520402614, "learning_rate": 4.208006925623329e-06, "loss": 0.1389, "step": 19337 }, { "epoch": 0.5641519341851917, "grad_norm": 0.8206167461272429, "learning_rate": 4.207540459367365e-06, "loss": 0.1438, "step": 19338 }, { "epoch": 0.5641811074158353, "grad_norm": 1.0190539656046538, "learning_rate": 4.207074000186291e-06, "loss": 0.1191, "step": 19339 }, { "epoch": 0.5642102806464788, "grad_norm": 1.101606784957744, "learning_rate": 4.20660754808427e-06, "loss": 0.1266, "step": 19340 }, { "epoch": 0.5642394538771224, "grad_norm": 0.8097156329379198, "learning_rate": 4.20614110306547e-06, "loss": 0.1157, "step": 19341 }, { "epoch": 0.5642686271077659, "grad_norm": 1.0365985115715617, "learning_rate": 4.205674665134051e-06, "loss": 0.111, "step": 19342 }, { "epoch": 0.5642978003384095, "grad_norm": 0.9027453339980929, "learning_rate": 4.205208234294179e-06, "loss": 0.1238, "step": 19343 }, { "epoch": 0.564326973569053, "grad_norm": 0.9278063332020796, "learning_rate": 4.204741810550018e-06, "loss": 0.1302, "step": 19344 }, { "epoch": 0.5643561467996966, "grad_norm": 0.9972173466492001, "learning_rate": 4.204275393905734e-06, "loss": 0.1413, "step": 19345 }, { "epoch": 0.5643853200303401, "grad_norm": 0.8389099482289466, "learning_rate": 4.203808984365487e-06, "loss": 0.1205, "step": 19346 }, { "epoch": 0.5644144932609837, "grad_norm": 0.8919946807713786, "learning_rate": 4.203342581933444e-06, "loss": 0.1502, "step": 19347 }, { "epoch": 0.5644436664916272, "grad_norm": 0.9556445667187019, "learning_rate": 4.202876186613769e-06, "loss": 0.1357, "step": 19348 }, { "epoch": 0.5644728397222708, "grad_norm": 0.9808822001374411, "learning_rate": 4.2024097984106254e-06, "loss": 0.13, "step": 19349 }, { "epoch": 0.5645020129529144, "grad_norm": 0.692672238631294, "learning_rate": 4.201943417328176e-06, "loss": 0.1101, "step": 19350 }, { "epoch": 0.564531186183558, "grad_norm": 1.051160649074656, "learning_rate": 4.2014770433705856e-06, "loss": 0.1102, "step": 19351 }, { "epoch": 0.5645603594142016, "grad_norm": 0.7781561212765957, "learning_rate": 4.201010676542016e-06, "loss": 0.1401, "step": 19352 }, { "epoch": 0.5645895326448451, "grad_norm": 0.9035506015524689, "learning_rate": 4.200544316846633e-06, "loss": 0.143, "step": 19353 }, { "epoch": 0.5646187058754887, "grad_norm": 0.9088489089560662, "learning_rate": 4.200077964288601e-06, "loss": 0.1137, "step": 19354 }, { "epoch": 0.5646478791061322, "grad_norm": 0.7259117047558888, "learning_rate": 4.199611618872081e-06, "loss": 0.1347, "step": 19355 }, { "epoch": 0.5646770523367758, "grad_norm": 0.9965548007664105, "learning_rate": 4.199145280601238e-06, "loss": 0.1262, "step": 19356 }, { "epoch": 0.5647062255674193, "grad_norm": 0.8341149270966275, "learning_rate": 4.1986789494802345e-06, "loss": 0.1371, "step": 19357 }, { "epoch": 0.5647353987980629, "grad_norm": 0.7118071097143382, "learning_rate": 4.198212625513232e-06, "loss": 0.1264, "step": 19358 }, { "epoch": 0.5647645720287064, "grad_norm": 0.9634689955515569, "learning_rate": 4.197746308704399e-06, "loss": 0.1302, "step": 19359 }, { "epoch": 0.56479374525935, "grad_norm": 0.9066515208854374, "learning_rate": 4.1972799990578934e-06, "loss": 0.1461, "step": 19360 }, { "epoch": 0.5648229184899936, "grad_norm": 0.9874836309580389, "learning_rate": 4.1968136965778805e-06, "loss": 0.1336, "step": 19361 }, { "epoch": 0.5648520917206371, "grad_norm": 0.9584657928992774, "learning_rate": 4.196347401268525e-06, "loss": 0.1101, "step": 19362 }, { "epoch": 0.5648812649512807, "grad_norm": 0.8677620847531943, "learning_rate": 4.195881113133986e-06, "loss": 0.1432, "step": 19363 }, { "epoch": 0.5649104381819242, "grad_norm": 0.8374568733142175, "learning_rate": 4.1954148321784285e-06, "loss": 0.1142, "step": 19364 }, { "epoch": 0.5649396114125679, "grad_norm": 0.6635602068403426, "learning_rate": 4.1949485584060155e-06, "loss": 0.1411, "step": 19365 }, { "epoch": 0.5649687846432114, "grad_norm": 0.8396410835996723, "learning_rate": 4.19448229182091e-06, "loss": 0.1189, "step": 19366 }, { "epoch": 0.564997957873855, "grad_norm": 0.8711314366463256, "learning_rate": 4.194016032427275e-06, "loss": 0.13, "step": 19367 }, { "epoch": 0.5650271311044985, "grad_norm": 0.7495674259968295, "learning_rate": 4.193549780229273e-06, "loss": 0.0982, "step": 19368 }, { "epoch": 0.5650563043351421, "grad_norm": 0.994828317206556, "learning_rate": 4.193083535231064e-06, "loss": 0.1193, "step": 19369 }, { "epoch": 0.5650854775657856, "grad_norm": 0.8031371144023249, "learning_rate": 4.192617297436812e-06, "loss": 0.1428, "step": 19370 }, { "epoch": 0.5651146507964292, "grad_norm": 1.078601565853731, "learning_rate": 4.192151066850682e-06, "loss": 0.1676, "step": 19371 }, { "epoch": 0.5651438240270727, "grad_norm": 0.9624650306696038, "learning_rate": 4.191684843476834e-06, "loss": 0.1523, "step": 19372 }, { "epoch": 0.5651729972577163, "grad_norm": 0.757665184886242, "learning_rate": 4.191218627319431e-06, "loss": 0.1431, "step": 19373 }, { "epoch": 0.5652021704883599, "grad_norm": 0.694972450711904, "learning_rate": 4.190752418382635e-06, "loss": 0.1185, "step": 19374 }, { "epoch": 0.5652313437190034, "grad_norm": 0.8081352924546684, "learning_rate": 4.190286216670608e-06, "loss": 0.1151, "step": 19375 }, { "epoch": 0.565260516949647, "grad_norm": 0.903095796203024, "learning_rate": 4.189820022187511e-06, "loss": 0.1361, "step": 19376 }, { "epoch": 0.5652896901802905, "grad_norm": 1.3446850188910406, "learning_rate": 4.189353834937509e-06, "loss": 0.1439, "step": 19377 }, { "epoch": 0.5653188634109342, "grad_norm": 0.8425675405138412, "learning_rate": 4.188887654924761e-06, "loss": 0.1177, "step": 19378 }, { "epoch": 0.5653480366415777, "grad_norm": 0.7948812385773764, "learning_rate": 4.1884214821534334e-06, "loss": 0.1101, "step": 19379 }, { "epoch": 0.5653772098722213, "grad_norm": 0.7755867743772799, "learning_rate": 4.187955316627683e-06, "loss": 0.1312, "step": 19380 }, { "epoch": 0.5654063831028648, "grad_norm": 1.140046919943364, "learning_rate": 4.187489158351674e-06, "loss": 0.1558, "step": 19381 }, { "epoch": 0.5654355563335084, "grad_norm": 0.8086468288477293, "learning_rate": 4.187023007329566e-06, "loss": 0.1373, "step": 19382 }, { "epoch": 0.565464729564152, "grad_norm": 0.7529757380862866, "learning_rate": 4.186556863565524e-06, "loss": 0.1286, "step": 19383 }, { "epoch": 0.5654939027947955, "grad_norm": 1.3346562066612684, "learning_rate": 4.18609072706371e-06, "loss": 0.1224, "step": 19384 }, { "epoch": 0.565523076025439, "grad_norm": 0.7560453375313204, "learning_rate": 4.185624597828282e-06, "loss": 0.1085, "step": 19385 }, { "epoch": 0.5655522492560826, "grad_norm": 0.7212282462561252, "learning_rate": 4.185158475863403e-06, "loss": 0.1275, "step": 19386 }, { "epoch": 0.5655814224867262, "grad_norm": 0.7374292874245314, "learning_rate": 4.184692361173236e-06, "loss": 0.1114, "step": 19387 }, { "epoch": 0.5656105957173697, "grad_norm": 0.9391013064283164, "learning_rate": 4.184226253761937e-06, "loss": 0.1291, "step": 19388 }, { "epoch": 0.5656397689480133, "grad_norm": 0.7647587171548582, "learning_rate": 4.183760153633675e-06, "loss": 0.1228, "step": 19389 }, { "epoch": 0.5656689421786568, "grad_norm": 0.9987100527782217, "learning_rate": 4.183294060792606e-06, "loss": 0.1368, "step": 19390 }, { "epoch": 0.5656981154093004, "grad_norm": 0.8860530588359792, "learning_rate": 4.182827975242894e-06, "loss": 0.134, "step": 19391 }, { "epoch": 0.565727288639944, "grad_norm": 1.0873575091132723, "learning_rate": 4.182361896988699e-06, "loss": 0.1132, "step": 19392 }, { "epoch": 0.5657564618705876, "grad_norm": 0.842081833139939, "learning_rate": 4.18189582603418e-06, "loss": 0.14, "step": 19393 }, { "epoch": 0.5657856351012311, "grad_norm": 0.8752384891404035, "learning_rate": 4.1814297623835e-06, "loss": 0.1287, "step": 19394 }, { "epoch": 0.5658148083318747, "grad_norm": 0.8753769206838534, "learning_rate": 4.18096370604082e-06, "loss": 0.1291, "step": 19395 }, { "epoch": 0.5658439815625183, "grad_norm": 1.5124093889687904, "learning_rate": 4.1804976570103e-06, "loss": 0.1511, "step": 19396 }, { "epoch": 0.5658731547931618, "grad_norm": 0.8637210905796692, "learning_rate": 4.180031615296103e-06, "loss": 0.1413, "step": 19397 }, { "epoch": 0.5659023280238054, "grad_norm": 0.9549525126186816, "learning_rate": 4.179565580902387e-06, "loss": 0.1313, "step": 19398 }, { "epoch": 0.5659315012544489, "grad_norm": 0.7949894880690509, "learning_rate": 4.179099553833314e-06, "loss": 0.1209, "step": 19399 }, { "epoch": 0.5659606744850925, "grad_norm": 0.8830952411828071, "learning_rate": 4.178633534093043e-06, "loss": 0.1121, "step": 19400 }, { "epoch": 0.565989847715736, "grad_norm": 0.8183061514539053, "learning_rate": 4.178167521685737e-06, "loss": 0.1196, "step": 19401 }, { "epoch": 0.5660190209463796, "grad_norm": 0.8907210383788373, "learning_rate": 4.177701516615555e-06, "loss": 0.1609, "step": 19402 }, { "epoch": 0.5660481941770231, "grad_norm": 0.7892265729820176, "learning_rate": 4.177235518886657e-06, "loss": 0.1333, "step": 19403 }, { "epoch": 0.5660773674076667, "grad_norm": 0.7893912092104693, "learning_rate": 4.176769528503205e-06, "loss": 0.1162, "step": 19404 }, { "epoch": 0.5661065406383103, "grad_norm": 0.8847707860248508, "learning_rate": 4.176303545469358e-06, "loss": 0.1257, "step": 19405 }, { "epoch": 0.5661357138689539, "grad_norm": 1.0207630139430863, "learning_rate": 4.175837569789274e-06, "loss": 0.1441, "step": 19406 }, { "epoch": 0.5661648870995974, "grad_norm": 0.714577325556803, "learning_rate": 4.175371601467117e-06, "loss": 0.1155, "step": 19407 }, { "epoch": 0.566194060330241, "grad_norm": 0.9138541251039776, "learning_rate": 4.1749056405070455e-06, "loss": 0.1312, "step": 19408 }, { "epoch": 0.5662232335608846, "grad_norm": 0.8673457643088875, "learning_rate": 4.1744396869132205e-06, "loss": 0.1313, "step": 19409 }, { "epoch": 0.5662524067915281, "grad_norm": 1.0043121989356136, "learning_rate": 4.1739737406898e-06, "loss": 0.1313, "step": 19410 }, { "epoch": 0.5662815800221717, "grad_norm": 0.8286278643874674, "learning_rate": 4.173507801840945e-06, "loss": 0.1325, "step": 19411 }, { "epoch": 0.5663107532528152, "grad_norm": 1.0226878988018635, "learning_rate": 4.173041870370813e-06, "loss": 0.1376, "step": 19412 }, { "epoch": 0.5663399264834588, "grad_norm": 0.9393169483809087, "learning_rate": 4.1725759462835674e-06, "loss": 0.1266, "step": 19413 }, { "epoch": 0.5663690997141023, "grad_norm": 0.8350351829037099, "learning_rate": 4.172110029583368e-06, "loss": 0.1118, "step": 19414 }, { "epoch": 0.5663982729447459, "grad_norm": 1.0055518985598915, "learning_rate": 4.171644120274371e-06, "loss": 0.1327, "step": 19415 }, { "epoch": 0.5664274461753894, "grad_norm": 0.8402808204010609, "learning_rate": 4.171178218360737e-06, "loss": 0.1214, "step": 19416 }, { "epoch": 0.566456619406033, "grad_norm": 0.9098556148388088, "learning_rate": 4.170712323846628e-06, "loss": 0.1282, "step": 19417 }, { "epoch": 0.5664857926366765, "grad_norm": 0.9596209886171775, "learning_rate": 4.170246436736198e-06, "loss": 0.1071, "step": 19418 }, { "epoch": 0.5665149658673202, "grad_norm": 0.8622785281876078, "learning_rate": 4.169780557033612e-06, "loss": 0.1278, "step": 19419 }, { "epoch": 0.5665441390979638, "grad_norm": 0.7480267302996062, "learning_rate": 4.169314684743027e-06, "loss": 0.1252, "step": 19420 }, { "epoch": 0.5665733123286073, "grad_norm": 0.8038013696081622, "learning_rate": 4.168848819868601e-06, "loss": 0.1316, "step": 19421 }, { "epoch": 0.5666024855592509, "grad_norm": 1.1371605648032899, "learning_rate": 4.168382962414496e-06, "loss": 0.127, "step": 19422 }, { "epoch": 0.5666316587898944, "grad_norm": 1.2734743904923689, "learning_rate": 4.167917112384869e-06, "loss": 0.1498, "step": 19423 }, { "epoch": 0.566660832020538, "grad_norm": 1.0318764991978846, "learning_rate": 4.167451269783878e-06, "loss": 0.1565, "step": 19424 }, { "epoch": 0.5666900052511815, "grad_norm": 0.9522348872724322, "learning_rate": 4.166985434615683e-06, "loss": 0.1351, "step": 19425 }, { "epoch": 0.5667191784818251, "grad_norm": 0.9402296360804302, "learning_rate": 4.166519606884445e-06, "loss": 0.1187, "step": 19426 }, { "epoch": 0.5667483517124686, "grad_norm": 0.9181844994976865, "learning_rate": 4.166053786594322e-06, "loss": 0.1351, "step": 19427 }, { "epoch": 0.5667775249431122, "grad_norm": 0.8209798989749533, "learning_rate": 4.16558797374947e-06, "loss": 0.1388, "step": 19428 }, { "epoch": 0.5668066981737557, "grad_norm": 0.898912837960602, "learning_rate": 4.165122168354049e-06, "loss": 0.1321, "step": 19429 }, { "epoch": 0.5668358714043993, "grad_norm": 0.9807008826655649, "learning_rate": 4.164656370412218e-06, "loss": 0.1385, "step": 19430 }, { "epoch": 0.5668650446350428, "grad_norm": 0.8622015768296055, "learning_rate": 4.164190579928137e-06, "loss": 0.1286, "step": 19431 }, { "epoch": 0.5668942178656865, "grad_norm": 0.7912732824512433, "learning_rate": 4.163724796905961e-06, "loss": 0.1061, "step": 19432 }, { "epoch": 0.56692339109633, "grad_norm": 0.833736347636523, "learning_rate": 4.163259021349852e-06, "loss": 0.1215, "step": 19433 }, { "epoch": 0.5669525643269736, "grad_norm": 0.7638380756488233, "learning_rate": 4.162793253263967e-06, "loss": 0.1131, "step": 19434 }, { "epoch": 0.5669817375576172, "grad_norm": 0.7640489158978119, "learning_rate": 4.162327492652463e-06, "loss": 0.1177, "step": 19435 }, { "epoch": 0.5670109107882607, "grad_norm": 1.0578159091295236, "learning_rate": 4.161861739519498e-06, "loss": 0.139, "step": 19436 }, { "epoch": 0.5670400840189043, "grad_norm": 0.810072989231744, "learning_rate": 4.161395993869232e-06, "loss": 0.1262, "step": 19437 }, { "epoch": 0.5670692572495478, "grad_norm": 0.9400030183406941, "learning_rate": 4.160930255705824e-06, "loss": 0.1678, "step": 19438 }, { "epoch": 0.5670984304801914, "grad_norm": 0.6954126633205726, "learning_rate": 4.16046452503343e-06, "loss": 0.1263, "step": 19439 }, { "epoch": 0.5671276037108349, "grad_norm": 0.7525121649948221, "learning_rate": 4.159998801856207e-06, "loss": 0.1477, "step": 19440 }, { "epoch": 0.5671567769414785, "grad_norm": 0.9595718520300346, "learning_rate": 4.1595330861783145e-06, "loss": 0.1292, "step": 19441 }, { "epoch": 0.567185950172122, "grad_norm": 0.7302068471788169, "learning_rate": 4.15906737800391e-06, "loss": 0.1202, "step": 19442 }, { "epoch": 0.5672151234027656, "grad_norm": 0.7817238203916237, "learning_rate": 4.158601677337151e-06, "loss": 0.1483, "step": 19443 }, { "epoch": 0.5672442966334091, "grad_norm": 0.6014948314245426, "learning_rate": 4.158135984182197e-06, "loss": 0.1125, "step": 19444 }, { "epoch": 0.5672734698640527, "grad_norm": 0.9995087962528116, "learning_rate": 4.157670298543203e-06, "loss": 0.1526, "step": 19445 }, { "epoch": 0.5673026430946964, "grad_norm": 0.8340204683298592, "learning_rate": 4.157204620424326e-06, "loss": 0.1301, "step": 19446 }, { "epoch": 0.5673318163253399, "grad_norm": 1.2041059476698694, "learning_rate": 4.156738949829728e-06, "loss": 0.1205, "step": 19447 }, { "epoch": 0.5673609895559835, "grad_norm": 0.7920740428638681, "learning_rate": 4.156273286763559e-06, "loss": 0.1312, "step": 19448 }, { "epoch": 0.567390162786627, "grad_norm": 0.7831400996347803, "learning_rate": 4.155807631229984e-06, "loss": 0.1199, "step": 19449 }, { "epoch": 0.5674193360172706, "grad_norm": 0.8910101049727717, "learning_rate": 4.155341983233156e-06, "loss": 0.1336, "step": 19450 }, { "epoch": 0.5674485092479141, "grad_norm": 0.730621387194202, "learning_rate": 4.154876342777234e-06, "loss": 0.1135, "step": 19451 }, { "epoch": 0.5674776824785577, "grad_norm": 0.7208364369338753, "learning_rate": 4.154410709866374e-06, "loss": 0.1188, "step": 19452 }, { "epoch": 0.5675068557092012, "grad_norm": 0.8334785944597244, "learning_rate": 4.153945084504733e-06, "loss": 0.1418, "step": 19453 }, { "epoch": 0.5675360289398448, "grad_norm": 0.805382737106822, "learning_rate": 4.153479466696467e-06, "loss": 0.1191, "step": 19454 }, { "epoch": 0.5675652021704883, "grad_norm": 1.1381496668325919, "learning_rate": 4.153013856445736e-06, "loss": 0.1263, "step": 19455 }, { "epoch": 0.5675943754011319, "grad_norm": 0.892138197681285, "learning_rate": 4.152548253756694e-06, "loss": 0.1167, "step": 19456 }, { "epoch": 0.5676235486317754, "grad_norm": 0.8309719989376817, "learning_rate": 4.152082658633501e-06, "loss": 0.1322, "step": 19457 }, { "epoch": 0.567652721862419, "grad_norm": 0.856713290710731, "learning_rate": 4.15161707108031e-06, "loss": 0.129, "step": 19458 }, { "epoch": 0.5676818950930627, "grad_norm": 0.9598761879946143, "learning_rate": 4.151151491101279e-06, "loss": 0.123, "step": 19459 }, { "epoch": 0.5677110683237062, "grad_norm": 1.011005379721408, "learning_rate": 4.150685918700565e-06, "loss": 0.1304, "step": 19460 }, { "epoch": 0.5677402415543498, "grad_norm": 0.8848972179178022, "learning_rate": 4.150220353882325e-06, "loss": 0.146, "step": 19461 }, { "epoch": 0.5677694147849933, "grad_norm": 0.7194590108526638, "learning_rate": 4.149754796650714e-06, "loss": 0.1146, "step": 19462 }, { "epoch": 0.5677985880156369, "grad_norm": 0.9134909870263048, "learning_rate": 4.14928924700989e-06, "loss": 0.1268, "step": 19463 }, { "epoch": 0.5678277612462804, "grad_norm": 0.6926311306615194, "learning_rate": 4.148823704964009e-06, "loss": 0.1002, "step": 19464 }, { "epoch": 0.567856934476924, "grad_norm": 0.8663371143277986, "learning_rate": 4.148358170517226e-06, "loss": 0.1044, "step": 19465 }, { "epoch": 0.5678861077075675, "grad_norm": 0.8497971341096269, "learning_rate": 4.147892643673696e-06, "loss": 0.1273, "step": 19466 }, { "epoch": 0.5679152809382111, "grad_norm": 0.906068927483276, "learning_rate": 4.147427124437579e-06, "loss": 0.1627, "step": 19467 }, { "epoch": 0.5679444541688546, "grad_norm": 0.9524640293613119, "learning_rate": 4.146961612813029e-06, "loss": 0.1137, "step": 19468 }, { "epoch": 0.5679736273994982, "grad_norm": 0.8292784079540674, "learning_rate": 4.1464961088042035e-06, "loss": 0.1472, "step": 19469 }, { "epoch": 0.5680028006301417, "grad_norm": 0.8191775191554431, "learning_rate": 4.146030612415256e-06, "loss": 0.1408, "step": 19470 }, { "epoch": 0.5680319738607853, "grad_norm": 0.9411945831136156, "learning_rate": 4.145565123650342e-06, "loss": 0.1292, "step": 19471 }, { "epoch": 0.5680611470914289, "grad_norm": 0.8442802249400984, "learning_rate": 4.1450996425136184e-06, "loss": 0.1166, "step": 19472 }, { "epoch": 0.5680903203220725, "grad_norm": 0.9372724918897899, "learning_rate": 4.144634169009243e-06, "loss": 0.1366, "step": 19473 }, { "epoch": 0.5681194935527161, "grad_norm": 0.7893050022050192, "learning_rate": 4.1441687031413695e-06, "loss": 0.1188, "step": 19474 }, { "epoch": 0.5681486667833596, "grad_norm": 0.7700591593083678, "learning_rate": 4.143703244914152e-06, "loss": 0.1133, "step": 19475 }, { "epoch": 0.5681778400140032, "grad_norm": 0.9653185623356487, "learning_rate": 4.143237794331749e-06, "loss": 0.1221, "step": 19476 }, { "epoch": 0.5682070132446467, "grad_norm": 1.0229825103184051, "learning_rate": 4.142772351398314e-06, "loss": 0.1407, "step": 19477 }, { "epoch": 0.5682361864752903, "grad_norm": 0.9353775145276803, "learning_rate": 4.142306916118e-06, "loss": 0.1001, "step": 19478 }, { "epoch": 0.5682653597059338, "grad_norm": 0.8305059203098825, "learning_rate": 4.141841488494969e-06, "loss": 0.1303, "step": 19479 }, { "epoch": 0.5682945329365774, "grad_norm": 0.879799059625502, "learning_rate": 4.1413760685333714e-06, "loss": 0.1237, "step": 19480 }, { "epoch": 0.5683237061672209, "grad_norm": 0.786556193206934, "learning_rate": 4.140910656237363e-06, "loss": 0.1327, "step": 19481 }, { "epoch": 0.5683528793978645, "grad_norm": 0.6985346634610555, "learning_rate": 4.1404452516111e-06, "loss": 0.1044, "step": 19482 }, { "epoch": 0.568382052628508, "grad_norm": 0.873493223399064, "learning_rate": 4.139979854658735e-06, "loss": 0.1329, "step": 19483 }, { "epoch": 0.5684112258591516, "grad_norm": 0.833281339853357, "learning_rate": 4.139514465384424e-06, "loss": 0.1522, "step": 19484 }, { "epoch": 0.5684403990897952, "grad_norm": 0.7788355824123007, "learning_rate": 4.139049083792324e-06, "loss": 0.1097, "step": 19485 }, { "epoch": 0.5684695723204388, "grad_norm": 0.8406256788633911, "learning_rate": 4.1385837098865874e-06, "loss": 0.1326, "step": 19486 }, { "epoch": 0.5684987455510824, "grad_norm": 0.5863643463359757, "learning_rate": 4.138118343671372e-06, "loss": 0.1214, "step": 19487 }, { "epoch": 0.5685279187817259, "grad_norm": 0.813258095451415, "learning_rate": 4.137652985150829e-06, "loss": 0.1474, "step": 19488 }, { "epoch": 0.5685570920123695, "grad_norm": 0.745323317296243, "learning_rate": 4.137187634329114e-06, "loss": 0.1179, "step": 19489 }, { "epoch": 0.568586265243013, "grad_norm": 0.7667490617432795, "learning_rate": 4.13672229121038e-06, "loss": 0.1435, "step": 19490 }, { "epoch": 0.5686154384736566, "grad_norm": 0.8094175844997595, "learning_rate": 4.136256955798786e-06, "loss": 0.1411, "step": 19491 }, { "epoch": 0.5686446117043001, "grad_norm": 0.6636350950499006, "learning_rate": 4.135791628098483e-06, "loss": 0.1429, "step": 19492 }, { "epoch": 0.5686737849349437, "grad_norm": 0.7253760280737869, "learning_rate": 4.135326308113625e-06, "loss": 0.1086, "step": 19493 }, { "epoch": 0.5687029581655872, "grad_norm": 0.8037861905000112, "learning_rate": 4.13486099584837e-06, "loss": 0.1229, "step": 19494 }, { "epoch": 0.5687321313962308, "grad_norm": 0.8480607830202082, "learning_rate": 4.134395691306868e-06, "loss": 0.1313, "step": 19495 }, { "epoch": 0.5687613046268744, "grad_norm": 0.7440739430597881, "learning_rate": 4.133930394493272e-06, "loss": 0.1035, "step": 19496 }, { "epoch": 0.5687904778575179, "grad_norm": 0.7217697385956728, "learning_rate": 4.1334651054117404e-06, "loss": 0.1252, "step": 19497 }, { "epoch": 0.5688196510881615, "grad_norm": 0.7103858199884849, "learning_rate": 4.132999824066426e-06, "loss": 0.1448, "step": 19498 }, { "epoch": 0.568848824318805, "grad_norm": 0.8064217001025907, "learning_rate": 4.132534550461484e-06, "loss": 0.1435, "step": 19499 }, { "epoch": 0.5688779975494487, "grad_norm": 0.7874102313578355, "learning_rate": 4.1320692846010645e-06, "loss": 0.1089, "step": 19500 }, { "epoch": 0.5689071707800922, "grad_norm": 0.8832892212667347, "learning_rate": 4.131604026489322e-06, "loss": 0.1293, "step": 19501 }, { "epoch": 0.5689363440107358, "grad_norm": 0.8844026983146775, "learning_rate": 4.131138776130413e-06, "loss": 0.1291, "step": 19502 }, { "epoch": 0.5689655172413793, "grad_norm": 0.7443751926414968, "learning_rate": 4.130673533528489e-06, "loss": 0.1262, "step": 19503 }, { "epoch": 0.5689946904720229, "grad_norm": 0.7058878160954616, "learning_rate": 4.130208298687705e-06, "loss": 0.1101, "step": 19504 }, { "epoch": 0.5690238637026664, "grad_norm": 0.7290644904028006, "learning_rate": 4.129743071612214e-06, "loss": 0.1101, "step": 19505 }, { "epoch": 0.56905303693331, "grad_norm": 0.890508438194241, "learning_rate": 4.129277852306169e-06, "loss": 0.1313, "step": 19506 }, { "epoch": 0.5690822101639536, "grad_norm": 0.7590479402233686, "learning_rate": 4.128812640773721e-06, "loss": 0.1234, "step": 19507 }, { "epoch": 0.5691113833945971, "grad_norm": 0.6992276627467836, "learning_rate": 4.128347437019028e-06, "loss": 0.1051, "step": 19508 }, { "epoch": 0.5691405566252407, "grad_norm": 0.77158180994644, "learning_rate": 4.127882241046241e-06, "loss": 0.1197, "step": 19509 }, { "epoch": 0.5691697298558842, "grad_norm": 0.7894896966601396, "learning_rate": 4.127417052859513e-06, "loss": 0.1358, "step": 19510 }, { "epoch": 0.5691989030865278, "grad_norm": 0.7574734746844207, "learning_rate": 4.126951872462997e-06, "loss": 0.1215, "step": 19511 }, { "epoch": 0.5692280763171713, "grad_norm": 0.8685129970062757, "learning_rate": 4.1264866998608476e-06, "loss": 0.1421, "step": 19512 }, { "epoch": 0.5692572495478149, "grad_norm": 0.870514576773545, "learning_rate": 4.126021535057213e-06, "loss": 0.1387, "step": 19513 }, { "epoch": 0.5692864227784585, "grad_norm": 0.8267546453740713, "learning_rate": 4.125556378056252e-06, "loss": 0.118, "step": 19514 }, { "epoch": 0.5693155960091021, "grad_norm": 0.7105776430547567, "learning_rate": 4.125091228862115e-06, "loss": 0.1399, "step": 19515 }, { "epoch": 0.5693447692397456, "grad_norm": 0.939084837621374, "learning_rate": 4.124626087478954e-06, "loss": 0.1272, "step": 19516 }, { "epoch": 0.5693739424703892, "grad_norm": 0.8669677573714532, "learning_rate": 4.124160953910923e-06, "loss": 0.1237, "step": 19517 }, { "epoch": 0.5694031157010327, "grad_norm": 0.9734128272672662, "learning_rate": 4.1236958281621735e-06, "loss": 0.1136, "step": 19518 }, { "epoch": 0.5694322889316763, "grad_norm": 0.9261766815643392, "learning_rate": 4.123230710236857e-06, "loss": 0.1129, "step": 19519 }, { "epoch": 0.5694614621623199, "grad_norm": 1.0987557919086146, "learning_rate": 4.122765600139129e-06, "loss": 0.1273, "step": 19520 }, { "epoch": 0.5694906353929634, "grad_norm": 1.0520802567736014, "learning_rate": 4.122300497873141e-06, "loss": 0.1248, "step": 19521 }, { "epoch": 0.569519808623607, "grad_norm": 0.7404176701961205, "learning_rate": 4.121835403443044e-06, "loss": 0.1204, "step": 19522 }, { "epoch": 0.5695489818542505, "grad_norm": 0.8641522715390688, "learning_rate": 4.1213703168529905e-06, "loss": 0.1306, "step": 19523 }, { "epoch": 0.5695781550848941, "grad_norm": 0.7196633741119154, "learning_rate": 4.120905238107134e-06, "loss": 0.1193, "step": 19524 }, { "epoch": 0.5696073283155376, "grad_norm": 0.8359834060471374, "learning_rate": 4.120440167209623e-06, "loss": 0.106, "step": 19525 }, { "epoch": 0.5696365015461812, "grad_norm": 0.7430056591631837, "learning_rate": 4.119975104164616e-06, "loss": 0.1342, "step": 19526 }, { "epoch": 0.5696656747768248, "grad_norm": 0.78401092675671, "learning_rate": 4.119510048976258e-06, "loss": 0.1089, "step": 19527 }, { "epoch": 0.5696948480074684, "grad_norm": 0.737065396274654, "learning_rate": 4.119045001648705e-06, "loss": 0.1071, "step": 19528 }, { "epoch": 0.5697240212381119, "grad_norm": 0.8720979256625068, "learning_rate": 4.11857996218611e-06, "loss": 0.1268, "step": 19529 }, { "epoch": 0.5697531944687555, "grad_norm": 0.9327166649629202, "learning_rate": 4.118114930592621e-06, "loss": 0.14, "step": 19530 }, { "epoch": 0.569782367699399, "grad_norm": 0.8726103788430802, "learning_rate": 4.1176499068723895e-06, "loss": 0.1158, "step": 19531 }, { "epoch": 0.5698115409300426, "grad_norm": 1.0831975917678167, "learning_rate": 4.117184891029571e-06, "loss": 0.1213, "step": 19532 }, { "epoch": 0.5698407141606862, "grad_norm": 0.6362389314916833, "learning_rate": 4.116719883068315e-06, "loss": 0.119, "step": 19533 }, { "epoch": 0.5698698873913297, "grad_norm": 1.022191520372765, "learning_rate": 4.116254882992774e-06, "loss": 0.1398, "step": 19534 }, { "epoch": 0.5698990606219733, "grad_norm": 0.7810039189547336, "learning_rate": 4.115789890807097e-06, "loss": 0.1176, "step": 19535 }, { "epoch": 0.5699282338526168, "grad_norm": 0.9425448543692836, "learning_rate": 4.115324906515438e-06, "loss": 0.1217, "step": 19536 }, { "epoch": 0.5699574070832604, "grad_norm": 0.6867779750577592, "learning_rate": 4.114859930121944e-06, "loss": 0.1335, "step": 19537 }, { "epoch": 0.5699865803139039, "grad_norm": 0.7868889556038507, "learning_rate": 4.1143949616307725e-06, "loss": 0.1545, "step": 19538 }, { "epoch": 0.5700157535445475, "grad_norm": 0.7451587901962556, "learning_rate": 4.1139300010460705e-06, "loss": 0.1317, "step": 19539 }, { "epoch": 0.570044926775191, "grad_norm": 0.9621247044829014, "learning_rate": 4.11346504837199e-06, "loss": 0.1348, "step": 19540 }, { "epoch": 0.5700741000058347, "grad_norm": 0.8226151313164166, "learning_rate": 4.113000103612681e-06, "loss": 0.144, "step": 19541 }, { "epoch": 0.5701032732364782, "grad_norm": 0.7845961869456818, "learning_rate": 4.112535166772297e-06, "loss": 0.138, "step": 19542 }, { "epoch": 0.5701324464671218, "grad_norm": 0.7521358174870113, "learning_rate": 4.112070237854984e-06, "loss": 0.12, "step": 19543 }, { "epoch": 0.5701616196977654, "grad_norm": 0.7869389188886107, "learning_rate": 4.111605316864899e-06, "loss": 0.1438, "step": 19544 }, { "epoch": 0.5701907929284089, "grad_norm": 0.891113661257238, "learning_rate": 4.1111404038061895e-06, "loss": 0.1334, "step": 19545 }, { "epoch": 0.5702199661590525, "grad_norm": 0.8058385390983227, "learning_rate": 4.110675498683005e-06, "loss": 0.1496, "step": 19546 }, { "epoch": 0.570249139389696, "grad_norm": 0.891585900716879, "learning_rate": 4.1102106014994994e-06, "loss": 0.1202, "step": 19547 }, { "epoch": 0.5702783126203396, "grad_norm": 0.8010862708971639, "learning_rate": 4.109745712259819e-06, "loss": 0.1373, "step": 19548 }, { "epoch": 0.5703074858509831, "grad_norm": 0.6890893052186399, "learning_rate": 4.109280830968116e-06, "loss": 0.1086, "step": 19549 }, { "epoch": 0.5703366590816267, "grad_norm": 0.9517040546287387, "learning_rate": 4.108815957628542e-06, "loss": 0.1444, "step": 19550 }, { "epoch": 0.5703658323122702, "grad_norm": 0.740405320141188, "learning_rate": 4.108351092245248e-06, "loss": 0.1496, "step": 19551 }, { "epoch": 0.5703950055429138, "grad_norm": 0.803242782272997, "learning_rate": 4.107886234822381e-06, "loss": 0.1322, "step": 19552 }, { "epoch": 0.5704241787735573, "grad_norm": 0.9112748207641433, "learning_rate": 4.107421385364093e-06, "loss": 0.1404, "step": 19553 }, { "epoch": 0.570453352004201, "grad_norm": 0.8018854481429659, "learning_rate": 4.106956543874534e-06, "loss": 0.1231, "step": 19554 }, { "epoch": 0.5704825252348446, "grad_norm": 0.983309130673319, "learning_rate": 4.106491710357851e-06, "loss": 0.1554, "step": 19555 }, { "epoch": 0.5705116984654881, "grad_norm": 0.9327751668367343, "learning_rate": 4.106026884818201e-06, "loss": 0.1263, "step": 19556 }, { "epoch": 0.5705408716961317, "grad_norm": 0.8019383722303732, "learning_rate": 4.105562067259726e-06, "loss": 0.1025, "step": 19557 }, { "epoch": 0.5705700449267752, "grad_norm": 0.7746129361333384, "learning_rate": 4.1050972576865824e-06, "loss": 0.1471, "step": 19558 }, { "epoch": 0.5705992181574188, "grad_norm": 0.9636544945578799, "learning_rate": 4.104632456102916e-06, "loss": 0.1232, "step": 19559 }, { "epoch": 0.5706283913880623, "grad_norm": 0.7462668806815042, "learning_rate": 4.104167662512877e-06, "loss": 0.1153, "step": 19560 }, { "epoch": 0.5706575646187059, "grad_norm": 1.3059020968712054, "learning_rate": 4.103702876920614e-06, "loss": 0.1338, "step": 19561 }, { "epoch": 0.5706867378493494, "grad_norm": 0.8451109525651166, "learning_rate": 4.103238099330279e-06, "loss": 0.1371, "step": 19562 }, { "epoch": 0.570715911079993, "grad_norm": 0.9030798489373518, "learning_rate": 4.102773329746019e-06, "loss": 0.1467, "step": 19563 }, { "epoch": 0.5707450843106365, "grad_norm": 0.8529831916062731, "learning_rate": 4.102308568171987e-06, "loss": 0.1363, "step": 19564 }, { "epoch": 0.5707742575412801, "grad_norm": 0.860329533972752, "learning_rate": 4.101843814612328e-06, "loss": 0.1321, "step": 19565 }, { "epoch": 0.5708034307719236, "grad_norm": 0.8707392296522488, "learning_rate": 4.101379069071193e-06, "loss": 0.1359, "step": 19566 }, { "epoch": 0.5708326040025672, "grad_norm": 1.100558595633123, "learning_rate": 4.100914331552731e-06, "loss": 0.1099, "step": 19567 }, { "epoch": 0.5708617772332109, "grad_norm": 1.036221664048366, "learning_rate": 4.100449602061091e-06, "loss": 0.1076, "step": 19568 }, { "epoch": 0.5708909504638544, "grad_norm": 0.670950051219072, "learning_rate": 4.0999848806004235e-06, "loss": 0.132, "step": 19569 }, { "epoch": 0.570920123694498, "grad_norm": 0.8106256922639987, "learning_rate": 4.099520167174876e-06, "loss": 0.1364, "step": 19570 }, { "epoch": 0.5709492969251415, "grad_norm": 0.8510338740647666, "learning_rate": 4.0990554617885965e-06, "loss": 0.1247, "step": 19571 }, { "epoch": 0.5709784701557851, "grad_norm": 0.8043332398153833, "learning_rate": 4.098590764445737e-06, "loss": 0.1202, "step": 19572 }, { "epoch": 0.5710076433864286, "grad_norm": 1.0271611484031353, "learning_rate": 4.0981260751504394e-06, "loss": 0.1464, "step": 19573 }, { "epoch": 0.5710368166170722, "grad_norm": 0.9118783140012819, "learning_rate": 4.097661393906861e-06, "loss": 0.1546, "step": 19574 }, { "epoch": 0.5710659898477157, "grad_norm": 0.7993818994107283, "learning_rate": 4.097196720719146e-06, "loss": 0.1238, "step": 19575 }, { "epoch": 0.5710951630783593, "grad_norm": 0.8956912654468556, "learning_rate": 4.096732055591442e-06, "loss": 0.1291, "step": 19576 }, { "epoch": 0.5711243363090028, "grad_norm": 1.2382410713120202, "learning_rate": 4.096267398527899e-06, "loss": 0.146, "step": 19577 }, { "epoch": 0.5711535095396464, "grad_norm": 0.8038197701147217, "learning_rate": 4.095802749532665e-06, "loss": 0.1366, "step": 19578 }, { "epoch": 0.5711826827702899, "grad_norm": 0.9035561096904182, "learning_rate": 4.095338108609887e-06, "loss": 0.1472, "step": 19579 }, { "epoch": 0.5712118560009335, "grad_norm": 0.9322470645117651, "learning_rate": 4.0948734757637145e-06, "loss": 0.1278, "step": 19580 }, { "epoch": 0.5712410292315772, "grad_norm": 0.7384723552150703, "learning_rate": 4.094408850998298e-06, "loss": 0.1332, "step": 19581 }, { "epoch": 0.5712702024622207, "grad_norm": 0.7524179745350091, "learning_rate": 4.093944234317781e-06, "loss": 0.1198, "step": 19582 }, { "epoch": 0.5712993756928643, "grad_norm": 0.8528641395279695, "learning_rate": 4.093479625726314e-06, "loss": 0.1108, "step": 19583 }, { "epoch": 0.5713285489235078, "grad_norm": 0.7550134162929518, "learning_rate": 4.093015025228045e-06, "loss": 0.145, "step": 19584 }, { "epoch": 0.5713577221541514, "grad_norm": 0.7579223993536571, "learning_rate": 4.092550432827119e-06, "loss": 0.1188, "step": 19585 }, { "epoch": 0.5713868953847949, "grad_norm": 0.8729008044637647, "learning_rate": 4.092085848527689e-06, "loss": 0.1212, "step": 19586 }, { "epoch": 0.5714160686154385, "grad_norm": 0.6287918891569605, "learning_rate": 4.091621272333899e-06, "loss": 0.1219, "step": 19587 }, { "epoch": 0.571445241846082, "grad_norm": 0.9254186992449724, "learning_rate": 4.091156704249897e-06, "loss": 0.1547, "step": 19588 }, { "epoch": 0.5714744150767256, "grad_norm": 0.86534759301468, "learning_rate": 4.090692144279832e-06, "loss": 0.1208, "step": 19589 }, { "epoch": 0.5715035883073691, "grad_norm": 0.7307745436742721, "learning_rate": 4.0902275924278494e-06, "loss": 0.1158, "step": 19590 }, { "epoch": 0.5715327615380127, "grad_norm": 0.7691039628841517, "learning_rate": 4.0897630486980975e-06, "loss": 0.151, "step": 19591 }, { "epoch": 0.5715619347686562, "grad_norm": 0.9721164989121035, "learning_rate": 4.089298513094724e-06, "loss": 0.1145, "step": 19592 }, { "epoch": 0.5715911079992998, "grad_norm": 0.9281174769800552, "learning_rate": 4.088833985621876e-06, "loss": 0.1523, "step": 19593 }, { "epoch": 0.5716202812299433, "grad_norm": 0.8749704575707429, "learning_rate": 4.0883694662837015e-06, "loss": 0.1438, "step": 19594 }, { "epoch": 0.571649454460587, "grad_norm": 0.8404845980285226, "learning_rate": 4.087904955084346e-06, "loss": 0.1105, "step": 19595 }, { "epoch": 0.5716786276912306, "grad_norm": 0.6669387434807595, "learning_rate": 4.087440452027958e-06, "loss": 0.1161, "step": 19596 }, { "epoch": 0.5717078009218741, "grad_norm": 0.8796561362075194, "learning_rate": 4.086975957118682e-06, "loss": 0.1091, "step": 19597 }, { "epoch": 0.5717369741525177, "grad_norm": 0.8085861200201783, "learning_rate": 4.0865114703606675e-06, "loss": 0.1313, "step": 19598 }, { "epoch": 0.5717661473831612, "grad_norm": 0.9494960421333559, "learning_rate": 4.0860469917580625e-06, "loss": 0.1413, "step": 19599 }, { "epoch": 0.5717953206138048, "grad_norm": 1.016402525698968, "learning_rate": 4.085582521315011e-06, "loss": 0.1327, "step": 19600 }, { "epoch": 0.5718244938444483, "grad_norm": 0.8810885581902279, "learning_rate": 4.085118059035659e-06, "loss": 0.1316, "step": 19601 }, { "epoch": 0.5718536670750919, "grad_norm": 1.295599828780416, "learning_rate": 4.084653604924156e-06, "loss": 0.134, "step": 19602 }, { "epoch": 0.5718828403057354, "grad_norm": 0.8965368382451819, "learning_rate": 4.084189158984644e-06, "loss": 0.1147, "step": 19603 }, { "epoch": 0.571912013536379, "grad_norm": 0.7570945154397575, "learning_rate": 4.083724721221276e-06, "loss": 0.1389, "step": 19604 }, { "epoch": 0.5719411867670225, "grad_norm": 0.7008189567842064, "learning_rate": 4.083260291638194e-06, "loss": 0.1326, "step": 19605 }, { "epoch": 0.5719703599976661, "grad_norm": 0.8957014734114848, "learning_rate": 4.082795870239546e-06, "loss": 0.1096, "step": 19606 }, { "epoch": 0.5719995332283097, "grad_norm": 0.9645052959832401, "learning_rate": 4.082331457029477e-06, "loss": 0.1223, "step": 19607 }, { "epoch": 0.5720287064589533, "grad_norm": 0.8505067561797681, "learning_rate": 4.081867052012133e-06, "loss": 0.1306, "step": 19608 }, { "epoch": 0.5720578796895969, "grad_norm": 1.1036601142215465, "learning_rate": 4.081402655191661e-06, "loss": 0.1362, "step": 19609 }, { "epoch": 0.5720870529202404, "grad_norm": 1.0219819072813023, "learning_rate": 4.080938266572206e-06, "loss": 0.1426, "step": 19610 }, { "epoch": 0.572116226150884, "grad_norm": 0.7664604229633002, "learning_rate": 4.080473886157917e-06, "loss": 0.1237, "step": 19611 }, { "epoch": 0.5721453993815275, "grad_norm": 4.583120225789219, "learning_rate": 4.080009513952937e-06, "loss": 0.1405, "step": 19612 }, { "epoch": 0.5721745726121711, "grad_norm": 1.1148858841638805, "learning_rate": 4.079545149961411e-06, "loss": 0.1221, "step": 19613 }, { "epoch": 0.5722037458428146, "grad_norm": 1.1982993190446523, "learning_rate": 4.079080794187488e-06, "loss": 0.1311, "step": 19614 }, { "epoch": 0.5722329190734582, "grad_norm": 0.7716507717834155, "learning_rate": 4.078616446635309e-06, "loss": 0.1475, "step": 19615 }, { "epoch": 0.5722620923041017, "grad_norm": 1.3592832215293333, "learning_rate": 4.078152107309025e-06, "loss": 0.1188, "step": 19616 }, { "epoch": 0.5722912655347453, "grad_norm": 0.8898626691653115, "learning_rate": 4.0776877762127786e-06, "loss": 0.1348, "step": 19617 }, { "epoch": 0.5723204387653889, "grad_norm": 0.7912808512138472, "learning_rate": 4.077223453350715e-06, "loss": 0.1321, "step": 19618 }, { "epoch": 0.5723496119960324, "grad_norm": 0.7352657412409985, "learning_rate": 4.076759138726981e-06, "loss": 0.1377, "step": 19619 }, { "epoch": 0.572378785226676, "grad_norm": 1.0250168066845917, "learning_rate": 4.07629483234572e-06, "loss": 0.1192, "step": 19620 }, { "epoch": 0.5724079584573195, "grad_norm": 0.9129310045646087, "learning_rate": 4.075830534211077e-06, "loss": 0.1102, "step": 19621 }, { "epoch": 0.5724371316879632, "grad_norm": 1.0136528664078577, "learning_rate": 4.075366244327201e-06, "loss": 0.1046, "step": 19622 }, { "epoch": 0.5724663049186067, "grad_norm": 0.9626198268203121, "learning_rate": 4.074901962698233e-06, "loss": 0.1152, "step": 19623 }, { "epoch": 0.5724954781492503, "grad_norm": 0.9867479245628938, "learning_rate": 4.07443768932832e-06, "loss": 0.1452, "step": 19624 }, { "epoch": 0.5725246513798938, "grad_norm": 1.1981208406394535, "learning_rate": 4.073973424221606e-06, "loss": 0.1342, "step": 19625 }, { "epoch": 0.5725538246105374, "grad_norm": 1.103538368144145, "learning_rate": 4.073509167382237e-06, "loss": 0.1255, "step": 19626 }, { "epoch": 0.5725829978411809, "grad_norm": 0.9205918551145647, "learning_rate": 4.073044918814355e-06, "loss": 0.1307, "step": 19627 }, { "epoch": 0.5726121710718245, "grad_norm": 0.75994579522841, "learning_rate": 4.072580678522108e-06, "loss": 0.1203, "step": 19628 }, { "epoch": 0.572641344302468, "grad_norm": 0.7800455976184955, "learning_rate": 4.07211644650964e-06, "loss": 0.1274, "step": 19629 }, { "epoch": 0.5726705175331116, "grad_norm": 0.7479296977296234, "learning_rate": 4.071652222781095e-06, "loss": 0.1067, "step": 19630 }, { "epoch": 0.5726996907637552, "grad_norm": 0.935377227041055, "learning_rate": 4.071188007340616e-06, "loss": 0.144, "step": 19631 }, { "epoch": 0.5727288639943987, "grad_norm": 1.086846693480443, "learning_rate": 4.07072380019235e-06, "loss": 0.1249, "step": 19632 }, { "epoch": 0.5727580372250423, "grad_norm": 0.7920414969886806, "learning_rate": 4.070259601340438e-06, "loss": 0.1228, "step": 19633 }, { "epoch": 0.5727872104556858, "grad_norm": 0.9641235197837658, "learning_rate": 4.069795410789028e-06, "loss": 0.13, "step": 19634 }, { "epoch": 0.5728163836863295, "grad_norm": 1.4582623863303132, "learning_rate": 4.069331228542262e-06, "loss": 0.1367, "step": 19635 }, { "epoch": 0.572845556916973, "grad_norm": 0.7596637382779614, "learning_rate": 4.0688670546042846e-06, "loss": 0.1163, "step": 19636 }, { "epoch": 0.5728747301476166, "grad_norm": 0.9131849863806129, "learning_rate": 4.0684028889792414e-06, "loss": 0.1384, "step": 19637 }, { "epoch": 0.5729039033782601, "grad_norm": 0.9595573296767251, "learning_rate": 4.067938731671273e-06, "loss": 0.1303, "step": 19638 }, { "epoch": 0.5729330766089037, "grad_norm": 1.3266108150947125, "learning_rate": 4.0674745826845245e-06, "loss": 0.1196, "step": 19639 }, { "epoch": 0.5729622498395472, "grad_norm": 0.8504605757416285, "learning_rate": 4.0670104420231415e-06, "loss": 0.1297, "step": 19640 }, { "epoch": 0.5729914230701908, "grad_norm": 0.9627670484871081, "learning_rate": 4.066546309691267e-06, "loss": 0.1481, "step": 19641 }, { "epoch": 0.5730205963008344, "grad_norm": 0.8181036989347299, "learning_rate": 4.066082185693044e-06, "loss": 0.1216, "step": 19642 }, { "epoch": 0.5730497695314779, "grad_norm": 0.8190177665164526, "learning_rate": 4.065618070032616e-06, "loss": 0.144, "step": 19643 }, { "epoch": 0.5730789427621215, "grad_norm": 0.9405307383188206, "learning_rate": 4.065153962714128e-06, "loss": 0.1456, "step": 19644 }, { "epoch": 0.573108115992765, "grad_norm": 0.8718385256673635, "learning_rate": 4.064689863741718e-06, "loss": 0.1353, "step": 19645 }, { "epoch": 0.5731372892234086, "grad_norm": 0.8696890164201897, "learning_rate": 4.0642257731195386e-06, "loss": 0.1209, "step": 19646 }, { "epoch": 0.5731664624540521, "grad_norm": 0.9092921157919418, "learning_rate": 4.063761690851726e-06, "loss": 0.1263, "step": 19647 }, { "epoch": 0.5731956356846957, "grad_norm": 0.8009606291683495, "learning_rate": 4.063297616942425e-06, "loss": 0.1369, "step": 19648 }, { "epoch": 0.5732248089153393, "grad_norm": 0.7824878711572439, "learning_rate": 4.062833551395781e-06, "loss": 0.1216, "step": 19649 }, { "epoch": 0.5732539821459829, "grad_norm": 0.9747094430619917, "learning_rate": 4.062369494215935e-06, "loss": 0.1305, "step": 19650 }, { "epoch": 0.5732831553766264, "grad_norm": 0.7065736982856796, "learning_rate": 4.061905445407028e-06, "loss": 0.1564, "step": 19651 }, { "epoch": 0.57331232860727, "grad_norm": 0.9489454903107976, "learning_rate": 4.061441404973207e-06, "loss": 0.1394, "step": 19652 }, { "epoch": 0.5733415018379135, "grad_norm": 0.9276366532392195, "learning_rate": 4.0609773729186126e-06, "loss": 0.1237, "step": 19653 }, { "epoch": 0.5733706750685571, "grad_norm": 0.8067987872644518, "learning_rate": 4.060513349247389e-06, "loss": 0.1281, "step": 19654 }, { "epoch": 0.5733998482992007, "grad_norm": 0.7668286174008708, "learning_rate": 4.060049333963677e-06, "loss": 0.1068, "step": 19655 }, { "epoch": 0.5734290215298442, "grad_norm": 1.0409118486005096, "learning_rate": 4.059585327071622e-06, "loss": 0.1418, "step": 19656 }, { "epoch": 0.5734581947604878, "grad_norm": 0.7098214044664539, "learning_rate": 4.059121328575361e-06, "loss": 0.1323, "step": 19657 }, { "epoch": 0.5734873679911313, "grad_norm": 0.8026493491423402, "learning_rate": 4.058657338479043e-06, "loss": 0.1486, "step": 19658 }, { "epoch": 0.5735165412217749, "grad_norm": 0.9952965179770774, "learning_rate": 4.058193356786808e-06, "loss": 0.1272, "step": 19659 }, { "epoch": 0.5735457144524184, "grad_norm": 1.004014463737942, "learning_rate": 4.057729383502797e-06, "loss": 0.1364, "step": 19660 }, { "epoch": 0.573574887683062, "grad_norm": 0.9945880244164974, "learning_rate": 4.057265418631152e-06, "loss": 0.1146, "step": 19661 }, { "epoch": 0.5736040609137056, "grad_norm": 0.7428496129006911, "learning_rate": 4.056801462176018e-06, "loss": 0.1241, "step": 19662 }, { "epoch": 0.5736332341443492, "grad_norm": 0.9291305056916853, "learning_rate": 4.056337514141534e-06, "loss": 0.1445, "step": 19663 }, { "epoch": 0.5736624073749927, "grad_norm": 0.9821001367157365, "learning_rate": 4.055873574531844e-06, "loss": 0.1317, "step": 19664 }, { "epoch": 0.5736915806056363, "grad_norm": 0.7703487540931252, "learning_rate": 4.055409643351089e-06, "loss": 0.1415, "step": 19665 }, { "epoch": 0.5737207538362799, "grad_norm": 1.0669377839748413, "learning_rate": 4.054945720603412e-06, "loss": 0.1429, "step": 19666 }, { "epoch": 0.5737499270669234, "grad_norm": 0.7835812262511627, "learning_rate": 4.054481806292954e-06, "loss": 0.116, "step": 19667 }, { "epoch": 0.573779100297567, "grad_norm": 1.0901860140491364, "learning_rate": 4.054017900423857e-06, "loss": 0.1337, "step": 19668 }, { "epoch": 0.5738082735282105, "grad_norm": 0.7255252750423824, "learning_rate": 4.05355400300026e-06, "loss": 0.1477, "step": 19669 }, { "epoch": 0.5738374467588541, "grad_norm": 0.8568672255002934, "learning_rate": 4.0530901140263086e-06, "loss": 0.1453, "step": 19670 }, { "epoch": 0.5738666199894976, "grad_norm": 0.8489136617872457, "learning_rate": 4.052626233506144e-06, "loss": 0.1052, "step": 19671 }, { "epoch": 0.5738957932201412, "grad_norm": 0.8732673269822866, "learning_rate": 4.052162361443905e-06, "loss": 0.1258, "step": 19672 }, { "epoch": 0.5739249664507847, "grad_norm": 0.7741536434094117, "learning_rate": 4.051698497843733e-06, "loss": 0.1313, "step": 19673 }, { "epoch": 0.5739541396814283, "grad_norm": 0.906186888405836, "learning_rate": 4.0512346427097725e-06, "loss": 0.1226, "step": 19674 }, { "epoch": 0.5739833129120718, "grad_norm": 0.8287054882482325, "learning_rate": 4.05077079604616e-06, "loss": 0.1276, "step": 19675 }, { "epoch": 0.5740124861427155, "grad_norm": 0.7243895749605813, "learning_rate": 4.050306957857041e-06, "loss": 0.1492, "step": 19676 }, { "epoch": 0.574041659373359, "grad_norm": 0.7510583587606688, "learning_rate": 4.049843128146555e-06, "loss": 0.1217, "step": 19677 }, { "epoch": 0.5740708326040026, "grad_norm": 0.9605537640545965, "learning_rate": 4.0493793069188425e-06, "loss": 0.137, "step": 19678 }, { "epoch": 0.5741000058346462, "grad_norm": 0.6980832287768168, "learning_rate": 4.0489154941780455e-06, "loss": 0.1043, "step": 19679 }, { "epoch": 0.5741291790652897, "grad_norm": 0.9968242679545044, "learning_rate": 4.048451689928302e-06, "loss": 0.1273, "step": 19680 }, { "epoch": 0.5741583522959333, "grad_norm": 0.9241971501512688, "learning_rate": 4.047987894173755e-06, "loss": 0.107, "step": 19681 }, { "epoch": 0.5741875255265768, "grad_norm": 0.7666690507839959, "learning_rate": 4.047524106918545e-06, "loss": 0.1272, "step": 19682 }, { "epoch": 0.5742166987572204, "grad_norm": 0.8163723685752113, "learning_rate": 4.047060328166813e-06, "loss": 0.121, "step": 19683 }, { "epoch": 0.5742458719878639, "grad_norm": 0.7700777990182223, "learning_rate": 4.0465965579227e-06, "loss": 0.1152, "step": 19684 }, { "epoch": 0.5742750452185075, "grad_norm": 0.855520884905867, "learning_rate": 4.046132796190344e-06, "loss": 0.1389, "step": 19685 }, { "epoch": 0.574304218449151, "grad_norm": 0.7351823772848916, "learning_rate": 4.045669042973886e-06, "loss": 0.1194, "step": 19686 }, { "epoch": 0.5743333916797946, "grad_norm": 0.7907259457199423, "learning_rate": 4.045205298277466e-06, "loss": 0.1168, "step": 19687 }, { "epoch": 0.5743625649104381, "grad_norm": 0.8827805223309084, "learning_rate": 4.044741562105227e-06, "loss": 0.1407, "step": 19688 }, { "epoch": 0.5743917381410818, "grad_norm": 0.9773334181207157, "learning_rate": 4.044277834461308e-06, "loss": 0.1307, "step": 19689 }, { "epoch": 0.5744209113717254, "grad_norm": 0.812223302005619, "learning_rate": 4.043814115349848e-06, "loss": 0.1304, "step": 19690 }, { "epoch": 0.5744500846023689, "grad_norm": 1.0275950849839577, "learning_rate": 4.043350404774986e-06, "loss": 0.1404, "step": 19691 }, { "epoch": 0.5744792578330125, "grad_norm": 0.8937972180652621, "learning_rate": 4.042886702740865e-06, "loss": 0.1111, "step": 19692 }, { "epoch": 0.574508431063656, "grad_norm": 0.7054348300623637, "learning_rate": 4.042423009251622e-06, "loss": 0.1158, "step": 19693 }, { "epoch": 0.5745376042942996, "grad_norm": 0.9100222587938743, "learning_rate": 4.041959324311397e-06, "loss": 0.1319, "step": 19694 }, { "epoch": 0.5745667775249431, "grad_norm": 0.9097507057876928, "learning_rate": 4.041495647924331e-06, "loss": 0.1177, "step": 19695 }, { "epoch": 0.5745959507555867, "grad_norm": 0.7082270876472646, "learning_rate": 4.041031980094563e-06, "loss": 0.1385, "step": 19696 }, { "epoch": 0.5746251239862302, "grad_norm": 0.9263768877658136, "learning_rate": 4.040568320826234e-06, "loss": 0.1304, "step": 19697 }, { "epoch": 0.5746542972168738, "grad_norm": 1.0005896366809788, "learning_rate": 4.0401046701234795e-06, "loss": 0.1345, "step": 19698 }, { "epoch": 0.5746834704475173, "grad_norm": 0.9350469304411964, "learning_rate": 4.039641027990443e-06, "loss": 0.1477, "step": 19699 }, { "epoch": 0.5747126436781609, "grad_norm": 0.9217337418915221, "learning_rate": 4.039177394431262e-06, "loss": 0.1346, "step": 19700 }, { "epoch": 0.5747418169088044, "grad_norm": 0.7962343875316714, "learning_rate": 4.038713769450076e-06, "loss": 0.1296, "step": 19701 }, { "epoch": 0.574770990139448, "grad_norm": 0.8335662634199607, "learning_rate": 4.038250153051024e-06, "loss": 0.1381, "step": 19702 }, { "epoch": 0.5748001633700917, "grad_norm": 1.0278937183408707, "learning_rate": 4.0377865452382444e-06, "loss": 0.1231, "step": 19703 }, { "epoch": 0.5748293366007352, "grad_norm": 0.7782265105030279, "learning_rate": 4.037322946015876e-06, "loss": 0.1235, "step": 19704 }, { "epoch": 0.5748585098313788, "grad_norm": 0.8048237422203832, "learning_rate": 4.03685935538806e-06, "loss": 0.1368, "step": 19705 }, { "epoch": 0.5748876830620223, "grad_norm": 0.9199416165871753, "learning_rate": 4.036395773358934e-06, "loss": 0.1317, "step": 19706 }, { "epoch": 0.5749168562926659, "grad_norm": 0.799419880980573, "learning_rate": 4.035932199932636e-06, "loss": 0.1103, "step": 19707 }, { "epoch": 0.5749460295233094, "grad_norm": 1.0907092432324073, "learning_rate": 4.0354686351133055e-06, "loss": 0.1421, "step": 19708 }, { "epoch": 0.574975202753953, "grad_norm": 1.0981836394400115, "learning_rate": 4.035005078905081e-06, "loss": 0.121, "step": 19709 }, { "epoch": 0.5750043759845965, "grad_norm": 0.8183032746130807, "learning_rate": 4.034541531312099e-06, "loss": 0.1446, "step": 19710 }, { "epoch": 0.5750335492152401, "grad_norm": 0.8314492661481785, "learning_rate": 4.034077992338501e-06, "loss": 0.1396, "step": 19711 }, { "epoch": 0.5750627224458836, "grad_norm": 0.9401287982859894, "learning_rate": 4.0336144619884236e-06, "loss": 0.1409, "step": 19712 }, { "epoch": 0.5750918956765272, "grad_norm": 0.8138107409108649, "learning_rate": 4.0331509402660066e-06, "loss": 0.1176, "step": 19713 }, { "epoch": 0.5751210689071707, "grad_norm": 0.7815657198400805, "learning_rate": 4.032687427175387e-06, "loss": 0.1194, "step": 19714 }, { "epoch": 0.5751502421378143, "grad_norm": 0.9751414630695675, "learning_rate": 4.0322239227207025e-06, "loss": 0.155, "step": 19715 }, { "epoch": 0.575179415368458, "grad_norm": 0.752295778092462, "learning_rate": 4.031760426906091e-06, "loss": 0.1291, "step": 19716 }, { "epoch": 0.5752085885991015, "grad_norm": 0.9306219494970849, "learning_rate": 4.031296939735693e-06, "loss": 0.1291, "step": 19717 }, { "epoch": 0.5752377618297451, "grad_norm": 0.7988325175986996, "learning_rate": 4.0308334612136435e-06, "loss": 0.1179, "step": 19718 }, { "epoch": 0.5752669350603886, "grad_norm": 1.0465718626053495, "learning_rate": 4.030369991344083e-06, "loss": 0.1448, "step": 19719 }, { "epoch": 0.5752961082910322, "grad_norm": 0.8277484942582721, "learning_rate": 4.029906530131147e-06, "loss": 0.1443, "step": 19720 }, { "epoch": 0.5753252815216757, "grad_norm": 0.8403907840133241, "learning_rate": 4.0294430775789735e-06, "loss": 0.1201, "step": 19721 }, { "epoch": 0.5753544547523193, "grad_norm": 0.7523851084725544, "learning_rate": 4.028979633691699e-06, "loss": 0.1275, "step": 19722 }, { "epoch": 0.5753836279829628, "grad_norm": 0.742304283264885, "learning_rate": 4.028516198473465e-06, "loss": 0.1091, "step": 19723 }, { "epoch": 0.5754128012136064, "grad_norm": 0.8433274774611195, "learning_rate": 4.028052771928406e-06, "loss": 0.1223, "step": 19724 }, { "epoch": 0.5754419744442499, "grad_norm": 0.8413010352489224, "learning_rate": 4.027589354060659e-06, "loss": 0.1155, "step": 19725 }, { "epoch": 0.5754711476748935, "grad_norm": 0.854479311239559, "learning_rate": 4.027125944874364e-06, "loss": 0.1365, "step": 19726 }, { "epoch": 0.575500320905537, "grad_norm": 0.8284366003951291, "learning_rate": 4.0266625443736555e-06, "loss": 0.1311, "step": 19727 }, { "epoch": 0.5755294941361806, "grad_norm": 0.8834802623640456, "learning_rate": 4.0261991525626696e-06, "loss": 0.127, "step": 19728 }, { "epoch": 0.5755586673668242, "grad_norm": 0.6915509347636353, "learning_rate": 4.025735769445546e-06, "loss": 0.141, "step": 19729 }, { "epoch": 0.5755878405974678, "grad_norm": 0.96567692670703, "learning_rate": 4.025272395026421e-06, "loss": 0.1306, "step": 19730 }, { "epoch": 0.5756170138281114, "grad_norm": 1.0906606888868646, "learning_rate": 4.024809029309433e-06, "loss": 0.1569, "step": 19731 }, { "epoch": 0.5756461870587549, "grad_norm": 0.6957580102975305, "learning_rate": 4.024345672298716e-06, "loss": 0.1121, "step": 19732 }, { "epoch": 0.5756753602893985, "grad_norm": 0.7463641590409569, "learning_rate": 4.023882323998408e-06, "loss": 0.1072, "step": 19733 }, { "epoch": 0.575704533520042, "grad_norm": 0.8857683952020571, "learning_rate": 4.023418984412644e-06, "loss": 0.1393, "step": 19734 }, { "epoch": 0.5757337067506856, "grad_norm": 0.8309607274026838, "learning_rate": 4.022955653545563e-06, "loss": 0.1102, "step": 19735 }, { "epoch": 0.5757628799813291, "grad_norm": 1.0748862367884913, "learning_rate": 4.0224923314013025e-06, "loss": 0.1309, "step": 19736 }, { "epoch": 0.5757920532119727, "grad_norm": 0.8220081336407107, "learning_rate": 4.022029017983996e-06, "loss": 0.116, "step": 19737 }, { "epoch": 0.5758212264426162, "grad_norm": 0.7963877708342001, "learning_rate": 4.0215657132977806e-06, "loss": 0.135, "step": 19738 }, { "epoch": 0.5758503996732598, "grad_norm": 0.9767353315039462, "learning_rate": 4.021102417346794e-06, "loss": 0.1059, "step": 19739 }, { "epoch": 0.5758795729039033, "grad_norm": 1.1487649578395094, "learning_rate": 4.020639130135169e-06, "loss": 0.1413, "step": 19740 }, { "epoch": 0.5759087461345469, "grad_norm": 1.0001431320504899, "learning_rate": 4.020175851667047e-06, "loss": 0.1264, "step": 19741 }, { "epoch": 0.5759379193651905, "grad_norm": 0.8337775920306253, "learning_rate": 4.019712581946559e-06, "loss": 0.1492, "step": 19742 }, { "epoch": 0.5759670925958341, "grad_norm": 0.9615227523095687, "learning_rate": 4.019249320977844e-06, "loss": 0.1351, "step": 19743 }, { "epoch": 0.5759962658264777, "grad_norm": 0.8406770901161117, "learning_rate": 4.018786068765037e-06, "loss": 0.122, "step": 19744 }, { "epoch": 0.5760254390571212, "grad_norm": 1.0603003567825613, "learning_rate": 4.018322825312273e-06, "loss": 0.1402, "step": 19745 }, { "epoch": 0.5760546122877648, "grad_norm": 0.834167142924995, "learning_rate": 4.017859590623688e-06, "loss": 0.1243, "step": 19746 }, { "epoch": 0.5760837855184083, "grad_norm": 0.8171894066121539, "learning_rate": 4.01739636470342e-06, "loss": 0.1172, "step": 19747 }, { "epoch": 0.5761129587490519, "grad_norm": 1.0158128756718992, "learning_rate": 4.016933147555601e-06, "loss": 0.1216, "step": 19748 }, { "epoch": 0.5761421319796954, "grad_norm": 0.7361794378816956, "learning_rate": 4.01646993918437e-06, "loss": 0.1103, "step": 19749 }, { "epoch": 0.576171305210339, "grad_norm": 1.0196663055710706, "learning_rate": 4.016006739593859e-06, "loss": 0.1125, "step": 19750 }, { "epoch": 0.5762004784409825, "grad_norm": 0.8065011504438061, "learning_rate": 4.015543548788206e-06, "loss": 0.1121, "step": 19751 }, { "epoch": 0.5762296516716261, "grad_norm": 1.0641140286394402, "learning_rate": 4.015080366771543e-06, "loss": 0.121, "step": 19752 }, { "epoch": 0.5762588249022697, "grad_norm": 0.82255712865244, "learning_rate": 4.0146171935480105e-06, "loss": 0.1077, "step": 19753 }, { "epoch": 0.5762879981329132, "grad_norm": 0.7435633035051162, "learning_rate": 4.014154029121739e-06, "loss": 0.1296, "step": 19754 }, { "epoch": 0.5763171713635568, "grad_norm": 0.943976291171618, "learning_rate": 4.013690873496864e-06, "loss": 0.1329, "step": 19755 }, { "epoch": 0.5763463445942003, "grad_norm": 0.8523598476593407, "learning_rate": 4.013227726677524e-06, "loss": 0.1241, "step": 19756 }, { "epoch": 0.576375517824844, "grad_norm": 0.8075599513321965, "learning_rate": 4.01276458866785e-06, "loss": 0.101, "step": 19757 }, { "epoch": 0.5764046910554875, "grad_norm": 1.1751377581481242, "learning_rate": 4.012301459471976e-06, "loss": 0.127, "step": 19758 }, { "epoch": 0.5764338642861311, "grad_norm": 1.0134788032131126, "learning_rate": 4.011838339094041e-06, "loss": 0.1468, "step": 19759 }, { "epoch": 0.5764630375167746, "grad_norm": 0.7576605016473313, "learning_rate": 4.011375227538176e-06, "loss": 0.1254, "step": 19760 }, { "epoch": 0.5764922107474182, "grad_norm": 0.8019530711713517, "learning_rate": 4.0109121248085196e-06, "loss": 0.1349, "step": 19761 }, { "epoch": 0.5765213839780617, "grad_norm": 0.8733164061042527, "learning_rate": 4.010449030909202e-06, "loss": 0.1369, "step": 19762 }, { "epoch": 0.5765505572087053, "grad_norm": 1.1058875053330164, "learning_rate": 4.009985945844359e-06, "loss": 0.14, "step": 19763 }, { "epoch": 0.5765797304393488, "grad_norm": 0.7559257276682203, "learning_rate": 4.009522869618124e-06, "loss": 0.1101, "step": 19764 }, { "epoch": 0.5766089036699924, "grad_norm": 0.7963224403194995, "learning_rate": 4.009059802234633e-06, "loss": 0.1282, "step": 19765 }, { "epoch": 0.576638076900636, "grad_norm": 1.0287969571924067, "learning_rate": 4.008596743698022e-06, "loss": 0.1228, "step": 19766 }, { "epoch": 0.5766672501312795, "grad_norm": 1.0658700001395847, "learning_rate": 4.00813369401242e-06, "loss": 0.1485, "step": 19767 }, { "epoch": 0.5766964233619231, "grad_norm": 0.9506619467707396, "learning_rate": 4.007670653181965e-06, "loss": 0.121, "step": 19768 }, { "epoch": 0.5767255965925666, "grad_norm": 1.6668110885406686, "learning_rate": 4.00720762121079e-06, "loss": 0.1381, "step": 19769 }, { "epoch": 0.5767547698232102, "grad_norm": 1.105236162752622, "learning_rate": 4.006744598103025e-06, "loss": 0.119, "step": 19770 }, { "epoch": 0.5767839430538538, "grad_norm": 1.0975762739219408, "learning_rate": 4.00628158386281e-06, "loss": 0.1288, "step": 19771 }, { "epoch": 0.5768131162844974, "grad_norm": 0.9598289749935085, "learning_rate": 4.005818578494275e-06, "loss": 0.1288, "step": 19772 }, { "epoch": 0.5768422895151409, "grad_norm": 1.2119157824993114, "learning_rate": 4.005355582001555e-06, "loss": 0.135, "step": 19773 }, { "epoch": 0.5768714627457845, "grad_norm": 1.4749088699914519, "learning_rate": 4.0048925943887835e-06, "loss": 0.1347, "step": 19774 }, { "epoch": 0.576900635976428, "grad_norm": 1.125350444889824, "learning_rate": 4.004429615660092e-06, "loss": 0.138, "step": 19775 }, { "epoch": 0.5769298092070716, "grad_norm": 0.8212775273239675, "learning_rate": 4.003966645819615e-06, "loss": 0.1323, "step": 19776 }, { "epoch": 0.5769589824377152, "grad_norm": 1.1248482628024037, "learning_rate": 4.003503684871486e-06, "loss": 0.1376, "step": 19777 }, { "epoch": 0.5769881556683587, "grad_norm": 1.460473657981357, "learning_rate": 4.003040732819839e-06, "loss": 0.1257, "step": 19778 }, { "epoch": 0.5770173288990023, "grad_norm": 0.97374372816712, "learning_rate": 4.002577789668807e-06, "loss": 0.1224, "step": 19779 }, { "epoch": 0.5770465021296458, "grad_norm": 0.728353761558451, "learning_rate": 4.002114855422522e-06, "loss": 0.1182, "step": 19780 }, { "epoch": 0.5770756753602894, "grad_norm": 0.8199051259301026, "learning_rate": 4.001651930085117e-06, "loss": 0.1288, "step": 19781 }, { "epoch": 0.5771048485909329, "grad_norm": 0.9040974601117048, "learning_rate": 4.0011890136607236e-06, "loss": 0.1167, "step": 19782 }, { "epoch": 0.5771340218215765, "grad_norm": 0.9500231380617581, "learning_rate": 4.000726106153479e-06, "loss": 0.1563, "step": 19783 }, { "epoch": 0.5771631950522201, "grad_norm": 0.9962617092536757, "learning_rate": 4.000263207567512e-06, "loss": 0.1391, "step": 19784 }, { "epoch": 0.5771923682828637, "grad_norm": 0.8647576649946964, "learning_rate": 3.999800317906956e-06, "loss": 0.1415, "step": 19785 }, { "epoch": 0.5772215415135072, "grad_norm": 0.9644638617361798, "learning_rate": 3.999337437175946e-06, "loss": 0.1261, "step": 19786 }, { "epoch": 0.5772507147441508, "grad_norm": 1.0077523994658744, "learning_rate": 3.998874565378611e-06, "loss": 0.1204, "step": 19787 }, { "epoch": 0.5772798879747943, "grad_norm": 0.9483469434896953, "learning_rate": 3.998411702519083e-06, "loss": 0.1432, "step": 19788 }, { "epoch": 0.5773090612054379, "grad_norm": 0.6702345503281127, "learning_rate": 3.997948848601498e-06, "loss": 0.1091, "step": 19789 }, { "epoch": 0.5773382344360815, "grad_norm": 0.868699841119976, "learning_rate": 3.997486003629987e-06, "loss": 0.1255, "step": 19790 }, { "epoch": 0.577367407666725, "grad_norm": 0.9848451210062822, "learning_rate": 3.997023167608682e-06, "loss": 0.1401, "step": 19791 }, { "epoch": 0.5773965808973686, "grad_norm": 0.8777929448891568, "learning_rate": 3.996560340541714e-06, "loss": 0.1117, "step": 19792 }, { "epoch": 0.5774257541280121, "grad_norm": 0.7907585752637132, "learning_rate": 3.996097522433216e-06, "loss": 0.1235, "step": 19793 }, { "epoch": 0.5774549273586557, "grad_norm": 0.9023159792247771, "learning_rate": 3.995634713287317e-06, "loss": 0.1403, "step": 19794 }, { "epoch": 0.5774841005892992, "grad_norm": 0.8529545228579224, "learning_rate": 3.995171913108154e-06, "loss": 0.1048, "step": 19795 }, { "epoch": 0.5775132738199428, "grad_norm": 0.858297678135177, "learning_rate": 3.994709121899858e-06, "loss": 0.1578, "step": 19796 }, { "epoch": 0.5775424470505863, "grad_norm": 1.002419466038017, "learning_rate": 3.994246339666557e-06, "loss": 0.14, "step": 19797 }, { "epoch": 0.57757162028123, "grad_norm": 1.0851157227082862, "learning_rate": 3.993783566412384e-06, "loss": 0.1296, "step": 19798 }, { "epoch": 0.5776007935118735, "grad_norm": 0.9576563622048736, "learning_rate": 3.9933208021414725e-06, "loss": 0.1259, "step": 19799 }, { "epoch": 0.5776299667425171, "grad_norm": 1.0849308098215573, "learning_rate": 3.9928580468579495e-06, "loss": 0.142, "step": 19800 }, { "epoch": 0.5776591399731607, "grad_norm": 0.9446703660505869, "learning_rate": 3.992395300565953e-06, "loss": 0.1231, "step": 19801 }, { "epoch": 0.5776883132038042, "grad_norm": 1.1123231866657721, "learning_rate": 3.991932563269609e-06, "loss": 0.1571, "step": 19802 }, { "epoch": 0.5777174864344478, "grad_norm": 0.7499829305532338, "learning_rate": 3.991469834973051e-06, "loss": 0.0973, "step": 19803 }, { "epoch": 0.5777466596650913, "grad_norm": 0.7121285357367448, "learning_rate": 3.991007115680411e-06, "loss": 0.1288, "step": 19804 }, { "epoch": 0.5777758328957349, "grad_norm": 1.0362350184472247, "learning_rate": 3.990544405395817e-06, "loss": 0.1182, "step": 19805 }, { "epoch": 0.5778050061263784, "grad_norm": 2.8101228898403883, "learning_rate": 3.9900817041234e-06, "loss": 0.1465, "step": 19806 }, { "epoch": 0.577834179357022, "grad_norm": 0.9996293048392291, "learning_rate": 3.989619011867294e-06, "loss": 0.1161, "step": 19807 }, { "epoch": 0.5778633525876655, "grad_norm": 0.688654226170998, "learning_rate": 3.989156328631629e-06, "loss": 0.1117, "step": 19808 }, { "epoch": 0.5778925258183091, "grad_norm": 1.0035116856754207, "learning_rate": 3.9886936544205354e-06, "loss": 0.1198, "step": 19809 }, { "epoch": 0.5779216990489526, "grad_norm": 1.1308080666264633, "learning_rate": 3.988230989238142e-06, "loss": 0.1564, "step": 19810 }, { "epoch": 0.5779508722795963, "grad_norm": 0.8816919204467307, "learning_rate": 3.987768333088581e-06, "loss": 0.1276, "step": 19811 }, { "epoch": 0.5779800455102398, "grad_norm": 0.7839434874727919, "learning_rate": 3.987305685975982e-06, "loss": 0.1115, "step": 19812 }, { "epoch": 0.5780092187408834, "grad_norm": 1.1028254583540187, "learning_rate": 3.9868430479044775e-06, "loss": 0.1545, "step": 19813 }, { "epoch": 0.578038391971527, "grad_norm": 0.8066229557752148, "learning_rate": 3.9863804188781965e-06, "loss": 0.1058, "step": 19814 }, { "epoch": 0.5780675652021705, "grad_norm": 1.069983565154359, "learning_rate": 3.985917798901268e-06, "loss": 0.1703, "step": 19815 }, { "epoch": 0.5780967384328141, "grad_norm": 0.9194799344167955, "learning_rate": 3.985455187977825e-06, "loss": 0.1419, "step": 19816 }, { "epoch": 0.5781259116634576, "grad_norm": 0.6317555119245367, "learning_rate": 3.984992586111995e-06, "loss": 0.0971, "step": 19817 }, { "epoch": 0.5781550848941012, "grad_norm": 0.7721207344944646, "learning_rate": 3.984529993307907e-06, "loss": 0.1067, "step": 19818 }, { "epoch": 0.5781842581247447, "grad_norm": 2.79177121614945, "learning_rate": 3.984067409569694e-06, "loss": 0.1371, "step": 19819 }, { "epoch": 0.5782134313553883, "grad_norm": 1.0237265107365645, "learning_rate": 3.983604834901485e-06, "loss": 0.1339, "step": 19820 }, { "epoch": 0.5782426045860318, "grad_norm": 0.8966166671272845, "learning_rate": 3.983142269307411e-06, "loss": 0.1154, "step": 19821 }, { "epoch": 0.5782717778166754, "grad_norm": 0.8076250089700812, "learning_rate": 3.982679712791599e-06, "loss": 0.1393, "step": 19822 }, { "epoch": 0.5783009510473189, "grad_norm": 0.8761378344490751, "learning_rate": 3.982217165358179e-06, "loss": 0.1224, "step": 19823 }, { "epoch": 0.5783301242779625, "grad_norm": 1.0008965283306264, "learning_rate": 3.98175462701128e-06, "loss": 0.105, "step": 19824 }, { "epoch": 0.5783592975086062, "grad_norm": 0.7722542211462903, "learning_rate": 3.981292097755034e-06, "loss": 0.1454, "step": 19825 }, { "epoch": 0.5783884707392497, "grad_norm": 0.653741040135133, "learning_rate": 3.98082957759357e-06, "loss": 0.1124, "step": 19826 }, { "epoch": 0.5784176439698933, "grad_norm": 0.8022381275982864, "learning_rate": 3.980367066531015e-06, "loss": 0.1221, "step": 19827 }, { "epoch": 0.5784468172005368, "grad_norm": 0.7649152233724401, "learning_rate": 3.9799045645715e-06, "loss": 0.1011, "step": 19828 }, { "epoch": 0.5784759904311804, "grad_norm": 0.7485276808645908, "learning_rate": 3.979442071719154e-06, "loss": 0.126, "step": 19829 }, { "epoch": 0.5785051636618239, "grad_norm": 1.19238550015347, "learning_rate": 3.978979587978102e-06, "loss": 0.1185, "step": 19830 }, { "epoch": 0.5785343368924675, "grad_norm": 0.8311169837843856, "learning_rate": 3.978517113352481e-06, "loss": 0.1173, "step": 19831 }, { "epoch": 0.578563510123111, "grad_norm": 0.9373734781385317, "learning_rate": 3.978054647846413e-06, "loss": 0.1265, "step": 19832 }, { "epoch": 0.5785926833537546, "grad_norm": 0.7231410845994454, "learning_rate": 3.97759219146403e-06, "loss": 0.1215, "step": 19833 }, { "epoch": 0.5786218565843981, "grad_norm": 0.720753605115125, "learning_rate": 3.977129744209461e-06, "loss": 0.1323, "step": 19834 }, { "epoch": 0.5786510298150417, "grad_norm": 0.7621961911100231, "learning_rate": 3.976667306086831e-06, "loss": 0.123, "step": 19835 }, { "epoch": 0.5786802030456852, "grad_norm": 0.940698061770366, "learning_rate": 3.976204877100272e-06, "loss": 0.1346, "step": 19836 }, { "epoch": 0.5787093762763288, "grad_norm": 0.693390098201076, "learning_rate": 3.975742457253911e-06, "loss": 0.1208, "step": 19837 }, { "epoch": 0.5787385495069725, "grad_norm": 0.7374938419856453, "learning_rate": 3.975280046551877e-06, "loss": 0.1317, "step": 19838 }, { "epoch": 0.578767722737616, "grad_norm": 0.8774612806914714, "learning_rate": 3.9748176449983e-06, "loss": 0.1503, "step": 19839 }, { "epoch": 0.5787968959682596, "grad_norm": 0.734475246427361, "learning_rate": 3.974355252597304e-06, "loss": 0.1126, "step": 19840 }, { "epoch": 0.5788260691989031, "grad_norm": 0.886860910766383, "learning_rate": 3.973892869353021e-06, "loss": 0.1195, "step": 19841 }, { "epoch": 0.5788552424295467, "grad_norm": 1.041479930540893, "learning_rate": 3.973430495269576e-06, "loss": 0.131, "step": 19842 }, { "epoch": 0.5788844156601902, "grad_norm": 0.7043167579534947, "learning_rate": 3.9729681303510995e-06, "loss": 0.1039, "step": 19843 }, { "epoch": 0.5789135888908338, "grad_norm": 0.7283009365961199, "learning_rate": 3.972505774601718e-06, "loss": 0.1043, "step": 19844 }, { "epoch": 0.5789427621214773, "grad_norm": 1.1139644686340868, "learning_rate": 3.97204342802556e-06, "loss": 0.1422, "step": 19845 }, { "epoch": 0.5789719353521209, "grad_norm": 0.8158675132918699, "learning_rate": 3.971581090626754e-06, "loss": 0.1109, "step": 19846 }, { "epoch": 0.5790011085827644, "grad_norm": 0.939665905198642, "learning_rate": 3.971118762409425e-06, "loss": 0.1268, "step": 19847 }, { "epoch": 0.579030281813408, "grad_norm": 0.8274508180598745, "learning_rate": 3.970656443377701e-06, "loss": 0.1298, "step": 19848 }, { "epoch": 0.5790594550440515, "grad_norm": 1.0217875282158446, "learning_rate": 3.970194133535712e-06, "loss": 0.1423, "step": 19849 }, { "epoch": 0.5790886282746951, "grad_norm": 0.8823670079489304, "learning_rate": 3.9697318328875835e-06, "loss": 0.1324, "step": 19850 }, { "epoch": 0.5791178015053386, "grad_norm": 0.808542118756704, "learning_rate": 3.969269541437444e-06, "loss": 0.1325, "step": 19851 }, { "epoch": 0.5791469747359823, "grad_norm": 0.8857303744791257, "learning_rate": 3.96880725918942e-06, "loss": 0.1634, "step": 19852 }, { "epoch": 0.5791761479666259, "grad_norm": 0.6887971717287216, "learning_rate": 3.968344986147637e-06, "loss": 0.1265, "step": 19853 }, { "epoch": 0.5792053211972694, "grad_norm": 0.9153616050952632, "learning_rate": 3.967882722316224e-06, "loss": 0.1317, "step": 19854 }, { "epoch": 0.579234494427913, "grad_norm": 0.884300541454304, "learning_rate": 3.967420467699309e-06, "loss": 0.1263, "step": 19855 }, { "epoch": 0.5792636676585565, "grad_norm": 0.7977231072198944, "learning_rate": 3.9669582223010175e-06, "loss": 0.1207, "step": 19856 }, { "epoch": 0.5792928408892001, "grad_norm": 0.7568274865395707, "learning_rate": 3.966495986125476e-06, "loss": 0.1203, "step": 19857 }, { "epoch": 0.5793220141198436, "grad_norm": 0.7097346775430343, "learning_rate": 3.966033759176811e-06, "loss": 0.1226, "step": 19858 }, { "epoch": 0.5793511873504872, "grad_norm": 0.9743664825150998, "learning_rate": 3.965571541459153e-06, "loss": 0.1163, "step": 19859 }, { "epoch": 0.5793803605811307, "grad_norm": 0.7941952176074787, "learning_rate": 3.96510933297662e-06, "loss": 0.124, "step": 19860 }, { "epoch": 0.5794095338117743, "grad_norm": 1.0758974343281034, "learning_rate": 3.964647133733347e-06, "loss": 0.1318, "step": 19861 }, { "epoch": 0.5794387070424178, "grad_norm": 0.9089754466127158, "learning_rate": 3.964184943733457e-06, "loss": 0.1609, "step": 19862 }, { "epoch": 0.5794678802730614, "grad_norm": 0.8361578672702864, "learning_rate": 3.963722762981076e-06, "loss": 0.1447, "step": 19863 }, { "epoch": 0.579497053503705, "grad_norm": 0.7861331954177537, "learning_rate": 3.963260591480332e-06, "loss": 0.1047, "step": 19864 }, { "epoch": 0.5795262267343486, "grad_norm": 0.8176470971516183, "learning_rate": 3.962798429235349e-06, "loss": 0.1518, "step": 19865 }, { "epoch": 0.5795553999649922, "grad_norm": 0.7700184030086775, "learning_rate": 3.9623362762502525e-06, "loss": 0.1481, "step": 19866 }, { "epoch": 0.5795845731956357, "grad_norm": 0.8709540450684102, "learning_rate": 3.961874132529172e-06, "loss": 0.1272, "step": 19867 }, { "epoch": 0.5796137464262793, "grad_norm": 0.8419206805683189, "learning_rate": 3.961411998076231e-06, "loss": 0.1287, "step": 19868 }, { "epoch": 0.5796429196569228, "grad_norm": 0.7653976739221687, "learning_rate": 3.960949872895556e-06, "loss": 0.1256, "step": 19869 }, { "epoch": 0.5796720928875664, "grad_norm": 0.8034744332979992, "learning_rate": 3.960487756991272e-06, "loss": 0.12, "step": 19870 }, { "epoch": 0.5797012661182099, "grad_norm": 0.8200857262003038, "learning_rate": 3.9600256503675054e-06, "loss": 0.147, "step": 19871 }, { "epoch": 0.5797304393488535, "grad_norm": 0.7411616313533512, "learning_rate": 3.95956355302838e-06, "loss": 0.1353, "step": 19872 }, { "epoch": 0.579759612579497, "grad_norm": 0.6780823267716775, "learning_rate": 3.959101464978026e-06, "loss": 0.1358, "step": 19873 }, { "epoch": 0.5797887858101406, "grad_norm": 0.9806931616582027, "learning_rate": 3.958639386220564e-06, "loss": 0.1378, "step": 19874 }, { "epoch": 0.5798179590407841, "grad_norm": 0.8598138366783364, "learning_rate": 3.9581773167601205e-06, "loss": 0.1326, "step": 19875 }, { "epoch": 0.5798471322714277, "grad_norm": 0.7139813517210786, "learning_rate": 3.957715256600822e-06, "loss": 0.142, "step": 19876 }, { "epoch": 0.5798763055020713, "grad_norm": 0.7566984482264986, "learning_rate": 3.957253205746793e-06, "loss": 0.143, "step": 19877 }, { "epoch": 0.5799054787327148, "grad_norm": 0.7622060336518932, "learning_rate": 3.956791164202158e-06, "loss": 0.116, "step": 19878 }, { "epoch": 0.5799346519633585, "grad_norm": 0.7702675669435379, "learning_rate": 3.9563291319710416e-06, "loss": 0.1187, "step": 19879 }, { "epoch": 0.579963825194002, "grad_norm": 0.7526465653630454, "learning_rate": 3.95586710905757e-06, "loss": 0.1215, "step": 19880 }, { "epoch": 0.5799929984246456, "grad_norm": 0.7270363122337156, "learning_rate": 3.955405095465869e-06, "loss": 0.1308, "step": 19881 }, { "epoch": 0.5800221716552891, "grad_norm": 0.7641247848398072, "learning_rate": 3.9549430912000605e-06, "loss": 0.1318, "step": 19882 }, { "epoch": 0.5800513448859327, "grad_norm": 0.9114894132623551, "learning_rate": 3.954481096264272e-06, "loss": 0.1183, "step": 19883 }, { "epoch": 0.5800805181165762, "grad_norm": 0.9135341903234376, "learning_rate": 3.954019110662624e-06, "loss": 0.1168, "step": 19884 }, { "epoch": 0.5801096913472198, "grad_norm": 0.7507602960813876, "learning_rate": 3.953557134399245e-06, "loss": 0.108, "step": 19885 }, { "epoch": 0.5801388645778633, "grad_norm": 0.755358982570575, "learning_rate": 3.95309516747826e-06, "loss": 0.1369, "step": 19886 }, { "epoch": 0.5801680378085069, "grad_norm": 0.7412791489904513, "learning_rate": 3.95263320990379e-06, "loss": 0.1288, "step": 19887 }, { "epoch": 0.5801972110391505, "grad_norm": 0.9177539700374737, "learning_rate": 3.95217126167996e-06, "loss": 0.1324, "step": 19888 }, { "epoch": 0.580226384269794, "grad_norm": 0.721199355245318, "learning_rate": 3.951709322810896e-06, "loss": 0.108, "step": 19889 }, { "epoch": 0.5802555575004376, "grad_norm": 0.9920530278610725, "learning_rate": 3.9512473933007185e-06, "loss": 0.1269, "step": 19890 }, { "epoch": 0.5802847307310811, "grad_norm": 0.9430818114541443, "learning_rate": 3.950785473153557e-06, "loss": 0.1365, "step": 19891 }, { "epoch": 0.5803139039617248, "grad_norm": 0.8576843877655337, "learning_rate": 3.950323562373531e-06, "loss": 0.1185, "step": 19892 }, { "epoch": 0.5803430771923683, "grad_norm": 0.7132914824219657, "learning_rate": 3.949861660964766e-06, "loss": 0.1158, "step": 19893 }, { "epoch": 0.5803722504230119, "grad_norm": 0.9576143100258052, "learning_rate": 3.949399768931386e-06, "loss": 0.1146, "step": 19894 }, { "epoch": 0.5804014236536554, "grad_norm": 0.7590347442726993, "learning_rate": 3.948937886277511e-06, "loss": 0.1212, "step": 19895 }, { "epoch": 0.580430596884299, "grad_norm": 0.7955627615021786, "learning_rate": 3.948476013007271e-06, "loss": 0.1384, "step": 19896 }, { "epoch": 0.5804597701149425, "grad_norm": 0.794198650050654, "learning_rate": 3.948014149124785e-06, "loss": 0.1275, "step": 19897 }, { "epoch": 0.5804889433455861, "grad_norm": 0.8803440218187136, "learning_rate": 3.947552294634177e-06, "loss": 0.1427, "step": 19898 }, { "epoch": 0.5805181165762296, "grad_norm": 1.1473174799434247, "learning_rate": 3.947090449539573e-06, "loss": 0.1209, "step": 19899 }, { "epoch": 0.5805472898068732, "grad_norm": 0.7649924331445636, "learning_rate": 3.946628613845092e-06, "loss": 0.1184, "step": 19900 }, { "epoch": 0.5805764630375168, "grad_norm": 0.8568457914856835, "learning_rate": 3.9461667875548594e-06, "loss": 0.1421, "step": 19901 }, { "epoch": 0.5806056362681603, "grad_norm": 0.9987021030237287, "learning_rate": 3.945704970672998e-06, "loss": 0.1465, "step": 19902 }, { "epoch": 0.5806348094988039, "grad_norm": 0.7834915474880942, "learning_rate": 3.9452431632036326e-06, "loss": 0.1352, "step": 19903 }, { "epoch": 0.5806639827294474, "grad_norm": 0.9804178230128983, "learning_rate": 3.944781365150883e-06, "loss": 0.1137, "step": 19904 }, { "epoch": 0.580693155960091, "grad_norm": 0.7419456224991688, "learning_rate": 3.944319576518874e-06, "loss": 0.1191, "step": 19905 }, { "epoch": 0.5807223291907346, "grad_norm": 0.8857004541264865, "learning_rate": 3.943857797311729e-06, "loss": 0.149, "step": 19906 }, { "epoch": 0.5807515024213782, "grad_norm": 0.8853359666038874, "learning_rate": 3.943396027533566e-06, "loss": 0.1426, "step": 19907 }, { "epoch": 0.5807806756520217, "grad_norm": 0.7817563314982279, "learning_rate": 3.942934267188514e-06, "loss": 0.1339, "step": 19908 }, { "epoch": 0.5808098488826653, "grad_norm": 1.0440011384952868, "learning_rate": 3.942472516280691e-06, "loss": 0.1477, "step": 19909 }, { "epoch": 0.5808390221133088, "grad_norm": 0.7198382432564843, "learning_rate": 3.942010774814222e-06, "loss": 0.1496, "step": 19910 }, { "epoch": 0.5808681953439524, "grad_norm": 0.9719886487899805, "learning_rate": 3.941549042793229e-06, "loss": 0.1397, "step": 19911 }, { "epoch": 0.580897368574596, "grad_norm": 1.0668822065325398, "learning_rate": 3.941087320221832e-06, "loss": 0.1228, "step": 19912 }, { "epoch": 0.5809265418052395, "grad_norm": 0.8799220268546974, "learning_rate": 3.940625607104154e-06, "loss": 0.1179, "step": 19913 }, { "epoch": 0.5809557150358831, "grad_norm": 1.131297759725469, "learning_rate": 3.940163903444319e-06, "loss": 0.1092, "step": 19914 }, { "epoch": 0.5809848882665266, "grad_norm": 0.9415100809001666, "learning_rate": 3.939702209246446e-06, "loss": 0.1262, "step": 19915 }, { "epoch": 0.5810140614971702, "grad_norm": 1.1233275530287412, "learning_rate": 3.939240524514662e-06, "loss": 0.1227, "step": 19916 }, { "epoch": 0.5810432347278137, "grad_norm": 0.7489122847542726, "learning_rate": 3.9387788492530826e-06, "loss": 0.1117, "step": 19917 }, { "epoch": 0.5810724079584573, "grad_norm": 0.6935638746989583, "learning_rate": 3.938317183465833e-06, "loss": 0.088, "step": 19918 }, { "epoch": 0.5811015811891009, "grad_norm": 1.0277083187623963, "learning_rate": 3.937855527157033e-06, "loss": 0.1526, "step": 19919 }, { "epoch": 0.5811307544197445, "grad_norm": 1.0545003707528808, "learning_rate": 3.937393880330806e-06, "loss": 0.1373, "step": 19920 }, { "epoch": 0.581159927650388, "grad_norm": 0.7525256930274601, "learning_rate": 3.9369322429912736e-06, "loss": 0.1257, "step": 19921 }, { "epoch": 0.5811891008810316, "grad_norm": 0.7158008025312903, "learning_rate": 3.936470615142557e-06, "loss": 0.1226, "step": 19922 }, { "epoch": 0.5812182741116751, "grad_norm": 0.8203859749882335, "learning_rate": 3.936008996788775e-06, "loss": 0.1379, "step": 19923 }, { "epoch": 0.5812474473423187, "grad_norm": 1.0118663819334237, "learning_rate": 3.935547387934052e-06, "loss": 0.1094, "step": 19924 }, { "epoch": 0.5812766205729623, "grad_norm": 0.8037094045417538, "learning_rate": 3.935085788582506e-06, "loss": 0.1192, "step": 19925 }, { "epoch": 0.5813057938036058, "grad_norm": 0.7028004467414166, "learning_rate": 3.9346241987382615e-06, "loss": 0.1217, "step": 19926 }, { "epoch": 0.5813349670342494, "grad_norm": 0.7599614576893323, "learning_rate": 3.9341626184054375e-06, "loss": 0.1378, "step": 19927 }, { "epoch": 0.5813641402648929, "grad_norm": 1.0005815611395625, "learning_rate": 3.9337010475881545e-06, "loss": 0.1408, "step": 19928 }, { "epoch": 0.5813933134955365, "grad_norm": 0.7408872546255835, "learning_rate": 3.933239486290536e-06, "loss": 0.1369, "step": 19929 }, { "epoch": 0.58142248672618, "grad_norm": 0.8496162025289706, "learning_rate": 3.932777934516699e-06, "loss": 0.1431, "step": 19930 }, { "epoch": 0.5814516599568236, "grad_norm": 0.7909946780810766, "learning_rate": 3.932316392270765e-06, "loss": 0.1363, "step": 19931 }, { "epoch": 0.5814808331874671, "grad_norm": 0.8744586560589046, "learning_rate": 3.931854859556857e-06, "loss": 0.1303, "step": 19932 }, { "epoch": 0.5815100064181108, "grad_norm": 0.7510692926728375, "learning_rate": 3.931393336379094e-06, "loss": 0.1408, "step": 19933 }, { "epoch": 0.5815391796487543, "grad_norm": 0.6833111703741482, "learning_rate": 3.930931822741596e-06, "loss": 0.1371, "step": 19934 }, { "epoch": 0.5815683528793979, "grad_norm": 0.9106610941026719, "learning_rate": 3.9304703186484825e-06, "loss": 0.1399, "step": 19935 }, { "epoch": 0.5815975261100415, "grad_norm": 0.7618252838441384, "learning_rate": 3.930008824103876e-06, "loss": 0.1341, "step": 19936 }, { "epoch": 0.581626699340685, "grad_norm": 0.893523887261781, "learning_rate": 3.929547339111892e-06, "loss": 0.1183, "step": 19937 }, { "epoch": 0.5816558725713286, "grad_norm": 0.8366860430902492, "learning_rate": 3.9290858636766585e-06, "loss": 0.1307, "step": 19938 }, { "epoch": 0.5816850458019721, "grad_norm": 0.7336512921920025, "learning_rate": 3.928624397802288e-06, "loss": 0.1271, "step": 19939 }, { "epoch": 0.5817142190326157, "grad_norm": 0.7648675122049943, "learning_rate": 3.928162941492904e-06, "loss": 0.1225, "step": 19940 }, { "epoch": 0.5817433922632592, "grad_norm": 0.7354919159711172, "learning_rate": 3.927701494752626e-06, "loss": 0.1223, "step": 19941 }, { "epoch": 0.5817725654939028, "grad_norm": 0.6273991668830573, "learning_rate": 3.927240057585573e-06, "loss": 0.1085, "step": 19942 }, { "epoch": 0.5818017387245463, "grad_norm": 0.6459800492029086, "learning_rate": 3.926778629995862e-06, "loss": 0.1314, "step": 19943 }, { "epoch": 0.5818309119551899, "grad_norm": 0.7482739992861783, "learning_rate": 3.9263172119876166e-06, "loss": 0.1223, "step": 19944 }, { "epoch": 0.5818600851858334, "grad_norm": 0.8085054685461269, "learning_rate": 3.9258558035649556e-06, "loss": 0.1402, "step": 19945 }, { "epoch": 0.5818892584164771, "grad_norm": 0.6800301775320966, "learning_rate": 3.925394404731998e-06, "loss": 0.122, "step": 19946 }, { "epoch": 0.5819184316471206, "grad_norm": 0.7043598093254362, "learning_rate": 3.9249330154928625e-06, "loss": 0.1174, "step": 19947 }, { "epoch": 0.5819476048777642, "grad_norm": 0.7787487097948912, "learning_rate": 3.924471635851667e-06, "loss": 0.1071, "step": 19948 }, { "epoch": 0.5819767781084078, "grad_norm": 0.7955993349512235, "learning_rate": 3.924010265812532e-06, "loss": 0.1036, "step": 19949 }, { "epoch": 0.5820059513390513, "grad_norm": 0.8019184844104764, "learning_rate": 3.923548905379577e-06, "loss": 0.1464, "step": 19950 }, { "epoch": 0.5820351245696949, "grad_norm": 0.9414282098573326, "learning_rate": 3.923087554556922e-06, "loss": 0.1165, "step": 19951 }, { "epoch": 0.5820642978003384, "grad_norm": 0.7954506286146843, "learning_rate": 3.9226262133486824e-06, "loss": 0.107, "step": 19952 }, { "epoch": 0.582093471030982, "grad_norm": 0.7379893339804852, "learning_rate": 3.922164881758979e-06, "loss": 0.1326, "step": 19953 }, { "epoch": 0.5821226442616255, "grad_norm": 0.929430262482613, "learning_rate": 3.921703559791932e-06, "loss": 0.1403, "step": 19954 }, { "epoch": 0.5821518174922691, "grad_norm": 0.943504225127908, "learning_rate": 3.921242247451654e-06, "loss": 0.1331, "step": 19955 }, { "epoch": 0.5821809907229126, "grad_norm": 0.7250821337711371, "learning_rate": 3.920780944742272e-06, "loss": 0.1208, "step": 19956 }, { "epoch": 0.5822101639535562, "grad_norm": 0.8756263308111629, "learning_rate": 3.920319651667898e-06, "loss": 0.1127, "step": 19957 }, { "epoch": 0.5822393371841997, "grad_norm": 0.7321426688668755, "learning_rate": 3.919858368232653e-06, "loss": 0.1482, "step": 19958 }, { "epoch": 0.5822685104148433, "grad_norm": 0.7992597580466819, "learning_rate": 3.919397094440655e-06, "loss": 0.1094, "step": 19959 }, { "epoch": 0.582297683645487, "grad_norm": 0.9162254415320166, "learning_rate": 3.9189358302960215e-06, "loss": 0.1248, "step": 19960 }, { "epoch": 0.5823268568761305, "grad_norm": 0.7029670268063505, "learning_rate": 3.91847457580287e-06, "loss": 0.1121, "step": 19961 }, { "epoch": 0.5823560301067741, "grad_norm": 0.8335174589105804, "learning_rate": 3.91801333096532e-06, "loss": 0.136, "step": 19962 }, { "epoch": 0.5823852033374176, "grad_norm": 0.9310187708467518, "learning_rate": 3.917552095787489e-06, "loss": 0.1252, "step": 19963 }, { "epoch": 0.5824143765680612, "grad_norm": 0.7710969883012689, "learning_rate": 3.9170908702734945e-06, "loss": 0.1256, "step": 19964 }, { "epoch": 0.5824435497987047, "grad_norm": 0.9074498699170812, "learning_rate": 3.916629654427454e-06, "loss": 0.1324, "step": 19965 }, { "epoch": 0.5824727230293483, "grad_norm": 1.0343579747381952, "learning_rate": 3.916168448253485e-06, "loss": 0.1456, "step": 19966 }, { "epoch": 0.5825018962599918, "grad_norm": 0.9765773529326656, "learning_rate": 3.915707251755704e-06, "loss": 0.129, "step": 19967 }, { "epoch": 0.5825310694906354, "grad_norm": 0.8459793182395423, "learning_rate": 3.915246064938233e-06, "loss": 0.1153, "step": 19968 }, { "epoch": 0.5825602427212789, "grad_norm": 0.7714183250959891, "learning_rate": 3.9147848878051845e-06, "loss": 0.1139, "step": 19969 }, { "epoch": 0.5825894159519225, "grad_norm": 0.970932464693246, "learning_rate": 3.914323720360677e-06, "loss": 0.1081, "step": 19970 }, { "epoch": 0.582618589182566, "grad_norm": 1.3982589816857298, "learning_rate": 3.91386256260883e-06, "loss": 0.1436, "step": 19971 }, { "epoch": 0.5826477624132096, "grad_norm": 0.8520444859948432, "learning_rate": 3.913401414553757e-06, "loss": 0.1153, "step": 19972 }, { "epoch": 0.5826769356438533, "grad_norm": 0.8259940838057555, "learning_rate": 3.9129402761995765e-06, "loss": 0.1334, "step": 19973 }, { "epoch": 0.5827061088744968, "grad_norm": 0.9727286286622902, "learning_rate": 3.912479147550406e-06, "loss": 0.1415, "step": 19974 }, { "epoch": 0.5827352821051404, "grad_norm": 0.998387208620422, "learning_rate": 3.912018028610362e-06, "loss": 0.12, "step": 19975 }, { "epoch": 0.5827644553357839, "grad_norm": 0.8039708771766947, "learning_rate": 3.911556919383563e-06, "loss": 0.1429, "step": 19976 }, { "epoch": 0.5827936285664275, "grad_norm": 0.7643786701800359, "learning_rate": 3.911095819874123e-06, "loss": 0.146, "step": 19977 }, { "epoch": 0.582822801797071, "grad_norm": 1.0885862466447291, "learning_rate": 3.910634730086159e-06, "loss": 0.1229, "step": 19978 }, { "epoch": 0.5828519750277146, "grad_norm": 0.8779849257664843, "learning_rate": 3.910173650023787e-06, "loss": 0.1146, "step": 19979 }, { "epoch": 0.5828811482583581, "grad_norm": 0.9798843494074476, "learning_rate": 3.909712579691126e-06, "loss": 0.1373, "step": 19980 }, { "epoch": 0.5829103214890017, "grad_norm": 0.9199995043051882, "learning_rate": 3.909251519092292e-06, "loss": 0.1625, "step": 19981 }, { "epoch": 0.5829394947196452, "grad_norm": 0.9244682248536849, "learning_rate": 3.908790468231398e-06, "loss": 0.1235, "step": 19982 }, { "epoch": 0.5829686679502888, "grad_norm": 0.6955839158370702, "learning_rate": 3.9083294271125635e-06, "loss": 0.127, "step": 19983 }, { "epoch": 0.5829978411809323, "grad_norm": 0.8609939111882231, "learning_rate": 3.907868395739904e-06, "loss": 0.1421, "step": 19984 }, { "epoch": 0.5830270144115759, "grad_norm": 1.751689791296209, "learning_rate": 3.907407374117531e-06, "loss": 0.1224, "step": 19985 }, { "epoch": 0.5830561876422194, "grad_norm": 0.7213866183718433, "learning_rate": 3.906946362249567e-06, "loss": 0.1072, "step": 19986 }, { "epoch": 0.5830853608728631, "grad_norm": 0.6711112849067568, "learning_rate": 3.9064853601401255e-06, "loss": 0.125, "step": 19987 }, { "epoch": 0.5831145341035067, "grad_norm": 0.8294815817653309, "learning_rate": 3.90602436779332e-06, "loss": 0.1147, "step": 19988 }, { "epoch": 0.5831437073341502, "grad_norm": 1.0764632780837011, "learning_rate": 3.90556338521327e-06, "loss": 0.0988, "step": 19989 }, { "epoch": 0.5831728805647938, "grad_norm": 0.7551939346681211, "learning_rate": 3.905102412404087e-06, "loss": 0.1312, "step": 19990 }, { "epoch": 0.5832020537954373, "grad_norm": 0.6482365702037921, "learning_rate": 3.904641449369887e-06, "loss": 0.1206, "step": 19991 }, { "epoch": 0.5832312270260809, "grad_norm": 0.8071205374283903, "learning_rate": 3.904180496114789e-06, "loss": 0.1345, "step": 19992 }, { "epoch": 0.5832604002567244, "grad_norm": 0.7723152671545592, "learning_rate": 3.903719552642906e-06, "loss": 0.1, "step": 19993 }, { "epoch": 0.583289573487368, "grad_norm": 0.9115210649858577, "learning_rate": 3.9032586189583525e-06, "loss": 0.1317, "step": 19994 }, { "epoch": 0.5833187467180115, "grad_norm": 0.7362211880106496, "learning_rate": 3.902797695065244e-06, "loss": 0.1253, "step": 19995 }, { "epoch": 0.5833479199486551, "grad_norm": 0.8636060424256389, "learning_rate": 3.902336780967697e-06, "loss": 0.1338, "step": 19996 }, { "epoch": 0.5833770931792986, "grad_norm": 0.7829876879955818, "learning_rate": 3.901875876669822e-06, "loss": 0.1127, "step": 19997 }, { "epoch": 0.5834062664099422, "grad_norm": 0.7388444336577626, "learning_rate": 3.90141498217574e-06, "loss": 0.1361, "step": 19998 }, { "epoch": 0.5834354396405858, "grad_norm": 0.8067589149653539, "learning_rate": 3.900954097489562e-06, "loss": 0.1207, "step": 19999 }, { "epoch": 0.5834646128712293, "grad_norm": 0.7230169779803474, "learning_rate": 3.900493222615403e-06, "loss": 0.1104, "step": 20000 }, { "epoch": 0.583493786101873, "grad_norm": 0.877173890972481, "learning_rate": 3.900032357557379e-06, "loss": 0.1243, "step": 20001 }, { "epoch": 0.5835229593325165, "grad_norm": 0.9922213644489261, "learning_rate": 3.899571502319603e-06, "loss": 0.1247, "step": 20002 }, { "epoch": 0.5835521325631601, "grad_norm": 0.9334965449708817, "learning_rate": 3.899110656906189e-06, "loss": 0.1386, "step": 20003 }, { "epoch": 0.5835813057938036, "grad_norm": 1.0450135546821684, "learning_rate": 3.898649821321253e-06, "loss": 0.126, "step": 20004 }, { "epoch": 0.5836104790244472, "grad_norm": 1.0209668161040246, "learning_rate": 3.898188995568908e-06, "loss": 0.1247, "step": 20005 }, { "epoch": 0.5836396522550907, "grad_norm": 1.007674244420397, "learning_rate": 3.8977281796532706e-06, "loss": 0.1202, "step": 20006 }, { "epoch": 0.5836688254857343, "grad_norm": 0.9367676131305241, "learning_rate": 3.8972673735784516e-06, "loss": 0.12, "step": 20007 }, { "epoch": 0.5836979987163778, "grad_norm": 1.0124107800044146, "learning_rate": 3.896806577348566e-06, "loss": 0.1151, "step": 20008 }, { "epoch": 0.5837271719470214, "grad_norm": 0.9649067586170705, "learning_rate": 3.896345790967726e-06, "loss": 0.1217, "step": 20009 }, { "epoch": 0.583756345177665, "grad_norm": 0.7424227106621085, "learning_rate": 3.89588501444005e-06, "loss": 0.1095, "step": 20010 }, { "epoch": 0.5837855184083085, "grad_norm": 0.8772521777826231, "learning_rate": 3.895424247769649e-06, "loss": 0.1238, "step": 20011 }, { "epoch": 0.583814691638952, "grad_norm": 0.8659507469262135, "learning_rate": 3.8949634909606365e-06, "loss": 0.1365, "step": 20012 }, { "epoch": 0.5838438648695956, "grad_norm": 1.0415473076502995, "learning_rate": 3.894502744017126e-06, "loss": 0.1318, "step": 20013 }, { "epoch": 0.5838730381002393, "grad_norm": 0.7313353263417107, "learning_rate": 3.894042006943231e-06, "loss": 0.1189, "step": 20014 }, { "epoch": 0.5839022113308828, "grad_norm": 0.9722540423042687, "learning_rate": 3.893581279743064e-06, "loss": 0.1349, "step": 20015 }, { "epoch": 0.5839313845615264, "grad_norm": 0.850733822255877, "learning_rate": 3.89312056242074e-06, "loss": 0.1171, "step": 20016 }, { "epoch": 0.5839605577921699, "grad_norm": 1.5434614652881848, "learning_rate": 3.892659854980371e-06, "loss": 0.1322, "step": 20017 }, { "epoch": 0.5839897310228135, "grad_norm": 1.5228709237166698, "learning_rate": 3.892199157426071e-06, "loss": 0.1516, "step": 20018 }, { "epoch": 0.584018904253457, "grad_norm": 1.2995402203326651, "learning_rate": 3.891738469761953e-06, "loss": 0.1332, "step": 20019 }, { "epoch": 0.5840480774841006, "grad_norm": 1.4594074075054007, "learning_rate": 3.891277791992129e-06, "loss": 0.1363, "step": 20020 }, { "epoch": 0.5840772507147441, "grad_norm": 0.6573844072446806, "learning_rate": 3.890817124120711e-06, "loss": 0.1178, "step": 20021 }, { "epoch": 0.5841064239453877, "grad_norm": 0.8124837233294876, "learning_rate": 3.890356466151813e-06, "loss": 0.1315, "step": 20022 }, { "epoch": 0.5841355971760313, "grad_norm": 0.9847557728491084, "learning_rate": 3.889895818089549e-06, "loss": 0.1227, "step": 20023 }, { "epoch": 0.5841647704066748, "grad_norm": 1.2453805876828943, "learning_rate": 3.889435179938029e-06, "loss": 0.1255, "step": 20024 }, { "epoch": 0.5841939436373184, "grad_norm": 0.7276105532760212, "learning_rate": 3.888974551701368e-06, "loss": 0.1197, "step": 20025 }, { "epoch": 0.5842231168679619, "grad_norm": 0.6886842346855506, "learning_rate": 3.888513933383676e-06, "loss": 0.1254, "step": 20026 }, { "epoch": 0.5842522900986055, "grad_norm": 1.0024426718891248, "learning_rate": 3.888053324989065e-06, "loss": 0.1412, "step": 20027 }, { "epoch": 0.5842814633292491, "grad_norm": 0.8664652350890629, "learning_rate": 3.88759272652165e-06, "loss": 0.1127, "step": 20028 }, { "epoch": 0.5843106365598927, "grad_norm": 0.6224970984708146, "learning_rate": 3.887132137985542e-06, "loss": 0.1213, "step": 20029 }, { "epoch": 0.5843398097905362, "grad_norm": 0.6913691028308873, "learning_rate": 3.886671559384851e-06, "loss": 0.1414, "step": 20030 }, { "epoch": 0.5843689830211798, "grad_norm": 0.8798904964005513, "learning_rate": 3.8862109907236935e-06, "loss": 0.1183, "step": 20031 }, { "epoch": 0.5843981562518233, "grad_norm": 0.9806387333938295, "learning_rate": 3.8857504320061765e-06, "loss": 0.1095, "step": 20032 }, { "epoch": 0.5844273294824669, "grad_norm": 0.8494375148451374, "learning_rate": 3.8852898832364125e-06, "loss": 0.135, "step": 20033 }, { "epoch": 0.5844565027131104, "grad_norm": 0.7349521172006768, "learning_rate": 3.884829344418515e-06, "loss": 0.127, "step": 20034 }, { "epoch": 0.584485675943754, "grad_norm": 1.0602459365016692, "learning_rate": 3.884368815556595e-06, "loss": 0.1218, "step": 20035 }, { "epoch": 0.5845148491743976, "grad_norm": 1.1043666239761065, "learning_rate": 3.883908296654766e-06, "loss": 0.1204, "step": 20036 }, { "epoch": 0.5845440224050411, "grad_norm": 0.7194028575697635, "learning_rate": 3.883447787717134e-06, "loss": 0.1221, "step": 20037 }, { "epoch": 0.5845731956356847, "grad_norm": 1.0477697364098046, "learning_rate": 3.882987288747816e-06, "loss": 0.1359, "step": 20038 }, { "epoch": 0.5846023688663282, "grad_norm": 1.2162683390570899, "learning_rate": 3.8825267997509184e-06, "loss": 0.1384, "step": 20039 }, { "epoch": 0.5846315420969718, "grad_norm": 0.9315426408156291, "learning_rate": 3.882066320730556e-06, "loss": 0.1326, "step": 20040 }, { "epoch": 0.5846607153276154, "grad_norm": 0.7159074593237017, "learning_rate": 3.88160585169084e-06, "loss": 0.1297, "step": 20041 }, { "epoch": 0.584689888558259, "grad_norm": 1.2336832947245413, "learning_rate": 3.881145392635879e-06, "loss": 0.1199, "step": 20042 }, { "epoch": 0.5847190617889025, "grad_norm": 1.0889179166800855, "learning_rate": 3.880684943569785e-06, "loss": 0.1216, "step": 20043 }, { "epoch": 0.5847482350195461, "grad_norm": 0.8037398815818094, "learning_rate": 3.880224504496669e-06, "loss": 0.1291, "step": 20044 }, { "epoch": 0.5847774082501896, "grad_norm": 0.8383273262411762, "learning_rate": 3.87976407542064e-06, "loss": 0.1423, "step": 20045 }, { "epoch": 0.5848065814808332, "grad_norm": 0.9656367754583141, "learning_rate": 3.87930365634581e-06, "loss": 0.122, "step": 20046 }, { "epoch": 0.5848357547114768, "grad_norm": 0.7820117746304088, "learning_rate": 3.87884324727629e-06, "loss": 0.1492, "step": 20047 }, { "epoch": 0.5848649279421203, "grad_norm": 0.9874843732922406, "learning_rate": 3.87838284821619e-06, "loss": 0.1233, "step": 20048 }, { "epoch": 0.5848941011727639, "grad_norm": 0.7807569775875209, "learning_rate": 3.877922459169621e-06, "loss": 0.1324, "step": 20049 }, { "epoch": 0.5849232744034074, "grad_norm": 0.908533675249824, "learning_rate": 3.877462080140691e-06, "loss": 0.138, "step": 20050 }, { "epoch": 0.584952447634051, "grad_norm": 0.9268027110355939, "learning_rate": 3.877001711133511e-06, "loss": 0.1443, "step": 20051 }, { "epoch": 0.5849816208646945, "grad_norm": 1.1052806011889693, "learning_rate": 3.8765413521521925e-06, "loss": 0.1353, "step": 20052 }, { "epoch": 0.5850107940953381, "grad_norm": 0.7436349984137494, "learning_rate": 3.876081003200846e-06, "loss": 0.1137, "step": 20053 }, { "epoch": 0.5850399673259816, "grad_norm": 0.7905624130436862, "learning_rate": 3.875620664283578e-06, "loss": 0.1256, "step": 20054 }, { "epoch": 0.5850691405566253, "grad_norm": 0.7727231524808063, "learning_rate": 3.875160335404502e-06, "loss": 0.1109, "step": 20055 }, { "epoch": 0.5850983137872688, "grad_norm": 1.1785316990071035, "learning_rate": 3.874700016567726e-06, "loss": 0.1388, "step": 20056 }, { "epoch": 0.5851274870179124, "grad_norm": 0.8946554632140041, "learning_rate": 3.874239707777356e-06, "loss": 0.1067, "step": 20057 }, { "epoch": 0.585156660248556, "grad_norm": 0.8262004130191329, "learning_rate": 3.873779409037509e-06, "loss": 0.1297, "step": 20058 }, { "epoch": 0.5851858334791995, "grad_norm": 0.7765629391008582, "learning_rate": 3.873319120352289e-06, "loss": 0.1197, "step": 20059 }, { "epoch": 0.585215006709843, "grad_norm": 0.8418836592297371, "learning_rate": 3.872858841725808e-06, "loss": 0.1304, "step": 20060 }, { "epoch": 0.5852441799404866, "grad_norm": 0.7772474308397072, "learning_rate": 3.872398573162174e-06, "loss": 0.0986, "step": 20061 }, { "epoch": 0.5852733531711302, "grad_norm": 0.7997763202848225, "learning_rate": 3.871938314665496e-06, "loss": 0.1454, "step": 20062 }, { "epoch": 0.5853025264017737, "grad_norm": 0.7620179197154986, "learning_rate": 3.871478066239882e-06, "loss": 0.1159, "step": 20063 }, { "epoch": 0.5853316996324173, "grad_norm": 0.7601146672815218, "learning_rate": 3.871017827889444e-06, "loss": 0.1409, "step": 20064 }, { "epoch": 0.5853608728630608, "grad_norm": 0.9971389348944414, "learning_rate": 3.870557599618289e-06, "loss": 0.1192, "step": 20065 }, { "epoch": 0.5853900460937044, "grad_norm": 0.7898776749792827, "learning_rate": 3.8700973814305275e-06, "loss": 0.1396, "step": 20066 }, { "epoch": 0.5854192193243479, "grad_norm": 0.7245572167616001, "learning_rate": 3.869637173330265e-06, "loss": 0.1278, "step": 20067 }, { "epoch": 0.5854483925549916, "grad_norm": 0.81545613368151, "learning_rate": 3.869176975321613e-06, "loss": 0.1119, "step": 20068 }, { "epoch": 0.5854775657856351, "grad_norm": 0.8334207358753285, "learning_rate": 3.868716787408677e-06, "loss": 0.1344, "step": 20069 }, { "epoch": 0.5855067390162787, "grad_norm": 0.7354282710283488, "learning_rate": 3.8682566095955695e-06, "loss": 0.129, "step": 20070 }, { "epoch": 0.5855359122469223, "grad_norm": 0.7748654416289245, "learning_rate": 3.867796441886397e-06, "loss": 0.1007, "step": 20071 }, { "epoch": 0.5855650854775658, "grad_norm": 0.9861834514467656, "learning_rate": 3.867336284285267e-06, "loss": 0.1262, "step": 20072 }, { "epoch": 0.5855942587082094, "grad_norm": 0.8149178327824725, "learning_rate": 3.866876136796288e-06, "loss": 0.1329, "step": 20073 }, { "epoch": 0.5856234319388529, "grad_norm": 0.8383302629967043, "learning_rate": 3.86641599942357e-06, "loss": 0.1189, "step": 20074 }, { "epoch": 0.5856526051694965, "grad_norm": 1.0343052504712773, "learning_rate": 3.865955872171217e-06, "loss": 0.1247, "step": 20075 }, { "epoch": 0.58568177840014, "grad_norm": 0.7977024950722141, "learning_rate": 3.865495755043339e-06, "loss": 0.1204, "step": 20076 }, { "epoch": 0.5857109516307836, "grad_norm": 1.2251561082309965, "learning_rate": 3.865035648044046e-06, "loss": 0.1416, "step": 20077 }, { "epoch": 0.5857401248614271, "grad_norm": 0.8435546083933906, "learning_rate": 3.864575551177443e-06, "loss": 0.1272, "step": 20078 }, { "epoch": 0.5857692980920707, "grad_norm": 0.7883942391601387, "learning_rate": 3.864115464447639e-06, "loss": 0.1247, "step": 20079 }, { "epoch": 0.5857984713227142, "grad_norm": 0.8557367358849178, "learning_rate": 3.86365538785874e-06, "loss": 0.1291, "step": 20080 }, { "epoch": 0.5858276445533578, "grad_norm": 0.9048262984258508, "learning_rate": 3.863195321414855e-06, "loss": 0.1155, "step": 20081 }, { "epoch": 0.5858568177840014, "grad_norm": 0.7213752903120728, "learning_rate": 3.86273526512009e-06, "loss": 0.1472, "step": 20082 }, { "epoch": 0.585885991014645, "grad_norm": 0.7683928580871993, "learning_rate": 3.862275218978554e-06, "loss": 0.1478, "step": 20083 }, { "epoch": 0.5859151642452886, "grad_norm": 0.9852625062479899, "learning_rate": 3.861815182994353e-06, "loss": 0.1472, "step": 20084 }, { "epoch": 0.5859443374759321, "grad_norm": 0.9549895964272472, "learning_rate": 3.861355157171594e-06, "loss": 0.1159, "step": 20085 }, { "epoch": 0.5859735107065757, "grad_norm": 0.7935749292525924, "learning_rate": 3.860895141514384e-06, "loss": 0.1007, "step": 20086 }, { "epoch": 0.5860026839372192, "grad_norm": 0.8734585237837397, "learning_rate": 3.860435136026831e-06, "loss": 0.1254, "step": 20087 }, { "epoch": 0.5860318571678628, "grad_norm": 0.9321607175072781, "learning_rate": 3.859975140713042e-06, "loss": 0.1262, "step": 20088 }, { "epoch": 0.5860610303985063, "grad_norm": 0.8740670292774305, "learning_rate": 3.859515155577122e-06, "loss": 0.1186, "step": 20089 }, { "epoch": 0.5860902036291499, "grad_norm": 0.885447387603564, "learning_rate": 3.859055180623178e-06, "loss": 0.1444, "step": 20090 }, { "epoch": 0.5861193768597934, "grad_norm": 1.1201388672989014, "learning_rate": 3.858595215855318e-06, "loss": 0.1217, "step": 20091 }, { "epoch": 0.586148550090437, "grad_norm": 0.8479948512544705, "learning_rate": 3.858135261277645e-06, "loss": 0.1169, "step": 20092 }, { "epoch": 0.5861777233210805, "grad_norm": 0.8185512498549288, "learning_rate": 3.85767531689427e-06, "loss": 0.1064, "step": 20093 }, { "epoch": 0.5862068965517241, "grad_norm": 0.9016533709889478, "learning_rate": 3.857215382709296e-06, "loss": 0.1252, "step": 20094 }, { "epoch": 0.5862360697823678, "grad_norm": 0.9268200388301004, "learning_rate": 3.856755458726831e-06, "loss": 0.1119, "step": 20095 }, { "epoch": 0.5862652430130113, "grad_norm": 0.8720738752910677, "learning_rate": 3.8562955449509814e-06, "loss": 0.1365, "step": 20096 }, { "epoch": 0.5862944162436549, "grad_norm": 0.9482182903925894, "learning_rate": 3.85583564138585e-06, "loss": 0.1183, "step": 20097 }, { "epoch": 0.5863235894742984, "grad_norm": 0.8620664503063633, "learning_rate": 3.855375748035545e-06, "loss": 0.1304, "step": 20098 }, { "epoch": 0.586352762704942, "grad_norm": 0.9452053302308003, "learning_rate": 3.854915864904173e-06, "loss": 0.15, "step": 20099 }, { "epoch": 0.5863819359355855, "grad_norm": 1.0589915214326235, "learning_rate": 3.854455991995838e-06, "loss": 0.1411, "step": 20100 }, { "epoch": 0.5864111091662291, "grad_norm": 0.857230424040825, "learning_rate": 3.853996129314649e-06, "loss": 0.1176, "step": 20101 }, { "epoch": 0.5864402823968726, "grad_norm": 0.9205323384255356, "learning_rate": 3.853536276864707e-06, "loss": 0.1379, "step": 20102 }, { "epoch": 0.5864694556275162, "grad_norm": 1.048169501315199, "learning_rate": 3.853076434650119e-06, "loss": 0.133, "step": 20103 }, { "epoch": 0.5864986288581597, "grad_norm": 1.0178603010415677, "learning_rate": 3.8526166026749904e-06, "loss": 0.1203, "step": 20104 }, { "epoch": 0.5865278020888033, "grad_norm": 1.096479765021856, "learning_rate": 3.852156780943428e-06, "loss": 0.132, "step": 20105 }, { "epoch": 0.5865569753194468, "grad_norm": 1.0845604859818005, "learning_rate": 3.851696969459536e-06, "loss": 0.1407, "step": 20106 }, { "epoch": 0.5865861485500904, "grad_norm": 0.872584285654691, "learning_rate": 3.851237168227419e-06, "loss": 0.1172, "step": 20107 }, { "epoch": 0.586615321780734, "grad_norm": 1.036295087112203, "learning_rate": 3.850777377251183e-06, "loss": 0.1551, "step": 20108 }, { "epoch": 0.5866444950113776, "grad_norm": 1.1641155943704615, "learning_rate": 3.850317596534932e-06, "loss": 0.1396, "step": 20109 }, { "epoch": 0.5866736682420212, "grad_norm": 1.0923985167320642, "learning_rate": 3.849857826082769e-06, "loss": 0.1247, "step": 20110 }, { "epoch": 0.5867028414726647, "grad_norm": 0.987160959863191, "learning_rate": 3.849398065898802e-06, "loss": 0.131, "step": 20111 }, { "epoch": 0.5867320147033083, "grad_norm": 1.253080785508952, "learning_rate": 3.848938315987135e-06, "loss": 0.129, "step": 20112 }, { "epoch": 0.5867611879339518, "grad_norm": 1.1538753717837746, "learning_rate": 3.848478576351873e-06, "loss": 0.1245, "step": 20113 }, { "epoch": 0.5867903611645954, "grad_norm": 1.0333923564407248, "learning_rate": 3.848018846997117e-06, "loss": 0.1422, "step": 20114 }, { "epoch": 0.5868195343952389, "grad_norm": 0.6461112413466147, "learning_rate": 3.847559127926975e-06, "loss": 0.1214, "step": 20115 }, { "epoch": 0.5868487076258825, "grad_norm": 0.8272750820515089, "learning_rate": 3.847099419145549e-06, "loss": 0.1309, "step": 20116 }, { "epoch": 0.586877880856526, "grad_norm": 0.9896185719377043, "learning_rate": 3.846639720656944e-06, "loss": 0.1581, "step": 20117 }, { "epoch": 0.5869070540871696, "grad_norm": 0.7564900045154057, "learning_rate": 3.846180032465267e-06, "loss": 0.1131, "step": 20118 }, { "epoch": 0.5869362273178131, "grad_norm": 0.8645903721445637, "learning_rate": 3.845720354574617e-06, "loss": 0.1087, "step": 20119 }, { "epoch": 0.5869654005484567, "grad_norm": 0.8617417665144481, "learning_rate": 3.845260686989101e-06, "loss": 0.1454, "step": 20120 }, { "epoch": 0.5869945737791002, "grad_norm": 0.6203208205541115, "learning_rate": 3.844801029712822e-06, "loss": 0.1068, "step": 20121 }, { "epoch": 0.5870237470097439, "grad_norm": 0.7631944690684509, "learning_rate": 3.844341382749881e-06, "loss": 0.1434, "step": 20122 }, { "epoch": 0.5870529202403875, "grad_norm": 0.8825347793520131, "learning_rate": 3.843881746104387e-06, "loss": 0.1086, "step": 20123 }, { "epoch": 0.587082093471031, "grad_norm": 1.6912087255054722, "learning_rate": 3.84342211978044e-06, "loss": 0.1271, "step": 20124 }, { "epoch": 0.5871112667016746, "grad_norm": 0.8616899578604179, "learning_rate": 3.842962503782145e-06, "loss": 0.123, "step": 20125 }, { "epoch": 0.5871404399323181, "grad_norm": 0.6871094352328266, "learning_rate": 3.842502898113604e-06, "loss": 0.1384, "step": 20126 }, { "epoch": 0.5871696131629617, "grad_norm": 0.9791209284560194, "learning_rate": 3.842043302778921e-06, "loss": 0.1303, "step": 20127 }, { "epoch": 0.5871987863936052, "grad_norm": 0.6875342564927575, "learning_rate": 3.8415837177821976e-06, "loss": 0.1083, "step": 20128 }, { "epoch": 0.5872279596242488, "grad_norm": 0.8012390508740657, "learning_rate": 3.841124143127539e-06, "loss": 0.1147, "step": 20129 }, { "epoch": 0.5872571328548923, "grad_norm": 0.8787926292352068, "learning_rate": 3.840664578819047e-06, "loss": 0.1489, "step": 20130 }, { "epoch": 0.5872863060855359, "grad_norm": 0.8967703054374653, "learning_rate": 3.8402050248608266e-06, "loss": 0.1322, "step": 20131 }, { "epoch": 0.5873154793161794, "grad_norm": 0.8685471794163647, "learning_rate": 3.839745481256979e-06, "loss": 0.1346, "step": 20132 }, { "epoch": 0.587344652546823, "grad_norm": 0.8787694749575461, "learning_rate": 3.839285948011605e-06, "loss": 0.126, "step": 20133 }, { "epoch": 0.5873738257774666, "grad_norm": 0.8927126506899811, "learning_rate": 3.838826425128809e-06, "loss": 0.1205, "step": 20134 }, { "epoch": 0.5874029990081101, "grad_norm": 0.8486994366293353, "learning_rate": 3.838366912612694e-06, "loss": 0.1326, "step": 20135 }, { "epoch": 0.5874321722387538, "grad_norm": 0.7935926414817781, "learning_rate": 3.837907410467363e-06, "loss": 0.1266, "step": 20136 }, { "epoch": 0.5874613454693973, "grad_norm": 0.7376191128550542, "learning_rate": 3.837447918696915e-06, "loss": 0.1424, "step": 20137 }, { "epoch": 0.5874905187000409, "grad_norm": 1.0005943673066164, "learning_rate": 3.836988437305457e-06, "loss": 0.1177, "step": 20138 }, { "epoch": 0.5875196919306844, "grad_norm": 0.9030270167580045, "learning_rate": 3.836528966297087e-06, "loss": 0.1331, "step": 20139 }, { "epoch": 0.587548865161328, "grad_norm": 0.6526625704100362, "learning_rate": 3.836069505675909e-06, "loss": 0.1193, "step": 20140 }, { "epoch": 0.5875780383919715, "grad_norm": 1.019786180214768, "learning_rate": 3.835610055446024e-06, "loss": 0.1357, "step": 20141 }, { "epoch": 0.5876072116226151, "grad_norm": 1.0235832849047515, "learning_rate": 3.835150615611535e-06, "loss": 0.1444, "step": 20142 }, { "epoch": 0.5876363848532586, "grad_norm": 0.6907795557456458, "learning_rate": 3.8346911861765444e-06, "loss": 0.114, "step": 20143 }, { "epoch": 0.5876655580839022, "grad_norm": 0.6595963428949111, "learning_rate": 3.83423176714515e-06, "loss": 0.1151, "step": 20144 }, { "epoch": 0.5876947313145457, "grad_norm": 0.9794914247080527, "learning_rate": 3.833772358521458e-06, "loss": 0.1326, "step": 20145 }, { "epoch": 0.5877239045451893, "grad_norm": 0.7917853262638629, "learning_rate": 3.833312960309567e-06, "loss": 0.1344, "step": 20146 }, { "epoch": 0.5877530777758329, "grad_norm": 0.7057080667836103, "learning_rate": 3.83285357251358e-06, "loss": 0.122, "step": 20147 }, { "epoch": 0.5877822510064764, "grad_norm": 0.927868335098306, "learning_rate": 3.832394195137599e-06, "loss": 0.1672, "step": 20148 }, { "epoch": 0.5878114242371201, "grad_norm": 0.7746056396313836, "learning_rate": 3.8319348281857215e-06, "loss": 0.1141, "step": 20149 }, { "epoch": 0.5878405974677636, "grad_norm": 0.827359989904886, "learning_rate": 3.831475471662052e-06, "loss": 0.0981, "step": 20150 }, { "epoch": 0.5878697706984072, "grad_norm": 0.8608432342331177, "learning_rate": 3.831016125570692e-06, "loss": 0.1292, "step": 20151 }, { "epoch": 0.5878989439290507, "grad_norm": 0.7438997762371768, "learning_rate": 3.830556789915737e-06, "loss": 0.1526, "step": 20152 }, { "epoch": 0.5879281171596943, "grad_norm": 0.9969349527665727, "learning_rate": 3.830097464701296e-06, "loss": 0.1371, "step": 20153 }, { "epoch": 0.5879572903903378, "grad_norm": 0.7159327366780542, "learning_rate": 3.829638149931464e-06, "loss": 0.1135, "step": 20154 }, { "epoch": 0.5879864636209814, "grad_norm": 1.1501666243945665, "learning_rate": 3.829178845610343e-06, "loss": 0.1361, "step": 20155 }, { "epoch": 0.588015636851625, "grad_norm": 1.1771798621100007, "learning_rate": 3.8287195517420345e-06, "loss": 0.1157, "step": 20156 }, { "epoch": 0.5880448100822685, "grad_norm": 0.7381653491865249, "learning_rate": 3.828260268330638e-06, "loss": 0.1118, "step": 20157 }, { "epoch": 0.588073983312912, "grad_norm": 0.763061630725222, "learning_rate": 3.827800995380252e-06, "loss": 0.1444, "step": 20158 }, { "epoch": 0.5881031565435556, "grad_norm": 1.3101333418024994, "learning_rate": 3.827341732894981e-06, "loss": 0.1232, "step": 20159 }, { "epoch": 0.5881323297741992, "grad_norm": 0.93697661865873, "learning_rate": 3.826882480878923e-06, "loss": 0.0984, "step": 20160 }, { "epoch": 0.5881615030048427, "grad_norm": 0.8497757827537434, "learning_rate": 3.8264232393361785e-06, "loss": 0.1178, "step": 20161 }, { "epoch": 0.5881906762354863, "grad_norm": 1.1924786070328814, "learning_rate": 3.825964008270847e-06, "loss": 0.1347, "step": 20162 }, { "epoch": 0.5882198494661299, "grad_norm": 1.0225547594735644, "learning_rate": 3.825504787687027e-06, "loss": 0.0987, "step": 20163 }, { "epoch": 0.5882490226967735, "grad_norm": 1.0026292756298758, "learning_rate": 3.82504557758882e-06, "loss": 0.1282, "step": 20164 }, { "epoch": 0.588278195927417, "grad_norm": 0.8335342189841055, "learning_rate": 3.824586377980328e-06, "loss": 0.1291, "step": 20165 }, { "epoch": 0.5883073691580606, "grad_norm": 1.302804691078596, "learning_rate": 3.824127188865647e-06, "loss": 0.1572, "step": 20166 }, { "epoch": 0.5883365423887041, "grad_norm": 0.985718268969789, "learning_rate": 3.823668010248877e-06, "loss": 0.1274, "step": 20167 }, { "epoch": 0.5883657156193477, "grad_norm": 1.597761621760707, "learning_rate": 3.82320884213412e-06, "loss": 0.1257, "step": 20168 }, { "epoch": 0.5883948888499912, "grad_norm": 0.6882805232081349, "learning_rate": 3.822749684525472e-06, "loss": 0.1345, "step": 20169 }, { "epoch": 0.5884240620806348, "grad_norm": 0.8339342206647082, "learning_rate": 3.822290537427033e-06, "loss": 0.1423, "step": 20170 }, { "epoch": 0.5884532353112784, "grad_norm": 1.2302623407076574, "learning_rate": 3.8218314008429045e-06, "loss": 0.1219, "step": 20171 }, { "epoch": 0.5884824085419219, "grad_norm": 1.0556585914346928, "learning_rate": 3.821372274777183e-06, "loss": 0.1431, "step": 20172 }, { "epoch": 0.5885115817725655, "grad_norm": 0.8827344637910692, "learning_rate": 3.82091315923397e-06, "loss": 0.1443, "step": 20173 }, { "epoch": 0.588540755003209, "grad_norm": 0.674349674596774, "learning_rate": 3.820454054217362e-06, "loss": 0.1351, "step": 20174 }, { "epoch": 0.5885699282338526, "grad_norm": 0.8529289725548811, "learning_rate": 3.8199949597314586e-06, "loss": 0.1523, "step": 20175 }, { "epoch": 0.5885991014644962, "grad_norm": 0.9926984196468841, "learning_rate": 3.819535875780357e-06, "loss": 0.1385, "step": 20176 }, { "epoch": 0.5886282746951398, "grad_norm": 0.6518237954321938, "learning_rate": 3.8190768023681585e-06, "loss": 0.1275, "step": 20177 }, { "epoch": 0.5886574479257833, "grad_norm": 0.6130187369350059, "learning_rate": 3.818617739498962e-06, "loss": 0.1051, "step": 20178 }, { "epoch": 0.5886866211564269, "grad_norm": 0.8051082249111425, "learning_rate": 3.818158687176862e-06, "loss": 0.1431, "step": 20179 }, { "epoch": 0.5887157943870704, "grad_norm": 0.6799712927962633, "learning_rate": 3.81769964540596e-06, "loss": 0.1361, "step": 20180 }, { "epoch": 0.588744967617714, "grad_norm": 0.6967539028836852, "learning_rate": 3.817240614190354e-06, "loss": 0.1174, "step": 20181 }, { "epoch": 0.5887741408483576, "grad_norm": 0.9381637487702315, "learning_rate": 3.816781593534139e-06, "loss": 0.1125, "step": 20182 }, { "epoch": 0.5888033140790011, "grad_norm": 0.6395438147681456, "learning_rate": 3.816322583441419e-06, "loss": 0.1104, "step": 20183 }, { "epoch": 0.5888324873096447, "grad_norm": 0.6855004873407058, "learning_rate": 3.815863583916286e-06, "loss": 0.1201, "step": 20184 }, { "epoch": 0.5888616605402882, "grad_norm": 0.9913520040827561, "learning_rate": 3.815404594962841e-06, "loss": 0.1391, "step": 20185 }, { "epoch": 0.5888908337709318, "grad_norm": 0.9338284679029946, "learning_rate": 3.814945616585182e-06, "loss": 0.1347, "step": 20186 }, { "epoch": 0.5889200070015753, "grad_norm": 0.8037720846213434, "learning_rate": 3.8144866487874043e-06, "loss": 0.1076, "step": 20187 }, { "epoch": 0.5889491802322189, "grad_norm": 0.9015038126636395, "learning_rate": 3.8140276915736056e-06, "loss": 0.1379, "step": 20188 }, { "epoch": 0.5889783534628624, "grad_norm": 1.079556128494006, "learning_rate": 3.8135687449478865e-06, "loss": 0.1487, "step": 20189 }, { "epoch": 0.5890075266935061, "grad_norm": 0.9209749360880155, "learning_rate": 3.8131098089143415e-06, "loss": 0.14, "step": 20190 }, { "epoch": 0.5890366999241496, "grad_norm": 0.8223976212634202, "learning_rate": 3.8126508834770703e-06, "loss": 0.1298, "step": 20191 }, { "epoch": 0.5890658731547932, "grad_norm": 0.8623040227317921, "learning_rate": 3.812191968640167e-06, "loss": 0.1412, "step": 20192 }, { "epoch": 0.5890950463854367, "grad_norm": 0.8455280296843426, "learning_rate": 3.811733064407731e-06, "loss": 0.1179, "step": 20193 }, { "epoch": 0.5891242196160803, "grad_norm": 1.0134298386784872, "learning_rate": 3.811274170783857e-06, "loss": 0.1312, "step": 20194 }, { "epoch": 0.5891533928467239, "grad_norm": 0.8541302059261543, "learning_rate": 3.8108152877726457e-06, "loss": 0.13, "step": 20195 }, { "epoch": 0.5891825660773674, "grad_norm": 0.7675668199312625, "learning_rate": 3.8103564153781904e-06, "loss": 0.105, "step": 20196 }, { "epoch": 0.589211739308011, "grad_norm": 1.0805143242330255, "learning_rate": 3.809897553604589e-06, "loss": 0.1329, "step": 20197 }, { "epoch": 0.5892409125386545, "grad_norm": 0.9027691343169723, "learning_rate": 3.80943870245594e-06, "loss": 0.1416, "step": 20198 }, { "epoch": 0.5892700857692981, "grad_norm": 1.0002746479320108, "learning_rate": 3.808979861936336e-06, "loss": 0.1142, "step": 20199 }, { "epoch": 0.5892992589999416, "grad_norm": 0.8930213473446856, "learning_rate": 3.808521032049875e-06, "loss": 0.1113, "step": 20200 }, { "epoch": 0.5893284322305852, "grad_norm": 0.9342242631978366, "learning_rate": 3.8080622128006547e-06, "loss": 0.1224, "step": 20201 }, { "epoch": 0.5893576054612287, "grad_norm": 1.0747903739228073, "learning_rate": 3.80760340419277e-06, "loss": 0.1172, "step": 20202 }, { "epoch": 0.5893867786918724, "grad_norm": 1.1361561748983318, "learning_rate": 3.807144606230319e-06, "loss": 0.1368, "step": 20203 }, { "epoch": 0.589415951922516, "grad_norm": 0.8532972712206477, "learning_rate": 3.806685818917395e-06, "loss": 0.1186, "step": 20204 }, { "epoch": 0.5894451251531595, "grad_norm": 1.0131116558195778, "learning_rate": 3.8062270422580953e-06, "loss": 0.1144, "step": 20205 }, { "epoch": 0.589474298383803, "grad_norm": 0.7055321851541815, "learning_rate": 3.805768276256514e-06, "loss": 0.1386, "step": 20206 }, { "epoch": 0.5895034716144466, "grad_norm": 1.0382676058333964, "learning_rate": 3.80530952091675e-06, "loss": 0.1033, "step": 20207 }, { "epoch": 0.5895326448450902, "grad_norm": 0.7754527877530077, "learning_rate": 3.804850776242899e-06, "loss": 0.1192, "step": 20208 }, { "epoch": 0.5895618180757337, "grad_norm": 0.8098385858799838, "learning_rate": 3.8043920422390527e-06, "loss": 0.1183, "step": 20209 }, { "epoch": 0.5895909913063773, "grad_norm": 1.0213690853970159, "learning_rate": 3.80393331890931e-06, "loss": 0.1437, "step": 20210 }, { "epoch": 0.5896201645370208, "grad_norm": 0.8024946265682966, "learning_rate": 3.8034746062577653e-06, "loss": 0.1345, "step": 20211 }, { "epoch": 0.5896493377676644, "grad_norm": 0.7137791715372281, "learning_rate": 3.803015904288511e-06, "loss": 0.108, "step": 20212 }, { "epoch": 0.5896785109983079, "grad_norm": 0.9698080163434466, "learning_rate": 3.8025572130056475e-06, "loss": 0.1161, "step": 20213 }, { "epoch": 0.5897076842289515, "grad_norm": 0.7361753419335797, "learning_rate": 3.8020985324132663e-06, "loss": 0.1239, "step": 20214 }, { "epoch": 0.589736857459595, "grad_norm": 0.9412898368112532, "learning_rate": 3.801639862515464e-06, "loss": 0.1362, "step": 20215 }, { "epoch": 0.5897660306902386, "grad_norm": 0.6458254491982539, "learning_rate": 3.8011812033163365e-06, "loss": 0.1246, "step": 20216 }, { "epoch": 0.5897952039208822, "grad_norm": 0.8289219559663683, "learning_rate": 3.800722554819975e-06, "loss": 0.1179, "step": 20217 }, { "epoch": 0.5898243771515258, "grad_norm": 1.0031912845215238, "learning_rate": 3.8002639170304755e-06, "loss": 0.1178, "step": 20218 }, { "epoch": 0.5898535503821694, "grad_norm": 0.6587167737029508, "learning_rate": 3.7998052899519346e-06, "loss": 0.1119, "step": 20219 }, { "epoch": 0.5898827236128129, "grad_norm": 0.7147696398102091, "learning_rate": 3.7993466735884456e-06, "loss": 0.124, "step": 20220 }, { "epoch": 0.5899118968434565, "grad_norm": 0.8562683931200171, "learning_rate": 3.798888067944103e-06, "loss": 0.1329, "step": 20221 }, { "epoch": 0.5899410700741, "grad_norm": 0.8384721329675718, "learning_rate": 3.7984294730230008e-06, "loss": 0.133, "step": 20222 }, { "epoch": 0.5899702433047436, "grad_norm": 0.7354471706626688, "learning_rate": 3.797970888829233e-06, "loss": 0.1045, "step": 20223 }, { "epoch": 0.5899994165353871, "grad_norm": 0.7309064317536617, "learning_rate": 3.7975123153668935e-06, "loss": 0.1199, "step": 20224 }, { "epoch": 0.5900285897660307, "grad_norm": 0.9422277741528954, "learning_rate": 3.797053752640079e-06, "loss": 0.117, "step": 20225 }, { "epoch": 0.5900577629966742, "grad_norm": 1.1053133172270788, "learning_rate": 3.7965952006528805e-06, "loss": 0.1219, "step": 20226 }, { "epoch": 0.5900869362273178, "grad_norm": 0.8682861779230187, "learning_rate": 3.796136659409393e-06, "loss": 0.1234, "step": 20227 }, { "epoch": 0.5901161094579613, "grad_norm": 0.8578679187059338, "learning_rate": 3.7956781289137103e-06, "loss": 0.1213, "step": 20228 }, { "epoch": 0.5901452826886049, "grad_norm": 0.6627094752494258, "learning_rate": 3.795219609169925e-06, "loss": 0.1202, "step": 20229 }, { "epoch": 0.5901744559192486, "grad_norm": 1.0334651298328659, "learning_rate": 3.7947611001821307e-06, "loss": 0.126, "step": 20230 }, { "epoch": 0.5902036291498921, "grad_norm": 1.0303606584877127, "learning_rate": 3.7943026019544226e-06, "loss": 0.1172, "step": 20231 }, { "epoch": 0.5902328023805357, "grad_norm": 1.1767317178537413, "learning_rate": 3.7938441144908926e-06, "loss": 0.1068, "step": 20232 }, { "epoch": 0.5902619756111792, "grad_norm": 0.9603041475692659, "learning_rate": 3.7933856377956357e-06, "loss": 0.1325, "step": 20233 }, { "epoch": 0.5902911488418228, "grad_norm": 0.9127724785613851, "learning_rate": 3.7929271718727426e-06, "loss": 0.1272, "step": 20234 }, { "epoch": 0.5903203220724663, "grad_norm": 0.8904540092492622, "learning_rate": 3.792468716726308e-06, "loss": 0.1143, "step": 20235 }, { "epoch": 0.5903494953031099, "grad_norm": 1.1342747403471634, "learning_rate": 3.792010272360423e-06, "loss": 0.1251, "step": 20236 }, { "epoch": 0.5903786685337534, "grad_norm": 0.9276026984423652, "learning_rate": 3.7915518387791833e-06, "loss": 0.1208, "step": 20237 }, { "epoch": 0.590407841764397, "grad_norm": 0.8160098687053883, "learning_rate": 3.7910934159866807e-06, "loss": 0.1347, "step": 20238 }, { "epoch": 0.5904370149950405, "grad_norm": 1.1614221942977292, "learning_rate": 3.790635003987007e-06, "loss": 0.1322, "step": 20239 }, { "epoch": 0.5904661882256841, "grad_norm": 1.071437658293513, "learning_rate": 3.7901766027842553e-06, "loss": 0.1377, "step": 20240 }, { "epoch": 0.5904953614563276, "grad_norm": 0.8739567106959935, "learning_rate": 3.7897182123825196e-06, "loss": 0.1337, "step": 20241 }, { "epoch": 0.5905245346869712, "grad_norm": 0.9832205175421125, "learning_rate": 3.7892598327858863e-06, "loss": 0.1408, "step": 20242 }, { "epoch": 0.5905537079176147, "grad_norm": 1.1821731010630248, "learning_rate": 3.788801463998456e-06, "loss": 0.1191, "step": 20243 }, { "epoch": 0.5905828811482584, "grad_norm": 0.896523533953455, "learning_rate": 3.7883431060243163e-06, "loss": 0.1076, "step": 20244 }, { "epoch": 0.590612054378902, "grad_norm": 0.7510175788776267, "learning_rate": 3.78788475886756e-06, "loss": 0.1536, "step": 20245 }, { "epoch": 0.5906412276095455, "grad_norm": 1.1033535775488357, "learning_rate": 3.78742642253228e-06, "loss": 0.1198, "step": 20246 }, { "epoch": 0.5906704008401891, "grad_norm": 0.8632607001897823, "learning_rate": 3.7869680970225663e-06, "loss": 0.1434, "step": 20247 }, { "epoch": 0.5906995740708326, "grad_norm": 0.7615522216793309, "learning_rate": 3.786509782342511e-06, "loss": 0.1354, "step": 20248 }, { "epoch": 0.5907287473014762, "grad_norm": 0.8663061346334333, "learning_rate": 3.7860514784962084e-06, "loss": 0.1346, "step": 20249 }, { "epoch": 0.5907579205321197, "grad_norm": 0.7234468548634531, "learning_rate": 3.7855931854877474e-06, "loss": 0.1108, "step": 20250 }, { "epoch": 0.5907870937627633, "grad_norm": 1.1542971298041533, "learning_rate": 3.785134903321222e-06, "loss": 0.1193, "step": 20251 }, { "epoch": 0.5908162669934068, "grad_norm": 0.720546737061622, "learning_rate": 3.784676632000721e-06, "loss": 0.127, "step": 20252 }, { "epoch": 0.5908454402240504, "grad_norm": 1.0360990966121526, "learning_rate": 3.784218371530337e-06, "loss": 0.1263, "step": 20253 }, { "epoch": 0.5908746134546939, "grad_norm": 0.7628493944623506, "learning_rate": 3.7837601219141605e-06, "loss": 0.1075, "step": 20254 }, { "epoch": 0.5909037866853375, "grad_norm": 0.7982711688955447, "learning_rate": 3.783301883156285e-06, "loss": 0.1101, "step": 20255 }, { "epoch": 0.590932959915981, "grad_norm": 0.7200743169359279, "learning_rate": 3.782843655260799e-06, "loss": 0.128, "step": 20256 }, { "epoch": 0.5909621331466246, "grad_norm": 0.825527860327397, "learning_rate": 3.782385438231794e-06, "loss": 0.1177, "step": 20257 }, { "epoch": 0.5909913063772683, "grad_norm": 1.1544435995553002, "learning_rate": 3.7819272320733626e-06, "loss": 0.134, "step": 20258 }, { "epoch": 0.5910204796079118, "grad_norm": 0.797619386105925, "learning_rate": 3.7814690367895923e-06, "loss": 0.1252, "step": 20259 }, { "epoch": 0.5910496528385554, "grad_norm": 1.0506363430902557, "learning_rate": 3.7810108523845744e-06, "loss": 0.1334, "step": 20260 }, { "epoch": 0.5910788260691989, "grad_norm": 0.8632340134026724, "learning_rate": 3.7805526788624027e-06, "loss": 0.1578, "step": 20261 }, { "epoch": 0.5911079992998425, "grad_norm": 1.0704684836764167, "learning_rate": 3.780094516227165e-06, "loss": 0.1328, "step": 20262 }, { "epoch": 0.591137172530486, "grad_norm": 0.92312155686962, "learning_rate": 3.779636364482953e-06, "loss": 0.1267, "step": 20263 }, { "epoch": 0.5911663457611296, "grad_norm": 0.7941396087589555, "learning_rate": 3.779178223633856e-06, "loss": 0.111, "step": 20264 }, { "epoch": 0.5911955189917731, "grad_norm": 1.0645081561050787, "learning_rate": 3.778720093683964e-06, "loss": 0.1471, "step": 20265 }, { "epoch": 0.5912246922224167, "grad_norm": 0.9816676630504501, "learning_rate": 3.7782619746373663e-06, "loss": 0.1235, "step": 20266 }, { "epoch": 0.5912538654530602, "grad_norm": 0.777847199885689, "learning_rate": 3.777803866498155e-06, "loss": 0.1236, "step": 20267 }, { "epoch": 0.5912830386837038, "grad_norm": 0.9568779142795496, "learning_rate": 3.77734576927042e-06, "loss": 0.1009, "step": 20268 }, { "epoch": 0.5913122119143474, "grad_norm": 0.775840409936639, "learning_rate": 3.776887682958249e-06, "loss": 0.1359, "step": 20269 }, { "epoch": 0.5913413851449909, "grad_norm": 0.9228157617525361, "learning_rate": 3.776429607565733e-06, "loss": 0.1215, "step": 20270 }, { "epoch": 0.5913705583756346, "grad_norm": 0.8301160033415184, "learning_rate": 3.775971543096963e-06, "loss": 0.097, "step": 20271 }, { "epoch": 0.5913997316062781, "grad_norm": 1.0905985737349984, "learning_rate": 3.775513489556023e-06, "loss": 0.1402, "step": 20272 }, { "epoch": 0.5914289048369217, "grad_norm": 0.9260101498628446, "learning_rate": 3.775055446947009e-06, "loss": 0.1274, "step": 20273 }, { "epoch": 0.5914580780675652, "grad_norm": 0.8318914638435035, "learning_rate": 3.7745974152740074e-06, "loss": 0.1186, "step": 20274 }, { "epoch": 0.5914872512982088, "grad_norm": 1.0085084473662826, "learning_rate": 3.7741393945411075e-06, "loss": 0.1046, "step": 20275 }, { "epoch": 0.5915164245288523, "grad_norm": 0.9674246822945007, "learning_rate": 3.773681384752399e-06, "loss": 0.1372, "step": 20276 }, { "epoch": 0.5915455977594959, "grad_norm": 0.8177735756142149, "learning_rate": 3.773223385911969e-06, "loss": 0.1512, "step": 20277 }, { "epoch": 0.5915747709901394, "grad_norm": 0.8951644185692315, "learning_rate": 3.7727653980239077e-06, "loss": 0.1027, "step": 20278 }, { "epoch": 0.591603944220783, "grad_norm": 0.7730806463621648, "learning_rate": 3.7723074210923046e-06, "loss": 0.11, "step": 20279 }, { "epoch": 0.5916331174514265, "grad_norm": 1.4916102140596397, "learning_rate": 3.7718494551212477e-06, "loss": 0.1219, "step": 20280 }, { "epoch": 0.5916622906820701, "grad_norm": 1.2629613847233272, "learning_rate": 3.7713915001148264e-06, "loss": 0.1394, "step": 20281 }, { "epoch": 0.5916914639127137, "grad_norm": 0.8404674415570916, "learning_rate": 3.770933556077128e-06, "loss": 0.131, "step": 20282 }, { "epoch": 0.5917206371433572, "grad_norm": 0.8222340325811418, "learning_rate": 3.7704756230122404e-06, "loss": 0.1198, "step": 20283 }, { "epoch": 0.5917498103740008, "grad_norm": 1.2953859987497278, "learning_rate": 3.7700177009242533e-06, "loss": 0.1464, "step": 20284 }, { "epoch": 0.5917789836046444, "grad_norm": 0.7463448766649863, "learning_rate": 3.769559789817256e-06, "loss": 0.1227, "step": 20285 }, { "epoch": 0.591808156835288, "grad_norm": 0.9141728419939459, "learning_rate": 3.769101889695334e-06, "loss": 0.1088, "step": 20286 }, { "epoch": 0.5918373300659315, "grad_norm": 0.7300132060866088, "learning_rate": 3.768644000562577e-06, "loss": 0.1378, "step": 20287 }, { "epoch": 0.5918665032965751, "grad_norm": 0.9292285150265088, "learning_rate": 3.768186122423073e-06, "loss": 0.1101, "step": 20288 }, { "epoch": 0.5918956765272186, "grad_norm": 0.8716206320290674, "learning_rate": 3.767728255280906e-06, "loss": 0.1297, "step": 20289 }, { "epoch": 0.5919248497578622, "grad_norm": 0.8383145328399962, "learning_rate": 3.7672703991401706e-06, "loss": 0.1149, "step": 20290 }, { "epoch": 0.5919540229885057, "grad_norm": 0.7973474333886468, "learning_rate": 3.7668125540049493e-06, "loss": 0.1439, "step": 20291 }, { "epoch": 0.5919831962191493, "grad_norm": 0.7862309807261225, "learning_rate": 3.766354719879331e-06, "loss": 0.124, "step": 20292 }, { "epoch": 0.5920123694497929, "grad_norm": 0.8835775439221308, "learning_rate": 3.7658968967674046e-06, "loss": 0.1385, "step": 20293 }, { "epoch": 0.5920415426804364, "grad_norm": 0.8011348997365383, "learning_rate": 3.765439084673255e-06, "loss": 0.1302, "step": 20294 }, { "epoch": 0.59207071591108, "grad_norm": 1.027977766405424, "learning_rate": 3.76498128360097e-06, "loss": 0.1491, "step": 20295 }, { "epoch": 0.5920998891417235, "grad_norm": 0.8544333249902301, "learning_rate": 3.7645234935546377e-06, "loss": 0.1159, "step": 20296 }, { "epoch": 0.5921290623723671, "grad_norm": 0.8805906780075904, "learning_rate": 3.7640657145383445e-06, "loss": 0.1137, "step": 20297 }, { "epoch": 0.5921582356030107, "grad_norm": 0.7795037875714507, "learning_rate": 3.7636079465561793e-06, "loss": 0.1275, "step": 20298 }, { "epoch": 0.5921874088336543, "grad_norm": 0.8167336207242685, "learning_rate": 3.763150189612226e-06, "loss": 0.1294, "step": 20299 }, { "epoch": 0.5922165820642978, "grad_norm": 0.6971774326622011, "learning_rate": 3.7626924437105723e-06, "loss": 0.1266, "step": 20300 }, { "epoch": 0.5922457552949414, "grad_norm": 0.6563811155516861, "learning_rate": 3.762234708855304e-06, "loss": 0.1172, "step": 20301 }, { "epoch": 0.5922749285255849, "grad_norm": 0.7529173622208494, "learning_rate": 3.76177698505051e-06, "loss": 0.1161, "step": 20302 }, { "epoch": 0.5923041017562285, "grad_norm": 0.8713120732052216, "learning_rate": 3.761319272300276e-06, "loss": 0.1219, "step": 20303 }, { "epoch": 0.592333274986872, "grad_norm": 0.7023739943710798, "learning_rate": 3.7608615706086876e-06, "loss": 0.1213, "step": 20304 }, { "epoch": 0.5923624482175156, "grad_norm": 1.068413761264487, "learning_rate": 3.760403879979831e-06, "loss": 0.1267, "step": 20305 }, { "epoch": 0.5923916214481592, "grad_norm": 1.0622620883514793, "learning_rate": 3.759946200417793e-06, "loss": 0.1373, "step": 20306 }, { "epoch": 0.5924207946788027, "grad_norm": 0.8729555445934811, "learning_rate": 3.759488531926657e-06, "loss": 0.1254, "step": 20307 }, { "epoch": 0.5924499679094463, "grad_norm": 0.8807063874024929, "learning_rate": 3.7590308745105143e-06, "loss": 0.1369, "step": 20308 }, { "epoch": 0.5924791411400898, "grad_norm": 0.7730701181240264, "learning_rate": 3.7585732281734467e-06, "loss": 0.1226, "step": 20309 }, { "epoch": 0.5925083143707334, "grad_norm": 0.6801049083834891, "learning_rate": 3.7581155929195405e-06, "loss": 0.1325, "step": 20310 }, { "epoch": 0.5925374876013769, "grad_norm": 0.9171360209832026, "learning_rate": 3.7576579687528836e-06, "loss": 0.1416, "step": 20311 }, { "epoch": 0.5925666608320206, "grad_norm": 0.9015598603877577, "learning_rate": 3.757200355677558e-06, "loss": 0.1365, "step": 20312 }, { "epoch": 0.5925958340626641, "grad_norm": 0.9509602034197917, "learning_rate": 3.75674275369765e-06, "loss": 0.1193, "step": 20313 }, { "epoch": 0.5926250072933077, "grad_norm": 0.8274027554620362, "learning_rate": 3.7562851628172476e-06, "loss": 0.109, "step": 20314 }, { "epoch": 0.5926541805239512, "grad_norm": 2.385593261944973, "learning_rate": 3.755827583040435e-06, "loss": 0.1096, "step": 20315 }, { "epoch": 0.5926833537545948, "grad_norm": 0.8304798709331231, "learning_rate": 3.7553700143712956e-06, "loss": 0.1445, "step": 20316 }, { "epoch": 0.5927125269852384, "grad_norm": 0.9432083259294172, "learning_rate": 3.7549124568139158e-06, "loss": 0.1224, "step": 20317 }, { "epoch": 0.5927417002158819, "grad_norm": 0.9228415072757776, "learning_rate": 3.754454910372381e-06, "loss": 0.1417, "step": 20318 }, { "epoch": 0.5927708734465255, "grad_norm": 0.8394270709799782, "learning_rate": 3.7539973750507723e-06, "loss": 0.1356, "step": 20319 }, { "epoch": 0.592800046677169, "grad_norm": 1.0490922762574508, "learning_rate": 3.753539850853181e-06, "loss": 0.1182, "step": 20320 }, { "epoch": 0.5928292199078126, "grad_norm": 0.9272445567929332, "learning_rate": 3.753082337783688e-06, "loss": 0.0953, "step": 20321 }, { "epoch": 0.5928583931384561, "grad_norm": 0.7105217279422499, "learning_rate": 3.7526248358463768e-06, "loss": 0.1458, "step": 20322 }, { "epoch": 0.5928875663690997, "grad_norm": 0.9120023752836961, "learning_rate": 3.7521673450453356e-06, "loss": 0.1424, "step": 20323 }, { "epoch": 0.5929167395997432, "grad_norm": 1.257870197846429, "learning_rate": 3.7517098653846446e-06, "loss": 0.1058, "step": 20324 }, { "epoch": 0.5929459128303869, "grad_norm": 0.8575419280848129, "learning_rate": 3.751252396868389e-06, "loss": 0.118, "step": 20325 }, { "epoch": 0.5929750860610304, "grad_norm": 0.7128499351579052, "learning_rate": 3.750794939500655e-06, "loss": 0.1314, "step": 20326 }, { "epoch": 0.593004259291674, "grad_norm": 0.8097750720264704, "learning_rate": 3.7503374932855258e-06, "loss": 0.1234, "step": 20327 }, { "epoch": 0.5930334325223175, "grad_norm": 0.9747084859732462, "learning_rate": 3.7498800582270863e-06, "loss": 0.1073, "step": 20328 }, { "epoch": 0.5930626057529611, "grad_norm": 0.7648578301821678, "learning_rate": 3.7494226343294177e-06, "loss": 0.1, "step": 20329 }, { "epoch": 0.5930917789836047, "grad_norm": 0.7565341007173764, "learning_rate": 3.7489652215966055e-06, "loss": 0.1393, "step": 20330 }, { "epoch": 0.5931209522142482, "grad_norm": 0.7887501873350983, "learning_rate": 3.7485078200327317e-06, "loss": 0.1316, "step": 20331 }, { "epoch": 0.5931501254448918, "grad_norm": 0.8692053319094313, "learning_rate": 3.748050429641883e-06, "loss": 0.1152, "step": 20332 }, { "epoch": 0.5931792986755353, "grad_norm": 0.7751738676907003, "learning_rate": 3.747593050428142e-06, "loss": 0.1413, "step": 20333 }, { "epoch": 0.5932084719061789, "grad_norm": 0.7288789974986442, "learning_rate": 3.7471356823955908e-06, "loss": 0.1356, "step": 20334 }, { "epoch": 0.5932376451368224, "grad_norm": 0.8715625909154425, "learning_rate": 3.7466783255483125e-06, "loss": 0.1503, "step": 20335 }, { "epoch": 0.593266818367466, "grad_norm": 0.8948276633084514, "learning_rate": 3.746220979890392e-06, "loss": 0.1303, "step": 20336 }, { "epoch": 0.5932959915981095, "grad_norm": 0.7820814106919346, "learning_rate": 3.7457636454259084e-06, "loss": 0.1221, "step": 20337 }, { "epoch": 0.5933251648287531, "grad_norm": 0.9603909685091399, "learning_rate": 3.74530632215895e-06, "loss": 0.1262, "step": 20338 }, { "epoch": 0.5933543380593967, "grad_norm": 0.9199513407352496, "learning_rate": 3.744849010093597e-06, "loss": 0.1262, "step": 20339 }, { "epoch": 0.5933835112900403, "grad_norm": 0.9135260477282455, "learning_rate": 3.7443917092339323e-06, "loss": 0.1241, "step": 20340 }, { "epoch": 0.5934126845206839, "grad_norm": 0.9318084696620351, "learning_rate": 3.7439344195840393e-06, "loss": 0.1205, "step": 20341 }, { "epoch": 0.5934418577513274, "grad_norm": 0.8841853727203788, "learning_rate": 3.7434771411479993e-06, "loss": 0.1069, "step": 20342 }, { "epoch": 0.593471030981971, "grad_norm": 1.23356435704127, "learning_rate": 3.743019873929894e-06, "loss": 0.138, "step": 20343 }, { "epoch": 0.5935002042126145, "grad_norm": 1.300083953889085, "learning_rate": 3.7425626179338087e-06, "loss": 0.1369, "step": 20344 }, { "epoch": 0.5935293774432581, "grad_norm": 0.9076827024539956, "learning_rate": 3.7421053731638247e-06, "loss": 0.1724, "step": 20345 }, { "epoch": 0.5935585506739016, "grad_norm": 0.9152453874334994, "learning_rate": 3.7416481396240233e-06, "loss": 0.1179, "step": 20346 }, { "epoch": 0.5935877239045452, "grad_norm": 0.7506870356234908, "learning_rate": 3.7411909173184863e-06, "loss": 0.1167, "step": 20347 }, { "epoch": 0.5936168971351887, "grad_norm": 0.8966013397136234, "learning_rate": 3.740733706251298e-06, "loss": 0.1177, "step": 20348 }, { "epoch": 0.5936460703658323, "grad_norm": 0.8199959588363075, "learning_rate": 3.7402765064265346e-06, "loss": 0.1291, "step": 20349 }, { "epoch": 0.5936752435964758, "grad_norm": 0.9856671762568906, "learning_rate": 3.7398193178482855e-06, "loss": 0.1545, "step": 20350 }, { "epoch": 0.5937044168271194, "grad_norm": 0.7912230412079806, "learning_rate": 3.739362140520627e-06, "loss": 0.1406, "step": 20351 }, { "epoch": 0.593733590057763, "grad_norm": 0.8040817049542729, "learning_rate": 3.7389049744476437e-06, "loss": 0.1257, "step": 20352 }, { "epoch": 0.5937627632884066, "grad_norm": 0.7683089581911615, "learning_rate": 3.738447819633415e-06, "loss": 0.1199, "step": 20353 }, { "epoch": 0.5937919365190502, "grad_norm": 0.709091046344398, "learning_rate": 3.7379906760820234e-06, "loss": 0.107, "step": 20354 }, { "epoch": 0.5938211097496937, "grad_norm": 0.7930700175595372, "learning_rate": 3.737533543797548e-06, "loss": 0.132, "step": 20355 }, { "epoch": 0.5938502829803373, "grad_norm": 0.829812465390992, "learning_rate": 3.7370764227840734e-06, "loss": 0.1611, "step": 20356 }, { "epoch": 0.5938794562109808, "grad_norm": 0.7824138393830351, "learning_rate": 3.7366193130456784e-06, "loss": 0.1019, "step": 20357 }, { "epoch": 0.5939086294416244, "grad_norm": 0.8153222821715773, "learning_rate": 3.736162214586446e-06, "loss": 0.1364, "step": 20358 }, { "epoch": 0.5939378026722679, "grad_norm": 0.8966206591182314, "learning_rate": 3.7357051274104545e-06, "loss": 0.1107, "step": 20359 }, { "epoch": 0.5939669759029115, "grad_norm": 0.8034161455187806, "learning_rate": 3.735248051521786e-06, "loss": 0.1104, "step": 20360 }, { "epoch": 0.593996149133555, "grad_norm": 0.7245629777622508, "learning_rate": 3.734790986924519e-06, "loss": 0.1009, "step": 20361 }, { "epoch": 0.5940253223641986, "grad_norm": 0.8347839313674297, "learning_rate": 3.734333933622738e-06, "loss": 0.1052, "step": 20362 }, { "epoch": 0.5940544955948421, "grad_norm": 0.8366940053896308, "learning_rate": 3.7338768916205224e-06, "loss": 0.1295, "step": 20363 }, { "epoch": 0.5940836688254857, "grad_norm": 0.7912055817415566, "learning_rate": 3.7334198609219506e-06, "loss": 0.1364, "step": 20364 }, { "epoch": 0.5941128420561292, "grad_norm": 1.3096844057058885, "learning_rate": 3.7329628415311043e-06, "loss": 0.1215, "step": 20365 }, { "epoch": 0.5941420152867729, "grad_norm": 0.8284737979793699, "learning_rate": 3.7325058334520637e-06, "loss": 0.1393, "step": 20366 }, { "epoch": 0.5941711885174165, "grad_norm": 1.3668484993926355, "learning_rate": 3.7320488366889064e-06, "loss": 0.113, "step": 20367 }, { "epoch": 0.59420036174806, "grad_norm": 0.8733517250331496, "learning_rate": 3.731591851245716e-06, "loss": 0.1456, "step": 20368 }, { "epoch": 0.5942295349787036, "grad_norm": 0.7996803319961718, "learning_rate": 3.73113487712657e-06, "loss": 0.1246, "step": 20369 }, { "epoch": 0.5942587082093471, "grad_norm": 0.7079043012081149, "learning_rate": 3.73067791433555e-06, "loss": 0.135, "step": 20370 }, { "epoch": 0.5942878814399907, "grad_norm": 0.7480051648978058, "learning_rate": 3.7302209628767345e-06, "loss": 0.133, "step": 20371 }, { "epoch": 0.5943170546706342, "grad_norm": 0.7366026184962297, "learning_rate": 3.7297640227542024e-06, "loss": 0.1169, "step": 20372 }, { "epoch": 0.5943462279012778, "grad_norm": 2.1814451346277037, "learning_rate": 3.7293070939720332e-06, "loss": 0.1251, "step": 20373 }, { "epoch": 0.5943754011319213, "grad_norm": 0.959928786799977, "learning_rate": 3.7288501765343076e-06, "loss": 0.1413, "step": 20374 }, { "epoch": 0.5944045743625649, "grad_norm": 0.8054432663471339, "learning_rate": 3.7283932704451053e-06, "loss": 0.1348, "step": 20375 }, { "epoch": 0.5944337475932084, "grad_norm": 0.6711341243653186, "learning_rate": 3.727936375708503e-06, "loss": 0.1143, "step": 20376 }, { "epoch": 0.594462920823852, "grad_norm": 0.8048865207066975, "learning_rate": 3.727479492328582e-06, "loss": 0.1355, "step": 20377 }, { "epoch": 0.5944920940544955, "grad_norm": 0.8756069044306218, "learning_rate": 3.7270226203094207e-06, "loss": 0.1065, "step": 20378 }, { "epoch": 0.5945212672851392, "grad_norm": 0.7645233073994955, "learning_rate": 3.726565759655094e-06, "loss": 0.1356, "step": 20379 }, { "epoch": 0.5945504405157828, "grad_norm": 0.8286671678293144, "learning_rate": 3.726108910369688e-06, "loss": 0.1275, "step": 20380 }, { "epoch": 0.5945796137464263, "grad_norm": 0.7099918736793572, "learning_rate": 3.7256520724572766e-06, "loss": 0.1474, "step": 20381 }, { "epoch": 0.5946087869770699, "grad_norm": 0.8368279277975266, "learning_rate": 3.7251952459219385e-06, "loss": 0.0914, "step": 20382 }, { "epoch": 0.5946379602077134, "grad_norm": 0.6257342084471068, "learning_rate": 3.724738430767755e-06, "loss": 0.1382, "step": 20383 }, { "epoch": 0.594667133438357, "grad_norm": 0.7840624078300055, "learning_rate": 3.7242816269988e-06, "loss": 0.1343, "step": 20384 }, { "epoch": 0.5946963066690005, "grad_norm": 0.8047747085774777, "learning_rate": 3.7238248346191543e-06, "loss": 0.1164, "step": 20385 }, { "epoch": 0.5947254798996441, "grad_norm": 1.0750330604736558, "learning_rate": 3.7233680536328965e-06, "loss": 0.1133, "step": 20386 }, { "epoch": 0.5947546531302876, "grad_norm": 0.8052074439890694, "learning_rate": 3.7229112840441036e-06, "loss": 0.1426, "step": 20387 }, { "epoch": 0.5947838263609312, "grad_norm": 1.0046242506305367, "learning_rate": 3.722454525856855e-06, "loss": 0.1271, "step": 20388 }, { "epoch": 0.5948129995915747, "grad_norm": 0.920190064571862, "learning_rate": 3.7219977790752265e-06, "loss": 0.1186, "step": 20389 }, { "epoch": 0.5948421728222183, "grad_norm": 0.9141840259196852, "learning_rate": 3.721541043703297e-06, "loss": 0.131, "step": 20390 }, { "epoch": 0.5948713460528618, "grad_norm": 0.8071523413765483, "learning_rate": 3.7210843197451423e-06, "loss": 0.1568, "step": 20391 }, { "epoch": 0.5949005192835054, "grad_norm": 0.9220774603287075, "learning_rate": 3.720627607204843e-06, "loss": 0.1442, "step": 20392 }, { "epoch": 0.5949296925141491, "grad_norm": 0.9029026910952078, "learning_rate": 3.720170906086476e-06, "loss": 0.0972, "step": 20393 }, { "epoch": 0.5949588657447926, "grad_norm": 0.8324401036230402, "learning_rate": 3.719714216394117e-06, "loss": 0.1127, "step": 20394 }, { "epoch": 0.5949880389754362, "grad_norm": 0.9037690580022587, "learning_rate": 3.719257538131843e-06, "loss": 0.1183, "step": 20395 }, { "epoch": 0.5950172122060797, "grad_norm": 0.8559684972197111, "learning_rate": 3.718800871303733e-06, "loss": 0.1096, "step": 20396 }, { "epoch": 0.5950463854367233, "grad_norm": 1.1706300170802248, "learning_rate": 3.7183442159138618e-06, "loss": 0.1319, "step": 20397 }, { "epoch": 0.5950755586673668, "grad_norm": 0.8423446476329747, "learning_rate": 3.717887571966308e-06, "loss": 0.1399, "step": 20398 }, { "epoch": 0.5951047318980104, "grad_norm": 0.957491801776679, "learning_rate": 3.7174309394651476e-06, "loss": 0.1199, "step": 20399 }, { "epoch": 0.5951339051286539, "grad_norm": 0.9615955937963768, "learning_rate": 3.716974318414458e-06, "loss": 0.1058, "step": 20400 }, { "epoch": 0.5951630783592975, "grad_norm": 0.7055027976814059, "learning_rate": 3.7165177088183158e-06, "loss": 0.1325, "step": 20401 }, { "epoch": 0.595192251589941, "grad_norm": 0.8306744452301431, "learning_rate": 3.716061110680797e-06, "loss": 0.1214, "step": 20402 }, { "epoch": 0.5952214248205846, "grad_norm": 0.8514220181183535, "learning_rate": 3.7156045240059766e-06, "loss": 0.1371, "step": 20403 }, { "epoch": 0.5952505980512282, "grad_norm": 1.0421853182694367, "learning_rate": 3.7151479487979335e-06, "loss": 0.1287, "step": 20404 }, { "epoch": 0.5952797712818717, "grad_norm": 0.7951703747004137, "learning_rate": 3.7146913850607435e-06, "loss": 0.1174, "step": 20405 }, { "epoch": 0.5953089445125154, "grad_norm": 0.9022842613203131, "learning_rate": 3.714234832798481e-06, "loss": 0.1211, "step": 20406 }, { "epoch": 0.5953381177431589, "grad_norm": 0.8525782163889262, "learning_rate": 3.7137782920152237e-06, "loss": 0.125, "step": 20407 }, { "epoch": 0.5953672909738025, "grad_norm": 0.8668321566287852, "learning_rate": 3.7133217627150475e-06, "loss": 0.1264, "step": 20408 }, { "epoch": 0.595396464204446, "grad_norm": 0.8136735881886328, "learning_rate": 3.712865244902024e-06, "loss": 0.1459, "step": 20409 }, { "epoch": 0.5954256374350896, "grad_norm": 0.832175816541259, "learning_rate": 3.7124087385802353e-06, "loss": 0.1179, "step": 20410 }, { "epoch": 0.5954548106657331, "grad_norm": 0.8724492284315477, "learning_rate": 3.7119522437537537e-06, "loss": 0.1288, "step": 20411 }, { "epoch": 0.5954839838963767, "grad_norm": 1.0214453828923034, "learning_rate": 3.7114957604266546e-06, "loss": 0.133, "step": 20412 }, { "epoch": 0.5955131571270202, "grad_norm": 0.8232700476169499, "learning_rate": 3.7110392886030145e-06, "loss": 0.1303, "step": 20413 }, { "epoch": 0.5955423303576638, "grad_norm": 0.799274066246224, "learning_rate": 3.710582828286907e-06, "loss": 0.1184, "step": 20414 }, { "epoch": 0.5955715035883073, "grad_norm": 0.8350031645601355, "learning_rate": 3.7101263794824072e-06, "loss": 0.1084, "step": 20415 }, { "epoch": 0.5956006768189509, "grad_norm": 0.8646161730243356, "learning_rate": 3.7096699421935926e-06, "loss": 0.1508, "step": 20416 }, { "epoch": 0.5956298500495945, "grad_norm": 0.8859231506998803, "learning_rate": 3.709213516424537e-06, "loss": 0.1399, "step": 20417 }, { "epoch": 0.595659023280238, "grad_norm": 0.8511175372845571, "learning_rate": 3.708757102179315e-06, "loss": 0.1257, "step": 20418 }, { "epoch": 0.5956881965108816, "grad_norm": 0.9045200428975965, "learning_rate": 3.708300699462001e-06, "loss": 0.1426, "step": 20419 }, { "epoch": 0.5957173697415252, "grad_norm": 0.8359579178889355, "learning_rate": 3.7078443082766694e-06, "loss": 0.1264, "step": 20420 }, { "epoch": 0.5957465429721688, "grad_norm": 0.8293232457778126, "learning_rate": 3.707387928627395e-06, "loss": 0.1243, "step": 20421 }, { "epoch": 0.5957757162028123, "grad_norm": 0.8543332688610659, "learning_rate": 3.706931560518253e-06, "loss": 0.1369, "step": 20422 }, { "epoch": 0.5958048894334559, "grad_norm": 0.8685431899547782, "learning_rate": 3.706475203953319e-06, "loss": 0.1125, "step": 20423 }, { "epoch": 0.5958340626640994, "grad_norm": 0.8912930805152341, "learning_rate": 3.706018858936664e-06, "loss": 0.1213, "step": 20424 }, { "epoch": 0.595863235894743, "grad_norm": 0.9540886112928165, "learning_rate": 3.7055625254723645e-06, "loss": 0.1108, "step": 20425 }, { "epoch": 0.5958924091253865, "grad_norm": 0.6292913574709981, "learning_rate": 3.705106203564494e-06, "loss": 0.1246, "step": 20426 }, { "epoch": 0.5959215823560301, "grad_norm": 0.866460610219473, "learning_rate": 3.7046498932171247e-06, "loss": 0.1202, "step": 20427 }, { "epoch": 0.5959507555866737, "grad_norm": 0.9358369037365718, "learning_rate": 3.7041935944343325e-06, "loss": 0.1225, "step": 20428 }, { "epoch": 0.5959799288173172, "grad_norm": 0.8857018143086642, "learning_rate": 3.703737307220191e-06, "loss": 0.1124, "step": 20429 }, { "epoch": 0.5960091020479608, "grad_norm": 0.8307551087365714, "learning_rate": 3.7032810315787726e-06, "loss": 0.1405, "step": 20430 }, { "epoch": 0.5960382752786043, "grad_norm": 0.8077905351829049, "learning_rate": 3.7028247675141538e-06, "loss": 0.1342, "step": 20431 }, { "epoch": 0.5960674485092479, "grad_norm": 0.768252473178892, "learning_rate": 3.702368515030404e-06, "loss": 0.0898, "step": 20432 }, { "epoch": 0.5960966217398915, "grad_norm": 1.1228798295974916, "learning_rate": 3.701912274131597e-06, "loss": 0.1441, "step": 20433 }, { "epoch": 0.5961257949705351, "grad_norm": 0.653153642098277, "learning_rate": 3.7014560448218094e-06, "loss": 0.1148, "step": 20434 }, { "epoch": 0.5961549682011786, "grad_norm": 0.7372247471430156, "learning_rate": 3.7009998271051127e-06, "loss": 0.1184, "step": 20435 }, { "epoch": 0.5961841414318222, "grad_norm": 0.8465129941024829, "learning_rate": 3.700543620985578e-06, "loss": 0.114, "step": 20436 }, { "epoch": 0.5962133146624657, "grad_norm": 0.8628217830651215, "learning_rate": 3.7000874264672804e-06, "loss": 0.1313, "step": 20437 }, { "epoch": 0.5962424878931093, "grad_norm": 0.738228192358053, "learning_rate": 3.6996312435542925e-06, "loss": 0.127, "step": 20438 }, { "epoch": 0.5962716611237528, "grad_norm": 1.0597878518786803, "learning_rate": 3.6991750722506835e-06, "loss": 0.1202, "step": 20439 }, { "epoch": 0.5963008343543964, "grad_norm": 0.810686714995001, "learning_rate": 3.6987189125605315e-06, "loss": 0.1302, "step": 20440 }, { "epoch": 0.59633000758504, "grad_norm": 0.8079389368908894, "learning_rate": 3.6982627644879065e-06, "loss": 0.1522, "step": 20441 }, { "epoch": 0.5963591808156835, "grad_norm": 0.8620911802258356, "learning_rate": 3.6978066280368797e-06, "loss": 0.1086, "step": 20442 }, { "epoch": 0.5963883540463271, "grad_norm": 0.8940846739395645, "learning_rate": 3.6973505032115262e-06, "loss": 0.135, "step": 20443 }, { "epoch": 0.5964175272769706, "grad_norm": 0.7280652859703205, "learning_rate": 3.696894390015915e-06, "loss": 0.1066, "step": 20444 }, { "epoch": 0.5964467005076142, "grad_norm": 0.8332426661569426, "learning_rate": 3.6964382884541188e-06, "loss": 0.1127, "step": 20445 }, { "epoch": 0.5964758737382577, "grad_norm": 0.8961981703523535, "learning_rate": 3.695982198530211e-06, "loss": 0.132, "step": 20446 }, { "epoch": 0.5965050469689014, "grad_norm": 0.6293797067487202, "learning_rate": 3.695526120248264e-06, "loss": 0.118, "step": 20447 }, { "epoch": 0.5965342201995449, "grad_norm": 0.9985226237685431, "learning_rate": 3.6950700536123486e-06, "loss": 0.1149, "step": 20448 }, { "epoch": 0.5965633934301885, "grad_norm": 0.8470849790946957, "learning_rate": 3.694613998626535e-06, "loss": 0.1281, "step": 20449 }, { "epoch": 0.596592566660832, "grad_norm": 0.7890509438732369, "learning_rate": 3.694157955294896e-06, "loss": 0.1316, "step": 20450 }, { "epoch": 0.5966217398914756, "grad_norm": 0.9085179037130472, "learning_rate": 3.693701923621502e-06, "loss": 0.1322, "step": 20451 }, { "epoch": 0.5966509131221192, "grad_norm": 0.8370162621157942, "learning_rate": 3.6932459036104272e-06, "loss": 0.1048, "step": 20452 }, { "epoch": 0.5966800863527627, "grad_norm": 1.0677576295377924, "learning_rate": 3.6927898952657417e-06, "loss": 0.1245, "step": 20453 }, { "epoch": 0.5967092595834063, "grad_norm": 0.8045087334684989, "learning_rate": 3.6923338985915146e-06, "loss": 0.1382, "step": 20454 }, { "epoch": 0.5967384328140498, "grad_norm": 0.9289775206330065, "learning_rate": 3.691877913591818e-06, "loss": 0.1314, "step": 20455 }, { "epoch": 0.5967676060446934, "grad_norm": 0.7931801109386174, "learning_rate": 3.691421940270725e-06, "loss": 0.1128, "step": 20456 }, { "epoch": 0.5967967792753369, "grad_norm": 0.7868190286090775, "learning_rate": 3.6909659786323016e-06, "loss": 0.1203, "step": 20457 }, { "epoch": 0.5968259525059805, "grad_norm": 0.9195037410372153, "learning_rate": 3.6905100286806228e-06, "loss": 0.1307, "step": 20458 }, { "epoch": 0.596855125736624, "grad_norm": 0.9351941867488012, "learning_rate": 3.6900540904197583e-06, "loss": 0.1211, "step": 20459 }, { "epoch": 0.5968842989672677, "grad_norm": 0.7102082820615029, "learning_rate": 3.689598163853779e-06, "loss": 0.1374, "step": 20460 }, { "epoch": 0.5969134721979112, "grad_norm": 1.232348357686735, "learning_rate": 3.6891422489867535e-06, "loss": 0.1262, "step": 20461 }, { "epoch": 0.5969426454285548, "grad_norm": 1.6203371497122976, "learning_rate": 3.688686345822753e-06, "loss": 0.1398, "step": 20462 }, { "epoch": 0.5969718186591983, "grad_norm": 1.0860417899502368, "learning_rate": 3.6882304543658465e-06, "loss": 0.1697, "step": 20463 }, { "epoch": 0.5970009918898419, "grad_norm": 0.8871256628348626, "learning_rate": 3.6877745746201064e-06, "loss": 0.1386, "step": 20464 }, { "epoch": 0.5970301651204855, "grad_norm": 0.7836428040188675, "learning_rate": 3.6873187065896033e-06, "loss": 0.1205, "step": 20465 }, { "epoch": 0.597059338351129, "grad_norm": 0.9965053353182319, "learning_rate": 3.686862850278403e-06, "loss": 0.1118, "step": 20466 }, { "epoch": 0.5970885115817726, "grad_norm": 0.9808097879999046, "learning_rate": 3.6864070056905786e-06, "loss": 0.1331, "step": 20467 }, { "epoch": 0.5971176848124161, "grad_norm": 0.909625164618019, "learning_rate": 3.6859511728302006e-06, "loss": 0.1262, "step": 20468 }, { "epoch": 0.5971468580430597, "grad_norm": 0.7804522237800623, "learning_rate": 3.6854953517013326e-06, "loss": 0.1348, "step": 20469 }, { "epoch": 0.5971760312737032, "grad_norm": 0.9893818890694739, "learning_rate": 3.685039542308052e-06, "loss": 0.1477, "step": 20470 }, { "epoch": 0.5972052045043468, "grad_norm": 0.8875647411801023, "learning_rate": 3.684583744654423e-06, "loss": 0.1088, "step": 20471 }, { "epoch": 0.5972343777349903, "grad_norm": 0.7961423523856973, "learning_rate": 3.6841279587445165e-06, "loss": 0.1369, "step": 20472 }, { "epoch": 0.5972635509656339, "grad_norm": 1.0075475240222815, "learning_rate": 3.6836721845824032e-06, "loss": 0.1452, "step": 20473 }, { "epoch": 0.5972927241962775, "grad_norm": 0.9706868154006772, "learning_rate": 3.6832164221721465e-06, "loss": 0.1286, "step": 20474 }, { "epoch": 0.5973218974269211, "grad_norm": 0.6751922044837719, "learning_rate": 3.682760671517823e-06, "loss": 0.1046, "step": 20475 }, { "epoch": 0.5973510706575647, "grad_norm": 0.9000148407740457, "learning_rate": 3.6823049326234963e-06, "loss": 0.1278, "step": 20476 }, { "epoch": 0.5973802438882082, "grad_norm": 1.144890886810307, "learning_rate": 3.6818492054932363e-06, "loss": 0.1329, "step": 20477 }, { "epoch": 0.5974094171188518, "grad_norm": 1.039175940067144, "learning_rate": 3.6813934901311134e-06, "loss": 0.1077, "step": 20478 }, { "epoch": 0.5974385903494953, "grad_norm": 0.7314679239854645, "learning_rate": 3.6809377865411933e-06, "loss": 0.1235, "step": 20479 }, { "epoch": 0.5974677635801389, "grad_norm": 0.8729613152525328, "learning_rate": 3.6804820947275444e-06, "loss": 0.1319, "step": 20480 }, { "epoch": 0.5974969368107824, "grad_norm": 0.785173835529341, "learning_rate": 3.680026414694238e-06, "loss": 0.1206, "step": 20481 }, { "epoch": 0.597526110041426, "grad_norm": 1.184177483031644, "learning_rate": 3.679570746445341e-06, "loss": 0.122, "step": 20482 }, { "epoch": 0.5975552832720695, "grad_norm": 1.1185632815041635, "learning_rate": 3.6791150899849215e-06, "loss": 0.1345, "step": 20483 }, { "epoch": 0.5975844565027131, "grad_norm": 0.9808900238407227, "learning_rate": 3.6786594453170467e-06, "loss": 0.1472, "step": 20484 }, { "epoch": 0.5976136297333566, "grad_norm": 0.8160310435880171, "learning_rate": 3.678203812445784e-06, "loss": 0.1449, "step": 20485 }, { "epoch": 0.5976428029640002, "grad_norm": 0.8488171059555756, "learning_rate": 3.677748191375202e-06, "loss": 0.1224, "step": 20486 }, { "epoch": 0.5976719761946439, "grad_norm": 0.7615093566664713, "learning_rate": 3.67729258210937e-06, "loss": 0.1262, "step": 20487 }, { "epoch": 0.5977011494252874, "grad_norm": 1.0336629755885731, "learning_rate": 3.6768369846523534e-06, "loss": 0.1148, "step": 20488 }, { "epoch": 0.597730322655931, "grad_norm": 0.9502916717067837, "learning_rate": 3.6763813990082205e-06, "loss": 0.1562, "step": 20489 }, { "epoch": 0.5977594958865745, "grad_norm": 0.9356259514588079, "learning_rate": 3.675925825181039e-06, "loss": 0.1532, "step": 20490 }, { "epoch": 0.5977886691172181, "grad_norm": 1.0665564011160675, "learning_rate": 3.675470263174875e-06, "loss": 0.1394, "step": 20491 }, { "epoch": 0.5978178423478616, "grad_norm": 0.8358045200038936, "learning_rate": 3.6750147129937954e-06, "loss": 0.1385, "step": 20492 }, { "epoch": 0.5978470155785052, "grad_norm": 0.6542279822197059, "learning_rate": 3.6745591746418687e-06, "loss": 0.1317, "step": 20493 }, { "epoch": 0.5978761888091487, "grad_norm": 0.7857079650083013, "learning_rate": 3.6741036481231618e-06, "loss": 0.1396, "step": 20494 }, { "epoch": 0.5979053620397923, "grad_norm": 0.9921570766625235, "learning_rate": 3.673648133441742e-06, "loss": 0.1661, "step": 20495 }, { "epoch": 0.5979345352704358, "grad_norm": 0.7417332212474258, "learning_rate": 3.673192630601673e-06, "loss": 0.1136, "step": 20496 }, { "epoch": 0.5979637085010794, "grad_norm": 0.8751843957608968, "learning_rate": 3.672737139607024e-06, "loss": 0.1314, "step": 20497 }, { "epoch": 0.5979928817317229, "grad_norm": 0.9085993574172754, "learning_rate": 3.6722816604618603e-06, "loss": 0.1469, "step": 20498 }, { "epoch": 0.5980220549623665, "grad_norm": 1.0359512202969643, "learning_rate": 3.6718261931702504e-06, "loss": 0.1243, "step": 20499 }, { "epoch": 0.59805122819301, "grad_norm": 0.95895574487634, "learning_rate": 3.6713707377362594e-06, "loss": 0.1032, "step": 20500 }, { "epoch": 0.5980804014236537, "grad_norm": 1.2815863745803195, "learning_rate": 3.6709152941639526e-06, "loss": 0.1241, "step": 20501 }, { "epoch": 0.5981095746542973, "grad_norm": 0.8772051140004502, "learning_rate": 3.6704598624573967e-06, "loss": 0.1327, "step": 20502 }, { "epoch": 0.5981387478849408, "grad_norm": 0.8344933951998865, "learning_rate": 3.670004442620659e-06, "loss": 0.1216, "step": 20503 }, { "epoch": 0.5981679211155844, "grad_norm": 0.9100034234769037, "learning_rate": 3.6695490346578007e-06, "loss": 0.1233, "step": 20504 }, { "epoch": 0.5981970943462279, "grad_norm": 0.7762673031814327, "learning_rate": 3.6690936385728943e-06, "loss": 0.102, "step": 20505 }, { "epoch": 0.5982262675768715, "grad_norm": 0.7129530037666477, "learning_rate": 3.668638254370001e-06, "loss": 0.1178, "step": 20506 }, { "epoch": 0.598255440807515, "grad_norm": 1.052894058234126, "learning_rate": 3.668182882053188e-06, "loss": 0.1162, "step": 20507 }, { "epoch": 0.5982846140381586, "grad_norm": 0.7718272057497901, "learning_rate": 3.667727521626521e-06, "loss": 0.1546, "step": 20508 }, { "epoch": 0.5983137872688021, "grad_norm": 0.821713021135635, "learning_rate": 3.667272173094063e-06, "loss": 0.1322, "step": 20509 }, { "epoch": 0.5983429604994457, "grad_norm": 0.8207207203971407, "learning_rate": 3.666816836459881e-06, "loss": 0.1062, "step": 20510 }, { "epoch": 0.5983721337300892, "grad_norm": 0.8450759428460285, "learning_rate": 3.6663615117280405e-06, "loss": 0.1352, "step": 20511 }, { "epoch": 0.5984013069607328, "grad_norm": 0.7236166763696078, "learning_rate": 3.6659061989026057e-06, "loss": 0.1138, "step": 20512 }, { "epoch": 0.5984304801913763, "grad_norm": 0.708566754879243, "learning_rate": 3.6654508979876433e-06, "loss": 0.1234, "step": 20513 }, { "epoch": 0.5984596534220199, "grad_norm": 0.8334590853831855, "learning_rate": 3.6649956089872163e-06, "loss": 0.1172, "step": 20514 }, { "epoch": 0.5984888266526636, "grad_norm": 0.9573498502570469, "learning_rate": 3.6645403319053885e-06, "loss": 0.1459, "step": 20515 }, { "epoch": 0.5985179998833071, "grad_norm": 0.9034634695812083, "learning_rate": 3.664085066746226e-06, "loss": 0.1428, "step": 20516 }, { "epoch": 0.5985471731139507, "grad_norm": 1.005677962838466, "learning_rate": 3.6636298135137945e-06, "loss": 0.1189, "step": 20517 }, { "epoch": 0.5985763463445942, "grad_norm": 0.8234564040175449, "learning_rate": 3.663174572212156e-06, "loss": 0.1008, "step": 20518 }, { "epoch": 0.5986055195752378, "grad_norm": 0.8627707595548206, "learning_rate": 3.6627193428453755e-06, "loss": 0.1269, "step": 20519 }, { "epoch": 0.5986346928058813, "grad_norm": 0.9130325309868798, "learning_rate": 3.6622641254175193e-06, "loss": 0.1239, "step": 20520 }, { "epoch": 0.5986638660365249, "grad_norm": 1.0175108021003532, "learning_rate": 3.6618089199326477e-06, "loss": 0.1027, "step": 20521 }, { "epoch": 0.5986930392671684, "grad_norm": 1.0057138131731775, "learning_rate": 3.661353726394826e-06, "loss": 0.122, "step": 20522 }, { "epoch": 0.598722212497812, "grad_norm": 0.9933031439251812, "learning_rate": 3.6608985448081204e-06, "loss": 0.1498, "step": 20523 }, { "epoch": 0.5987513857284555, "grad_norm": 0.8647899552852462, "learning_rate": 3.660443375176592e-06, "loss": 0.112, "step": 20524 }, { "epoch": 0.5987805589590991, "grad_norm": 0.7840596306832815, "learning_rate": 3.6599882175043074e-06, "loss": 0.1216, "step": 20525 }, { "epoch": 0.5988097321897426, "grad_norm": 0.8525807806613944, "learning_rate": 3.659533071795326e-06, "loss": 0.1341, "step": 20526 }, { "epoch": 0.5988389054203862, "grad_norm": 0.8781698906984328, "learning_rate": 3.659077938053714e-06, "loss": 0.1418, "step": 20527 }, { "epoch": 0.5988680786510299, "grad_norm": 0.9022492130214876, "learning_rate": 3.6586228162835326e-06, "loss": 0.1437, "step": 20528 }, { "epoch": 0.5988972518816734, "grad_norm": 0.6684622309029933, "learning_rate": 3.6581677064888476e-06, "loss": 0.1105, "step": 20529 }, { "epoch": 0.598926425112317, "grad_norm": 0.8036002649106786, "learning_rate": 3.6577126086737225e-06, "loss": 0.1027, "step": 20530 }, { "epoch": 0.5989555983429605, "grad_norm": 1.061772593087311, "learning_rate": 3.657257522842217e-06, "loss": 0.1421, "step": 20531 }, { "epoch": 0.5989847715736041, "grad_norm": 0.9983704893430387, "learning_rate": 3.6568024489983967e-06, "loss": 0.1146, "step": 20532 }, { "epoch": 0.5990139448042476, "grad_norm": 0.9157293895801865, "learning_rate": 3.6563473871463238e-06, "loss": 0.113, "step": 20533 }, { "epoch": 0.5990431180348912, "grad_norm": 0.8104014835120488, "learning_rate": 3.655892337290058e-06, "loss": 0.1291, "step": 20534 }, { "epoch": 0.5990722912655347, "grad_norm": 0.7866470717924987, "learning_rate": 3.6554372994336674e-06, "loss": 0.117, "step": 20535 }, { "epoch": 0.5991014644961783, "grad_norm": 0.7352142384835508, "learning_rate": 3.65498227358121e-06, "loss": 0.0957, "step": 20536 }, { "epoch": 0.5991306377268218, "grad_norm": 0.8191234449972371, "learning_rate": 3.6545272597367507e-06, "loss": 0.1209, "step": 20537 }, { "epoch": 0.5991598109574654, "grad_norm": 0.9268517631101401, "learning_rate": 3.654072257904352e-06, "loss": 0.146, "step": 20538 }, { "epoch": 0.599188984188109, "grad_norm": 0.8001956900343967, "learning_rate": 3.6536172680880732e-06, "loss": 0.1335, "step": 20539 }, { "epoch": 0.5992181574187525, "grad_norm": 0.7775463995139918, "learning_rate": 3.653162290291977e-06, "loss": 0.1082, "step": 20540 }, { "epoch": 0.5992473306493961, "grad_norm": 0.7418387145631792, "learning_rate": 3.652707324520127e-06, "loss": 0.0965, "step": 20541 }, { "epoch": 0.5992765038800397, "grad_norm": 0.8001930971560922, "learning_rate": 3.6522523707765856e-06, "loss": 0.1274, "step": 20542 }, { "epoch": 0.5993056771106833, "grad_norm": 0.7296196533427095, "learning_rate": 3.6517974290654136e-06, "loss": 0.122, "step": 20543 }, { "epoch": 0.5993348503413268, "grad_norm": 0.8544467595532104, "learning_rate": 3.6513424993906717e-06, "loss": 0.1318, "step": 20544 }, { "epoch": 0.5993640235719704, "grad_norm": 0.9430329215346632, "learning_rate": 3.6508875817564214e-06, "loss": 0.146, "step": 20545 }, { "epoch": 0.5993931968026139, "grad_norm": 0.6930544020153473, "learning_rate": 3.6504326761667242e-06, "loss": 0.1126, "step": 20546 }, { "epoch": 0.5994223700332575, "grad_norm": 0.881379971953356, "learning_rate": 3.6499777826256434e-06, "loss": 0.1334, "step": 20547 }, { "epoch": 0.599451543263901, "grad_norm": 1.0946395253591987, "learning_rate": 3.649522901137238e-06, "loss": 0.1444, "step": 20548 }, { "epoch": 0.5994807164945446, "grad_norm": 0.9284954408650453, "learning_rate": 3.64906803170557e-06, "loss": 0.1341, "step": 20549 }, { "epoch": 0.5995098897251881, "grad_norm": 0.6966842165615734, "learning_rate": 3.6486131743347007e-06, "loss": 0.114, "step": 20550 }, { "epoch": 0.5995390629558317, "grad_norm": 0.8204866663581304, "learning_rate": 3.6481583290286894e-06, "loss": 0.1153, "step": 20551 }, { "epoch": 0.5995682361864753, "grad_norm": 1.21413720074367, "learning_rate": 3.647703495791597e-06, "loss": 0.1387, "step": 20552 }, { "epoch": 0.5995974094171188, "grad_norm": 0.8178389200924865, "learning_rate": 3.647248674627486e-06, "loss": 0.1109, "step": 20553 }, { "epoch": 0.5996265826477624, "grad_norm": 0.7348694448942786, "learning_rate": 3.6467938655404155e-06, "loss": 0.1168, "step": 20554 }, { "epoch": 0.599655755878406, "grad_norm": 0.7679002210842868, "learning_rate": 3.646339068534448e-06, "loss": 0.1152, "step": 20555 }, { "epoch": 0.5996849291090496, "grad_norm": 1.3187101764265343, "learning_rate": 3.645884283613641e-06, "loss": 0.122, "step": 20556 }, { "epoch": 0.5997141023396931, "grad_norm": 0.9132659570266782, "learning_rate": 3.6454295107820557e-06, "loss": 0.1006, "step": 20557 }, { "epoch": 0.5997432755703367, "grad_norm": 0.7024967893104602, "learning_rate": 3.6449747500437517e-06, "loss": 0.1206, "step": 20558 }, { "epoch": 0.5997724488009802, "grad_norm": 0.7835023542497151, "learning_rate": 3.64452000140279e-06, "loss": 0.1084, "step": 20559 }, { "epoch": 0.5998016220316238, "grad_norm": 0.891525483631285, "learning_rate": 3.6440652648632314e-06, "loss": 0.1001, "step": 20560 }, { "epoch": 0.5998307952622673, "grad_norm": 1.001478933931435, "learning_rate": 3.6436105404291334e-06, "loss": 0.1267, "step": 20561 }, { "epoch": 0.5998599684929109, "grad_norm": 0.8148258392322149, "learning_rate": 3.643155828104557e-06, "loss": 0.1179, "step": 20562 }, { "epoch": 0.5998891417235545, "grad_norm": 0.9157434882122585, "learning_rate": 3.642701127893562e-06, "loss": 0.1093, "step": 20563 }, { "epoch": 0.599918314954198, "grad_norm": 0.8773445146079626, "learning_rate": 3.6422464398002044e-06, "loss": 0.1163, "step": 20564 }, { "epoch": 0.5999474881848416, "grad_norm": 0.8994101692166602, "learning_rate": 3.6417917638285497e-06, "loss": 0.1216, "step": 20565 }, { "epoch": 0.5999766614154851, "grad_norm": 0.816099564080936, "learning_rate": 3.641337099982653e-06, "loss": 0.1395, "step": 20566 }, { "epoch": 0.6000058346461287, "grad_norm": 0.8183404442908826, "learning_rate": 3.6408824482665744e-06, "loss": 0.1194, "step": 20567 }, { "epoch": 0.6000350078767722, "grad_norm": 1.1391397282549864, "learning_rate": 3.640427808684374e-06, "loss": 0.1339, "step": 20568 }, { "epoch": 0.6000641811074159, "grad_norm": 0.829170338175808, "learning_rate": 3.639973181240108e-06, "loss": 0.1085, "step": 20569 }, { "epoch": 0.6000933543380594, "grad_norm": 0.7001107993042314, "learning_rate": 3.6395185659378357e-06, "loss": 0.1108, "step": 20570 }, { "epoch": 0.600122527568703, "grad_norm": 0.9894333483873435, "learning_rate": 3.6390639627816182e-06, "loss": 0.1252, "step": 20571 }, { "epoch": 0.6001517007993465, "grad_norm": 0.6801080708384538, "learning_rate": 3.638609371775512e-06, "loss": 0.1192, "step": 20572 }, { "epoch": 0.6001808740299901, "grad_norm": 0.8665406353231355, "learning_rate": 3.638154792923578e-06, "loss": 0.1328, "step": 20573 }, { "epoch": 0.6002100472606337, "grad_norm": 0.7497171878377388, "learning_rate": 3.6377002262298726e-06, "loss": 0.1121, "step": 20574 }, { "epoch": 0.6002392204912772, "grad_norm": 0.8700510828837275, "learning_rate": 3.637245671698454e-06, "loss": 0.1441, "step": 20575 }, { "epoch": 0.6002683937219208, "grad_norm": 0.8163394810681037, "learning_rate": 3.636791129333379e-06, "loss": 0.135, "step": 20576 }, { "epoch": 0.6002975669525643, "grad_norm": 0.8931486550626689, "learning_rate": 3.6363365991387102e-06, "loss": 0.1371, "step": 20577 }, { "epoch": 0.6003267401832079, "grad_norm": 0.8836632685717212, "learning_rate": 3.6358820811185015e-06, "loss": 0.1207, "step": 20578 }, { "epoch": 0.6003559134138514, "grad_norm": 0.6778682316577336, "learning_rate": 3.6354275752768114e-06, "loss": 0.1073, "step": 20579 }, { "epoch": 0.600385086644495, "grad_norm": 0.7449842815807373, "learning_rate": 3.6349730816176996e-06, "loss": 0.1326, "step": 20580 }, { "epoch": 0.6004142598751385, "grad_norm": 0.9601133893833005, "learning_rate": 3.6345186001452215e-06, "loss": 0.1359, "step": 20581 }, { "epoch": 0.6004434331057822, "grad_norm": 0.7274108088868823, "learning_rate": 3.634064130863434e-06, "loss": 0.1064, "step": 20582 }, { "epoch": 0.6004726063364257, "grad_norm": 0.7576625059967385, "learning_rate": 3.6336096737763964e-06, "loss": 0.1441, "step": 20583 }, { "epoch": 0.6005017795670693, "grad_norm": 0.9506553708409862, "learning_rate": 3.633155228888166e-06, "loss": 0.1605, "step": 20584 }, { "epoch": 0.6005309527977128, "grad_norm": 1.045133038241358, "learning_rate": 3.6327007962028003e-06, "loss": 0.1532, "step": 20585 }, { "epoch": 0.6005601260283564, "grad_norm": 0.8065056763539316, "learning_rate": 3.6322463757243554e-06, "loss": 0.1353, "step": 20586 }, { "epoch": 0.600589299259, "grad_norm": 0.7090430509265934, "learning_rate": 3.631791967456887e-06, "loss": 0.1425, "step": 20587 }, { "epoch": 0.6006184724896435, "grad_norm": 0.9132580952968826, "learning_rate": 3.631337571404453e-06, "loss": 0.1194, "step": 20588 }, { "epoch": 0.6006476457202871, "grad_norm": 0.9152231027887684, "learning_rate": 3.6308831875711115e-06, "loss": 0.1243, "step": 20589 }, { "epoch": 0.6006768189509306, "grad_norm": 1.219974233729985, "learning_rate": 3.6304288159609187e-06, "loss": 0.1263, "step": 20590 }, { "epoch": 0.6007059921815742, "grad_norm": 0.7423868034816976, "learning_rate": 3.6299744565779294e-06, "loss": 0.1174, "step": 20591 }, { "epoch": 0.6007351654122177, "grad_norm": 0.9292507963557238, "learning_rate": 3.6295201094262013e-06, "loss": 0.1253, "step": 20592 }, { "epoch": 0.6007643386428613, "grad_norm": 1.0896664292755547, "learning_rate": 3.6290657745097917e-06, "loss": 0.1371, "step": 20593 }, { "epoch": 0.6007935118735048, "grad_norm": 0.808847616529553, "learning_rate": 3.628611451832752e-06, "loss": 0.1339, "step": 20594 }, { "epoch": 0.6008226851041484, "grad_norm": 0.7392848472867324, "learning_rate": 3.6281571413991458e-06, "loss": 0.1236, "step": 20595 }, { "epoch": 0.600851858334792, "grad_norm": 0.9715254412626044, "learning_rate": 3.6277028432130235e-06, "loss": 0.111, "step": 20596 }, { "epoch": 0.6008810315654356, "grad_norm": 0.9161313724467532, "learning_rate": 3.6272485572784426e-06, "loss": 0.1095, "step": 20597 }, { "epoch": 0.6009102047960792, "grad_norm": 1.1380091667837064, "learning_rate": 3.6267942835994607e-06, "loss": 0.1086, "step": 20598 }, { "epoch": 0.6009393780267227, "grad_norm": 0.7336744295507781, "learning_rate": 3.6263400221801292e-06, "loss": 0.1217, "step": 20599 }, { "epoch": 0.6009685512573663, "grad_norm": 0.7615605502921807, "learning_rate": 3.625885773024506e-06, "loss": 0.1268, "step": 20600 }, { "epoch": 0.6009977244880098, "grad_norm": 0.7294107060867674, "learning_rate": 3.6254315361366477e-06, "loss": 0.1173, "step": 20601 }, { "epoch": 0.6010268977186534, "grad_norm": 0.6880247447368646, "learning_rate": 3.6249773115206085e-06, "loss": 0.1162, "step": 20602 }, { "epoch": 0.6010560709492969, "grad_norm": 0.8659064709018958, "learning_rate": 3.624523099180444e-06, "loss": 0.11, "step": 20603 }, { "epoch": 0.6010852441799405, "grad_norm": 0.9030016504160433, "learning_rate": 3.6240688991202085e-06, "loss": 0.1214, "step": 20604 }, { "epoch": 0.601114417410584, "grad_norm": 0.6965923949101781, "learning_rate": 3.623614711343957e-06, "loss": 0.1261, "step": 20605 }, { "epoch": 0.6011435906412276, "grad_norm": 0.8626239518223667, "learning_rate": 3.6231605358557442e-06, "loss": 0.1231, "step": 20606 }, { "epoch": 0.6011727638718711, "grad_norm": 1.1416879445210433, "learning_rate": 3.622706372659627e-06, "loss": 0.1196, "step": 20607 }, { "epoch": 0.6012019371025147, "grad_norm": 0.7884215630810347, "learning_rate": 3.622252221759658e-06, "loss": 0.1346, "step": 20608 }, { "epoch": 0.6012311103331583, "grad_norm": 0.794585350162656, "learning_rate": 3.621798083159892e-06, "loss": 0.1181, "step": 20609 }, { "epoch": 0.6012602835638019, "grad_norm": 0.9790855397077731, "learning_rate": 3.621343956864385e-06, "loss": 0.1111, "step": 20610 }, { "epoch": 0.6012894567944455, "grad_norm": 0.7805984597896618, "learning_rate": 3.6208898428771887e-06, "loss": 0.1342, "step": 20611 }, { "epoch": 0.601318630025089, "grad_norm": 1.0142421179477181, "learning_rate": 3.620435741202357e-06, "loss": 0.1415, "step": 20612 }, { "epoch": 0.6013478032557326, "grad_norm": 1.05410386310876, "learning_rate": 3.6199816518439477e-06, "loss": 0.1566, "step": 20613 }, { "epoch": 0.6013769764863761, "grad_norm": 1.1189031920338837, "learning_rate": 3.6195275748060125e-06, "loss": 0.1252, "step": 20614 }, { "epoch": 0.6014061497170197, "grad_norm": 1.0135170322610298, "learning_rate": 3.6190735100926066e-06, "loss": 0.1434, "step": 20615 }, { "epoch": 0.6014353229476632, "grad_norm": 1.0868049012424301, "learning_rate": 3.6186194577077817e-06, "loss": 0.1402, "step": 20616 }, { "epoch": 0.6014644961783068, "grad_norm": 0.8598214788839132, "learning_rate": 3.6181654176555927e-06, "loss": 0.1119, "step": 20617 }, { "epoch": 0.6014936694089503, "grad_norm": 0.7153551213611583, "learning_rate": 3.6177113899400916e-06, "loss": 0.1075, "step": 20618 }, { "epoch": 0.6015228426395939, "grad_norm": 0.7031827095581438, "learning_rate": 3.617257374565335e-06, "loss": 0.1181, "step": 20619 }, { "epoch": 0.6015520158702374, "grad_norm": 1.2676887347710046, "learning_rate": 3.6168033715353747e-06, "loss": 0.1282, "step": 20620 }, { "epoch": 0.601581189100881, "grad_norm": 0.9495079511916384, "learning_rate": 3.6163493808542628e-06, "loss": 0.1189, "step": 20621 }, { "epoch": 0.6016103623315245, "grad_norm": 0.7168617426368551, "learning_rate": 3.6158954025260532e-06, "loss": 0.1209, "step": 20622 }, { "epoch": 0.6016395355621682, "grad_norm": 0.6744299998705775, "learning_rate": 3.6154414365548008e-06, "loss": 0.1355, "step": 20623 }, { "epoch": 0.6016687087928118, "grad_norm": 1.3458883116510916, "learning_rate": 3.614987482944553e-06, "loss": 0.1359, "step": 20624 }, { "epoch": 0.6016978820234553, "grad_norm": 1.1403406960405245, "learning_rate": 3.61453354169937e-06, "loss": 0.13, "step": 20625 }, { "epoch": 0.6017270552540989, "grad_norm": 0.8954812012347784, "learning_rate": 3.614079612823299e-06, "loss": 0.1182, "step": 20626 }, { "epoch": 0.6017562284847424, "grad_norm": 0.755592231970031, "learning_rate": 3.613625696320394e-06, "loss": 0.14, "step": 20627 }, { "epoch": 0.601785401715386, "grad_norm": 0.6368978214706259, "learning_rate": 3.61317179219471e-06, "loss": 0.1177, "step": 20628 }, { "epoch": 0.6018145749460295, "grad_norm": 0.8581482083463947, "learning_rate": 3.6127179004502953e-06, "loss": 0.161, "step": 20629 }, { "epoch": 0.6018437481766731, "grad_norm": 1.0527574505587558, "learning_rate": 3.6122640210912042e-06, "loss": 0.141, "step": 20630 }, { "epoch": 0.6018729214073166, "grad_norm": 0.9176333557272351, "learning_rate": 3.6118101541214887e-06, "loss": 0.1406, "step": 20631 }, { "epoch": 0.6019020946379602, "grad_norm": 0.8515930027778424, "learning_rate": 3.611356299545201e-06, "loss": 0.109, "step": 20632 }, { "epoch": 0.6019312678686037, "grad_norm": 0.7663183498739945, "learning_rate": 3.6109024573663938e-06, "loss": 0.1287, "step": 20633 }, { "epoch": 0.6019604410992473, "grad_norm": 0.7971329176308535, "learning_rate": 3.6104486275891166e-06, "loss": 0.1398, "step": 20634 }, { "epoch": 0.6019896143298908, "grad_norm": 0.847177107999471, "learning_rate": 3.609994810217422e-06, "loss": 0.127, "step": 20635 }, { "epoch": 0.6020187875605345, "grad_norm": 0.8145235229946574, "learning_rate": 3.6095410052553613e-06, "loss": 0.1334, "step": 20636 }, { "epoch": 0.6020479607911781, "grad_norm": 0.9353061638939351, "learning_rate": 3.609087212706989e-06, "loss": 0.142, "step": 20637 }, { "epoch": 0.6020771340218216, "grad_norm": 0.8241346742955289, "learning_rate": 3.6086334325763528e-06, "loss": 0.1181, "step": 20638 }, { "epoch": 0.6021063072524652, "grad_norm": 1.517520638230361, "learning_rate": 3.608179664867505e-06, "loss": 0.1112, "step": 20639 }, { "epoch": 0.6021354804831087, "grad_norm": 0.8251323222826907, "learning_rate": 3.607725909584498e-06, "loss": 0.1191, "step": 20640 }, { "epoch": 0.6021646537137523, "grad_norm": 0.8794529914077927, "learning_rate": 3.6072721667313806e-06, "loss": 0.1348, "step": 20641 }, { "epoch": 0.6021938269443958, "grad_norm": 0.8467028458750899, "learning_rate": 3.606818436312204e-06, "loss": 0.1301, "step": 20642 }, { "epoch": 0.6022230001750394, "grad_norm": 0.8023789917132966, "learning_rate": 3.606364718331021e-06, "loss": 0.1255, "step": 20643 }, { "epoch": 0.6022521734056829, "grad_norm": 0.9670951006226213, "learning_rate": 3.6059110127918807e-06, "loss": 0.1549, "step": 20644 }, { "epoch": 0.6022813466363265, "grad_norm": 0.8424118195840142, "learning_rate": 3.605457319698835e-06, "loss": 0.1594, "step": 20645 }, { "epoch": 0.60231051986697, "grad_norm": 0.830922376239889, "learning_rate": 3.605003639055933e-06, "loss": 0.1099, "step": 20646 }, { "epoch": 0.6023396930976136, "grad_norm": 0.7870205949639671, "learning_rate": 3.604549970867225e-06, "loss": 0.1034, "step": 20647 }, { "epoch": 0.6023688663282571, "grad_norm": 0.9793539706991589, "learning_rate": 3.604096315136761e-06, "loss": 0.1329, "step": 20648 }, { "epoch": 0.6023980395589007, "grad_norm": 0.7572672185471363, "learning_rate": 3.6036426718685925e-06, "loss": 0.1444, "step": 20649 }, { "epoch": 0.6024272127895444, "grad_norm": 0.7233964453635535, "learning_rate": 3.6031890410667704e-06, "loss": 0.116, "step": 20650 }, { "epoch": 0.6024563860201879, "grad_norm": 1.1149264391767628, "learning_rate": 3.6027354227353417e-06, "loss": 0.1433, "step": 20651 }, { "epoch": 0.6024855592508315, "grad_norm": 1.0212343659979026, "learning_rate": 3.602281816878358e-06, "loss": 0.1209, "step": 20652 }, { "epoch": 0.602514732481475, "grad_norm": 1.2270577799485092, "learning_rate": 3.6018282234998693e-06, "loss": 0.1236, "step": 20653 }, { "epoch": 0.6025439057121186, "grad_norm": 0.8697683972920066, "learning_rate": 3.601374642603921e-06, "loss": 0.1274, "step": 20654 }, { "epoch": 0.6025730789427621, "grad_norm": 0.932449916295231, "learning_rate": 3.60092107419457e-06, "loss": 0.1233, "step": 20655 }, { "epoch": 0.6026022521734057, "grad_norm": 1.1722804030568308, "learning_rate": 3.6004675182758598e-06, "loss": 0.1262, "step": 20656 }, { "epoch": 0.6026314254040492, "grad_norm": 1.1566340777759807, "learning_rate": 3.600013974851842e-06, "loss": 0.109, "step": 20657 }, { "epoch": 0.6026605986346928, "grad_norm": 0.8125755871923773, "learning_rate": 3.5995604439265664e-06, "loss": 0.1371, "step": 20658 }, { "epoch": 0.6026897718653363, "grad_norm": 1.351873782568918, "learning_rate": 3.599106925504079e-06, "loss": 0.1079, "step": 20659 }, { "epoch": 0.6027189450959799, "grad_norm": 1.0619854616517461, "learning_rate": 3.5986534195884305e-06, "loss": 0.1273, "step": 20660 }, { "epoch": 0.6027481183266234, "grad_norm": 1.3283689024383327, "learning_rate": 3.59819992618367e-06, "loss": 0.1256, "step": 20661 }, { "epoch": 0.602777291557267, "grad_norm": 0.8898156437098708, "learning_rate": 3.597746445293846e-06, "loss": 0.1245, "step": 20662 }, { "epoch": 0.6028064647879107, "grad_norm": 1.1695808649292871, "learning_rate": 3.597292976923008e-06, "loss": 0.1288, "step": 20663 }, { "epoch": 0.6028356380185542, "grad_norm": 0.8282942467785808, "learning_rate": 3.5968395210752027e-06, "loss": 0.1078, "step": 20664 }, { "epoch": 0.6028648112491978, "grad_norm": 0.9183036143669286, "learning_rate": 3.5963860777544796e-06, "loss": 0.1576, "step": 20665 }, { "epoch": 0.6028939844798413, "grad_norm": 1.0451332960441726, "learning_rate": 3.5959326469648847e-06, "loss": 0.1096, "step": 20666 }, { "epoch": 0.6029231577104849, "grad_norm": 0.7834724585353079, "learning_rate": 3.5954792287104707e-06, "loss": 0.1352, "step": 20667 }, { "epoch": 0.6029523309411284, "grad_norm": 0.7969454085436715, "learning_rate": 3.5950258229952817e-06, "loss": 0.121, "step": 20668 }, { "epoch": 0.602981504171772, "grad_norm": 1.2529372948763293, "learning_rate": 3.5945724298233665e-06, "loss": 0.1425, "step": 20669 }, { "epoch": 0.6030106774024155, "grad_norm": 1.192656878933679, "learning_rate": 3.5941190491987745e-06, "loss": 0.1222, "step": 20670 }, { "epoch": 0.6030398506330591, "grad_norm": 0.6172278253839465, "learning_rate": 3.5936656811255484e-06, "loss": 0.0992, "step": 20671 }, { "epoch": 0.6030690238637026, "grad_norm": 0.9249651754572804, "learning_rate": 3.593212325607742e-06, "loss": 0.1312, "step": 20672 }, { "epoch": 0.6030981970943462, "grad_norm": 1.2429406023273408, "learning_rate": 3.5927589826494005e-06, "loss": 0.1326, "step": 20673 }, { "epoch": 0.6031273703249898, "grad_norm": 1.107180675293284, "learning_rate": 3.5923056522545703e-06, "loss": 0.1209, "step": 20674 }, { "epoch": 0.6031565435556333, "grad_norm": 0.9657972443333933, "learning_rate": 3.5918523344272997e-06, "loss": 0.1687, "step": 20675 }, { "epoch": 0.6031857167862769, "grad_norm": 1.3548453125253543, "learning_rate": 3.591399029171635e-06, "loss": 0.1263, "step": 20676 }, { "epoch": 0.6032148900169205, "grad_norm": 1.0409323225286617, "learning_rate": 3.5909457364916223e-06, "loss": 0.1313, "step": 20677 }, { "epoch": 0.6032440632475641, "grad_norm": 0.8592680051229966, "learning_rate": 3.59049245639131e-06, "loss": 0.1442, "step": 20678 }, { "epoch": 0.6032732364782076, "grad_norm": 1.1466818044190623, "learning_rate": 3.5900391888747455e-06, "loss": 0.1239, "step": 20679 }, { "epoch": 0.6033024097088512, "grad_norm": 1.1531244828518774, "learning_rate": 3.5895859339459753e-06, "loss": 0.1162, "step": 20680 }, { "epoch": 0.6033315829394947, "grad_norm": 0.9372895602273493, "learning_rate": 3.589132691609044e-06, "loss": 0.1061, "step": 20681 }, { "epoch": 0.6033607561701383, "grad_norm": 0.8773942916931872, "learning_rate": 3.588679461868e-06, "loss": 0.1228, "step": 20682 }, { "epoch": 0.6033899294007818, "grad_norm": 0.8371494965905516, "learning_rate": 3.5882262447268865e-06, "loss": 0.1364, "step": 20683 }, { "epoch": 0.6034191026314254, "grad_norm": 1.1130096353059873, "learning_rate": 3.587773040189754e-06, "loss": 0.1158, "step": 20684 }, { "epoch": 0.603448275862069, "grad_norm": 0.8665302231987151, "learning_rate": 3.5873198482606477e-06, "loss": 0.1057, "step": 20685 }, { "epoch": 0.6034774490927125, "grad_norm": 0.7844004690780885, "learning_rate": 3.586866668943611e-06, "loss": 0.1166, "step": 20686 }, { "epoch": 0.6035066223233561, "grad_norm": 1.2263462758872938, "learning_rate": 3.5864135022426916e-06, "loss": 0.1347, "step": 20687 }, { "epoch": 0.6035357955539996, "grad_norm": 1.0266241126485882, "learning_rate": 3.585960348161936e-06, "loss": 0.1182, "step": 20688 }, { "epoch": 0.6035649687846432, "grad_norm": 0.8003543521389126, "learning_rate": 3.585507206705386e-06, "loss": 0.1274, "step": 20689 }, { "epoch": 0.6035941420152868, "grad_norm": 0.948863701856177, "learning_rate": 3.5850540778770924e-06, "loss": 0.1154, "step": 20690 }, { "epoch": 0.6036233152459304, "grad_norm": 0.7486068958601275, "learning_rate": 3.5846009616810983e-06, "loss": 0.131, "step": 20691 }, { "epoch": 0.6036524884765739, "grad_norm": 0.7833005345713987, "learning_rate": 3.5841478581214483e-06, "loss": 0.1153, "step": 20692 }, { "epoch": 0.6036816617072175, "grad_norm": 0.6709724665651245, "learning_rate": 3.583694767202189e-06, "loss": 0.097, "step": 20693 }, { "epoch": 0.603710834937861, "grad_norm": 0.8574087692321917, "learning_rate": 3.583241688927364e-06, "loss": 0.1284, "step": 20694 }, { "epoch": 0.6037400081685046, "grad_norm": 1.0161858743358603, "learning_rate": 3.582788623301018e-06, "loss": 0.132, "step": 20695 }, { "epoch": 0.6037691813991481, "grad_norm": 1.0026121346936068, "learning_rate": 3.582335570327198e-06, "loss": 0.1242, "step": 20696 }, { "epoch": 0.6037983546297917, "grad_norm": 0.7721155276758169, "learning_rate": 3.581882530009948e-06, "loss": 0.14, "step": 20697 }, { "epoch": 0.6038275278604353, "grad_norm": 0.7625042643757383, "learning_rate": 3.581429502353312e-06, "loss": 0.1484, "step": 20698 }, { "epoch": 0.6038567010910788, "grad_norm": 0.8184795323443229, "learning_rate": 3.580976487361334e-06, "loss": 0.1267, "step": 20699 }, { "epoch": 0.6038858743217224, "grad_norm": 0.7152635396360302, "learning_rate": 3.580523485038061e-06, "loss": 0.1052, "step": 20700 }, { "epoch": 0.6039150475523659, "grad_norm": 0.7071465680886245, "learning_rate": 3.580070495387532e-06, "loss": 0.1208, "step": 20701 }, { "epoch": 0.6039442207830095, "grad_norm": 1.0458616600763395, "learning_rate": 3.579617518413798e-06, "loss": 0.1493, "step": 20702 }, { "epoch": 0.603973394013653, "grad_norm": 0.7926599590148983, "learning_rate": 3.579164554120898e-06, "loss": 0.1176, "step": 20703 }, { "epoch": 0.6040025672442967, "grad_norm": 0.7180877823910206, "learning_rate": 3.578711602512878e-06, "loss": 0.1346, "step": 20704 }, { "epoch": 0.6040317404749402, "grad_norm": 0.731005048302328, "learning_rate": 3.5782586635937834e-06, "loss": 0.142, "step": 20705 }, { "epoch": 0.6040609137055838, "grad_norm": 0.9452837656329977, "learning_rate": 3.577805737367654e-06, "loss": 0.1396, "step": 20706 }, { "epoch": 0.6040900869362273, "grad_norm": 0.6820508516041044, "learning_rate": 3.5773528238385346e-06, "loss": 0.102, "step": 20707 }, { "epoch": 0.6041192601668709, "grad_norm": 0.80347364286241, "learning_rate": 3.5768999230104704e-06, "loss": 0.1087, "step": 20708 }, { "epoch": 0.6041484333975145, "grad_norm": 0.9430438813368315, "learning_rate": 3.5764470348875045e-06, "loss": 0.1391, "step": 20709 }, { "epoch": 0.604177606628158, "grad_norm": 0.926409377536277, "learning_rate": 3.57599415947368e-06, "loss": 0.1539, "step": 20710 }, { "epoch": 0.6042067798588016, "grad_norm": 0.7496252817836285, "learning_rate": 3.5755412967730397e-06, "loss": 0.1193, "step": 20711 }, { "epoch": 0.6042359530894451, "grad_norm": 1.0412874650331714, "learning_rate": 3.5750884467896262e-06, "loss": 0.1699, "step": 20712 }, { "epoch": 0.6042651263200887, "grad_norm": 1.0497410722901828, "learning_rate": 3.5746356095274817e-06, "loss": 0.1343, "step": 20713 }, { "epoch": 0.6042942995507322, "grad_norm": 0.9605813323763668, "learning_rate": 3.5741827849906514e-06, "loss": 0.11, "step": 20714 }, { "epoch": 0.6043234727813758, "grad_norm": 1.075380391109725, "learning_rate": 3.5737299731831776e-06, "loss": 0.1755, "step": 20715 }, { "epoch": 0.6043526460120193, "grad_norm": 0.815662823665873, "learning_rate": 3.5732771741091014e-06, "loss": 0.1267, "step": 20716 }, { "epoch": 0.604381819242663, "grad_norm": 1.0608945222955084, "learning_rate": 3.572824387772466e-06, "loss": 0.1316, "step": 20717 }, { "epoch": 0.6044109924733065, "grad_norm": 1.3495815301820906, "learning_rate": 3.5723716141773145e-06, "loss": 0.1109, "step": 20718 }, { "epoch": 0.6044401657039501, "grad_norm": 0.7443500837028708, "learning_rate": 3.5719188533276854e-06, "loss": 0.1258, "step": 20719 }, { "epoch": 0.6044693389345936, "grad_norm": 1.032691015903389, "learning_rate": 3.571466105227627e-06, "loss": 0.1204, "step": 20720 }, { "epoch": 0.6044985121652372, "grad_norm": 0.6986654255922213, "learning_rate": 3.5710133698811776e-06, "loss": 0.1076, "step": 20721 }, { "epoch": 0.6045276853958808, "grad_norm": 0.8930889899932719, "learning_rate": 3.570560647292379e-06, "loss": 0.1329, "step": 20722 }, { "epoch": 0.6045568586265243, "grad_norm": 0.8177811896482285, "learning_rate": 3.570107937465276e-06, "loss": 0.1199, "step": 20723 }, { "epoch": 0.6045860318571679, "grad_norm": 0.9547362573044593, "learning_rate": 3.5696552404039053e-06, "loss": 0.1166, "step": 20724 }, { "epoch": 0.6046152050878114, "grad_norm": 0.6963679729761292, "learning_rate": 3.569202556112311e-06, "loss": 0.1024, "step": 20725 }, { "epoch": 0.604644378318455, "grad_norm": 1.3464318442173022, "learning_rate": 3.5687498845945357e-06, "loss": 0.178, "step": 20726 }, { "epoch": 0.6046735515490985, "grad_norm": 0.740158816183777, "learning_rate": 3.5682972258546213e-06, "loss": 0.1164, "step": 20727 }, { "epoch": 0.6047027247797421, "grad_norm": 0.957911953830772, "learning_rate": 3.5678445798966055e-06, "loss": 0.1188, "step": 20728 }, { "epoch": 0.6047318980103856, "grad_norm": 1.2051437086304853, "learning_rate": 3.567391946724532e-06, "loss": 0.1162, "step": 20729 }, { "epoch": 0.6047610712410292, "grad_norm": 0.9796458679730979, "learning_rate": 3.5669393263424417e-06, "loss": 0.1317, "step": 20730 }, { "epoch": 0.6047902444716728, "grad_norm": 0.8184650639055017, "learning_rate": 3.566486718754372e-06, "loss": 0.1133, "step": 20731 }, { "epoch": 0.6048194177023164, "grad_norm": 0.6924939754626013, "learning_rate": 3.5660341239643703e-06, "loss": 0.1043, "step": 20732 }, { "epoch": 0.60484859093296, "grad_norm": 0.7927609320462828, "learning_rate": 3.5655815419764724e-06, "loss": 0.1324, "step": 20733 }, { "epoch": 0.6048777641636035, "grad_norm": 0.8222092239598024, "learning_rate": 3.56512897279472e-06, "loss": 0.1478, "step": 20734 }, { "epoch": 0.6049069373942471, "grad_norm": 0.8377589663943582, "learning_rate": 3.564676416423154e-06, "loss": 0.1253, "step": 20735 }, { "epoch": 0.6049361106248906, "grad_norm": 0.877026665903272, "learning_rate": 3.564223872865814e-06, "loss": 0.1367, "step": 20736 }, { "epoch": 0.6049652838555342, "grad_norm": 0.774475699720562, "learning_rate": 3.5637713421267395e-06, "loss": 0.1366, "step": 20737 }, { "epoch": 0.6049944570861777, "grad_norm": 0.9174580895118168, "learning_rate": 3.5633188242099726e-06, "loss": 0.1591, "step": 20738 }, { "epoch": 0.6050236303168213, "grad_norm": 0.9322027571763905, "learning_rate": 3.5628663191195525e-06, "loss": 0.1127, "step": 20739 }, { "epoch": 0.6050528035474648, "grad_norm": 0.6558769605593633, "learning_rate": 3.5624138268595186e-06, "loss": 0.1198, "step": 20740 }, { "epoch": 0.6050819767781084, "grad_norm": 0.8040920378513927, "learning_rate": 3.561961347433911e-06, "loss": 0.1484, "step": 20741 }, { "epoch": 0.6051111500087519, "grad_norm": 0.8918910970278644, "learning_rate": 3.5615088808467692e-06, "loss": 0.1291, "step": 20742 }, { "epoch": 0.6051403232393955, "grad_norm": 0.861320704932758, "learning_rate": 3.5610564271021315e-06, "loss": 0.1029, "step": 20743 }, { "epoch": 0.605169496470039, "grad_norm": 0.8195312237738975, "learning_rate": 3.5606039862040398e-06, "loss": 0.1459, "step": 20744 }, { "epoch": 0.6051986697006827, "grad_norm": 0.691934009545335, "learning_rate": 3.5601515581565326e-06, "loss": 0.1178, "step": 20745 }, { "epoch": 0.6052278429313263, "grad_norm": 0.8664220212506307, "learning_rate": 3.5596991429636474e-06, "loss": 0.1155, "step": 20746 }, { "epoch": 0.6052570161619698, "grad_norm": 0.769249132744937, "learning_rate": 3.559246740629425e-06, "loss": 0.1319, "step": 20747 }, { "epoch": 0.6052861893926134, "grad_norm": 0.7613625846478836, "learning_rate": 3.558794351157905e-06, "loss": 0.1316, "step": 20748 }, { "epoch": 0.6053153626232569, "grad_norm": 0.8746338487270799, "learning_rate": 3.558341974553122e-06, "loss": 0.1208, "step": 20749 }, { "epoch": 0.6053445358539005, "grad_norm": 0.7576457626868381, "learning_rate": 3.5578896108191195e-06, "loss": 0.138, "step": 20750 }, { "epoch": 0.605373709084544, "grad_norm": 0.827542553441471, "learning_rate": 3.5574372599599337e-06, "loss": 0.1222, "step": 20751 }, { "epoch": 0.6054028823151876, "grad_norm": 0.923101206094693, "learning_rate": 3.5569849219796044e-06, "loss": 0.1272, "step": 20752 }, { "epoch": 0.6054320555458311, "grad_norm": 0.8517156793731273, "learning_rate": 3.5565325968821694e-06, "loss": 0.1253, "step": 20753 }, { "epoch": 0.6054612287764747, "grad_norm": 0.7968882326285671, "learning_rate": 3.556080284671667e-06, "loss": 0.1288, "step": 20754 }, { "epoch": 0.6054904020071182, "grad_norm": 0.8297991682726783, "learning_rate": 3.555627985352133e-06, "loss": 0.1359, "step": 20755 }, { "epoch": 0.6055195752377618, "grad_norm": 0.9329933523109677, "learning_rate": 3.5551756989276087e-06, "loss": 0.1321, "step": 20756 }, { "epoch": 0.6055487484684053, "grad_norm": 0.7371539989534648, "learning_rate": 3.5547234254021325e-06, "loss": 0.1034, "step": 20757 }, { "epoch": 0.605577921699049, "grad_norm": 0.808178212639286, "learning_rate": 3.554271164779739e-06, "loss": 0.117, "step": 20758 }, { "epoch": 0.6056070949296926, "grad_norm": 0.6366159947828574, "learning_rate": 3.5538189170644678e-06, "loss": 0.1154, "step": 20759 }, { "epoch": 0.6056362681603361, "grad_norm": 1.2799814059715604, "learning_rate": 3.553366682260356e-06, "loss": 0.1327, "step": 20760 }, { "epoch": 0.6056654413909797, "grad_norm": 0.8034490254175609, "learning_rate": 3.5529144603714395e-06, "loss": 0.1227, "step": 20761 }, { "epoch": 0.6056946146216232, "grad_norm": 1.1062572770653447, "learning_rate": 3.55246225140176e-06, "loss": 0.1252, "step": 20762 }, { "epoch": 0.6057237878522668, "grad_norm": 1.0330797722000389, "learning_rate": 3.5520100553553504e-06, "loss": 0.1564, "step": 20763 }, { "epoch": 0.6057529610829103, "grad_norm": 0.8166753943911534, "learning_rate": 3.55155787223625e-06, "loss": 0.1281, "step": 20764 }, { "epoch": 0.6057821343135539, "grad_norm": 0.8070122984817666, "learning_rate": 3.551105702048495e-06, "loss": 0.1292, "step": 20765 }, { "epoch": 0.6058113075441974, "grad_norm": 1.1250501921628877, "learning_rate": 3.5506535447961227e-06, "loss": 0.1332, "step": 20766 }, { "epoch": 0.605840480774841, "grad_norm": 1.0708299412142712, "learning_rate": 3.5502014004831674e-06, "loss": 0.1257, "step": 20767 }, { "epoch": 0.6058696540054845, "grad_norm": 0.9902104897837148, "learning_rate": 3.5497492691136705e-06, "loss": 0.1424, "step": 20768 }, { "epoch": 0.6058988272361281, "grad_norm": 0.7946503233411324, "learning_rate": 3.5492971506916647e-06, "loss": 0.1489, "step": 20769 }, { "epoch": 0.6059280004667716, "grad_norm": 1.1545968112596225, "learning_rate": 3.5488450452211887e-06, "loss": 0.1281, "step": 20770 }, { "epoch": 0.6059571736974152, "grad_norm": 0.998017859659975, "learning_rate": 3.5483929527062764e-06, "loss": 0.1209, "step": 20771 }, { "epoch": 0.6059863469280589, "grad_norm": 0.7082787407444177, "learning_rate": 3.547940873150966e-06, "loss": 0.1006, "step": 20772 }, { "epoch": 0.6060155201587024, "grad_norm": 0.8808205347203875, "learning_rate": 3.547488806559292e-06, "loss": 0.1373, "step": 20773 }, { "epoch": 0.606044693389346, "grad_norm": 1.0237477128960173, "learning_rate": 3.5470367529352917e-06, "loss": 0.1378, "step": 20774 }, { "epoch": 0.6060738666199895, "grad_norm": 0.6965407799322243, "learning_rate": 3.5465847122830014e-06, "loss": 0.0925, "step": 20775 }, { "epoch": 0.6061030398506331, "grad_norm": 0.5832944443817607, "learning_rate": 3.5461326846064555e-06, "loss": 0.1142, "step": 20776 }, { "epoch": 0.6061322130812766, "grad_norm": 1.0577146828928368, "learning_rate": 3.545680669909689e-06, "loss": 0.1118, "step": 20777 }, { "epoch": 0.6061613863119202, "grad_norm": 1.1101115172841742, "learning_rate": 3.5452286681967397e-06, "loss": 0.1411, "step": 20778 }, { "epoch": 0.6061905595425637, "grad_norm": 0.7237761676523827, "learning_rate": 3.54477667947164e-06, "loss": 0.1271, "step": 20779 }, { "epoch": 0.6062197327732073, "grad_norm": 1.1404929236029993, "learning_rate": 3.5443247037384273e-06, "loss": 0.1319, "step": 20780 }, { "epoch": 0.6062489060038508, "grad_norm": 1.585405341464, "learning_rate": 3.543872741001136e-06, "loss": 0.1384, "step": 20781 }, { "epoch": 0.6062780792344944, "grad_norm": 1.1413178559235155, "learning_rate": 3.543420791263801e-06, "loss": 0.1094, "step": 20782 }, { "epoch": 0.606307252465138, "grad_norm": 0.805556354799846, "learning_rate": 3.5429688545304596e-06, "loss": 0.1302, "step": 20783 }, { "epoch": 0.6063364256957815, "grad_norm": 0.8586873033020317, "learning_rate": 3.5425169308051423e-06, "loss": 0.1168, "step": 20784 }, { "epoch": 0.6063655989264252, "grad_norm": 1.1159385045306318, "learning_rate": 3.5420650200918854e-06, "loss": 0.1182, "step": 20785 }, { "epoch": 0.6063947721570687, "grad_norm": 1.1160109972322403, "learning_rate": 3.5416131223947246e-06, "loss": 0.1018, "step": 20786 }, { "epoch": 0.6064239453877123, "grad_norm": 0.8517934459793963, "learning_rate": 3.5411612377176952e-06, "loss": 0.1398, "step": 20787 }, { "epoch": 0.6064531186183558, "grad_norm": 1.007116534568464, "learning_rate": 3.540709366064829e-06, "loss": 0.1161, "step": 20788 }, { "epoch": 0.6064822918489994, "grad_norm": 1.106611411847511, "learning_rate": 3.5402575074401614e-06, "loss": 0.121, "step": 20789 }, { "epoch": 0.6065114650796429, "grad_norm": 1.1247601950427408, "learning_rate": 3.5398056618477267e-06, "loss": 0.117, "step": 20790 }, { "epoch": 0.6065406383102865, "grad_norm": 0.8765751259200038, "learning_rate": 3.539353829291555e-06, "loss": 0.1522, "step": 20791 }, { "epoch": 0.60656981154093, "grad_norm": 0.848885329789495, "learning_rate": 3.5389020097756875e-06, "loss": 0.1423, "step": 20792 }, { "epoch": 0.6065989847715736, "grad_norm": 1.3298916316619849, "learning_rate": 3.5384502033041534e-06, "loss": 0.1238, "step": 20793 }, { "epoch": 0.6066281580022171, "grad_norm": 0.9793552854737595, "learning_rate": 3.537998409880986e-06, "loss": 0.1404, "step": 20794 }, { "epoch": 0.6066573312328607, "grad_norm": 0.9251143141179438, "learning_rate": 3.537546629510222e-06, "loss": 0.1465, "step": 20795 }, { "epoch": 0.6066865044635043, "grad_norm": 0.7619181328684479, "learning_rate": 3.5370948621958905e-06, "loss": 0.1183, "step": 20796 }, { "epoch": 0.6067156776941478, "grad_norm": 0.9043037680986421, "learning_rate": 3.536643107942026e-06, "loss": 0.1291, "step": 20797 }, { "epoch": 0.6067448509247914, "grad_norm": 0.7967554360175508, "learning_rate": 3.5361913667526637e-06, "loss": 0.1225, "step": 20798 }, { "epoch": 0.606774024155435, "grad_norm": 1.128864099514495, "learning_rate": 3.5357396386318356e-06, "loss": 0.1066, "step": 20799 }, { "epoch": 0.6068031973860786, "grad_norm": 0.9666030322325735, "learning_rate": 3.535287923583576e-06, "loss": 0.1013, "step": 20800 }, { "epoch": 0.6068323706167221, "grad_norm": 0.6596696692633949, "learning_rate": 3.5348362216119136e-06, "loss": 0.1278, "step": 20801 }, { "epoch": 0.6068615438473657, "grad_norm": 0.9430316041094062, "learning_rate": 3.534384532720885e-06, "loss": 0.1392, "step": 20802 }, { "epoch": 0.6068907170780092, "grad_norm": 0.8621980563535409, "learning_rate": 3.53393285691452e-06, "loss": 0.1262, "step": 20803 }, { "epoch": 0.6069198903086528, "grad_norm": 0.7425572221417459, "learning_rate": 3.5334811941968533e-06, "loss": 0.1064, "step": 20804 }, { "epoch": 0.6069490635392963, "grad_norm": 0.7104699576240053, "learning_rate": 3.5330295445719174e-06, "loss": 0.126, "step": 20805 }, { "epoch": 0.6069782367699399, "grad_norm": 0.7137393887368758, "learning_rate": 3.5325779080437427e-06, "loss": 0.0955, "step": 20806 }, { "epoch": 0.6070074100005834, "grad_norm": 0.836191153618072, "learning_rate": 3.532126284616362e-06, "loss": 0.1368, "step": 20807 }, { "epoch": 0.607036583231227, "grad_norm": 0.8013247250862227, "learning_rate": 3.531674674293809e-06, "loss": 0.1177, "step": 20808 }, { "epoch": 0.6070657564618706, "grad_norm": 0.8837431030362821, "learning_rate": 3.5312230770801115e-06, "loss": 0.1246, "step": 20809 }, { "epoch": 0.6070949296925141, "grad_norm": 0.7869883262216052, "learning_rate": 3.530771492979305e-06, "loss": 0.1373, "step": 20810 }, { "epoch": 0.6071241029231577, "grad_norm": 0.8483986506473256, "learning_rate": 3.5303199219954188e-06, "loss": 0.1183, "step": 20811 }, { "epoch": 0.6071532761538013, "grad_norm": 0.6440159824701732, "learning_rate": 3.5298683641324864e-06, "loss": 0.1147, "step": 20812 }, { "epoch": 0.6071824493844449, "grad_norm": 0.8186681196906564, "learning_rate": 3.5294168193945392e-06, "loss": 0.1363, "step": 20813 }, { "epoch": 0.6072116226150884, "grad_norm": 0.7340619231161224, "learning_rate": 3.528965287785607e-06, "loss": 0.0933, "step": 20814 }, { "epoch": 0.607240795845732, "grad_norm": 0.7199545601433122, "learning_rate": 3.5285137693097198e-06, "loss": 0.1366, "step": 20815 }, { "epoch": 0.6072699690763755, "grad_norm": 0.6875301094139717, "learning_rate": 3.5280622639709117e-06, "loss": 0.1199, "step": 20816 }, { "epoch": 0.6072991423070191, "grad_norm": 0.8598583189444979, "learning_rate": 3.5276107717732133e-06, "loss": 0.1261, "step": 20817 }, { "epoch": 0.6073283155376626, "grad_norm": 0.749982161543977, "learning_rate": 3.527159292720654e-06, "loss": 0.1302, "step": 20818 }, { "epoch": 0.6073574887683062, "grad_norm": 0.9103036841326585, "learning_rate": 3.526707826817264e-06, "loss": 0.1311, "step": 20819 }, { "epoch": 0.6073866619989498, "grad_norm": 0.746632233792318, "learning_rate": 3.5262563740670765e-06, "loss": 0.1189, "step": 20820 }, { "epoch": 0.6074158352295933, "grad_norm": 0.90928843892397, "learning_rate": 3.525804934474117e-06, "loss": 0.1469, "step": 20821 }, { "epoch": 0.6074450084602369, "grad_norm": 0.742644949864802, "learning_rate": 3.5253535080424224e-06, "loss": 0.1174, "step": 20822 }, { "epoch": 0.6074741816908804, "grad_norm": 0.7847711759910857, "learning_rate": 3.5249020947760182e-06, "loss": 0.1507, "step": 20823 }, { "epoch": 0.607503354921524, "grad_norm": 0.7114351635012685, "learning_rate": 3.524450694678936e-06, "loss": 0.1354, "step": 20824 }, { "epoch": 0.6075325281521675, "grad_norm": 0.9068679796758986, "learning_rate": 3.523999307755207e-06, "loss": 0.1387, "step": 20825 }, { "epoch": 0.6075617013828112, "grad_norm": 0.7231457922906789, "learning_rate": 3.523547934008859e-06, "loss": 0.1443, "step": 20826 }, { "epoch": 0.6075908746134547, "grad_norm": 0.8640421220288766, "learning_rate": 3.5230965734439214e-06, "loss": 0.1104, "step": 20827 }, { "epoch": 0.6076200478440983, "grad_norm": 0.7545538548010573, "learning_rate": 3.522645226064426e-06, "loss": 0.1171, "step": 20828 }, { "epoch": 0.6076492210747418, "grad_norm": 0.8720536517182633, "learning_rate": 3.5221938918744013e-06, "loss": 0.1275, "step": 20829 }, { "epoch": 0.6076783943053854, "grad_norm": 0.7062483667290207, "learning_rate": 3.5217425708778774e-06, "loss": 0.0852, "step": 20830 }, { "epoch": 0.607707567536029, "grad_norm": 0.9420998347925931, "learning_rate": 3.5212912630788827e-06, "loss": 0.1322, "step": 20831 }, { "epoch": 0.6077367407666725, "grad_norm": 0.7892344052736006, "learning_rate": 3.5208399684814463e-06, "loss": 0.124, "step": 20832 }, { "epoch": 0.607765913997316, "grad_norm": 0.8214551947674077, "learning_rate": 3.5203886870895965e-06, "loss": 0.1176, "step": 20833 }, { "epoch": 0.6077950872279596, "grad_norm": 0.8876504109379849, "learning_rate": 3.519937418907364e-06, "loss": 0.135, "step": 20834 }, { "epoch": 0.6078242604586032, "grad_norm": 0.939069544891645, "learning_rate": 3.5194861639387783e-06, "loss": 0.1228, "step": 20835 }, { "epoch": 0.6078534336892467, "grad_norm": 0.6827677451020275, "learning_rate": 3.5190349221878655e-06, "loss": 0.1229, "step": 20836 }, { "epoch": 0.6078826069198903, "grad_norm": 0.8175353329013016, "learning_rate": 3.518583693658656e-06, "loss": 0.1354, "step": 20837 }, { "epoch": 0.6079117801505338, "grad_norm": 0.8746298979621124, "learning_rate": 3.518132478355178e-06, "loss": 0.1394, "step": 20838 }, { "epoch": 0.6079409533811775, "grad_norm": 0.7487423416580942, "learning_rate": 3.5176812762814572e-06, "loss": 0.1097, "step": 20839 }, { "epoch": 0.607970126611821, "grad_norm": 0.7421659489455351, "learning_rate": 3.5172300874415256e-06, "loss": 0.1359, "step": 20840 }, { "epoch": 0.6079992998424646, "grad_norm": 0.928068727451947, "learning_rate": 3.51677891183941e-06, "loss": 0.1221, "step": 20841 }, { "epoch": 0.6080284730731081, "grad_norm": 0.8362138577853714, "learning_rate": 3.516327749479139e-06, "loss": 0.1323, "step": 20842 }, { "epoch": 0.6080576463037517, "grad_norm": 0.8282463313254467, "learning_rate": 3.5158766003647382e-06, "loss": 0.1245, "step": 20843 }, { "epoch": 0.6080868195343953, "grad_norm": 0.7602479263944608, "learning_rate": 3.515425464500237e-06, "loss": 0.144, "step": 20844 }, { "epoch": 0.6081159927650388, "grad_norm": 1.075393671920079, "learning_rate": 3.5149743418896622e-06, "loss": 0.1132, "step": 20845 }, { "epoch": 0.6081451659956824, "grad_norm": 0.8795152460951055, "learning_rate": 3.5145232325370426e-06, "loss": 0.1463, "step": 20846 }, { "epoch": 0.6081743392263259, "grad_norm": 0.8786622884723323, "learning_rate": 3.5140721364464068e-06, "loss": 0.1128, "step": 20847 }, { "epoch": 0.6082035124569695, "grad_norm": 0.7586587947296383, "learning_rate": 3.5136210536217787e-06, "loss": 0.1132, "step": 20848 }, { "epoch": 0.608232685687613, "grad_norm": 0.9380004096608355, "learning_rate": 3.5131699840671867e-06, "loss": 0.1291, "step": 20849 }, { "epoch": 0.6082618589182566, "grad_norm": 0.7444580516213217, "learning_rate": 3.51271892778666e-06, "loss": 0.1279, "step": 20850 }, { "epoch": 0.6082910321489001, "grad_norm": 0.8131382801497327, "learning_rate": 3.5122678847842197e-06, "loss": 0.1145, "step": 20851 }, { "epoch": 0.6083202053795437, "grad_norm": 0.7383238104561087, "learning_rate": 3.5118168550639e-06, "loss": 0.1259, "step": 20852 }, { "epoch": 0.6083493786101873, "grad_norm": 0.8365176261664504, "learning_rate": 3.5113658386297227e-06, "loss": 0.142, "step": 20853 }, { "epoch": 0.6083785518408309, "grad_norm": 0.748472365231926, "learning_rate": 3.5109148354857165e-06, "loss": 0.1447, "step": 20854 }, { "epoch": 0.6084077250714744, "grad_norm": 0.7115854890993898, "learning_rate": 3.510463845635908e-06, "loss": 0.1057, "step": 20855 }, { "epoch": 0.608436898302118, "grad_norm": 0.8930583837912623, "learning_rate": 3.5100128690843215e-06, "loss": 0.126, "step": 20856 }, { "epoch": 0.6084660715327616, "grad_norm": 0.7522305806269778, "learning_rate": 3.509561905834984e-06, "loss": 0.1225, "step": 20857 }, { "epoch": 0.6084952447634051, "grad_norm": 0.7656887352671015, "learning_rate": 3.5091109558919223e-06, "loss": 0.1097, "step": 20858 }, { "epoch": 0.6085244179940487, "grad_norm": 0.8535693426859395, "learning_rate": 3.5086600192591623e-06, "loss": 0.1293, "step": 20859 }, { "epoch": 0.6085535912246922, "grad_norm": 0.8635018599260486, "learning_rate": 3.5082090959407307e-06, "loss": 0.1157, "step": 20860 }, { "epoch": 0.6085827644553358, "grad_norm": 0.7333457926948656, "learning_rate": 3.5077581859406508e-06, "loss": 0.1188, "step": 20861 }, { "epoch": 0.6086119376859793, "grad_norm": 0.8042113886541649, "learning_rate": 3.507307289262949e-06, "loss": 0.1416, "step": 20862 }, { "epoch": 0.6086411109166229, "grad_norm": 0.7903422032931964, "learning_rate": 3.5068564059116522e-06, "loss": 0.1144, "step": 20863 }, { "epoch": 0.6086702841472664, "grad_norm": 0.7913155719387774, "learning_rate": 3.5064055358907854e-06, "loss": 0.0999, "step": 20864 }, { "epoch": 0.60869945737791, "grad_norm": 1.0452286762992344, "learning_rate": 3.5059546792043742e-06, "loss": 0.1217, "step": 20865 }, { "epoch": 0.6087286306085536, "grad_norm": 0.8504592104242178, "learning_rate": 3.505503835856442e-06, "loss": 0.1313, "step": 20866 }, { "epoch": 0.6087578038391972, "grad_norm": 0.9016377475640407, "learning_rate": 3.5050530058510146e-06, "loss": 0.1391, "step": 20867 }, { "epoch": 0.6087869770698408, "grad_norm": 1.3474493280915316, "learning_rate": 3.5046021891921156e-06, "loss": 0.1419, "step": 20868 }, { "epoch": 0.6088161503004843, "grad_norm": 1.0027879261808696, "learning_rate": 3.504151385883774e-06, "loss": 0.1173, "step": 20869 }, { "epoch": 0.6088453235311279, "grad_norm": 0.8442227729097376, "learning_rate": 3.5037005959300106e-06, "loss": 0.1265, "step": 20870 }, { "epoch": 0.6088744967617714, "grad_norm": 1.5925643195253445, "learning_rate": 3.503249819334851e-06, "loss": 0.1343, "step": 20871 }, { "epoch": 0.608903669992415, "grad_norm": 1.3341999200780448, "learning_rate": 3.5027990561023204e-06, "loss": 0.1328, "step": 20872 }, { "epoch": 0.6089328432230585, "grad_norm": 0.8395569746982348, "learning_rate": 3.502348306236442e-06, "loss": 0.1348, "step": 20873 }, { "epoch": 0.6089620164537021, "grad_norm": 1.1910055822634305, "learning_rate": 3.5018975697412392e-06, "loss": 0.1515, "step": 20874 }, { "epoch": 0.6089911896843456, "grad_norm": 1.4586076054610828, "learning_rate": 3.5014468466207387e-06, "loss": 0.1205, "step": 20875 }, { "epoch": 0.6090203629149892, "grad_norm": 0.8727719842501428, "learning_rate": 3.5009961368789623e-06, "loss": 0.114, "step": 20876 }, { "epoch": 0.6090495361456327, "grad_norm": 0.8727754863794717, "learning_rate": 3.5005454405199358e-06, "loss": 0.1166, "step": 20877 }, { "epoch": 0.6090787093762763, "grad_norm": 0.8788419478958428, "learning_rate": 3.5000947575476806e-06, "loss": 0.1029, "step": 20878 }, { "epoch": 0.6091078826069198, "grad_norm": 1.1375574796335826, "learning_rate": 3.4996440879662218e-06, "loss": 0.1335, "step": 20879 }, { "epoch": 0.6091370558375635, "grad_norm": 1.170482436033285, "learning_rate": 3.4991934317795806e-06, "loss": 0.1295, "step": 20880 }, { "epoch": 0.609166229068207, "grad_norm": 0.7969557444024356, "learning_rate": 3.4987427889917835e-06, "loss": 0.1288, "step": 20881 }, { "epoch": 0.6091954022988506, "grad_norm": 0.9061287474697424, "learning_rate": 3.4982921596068543e-06, "loss": 0.119, "step": 20882 }, { "epoch": 0.6092245755294942, "grad_norm": 0.841542942014196, "learning_rate": 3.4978415436288117e-06, "loss": 0.1126, "step": 20883 }, { "epoch": 0.6092537487601377, "grad_norm": 0.8243967094503337, "learning_rate": 3.4973909410616825e-06, "loss": 0.1144, "step": 20884 }, { "epoch": 0.6092829219907813, "grad_norm": 0.7724631396965665, "learning_rate": 3.4969403519094884e-06, "loss": 0.1247, "step": 20885 }, { "epoch": 0.6093120952214248, "grad_norm": 0.8084179720613038, "learning_rate": 3.4964897761762494e-06, "loss": 0.1458, "step": 20886 }, { "epoch": 0.6093412684520684, "grad_norm": 0.7925687170356461, "learning_rate": 3.4960392138659937e-06, "loss": 0.1159, "step": 20887 }, { "epoch": 0.6093704416827119, "grad_norm": 0.9906277379096061, "learning_rate": 3.49558866498274e-06, "loss": 0.127, "step": 20888 }, { "epoch": 0.6093996149133555, "grad_norm": 0.8723356293846019, "learning_rate": 3.495138129530511e-06, "loss": 0.0994, "step": 20889 }, { "epoch": 0.609428788143999, "grad_norm": 0.8502232317404022, "learning_rate": 3.4946876075133314e-06, "loss": 0.1389, "step": 20890 }, { "epoch": 0.6094579613746426, "grad_norm": 0.7269993560244731, "learning_rate": 3.4942370989352197e-06, "loss": 0.13, "step": 20891 }, { "epoch": 0.6094871346052861, "grad_norm": 0.7966318687625942, "learning_rate": 3.493786603800199e-06, "loss": 0.1203, "step": 20892 }, { "epoch": 0.6095163078359298, "grad_norm": 0.8267682902728425, "learning_rate": 3.493336122112293e-06, "loss": 0.1314, "step": 20893 }, { "epoch": 0.6095454810665734, "grad_norm": 0.8030176606329594, "learning_rate": 3.492885653875523e-06, "loss": 0.1412, "step": 20894 }, { "epoch": 0.6095746542972169, "grad_norm": 0.9935218848228359, "learning_rate": 3.4924351990939102e-06, "loss": 0.1076, "step": 20895 }, { "epoch": 0.6096038275278605, "grad_norm": 0.8916645742970674, "learning_rate": 3.4919847577714753e-06, "loss": 0.0969, "step": 20896 }, { "epoch": 0.609633000758504, "grad_norm": 0.8236760374407862, "learning_rate": 3.4915343299122408e-06, "loss": 0.1284, "step": 20897 }, { "epoch": 0.6096621739891476, "grad_norm": 0.919804114272897, "learning_rate": 3.491083915520227e-06, "loss": 0.1516, "step": 20898 }, { "epoch": 0.6096913472197911, "grad_norm": 0.9565564281413478, "learning_rate": 3.490633514599457e-06, "loss": 0.1143, "step": 20899 }, { "epoch": 0.6097205204504347, "grad_norm": 0.7096049569596213, "learning_rate": 3.49018312715395e-06, "loss": 0.1382, "step": 20900 }, { "epoch": 0.6097496936810782, "grad_norm": 0.9871400070174613, "learning_rate": 3.489732753187728e-06, "loss": 0.1388, "step": 20901 }, { "epoch": 0.6097788669117218, "grad_norm": 1.131942789357873, "learning_rate": 3.4892823927048113e-06, "loss": 0.1249, "step": 20902 }, { "epoch": 0.6098080401423653, "grad_norm": 0.7074766836221436, "learning_rate": 3.4888320457092207e-06, "loss": 0.1315, "step": 20903 }, { "epoch": 0.6098372133730089, "grad_norm": 1.6197528286078506, "learning_rate": 3.4883817122049757e-06, "loss": 0.1334, "step": 20904 }, { "epoch": 0.6098663866036524, "grad_norm": 1.0630169660883857, "learning_rate": 3.4879313921960988e-06, "loss": 0.146, "step": 20905 }, { "epoch": 0.609895559834296, "grad_norm": 1.3073245857038782, "learning_rate": 3.48748108568661e-06, "loss": 0.1199, "step": 20906 }, { "epoch": 0.6099247330649397, "grad_norm": 0.7861543012982191, "learning_rate": 3.4870307926805293e-06, "loss": 0.1196, "step": 20907 }, { "epoch": 0.6099539062955832, "grad_norm": 1.178482787113756, "learning_rate": 3.486580513181876e-06, "loss": 0.1264, "step": 20908 }, { "epoch": 0.6099830795262268, "grad_norm": 1.3058702213214082, "learning_rate": 3.4861302471946703e-06, "loss": 0.1353, "step": 20909 }, { "epoch": 0.6100122527568703, "grad_norm": 0.8620394268813247, "learning_rate": 3.4856799947229316e-06, "loss": 0.1239, "step": 20910 }, { "epoch": 0.6100414259875139, "grad_norm": 0.794154012305866, "learning_rate": 3.4852297557706803e-06, "loss": 0.1024, "step": 20911 }, { "epoch": 0.6100705992181574, "grad_norm": 0.9086135890438978, "learning_rate": 3.4847795303419385e-06, "loss": 0.142, "step": 20912 }, { "epoch": 0.610099772448801, "grad_norm": 1.1621642144116857, "learning_rate": 3.4843293184407223e-06, "loss": 0.1233, "step": 20913 }, { "epoch": 0.6101289456794445, "grad_norm": 0.8985573165668729, "learning_rate": 3.4838791200710515e-06, "loss": 0.1321, "step": 20914 }, { "epoch": 0.6101581189100881, "grad_norm": 0.8056488716815887, "learning_rate": 3.4834289352369477e-06, "loss": 0.1333, "step": 20915 }, { "epoch": 0.6101872921407316, "grad_norm": 0.8015228922543629, "learning_rate": 3.4829787639424238e-06, "loss": 0.1212, "step": 20916 }, { "epoch": 0.6102164653713752, "grad_norm": 1.038916445368362, "learning_rate": 3.482528606191508e-06, "loss": 0.1395, "step": 20917 }, { "epoch": 0.6102456386020187, "grad_norm": 0.9483160722802674, "learning_rate": 3.482078461988213e-06, "loss": 0.1042, "step": 20918 }, { "epoch": 0.6102748118326623, "grad_norm": 0.7376460552091912, "learning_rate": 3.481628331336559e-06, "loss": 0.1246, "step": 20919 }, { "epoch": 0.610303985063306, "grad_norm": 0.8470370483299596, "learning_rate": 3.481178214240566e-06, "loss": 0.1087, "step": 20920 }, { "epoch": 0.6103331582939495, "grad_norm": 0.9497577318465791, "learning_rate": 3.48072811070425e-06, "loss": 0.1286, "step": 20921 }, { "epoch": 0.6103623315245931, "grad_norm": 0.8930144418107888, "learning_rate": 3.48027802073163e-06, "loss": 0.1097, "step": 20922 }, { "epoch": 0.6103915047552366, "grad_norm": 0.842364621518752, "learning_rate": 3.479827944326726e-06, "loss": 0.1272, "step": 20923 }, { "epoch": 0.6104206779858802, "grad_norm": 0.747239643573268, "learning_rate": 3.4793778814935553e-06, "loss": 0.1231, "step": 20924 }, { "epoch": 0.6104498512165237, "grad_norm": 0.8841884267214734, "learning_rate": 3.478927832236137e-06, "loss": 0.1132, "step": 20925 }, { "epoch": 0.6104790244471673, "grad_norm": 0.8602181308889852, "learning_rate": 3.478477796558487e-06, "loss": 0.1351, "step": 20926 }, { "epoch": 0.6105081976778108, "grad_norm": 0.9963921043130824, "learning_rate": 3.4780277744646236e-06, "loss": 0.1272, "step": 20927 }, { "epoch": 0.6105373709084544, "grad_norm": 0.8268281964396148, "learning_rate": 3.477577765958564e-06, "loss": 0.1381, "step": 20928 }, { "epoch": 0.6105665441390979, "grad_norm": 0.7129514428666248, "learning_rate": 3.4771277710443284e-06, "loss": 0.1109, "step": 20929 }, { "epoch": 0.6105957173697415, "grad_norm": 0.8057698220128542, "learning_rate": 3.4766777897259317e-06, "loss": 0.1395, "step": 20930 }, { "epoch": 0.610624890600385, "grad_norm": 1.0915883936241786, "learning_rate": 3.4762278220073927e-06, "loss": 0.1416, "step": 20931 }, { "epoch": 0.6106540638310286, "grad_norm": 0.8465931890384214, "learning_rate": 3.475777867892728e-06, "loss": 0.1237, "step": 20932 }, { "epoch": 0.6106832370616722, "grad_norm": 0.9360312890669172, "learning_rate": 3.475327927385954e-06, "loss": 0.1195, "step": 20933 }, { "epoch": 0.6107124102923158, "grad_norm": 0.7891785178727896, "learning_rate": 3.4748780004910875e-06, "loss": 0.1175, "step": 20934 }, { "epoch": 0.6107415835229594, "grad_norm": 0.7896108617273909, "learning_rate": 3.474428087212147e-06, "loss": 0.1024, "step": 20935 }, { "epoch": 0.6107707567536029, "grad_norm": 0.9645337464520171, "learning_rate": 3.473978187553149e-06, "loss": 0.1134, "step": 20936 }, { "epoch": 0.6107999299842465, "grad_norm": 1.036323117317088, "learning_rate": 3.4735283015181092e-06, "loss": 0.148, "step": 20937 }, { "epoch": 0.61082910321489, "grad_norm": 1.020705629172368, "learning_rate": 3.473078429111044e-06, "loss": 0.142, "step": 20938 }, { "epoch": 0.6108582764455336, "grad_norm": 0.9300278752439212, "learning_rate": 3.4726285703359698e-06, "loss": 0.1102, "step": 20939 }, { "epoch": 0.6108874496761771, "grad_norm": 0.7062035862868438, "learning_rate": 3.4721787251969023e-06, "loss": 0.1332, "step": 20940 }, { "epoch": 0.6109166229068207, "grad_norm": 0.9170001169958084, "learning_rate": 3.47172889369786e-06, "loss": 0.1292, "step": 20941 }, { "epoch": 0.6109457961374642, "grad_norm": 1.1138343930381036, "learning_rate": 3.471279075842857e-06, "loss": 0.1292, "step": 20942 }, { "epoch": 0.6109749693681078, "grad_norm": 0.8864499926165303, "learning_rate": 3.4708292716359094e-06, "loss": 0.1507, "step": 20943 }, { "epoch": 0.6110041425987514, "grad_norm": 0.80922507010581, "learning_rate": 3.4703794810810334e-06, "loss": 0.1303, "step": 20944 }, { "epoch": 0.6110333158293949, "grad_norm": 0.7618441864480844, "learning_rate": 3.4699297041822444e-06, "loss": 0.1246, "step": 20945 }, { "epoch": 0.6110624890600385, "grad_norm": 1.1477619314815288, "learning_rate": 3.469479940943555e-06, "loss": 0.1192, "step": 20946 }, { "epoch": 0.6110916622906821, "grad_norm": 0.8051245272157639, "learning_rate": 3.4690301913689863e-06, "loss": 0.106, "step": 20947 }, { "epoch": 0.6111208355213257, "grad_norm": 0.9034847528658548, "learning_rate": 3.4685804554625495e-06, "loss": 0.1174, "step": 20948 }, { "epoch": 0.6111500087519692, "grad_norm": 0.921643093258146, "learning_rate": 3.468130733228261e-06, "loss": 0.134, "step": 20949 }, { "epoch": 0.6111791819826128, "grad_norm": 1.070957243678224, "learning_rate": 3.4676810246701365e-06, "loss": 0.1194, "step": 20950 }, { "epoch": 0.6112083552132563, "grad_norm": 1.166313172374092, "learning_rate": 3.467231329792189e-06, "loss": 0.156, "step": 20951 }, { "epoch": 0.6112375284438999, "grad_norm": 0.7777529780148577, "learning_rate": 3.4667816485984334e-06, "loss": 0.135, "step": 20952 }, { "epoch": 0.6112667016745434, "grad_norm": 0.7953839416045292, "learning_rate": 3.4663319810928865e-06, "loss": 0.1207, "step": 20953 }, { "epoch": 0.611295874905187, "grad_norm": 0.9927925875520035, "learning_rate": 3.465882327279561e-06, "loss": 0.1361, "step": 20954 }, { "epoch": 0.6113250481358306, "grad_norm": 0.9540477595798074, "learning_rate": 3.465432687162473e-06, "loss": 0.1155, "step": 20955 }, { "epoch": 0.6113542213664741, "grad_norm": 1.0290227431209418, "learning_rate": 3.464983060745635e-06, "loss": 0.1434, "step": 20956 }, { "epoch": 0.6113833945971177, "grad_norm": 0.8991033544133925, "learning_rate": 3.4645334480330616e-06, "loss": 0.1339, "step": 20957 }, { "epoch": 0.6114125678277612, "grad_norm": 0.7888454291008832, "learning_rate": 3.464083849028766e-06, "loss": 0.1575, "step": 20958 }, { "epoch": 0.6114417410584048, "grad_norm": 0.9488993324605315, "learning_rate": 3.463634263736765e-06, "loss": 0.1171, "step": 20959 }, { "epoch": 0.6114709142890483, "grad_norm": 0.8432693116187181, "learning_rate": 3.46318469216107e-06, "loss": 0.1466, "step": 20960 }, { "epoch": 0.611500087519692, "grad_norm": 0.6769327721821427, "learning_rate": 3.4627351343056947e-06, "loss": 0.1115, "step": 20961 }, { "epoch": 0.6115292607503355, "grad_norm": 0.8731764480185785, "learning_rate": 3.4622855901746543e-06, "loss": 0.1363, "step": 20962 }, { "epoch": 0.6115584339809791, "grad_norm": 0.953362235882418, "learning_rate": 3.46183605977196e-06, "loss": 0.1498, "step": 20963 }, { "epoch": 0.6115876072116226, "grad_norm": 1.0649145980754482, "learning_rate": 3.4613865431016253e-06, "loss": 0.1363, "step": 20964 }, { "epoch": 0.6116167804422662, "grad_norm": 0.9672501895694245, "learning_rate": 3.460937040167665e-06, "loss": 0.125, "step": 20965 }, { "epoch": 0.6116459536729097, "grad_norm": 0.9080402169163565, "learning_rate": 3.4604875509740922e-06, "loss": 0.1217, "step": 20966 }, { "epoch": 0.6116751269035533, "grad_norm": 1.1121807349407231, "learning_rate": 3.460038075524919e-06, "loss": 0.1251, "step": 20967 }, { "epoch": 0.6117043001341969, "grad_norm": 1.0001426277027219, "learning_rate": 3.4595886138241575e-06, "loss": 0.1473, "step": 20968 }, { "epoch": 0.6117334733648404, "grad_norm": 0.8792572688182623, "learning_rate": 3.459139165875821e-06, "loss": 0.1309, "step": 20969 }, { "epoch": 0.611762646595484, "grad_norm": 0.7173610154422235, "learning_rate": 3.4586897316839217e-06, "loss": 0.1387, "step": 20970 }, { "epoch": 0.6117918198261275, "grad_norm": 1.098478615007473, "learning_rate": 3.458240311252473e-06, "loss": 0.1293, "step": 20971 }, { "epoch": 0.6118209930567711, "grad_norm": 0.7366822015674209, "learning_rate": 3.4577909045854884e-06, "loss": 0.1167, "step": 20972 }, { "epoch": 0.6118501662874146, "grad_norm": 0.9817394618978629, "learning_rate": 3.4573415116869774e-06, "loss": 0.1205, "step": 20973 }, { "epoch": 0.6118793395180583, "grad_norm": 0.8977344426497099, "learning_rate": 3.456892132560953e-06, "loss": 0.1575, "step": 20974 }, { "epoch": 0.6119085127487018, "grad_norm": 0.8035672913976277, "learning_rate": 3.456442767211428e-06, "loss": 0.1278, "step": 20975 }, { "epoch": 0.6119376859793454, "grad_norm": 0.6974203347194811, "learning_rate": 3.45599341564241e-06, "loss": 0.1244, "step": 20976 }, { "epoch": 0.611966859209989, "grad_norm": 0.8611962303220687, "learning_rate": 3.4555440778579185e-06, "loss": 0.1056, "step": 20977 }, { "epoch": 0.6119960324406325, "grad_norm": 0.7342171979455759, "learning_rate": 3.455094753861959e-06, "loss": 0.119, "step": 20978 }, { "epoch": 0.612025205671276, "grad_norm": 0.6687892399548173, "learning_rate": 3.4546454436585454e-06, "loss": 0.1088, "step": 20979 }, { "epoch": 0.6120543789019196, "grad_norm": 0.9301980438692424, "learning_rate": 3.4541961472516882e-06, "loss": 0.1277, "step": 20980 }, { "epoch": 0.6120835521325632, "grad_norm": 0.7632162987039959, "learning_rate": 3.4537468646453987e-06, "loss": 0.1122, "step": 20981 }, { "epoch": 0.6121127253632067, "grad_norm": 0.7659636098045769, "learning_rate": 3.4532975958436866e-06, "loss": 0.1149, "step": 20982 }, { "epoch": 0.6121418985938503, "grad_norm": 0.8543931345825652, "learning_rate": 3.4528483408505653e-06, "loss": 0.1089, "step": 20983 }, { "epoch": 0.6121710718244938, "grad_norm": 1.0042344270139514, "learning_rate": 3.452399099670045e-06, "loss": 0.1227, "step": 20984 }, { "epoch": 0.6122002450551374, "grad_norm": 0.8658228461930776, "learning_rate": 3.451949872306137e-06, "loss": 0.1161, "step": 20985 }, { "epoch": 0.6122294182857809, "grad_norm": 0.8556424588367897, "learning_rate": 3.4515006587628497e-06, "loss": 0.1236, "step": 20986 }, { "epoch": 0.6122585915164245, "grad_norm": 0.8755333393449263, "learning_rate": 3.4510514590441957e-06, "loss": 0.1323, "step": 20987 }, { "epoch": 0.6122877647470681, "grad_norm": 1.079093149162543, "learning_rate": 3.4506022731541826e-06, "loss": 0.1398, "step": 20988 }, { "epoch": 0.6123169379777117, "grad_norm": 1.16111636361247, "learning_rate": 3.450153101096825e-06, "loss": 0.1099, "step": 20989 }, { "epoch": 0.6123461112083552, "grad_norm": 0.779681191738122, "learning_rate": 3.4497039428761293e-06, "loss": 0.1142, "step": 20990 }, { "epoch": 0.6123752844389988, "grad_norm": 0.9628872504343245, "learning_rate": 3.4492547984961067e-06, "loss": 0.1303, "step": 20991 }, { "epoch": 0.6124044576696424, "grad_norm": 1.051397096908327, "learning_rate": 3.4488056679607685e-06, "loss": 0.1168, "step": 20992 }, { "epoch": 0.6124336309002859, "grad_norm": 0.9578684732032886, "learning_rate": 3.4483565512741214e-06, "loss": 0.1236, "step": 20993 }, { "epoch": 0.6124628041309295, "grad_norm": 0.869847829921037, "learning_rate": 3.4479074484401763e-06, "loss": 0.1325, "step": 20994 }, { "epoch": 0.612491977361573, "grad_norm": 1.402606131990117, "learning_rate": 3.4474583594629436e-06, "loss": 0.1406, "step": 20995 }, { "epoch": 0.6125211505922166, "grad_norm": 1.0505145326623786, "learning_rate": 3.447009284346432e-06, "loss": 0.1255, "step": 20996 }, { "epoch": 0.6125503238228601, "grad_norm": 0.8443454167133138, "learning_rate": 3.4465602230946517e-06, "loss": 0.1312, "step": 20997 }, { "epoch": 0.6125794970535037, "grad_norm": 1.530233319482489, "learning_rate": 3.44611117571161e-06, "loss": 0.1327, "step": 20998 }, { "epoch": 0.6126086702841472, "grad_norm": 1.204036597985255, "learning_rate": 3.445662142201317e-06, "loss": 0.1275, "step": 20999 }, { "epoch": 0.6126378435147908, "grad_norm": 0.8904733020868539, "learning_rate": 3.4452131225677798e-06, "loss": 0.1215, "step": 21000 }, { "epoch": 0.6126670167454343, "grad_norm": 1.2092441567946983, "learning_rate": 3.4447641168150103e-06, "loss": 0.1449, "step": 21001 }, { "epoch": 0.612696189976078, "grad_norm": 1.298882785905577, "learning_rate": 3.4443151249470163e-06, "loss": 0.1352, "step": 21002 }, { "epoch": 0.6127253632067216, "grad_norm": 1.1317444753596781, "learning_rate": 3.443866146967804e-06, "loss": 0.1317, "step": 21003 }, { "epoch": 0.6127545364373651, "grad_norm": 0.770033170431705, "learning_rate": 3.4434171828813833e-06, "loss": 0.1353, "step": 21004 }, { "epoch": 0.6127837096680087, "grad_norm": 0.7785240795060671, "learning_rate": 3.4429682326917645e-06, "loss": 0.1136, "step": 21005 }, { "epoch": 0.6128128828986522, "grad_norm": 1.0235229812936215, "learning_rate": 3.44251929640295e-06, "loss": 0.1378, "step": 21006 }, { "epoch": 0.6128420561292958, "grad_norm": 0.9164818800568367, "learning_rate": 3.4420703740189544e-06, "loss": 0.1312, "step": 21007 }, { "epoch": 0.6128712293599393, "grad_norm": 0.9996494893264714, "learning_rate": 3.441621465543781e-06, "loss": 0.1621, "step": 21008 }, { "epoch": 0.6129004025905829, "grad_norm": 0.7748913711001848, "learning_rate": 3.4411725709814397e-06, "loss": 0.1247, "step": 21009 }, { "epoch": 0.6129295758212264, "grad_norm": 0.9984979162441634, "learning_rate": 3.4407236903359385e-06, "loss": 0.1258, "step": 21010 }, { "epoch": 0.61295874905187, "grad_norm": 0.9676661351595874, "learning_rate": 3.4402748236112827e-06, "loss": 0.1379, "step": 21011 }, { "epoch": 0.6129879222825135, "grad_norm": 0.8025287907469718, "learning_rate": 3.43982597081148e-06, "loss": 0.1277, "step": 21012 }, { "epoch": 0.6130170955131571, "grad_norm": 0.9375310565927569, "learning_rate": 3.43937713194054e-06, "loss": 0.1508, "step": 21013 }, { "epoch": 0.6130462687438006, "grad_norm": 0.7262399909554743, "learning_rate": 3.4389283070024684e-06, "loss": 0.1002, "step": 21014 }, { "epoch": 0.6130754419744443, "grad_norm": 0.7272098888733515, "learning_rate": 3.4384794960012734e-06, "loss": 0.1143, "step": 21015 }, { "epoch": 0.6131046152050879, "grad_norm": 0.8919304754962747, "learning_rate": 3.438030698940959e-06, "loss": 0.1176, "step": 21016 }, { "epoch": 0.6131337884357314, "grad_norm": 0.813250615691482, "learning_rate": 3.437581915825534e-06, "loss": 0.1408, "step": 21017 }, { "epoch": 0.613162961666375, "grad_norm": 0.7351002698829738, "learning_rate": 3.4371331466590038e-06, "loss": 0.1049, "step": 21018 }, { "epoch": 0.6131921348970185, "grad_norm": 0.876096414795378, "learning_rate": 3.4366843914453774e-06, "loss": 0.1352, "step": 21019 }, { "epoch": 0.6132213081276621, "grad_norm": 0.8369573559671274, "learning_rate": 3.436235650188659e-06, "loss": 0.1604, "step": 21020 }, { "epoch": 0.6132504813583056, "grad_norm": 0.710377594669661, "learning_rate": 3.4357869228928553e-06, "loss": 0.105, "step": 21021 }, { "epoch": 0.6132796545889492, "grad_norm": 0.7724533961278669, "learning_rate": 3.4353382095619737e-06, "loss": 0.1229, "step": 21022 }, { "epoch": 0.6133088278195927, "grad_norm": 0.9649071735315539, "learning_rate": 3.4348895102000173e-06, "loss": 0.1367, "step": 21023 }, { "epoch": 0.6133380010502363, "grad_norm": 0.9883881679299172, "learning_rate": 3.4344408248109933e-06, "loss": 0.12, "step": 21024 }, { "epoch": 0.6133671742808798, "grad_norm": 0.7613585357562115, "learning_rate": 3.4339921533989083e-06, "loss": 0.1059, "step": 21025 }, { "epoch": 0.6133963475115234, "grad_norm": 0.6464953118660361, "learning_rate": 3.4335434959677683e-06, "loss": 0.1124, "step": 21026 }, { "epoch": 0.6134255207421669, "grad_norm": 0.767443712319199, "learning_rate": 3.433094852521579e-06, "loss": 0.1146, "step": 21027 }, { "epoch": 0.6134546939728105, "grad_norm": 1.2296710589919633, "learning_rate": 3.4326462230643436e-06, "loss": 0.1302, "step": 21028 }, { "epoch": 0.6134838672034542, "grad_norm": 0.7219022973250488, "learning_rate": 3.4321976076000685e-06, "loss": 0.1252, "step": 21029 }, { "epoch": 0.6135130404340977, "grad_norm": 0.837349692783926, "learning_rate": 3.431749006132758e-06, "loss": 0.111, "step": 21030 }, { "epoch": 0.6135422136647413, "grad_norm": 0.7466805912168603, "learning_rate": 3.431300418666419e-06, "loss": 0.1386, "step": 21031 }, { "epoch": 0.6135713868953848, "grad_norm": 0.8062874915350087, "learning_rate": 3.4308518452050567e-06, "loss": 0.1016, "step": 21032 }, { "epoch": 0.6136005601260284, "grad_norm": 0.8428077027826459, "learning_rate": 3.4304032857526724e-06, "loss": 0.1344, "step": 21033 }, { "epoch": 0.6136297333566719, "grad_norm": 1.1384267038198153, "learning_rate": 3.4299547403132738e-06, "loss": 0.137, "step": 21034 }, { "epoch": 0.6136589065873155, "grad_norm": 0.7902375493699365, "learning_rate": 3.4295062088908652e-06, "loss": 0.1286, "step": 21035 }, { "epoch": 0.613688079817959, "grad_norm": 0.9446412109261622, "learning_rate": 3.4290576914894473e-06, "loss": 0.1219, "step": 21036 }, { "epoch": 0.6137172530486026, "grad_norm": 0.9453470235633491, "learning_rate": 3.4286091881130306e-06, "loss": 0.1283, "step": 21037 }, { "epoch": 0.6137464262792461, "grad_norm": 0.9393398829366166, "learning_rate": 3.4281606987656145e-06, "loss": 0.1274, "step": 21038 }, { "epoch": 0.6137755995098897, "grad_norm": 0.8114576726343591, "learning_rate": 3.427712223451205e-06, "loss": 0.0999, "step": 21039 }, { "epoch": 0.6138047727405332, "grad_norm": 0.8286629047477375, "learning_rate": 3.427263762173806e-06, "loss": 0.1214, "step": 21040 }, { "epoch": 0.6138339459711768, "grad_norm": 0.9669587631486285, "learning_rate": 3.4268153149374196e-06, "loss": 0.1456, "step": 21041 }, { "epoch": 0.6138631192018205, "grad_norm": 1.2406251188809492, "learning_rate": 3.42636688174605e-06, "loss": 0.1197, "step": 21042 }, { "epoch": 0.613892292432464, "grad_norm": 0.8051232750190032, "learning_rate": 3.425918462603702e-06, "loss": 0.1032, "step": 21043 }, { "epoch": 0.6139214656631076, "grad_norm": 0.7013276307148666, "learning_rate": 3.425470057514378e-06, "loss": 0.1232, "step": 21044 }, { "epoch": 0.6139506388937511, "grad_norm": 0.8151813991754139, "learning_rate": 3.4250216664820823e-06, "loss": 0.1196, "step": 21045 }, { "epoch": 0.6139798121243947, "grad_norm": 1.1766845593849624, "learning_rate": 3.424573289510817e-06, "loss": 0.1222, "step": 21046 }, { "epoch": 0.6140089853550382, "grad_norm": 0.8595289919133213, "learning_rate": 3.4241249266045846e-06, "loss": 0.1063, "step": 21047 }, { "epoch": 0.6140381585856818, "grad_norm": 0.977795390304621, "learning_rate": 3.4236765777673877e-06, "loss": 0.1065, "step": 21048 }, { "epoch": 0.6140673318163253, "grad_norm": 0.9509711323827088, "learning_rate": 3.4232282430032325e-06, "loss": 0.1311, "step": 21049 }, { "epoch": 0.6140965050469689, "grad_norm": 0.8516215564018293, "learning_rate": 3.4227799223161172e-06, "loss": 0.1038, "step": 21050 }, { "epoch": 0.6141256782776124, "grad_norm": 1.0373232414161326, "learning_rate": 3.4223316157100472e-06, "loss": 0.1203, "step": 21051 }, { "epoch": 0.614154851508256, "grad_norm": 1.1369227402149664, "learning_rate": 3.4218833231890247e-06, "loss": 0.1134, "step": 21052 }, { "epoch": 0.6141840247388995, "grad_norm": 0.9984749318565033, "learning_rate": 3.4214350447570497e-06, "loss": 0.1372, "step": 21053 }, { "epoch": 0.6142131979695431, "grad_norm": 0.8746272182503635, "learning_rate": 3.420986780418125e-06, "loss": 0.1223, "step": 21054 }, { "epoch": 0.6142423712001867, "grad_norm": 0.9208209942617852, "learning_rate": 3.420538530176255e-06, "loss": 0.1359, "step": 21055 }, { "epoch": 0.6142715444308303, "grad_norm": 0.8486047135830159, "learning_rate": 3.4200902940354393e-06, "loss": 0.1173, "step": 21056 }, { "epoch": 0.6143007176614739, "grad_norm": 0.9784875212204225, "learning_rate": 3.4196420719996815e-06, "loss": 0.1354, "step": 21057 }, { "epoch": 0.6143298908921174, "grad_norm": 0.9037462086589715, "learning_rate": 3.4191938640729804e-06, "loss": 0.13, "step": 21058 }, { "epoch": 0.614359064122761, "grad_norm": 0.836418206364951, "learning_rate": 3.4187456702593393e-06, "loss": 0.1164, "step": 21059 }, { "epoch": 0.6143882373534045, "grad_norm": 0.7692119225633084, "learning_rate": 3.4182974905627597e-06, "loss": 0.0974, "step": 21060 }, { "epoch": 0.6144174105840481, "grad_norm": 0.8262778150302602, "learning_rate": 3.4178493249872426e-06, "loss": 0.1179, "step": 21061 }, { "epoch": 0.6144465838146916, "grad_norm": 1.0896507636235115, "learning_rate": 3.4174011735367898e-06, "loss": 0.1482, "step": 21062 }, { "epoch": 0.6144757570453352, "grad_norm": 0.849059463558778, "learning_rate": 3.416953036215401e-06, "loss": 0.1557, "step": 21063 }, { "epoch": 0.6145049302759787, "grad_norm": 0.7333087844744282, "learning_rate": 3.416504913027077e-06, "loss": 0.11, "step": 21064 }, { "epoch": 0.6145341035066223, "grad_norm": 0.7265565962359889, "learning_rate": 3.416056803975818e-06, "loss": 0.1361, "step": 21065 }, { "epoch": 0.6145632767372659, "grad_norm": 1.0893694892523167, "learning_rate": 3.4156087090656274e-06, "loss": 0.1254, "step": 21066 }, { "epoch": 0.6145924499679094, "grad_norm": 0.8376286525754213, "learning_rate": 3.415160628300505e-06, "loss": 0.1272, "step": 21067 }, { "epoch": 0.614621623198553, "grad_norm": 0.7529455272292838, "learning_rate": 3.414712561684449e-06, "loss": 0.1017, "step": 21068 }, { "epoch": 0.6146507964291966, "grad_norm": 0.8563188556651878, "learning_rate": 3.414264509221461e-06, "loss": 0.1184, "step": 21069 }, { "epoch": 0.6146799696598402, "grad_norm": 0.9759476152761085, "learning_rate": 3.4138164709155415e-06, "loss": 0.1409, "step": 21070 }, { "epoch": 0.6147091428904837, "grad_norm": 0.9775050495130322, "learning_rate": 3.4133684467706872e-06, "loss": 0.1374, "step": 21071 }, { "epoch": 0.6147383161211273, "grad_norm": 0.6244597394948174, "learning_rate": 3.412920436790903e-06, "loss": 0.1086, "step": 21072 }, { "epoch": 0.6147674893517708, "grad_norm": 0.7340720042898693, "learning_rate": 3.4124724409801864e-06, "loss": 0.133, "step": 21073 }, { "epoch": 0.6147966625824144, "grad_norm": 0.9434362957191156, "learning_rate": 3.4120244593425363e-06, "loss": 0.1398, "step": 21074 }, { "epoch": 0.6148258358130579, "grad_norm": 0.7282879258179913, "learning_rate": 3.411576491881954e-06, "loss": 0.1129, "step": 21075 }, { "epoch": 0.6148550090437015, "grad_norm": 0.89164869118127, "learning_rate": 3.4111285386024363e-06, "loss": 0.1272, "step": 21076 }, { "epoch": 0.614884182274345, "grad_norm": 0.9496976075366732, "learning_rate": 3.4106805995079824e-06, "loss": 0.1528, "step": 21077 }, { "epoch": 0.6149133555049886, "grad_norm": 0.9619568063580806, "learning_rate": 3.4102326746025938e-06, "loss": 0.1261, "step": 21078 }, { "epoch": 0.6149425287356322, "grad_norm": 0.7389963679323991, "learning_rate": 3.40978476389027e-06, "loss": 0.1285, "step": 21079 }, { "epoch": 0.6149717019662757, "grad_norm": 0.7873182570850406, "learning_rate": 3.4093368673750066e-06, "loss": 0.1217, "step": 21080 }, { "epoch": 0.6150008751969193, "grad_norm": 1.124881653368094, "learning_rate": 3.408888985060804e-06, "loss": 0.1507, "step": 21081 }, { "epoch": 0.6150300484275628, "grad_norm": 0.8711768419178351, "learning_rate": 3.4084411169516618e-06, "loss": 0.1153, "step": 21082 }, { "epoch": 0.6150592216582065, "grad_norm": 0.5681554111961512, "learning_rate": 3.4079932630515746e-06, "loss": 0.1141, "step": 21083 }, { "epoch": 0.61508839488885, "grad_norm": 0.8012527760871635, "learning_rate": 3.4075454233645466e-06, "loss": 0.1251, "step": 21084 }, { "epoch": 0.6151175681194936, "grad_norm": 1.1940778013542062, "learning_rate": 3.407097597894572e-06, "loss": 0.1176, "step": 21085 }, { "epoch": 0.6151467413501371, "grad_norm": 0.783546930966101, "learning_rate": 3.4066497866456493e-06, "loss": 0.1315, "step": 21086 }, { "epoch": 0.6151759145807807, "grad_norm": 0.7274787811137401, "learning_rate": 3.406201989621778e-06, "loss": 0.1268, "step": 21087 }, { "epoch": 0.6152050878114242, "grad_norm": 0.8389753465093771, "learning_rate": 3.405754206826954e-06, "loss": 0.1186, "step": 21088 }, { "epoch": 0.6152342610420678, "grad_norm": 0.8868556445561018, "learning_rate": 3.4053064382651748e-06, "loss": 0.1124, "step": 21089 }, { "epoch": 0.6152634342727114, "grad_norm": 0.7182195739752382, "learning_rate": 3.4048586839404394e-06, "loss": 0.1229, "step": 21090 }, { "epoch": 0.6152926075033549, "grad_norm": 0.8918735417370072, "learning_rate": 3.4044109438567463e-06, "loss": 0.113, "step": 21091 }, { "epoch": 0.6153217807339985, "grad_norm": 0.8436804640959246, "learning_rate": 3.4039632180180915e-06, "loss": 0.1199, "step": 21092 }, { "epoch": 0.615350953964642, "grad_norm": 0.6505094891322994, "learning_rate": 3.403515506428471e-06, "loss": 0.111, "step": 21093 }, { "epoch": 0.6153801271952856, "grad_norm": 0.7803460636998947, "learning_rate": 3.4030678090918833e-06, "loss": 0.1418, "step": 21094 }, { "epoch": 0.6154093004259291, "grad_norm": 0.9766751223909023, "learning_rate": 3.4026201260123237e-06, "loss": 0.1441, "step": 21095 }, { "epoch": 0.6154384736565728, "grad_norm": 0.9481376533175228, "learning_rate": 3.402172457193792e-06, "loss": 0.1285, "step": 21096 }, { "epoch": 0.6154676468872163, "grad_norm": 0.8126063740133177, "learning_rate": 3.401724802640283e-06, "loss": 0.1139, "step": 21097 }, { "epoch": 0.6154968201178599, "grad_norm": 0.8163672154408407, "learning_rate": 3.401277162355793e-06, "loss": 0.1264, "step": 21098 }, { "epoch": 0.6155259933485034, "grad_norm": 0.8898152787830411, "learning_rate": 3.400829536344319e-06, "loss": 0.1047, "step": 21099 }, { "epoch": 0.615555166579147, "grad_norm": 0.9128706990403679, "learning_rate": 3.400381924609858e-06, "loss": 0.0934, "step": 21100 }, { "epoch": 0.6155843398097905, "grad_norm": 0.8037016179255396, "learning_rate": 3.3999343271564033e-06, "loss": 0.1174, "step": 21101 }, { "epoch": 0.6156135130404341, "grad_norm": 0.8151627688975245, "learning_rate": 3.3994867439879543e-06, "loss": 0.1284, "step": 21102 }, { "epoch": 0.6156426862710777, "grad_norm": 0.8541182688864145, "learning_rate": 3.399039175108505e-06, "loss": 0.1376, "step": 21103 }, { "epoch": 0.6156718595017212, "grad_norm": 0.9659765166684128, "learning_rate": 3.3985916205220527e-06, "loss": 0.1396, "step": 21104 }, { "epoch": 0.6157010327323648, "grad_norm": 0.6885270996459721, "learning_rate": 3.3981440802325922e-06, "loss": 0.1396, "step": 21105 }, { "epoch": 0.6157302059630083, "grad_norm": 0.9091368934832649, "learning_rate": 3.397696554244118e-06, "loss": 0.1489, "step": 21106 }, { "epoch": 0.6157593791936519, "grad_norm": 0.8662934895472221, "learning_rate": 3.3972490425606258e-06, "loss": 0.1273, "step": 21107 }, { "epoch": 0.6157885524242954, "grad_norm": 0.8242258428719526, "learning_rate": 3.3968015451861124e-06, "loss": 0.1416, "step": 21108 }, { "epoch": 0.615817725654939, "grad_norm": 1.005713719858816, "learning_rate": 3.3963540621245734e-06, "loss": 0.1153, "step": 21109 }, { "epoch": 0.6158468988855826, "grad_norm": 1.002304810971801, "learning_rate": 3.395906593380001e-06, "loss": 0.1185, "step": 21110 }, { "epoch": 0.6158760721162262, "grad_norm": 0.9061916118785468, "learning_rate": 3.395459138956392e-06, "loss": 0.1136, "step": 21111 }, { "epoch": 0.6159052453468697, "grad_norm": 0.946255304795538, "learning_rate": 3.395011698857742e-06, "loss": 0.1315, "step": 21112 }, { "epoch": 0.6159344185775133, "grad_norm": 1.3922510314359713, "learning_rate": 3.39456427308804e-06, "loss": 0.1151, "step": 21113 }, { "epoch": 0.6159635918081569, "grad_norm": 0.7777176414925873, "learning_rate": 3.39411686165129e-06, "loss": 0.1091, "step": 21114 }, { "epoch": 0.6159927650388004, "grad_norm": 0.7181694510957686, "learning_rate": 3.393669464551479e-06, "loss": 0.1183, "step": 21115 }, { "epoch": 0.616021938269444, "grad_norm": 1.2838727664589396, "learning_rate": 3.393222081792603e-06, "loss": 0.1194, "step": 21116 }, { "epoch": 0.6160511115000875, "grad_norm": 1.0335190932026679, "learning_rate": 3.3927747133786593e-06, "loss": 0.1491, "step": 21117 }, { "epoch": 0.6160802847307311, "grad_norm": 0.921466228499897, "learning_rate": 3.3923273593136376e-06, "loss": 0.1104, "step": 21118 }, { "epoch": 0.6161094579613746, "grad_norm": 0.973417624582613, "learning_rate": 3.3918800196015324e-06, "loss": 0.1212, "step": 21119 }, { "epoch": 0.6161386311920182, "grad_norm": 1.1702767967094085, "learning_rate": 3.3914326942463393e-06, "loss": 0.1286, "step": 21120 }, { "epoch": 0.6161678044226617, "grad_norm": 0.9303754380158115, "learning_rate": 3.390985383252051e-06, "loss": 0.1277, "step": 21121 }, { "epoch": 0.6161969776533053, "grad_norm": 0.8328432298832673, "learning_rate": 3.3905380866226622e-06, "loss": 0.1362, "step": 21122 }, { "epoch": 0.6162261508839489, "grad_norm": 1.0653924386615603, "learning_rate": 3.3900908043621642e-06, "loss": 0.1226, "step": 21123 }, { "epoch": 0.6162553241145925, "grad_norm": 0.8004490174977535, "learning_rate": 3.3896435364745516e-06, "loss": 0.1521, "step": 21124 }, { "epoch": 0.616284497345236, "grad_norm": 0.7318667905252606, "learning_rate": 3.389196282963816e-06, "loss": 0.1108, "step": 21125 }, { "epoch": 0.6163136705758796, "grad_norm": 0.9010306237909933, "learning_rate": 3.388749043833952e-06, "loss": 0.1253, "step": 21126 }, { "epoch": 0.6163428438065232, "grad_norm": 0.802820715916072, "learning_rate": 3.3883018190889526e-06, "loss": 0.131, "step": 21127 }, { "epoch": 0.6163720170371667, "grad_norm": 0.8216216258840674, "learning_rate": 3.3878546087328096e-06, "loss": 0.1068, "step": 21128 }, { "epoch": 0.6164011902678103, "grad_norm": 0.8973045211638617, "learning_rate": 3.3874074127695156e-06, "loss": 0.1166, "step": 21129 }, { "epoch": 0.6164303634984538, "grad_norm": 2.5582603756955216, "learning_rate": 3.386960231203064e-06, "loss": 0.1276, "step": 21130 }, { "epoch": 0.6164595367290974, "grad_norm": 0.9130987991297592, "learning_rate": 3.3865130640374444e-06, "loss": 0.1256, "step": 21131 }, { "epoch": 0.6164887099597409, "grad_norm": 1.0208648611113489, "learning_rate": 3.3860659112766526e-06, "loss": 0.1274, "step": 21132 }, { "epoch": 0.6165178831903845, "grad_norm": 0.6996033325044707, "learning_rate": 3.3856187729246785e-06, "loss": 0.1358, "step": 21133 }, { "epoch": 0.616547056421028, "grad_norm": 0.9373999646860356, "learning_rate": 3.3851716489855146e-06, "loss": 0.1511, "step": 21134 }, { "epoch": 0.6165762296516716, "grad_norm": 0.9384190485217382, "learning_rate": 3.3847245394631544e-06, "loss": 0.1321, "step": 21135 }, { "epoch": 0.6166054028823151, "grad_norm": 0.90609768373347, "learning_rate": 3.384277444361586e-06, "loss": 0.1466, "step": 21136 }, { "epoch": 0.6166345761129588, "grad_norm": 0.7934721185413713, "learning_rate": 3.3838303636848022e-06, "loss": 0.1396, "step": 21137 }, { "epoch": 0.6166637493436024, "grad_norm": 1.1830060333479426, "learning_rate": 3.383383297436796e-06, "loss": 0.1404, "step": 21138 }, { "epoch": 0.6166929225742459, "grad_norm": 0.9254301458145205, "learning_rate": 3.38293624562156e-06, "loss": 0.1173, "step": 21139 }, { "epoch": 0.6167220958048895, "grad_norm": 0.7713612792198895, "learning_rate": 3.3824892082430803e-06, "loss": 0.1207, "step": 21140 }, { "epoch": 0.616751269035533, "grad_norm": 1.2303376297944446, "learning_rate": 3.382042185305352e-06, "loss": 0.13, "step": 21141 }, { "epoch": 0.6167804422661766, "grad_norm": 0.9861930183687242, "learning_rate": 3.3815951768123654e-06, "loss": 0.1178, "step": 21142 }, { "epoch": 0.6168096154968201, "grad_norm": 0.7440257645961308, "learning_rate": 3.381148182768108e-06, "loss": 0.1159, "step": 21143 }, { "epoch": 0.6168387887274637, "grad_norm": 1.0968413448562162, "learning_rate": 3.3807012031765758e-06, "loss": 0.1283, "step": 21144 }, { "epoch": 0.6168679619581072, "grad_norm": 1.0142731549523618, "learning_rate": 3.3802542380417556e-06, "loss": 0.1115, "step": 21145 }, { "epoch": 0.6168971351887508, "grad_norm": 0.7268069163428662, "learning_rate": 3.379807287367639e-06, "loss": 0.1229, "step": 21146 }, { "epoch": 0.6169263084193943, "grad_norm": 0.8990398893279822, "learning_rate": 3.3793603511582178e-06, "loss": 0.1052, "step": 21147 }, { "epoch": 0.6169554816500379, "grad_norm": 1.3020606188837562, "learning_rate": 3.378913429417479e-06, "loss": 0.1506, "step": 21148 }, { "epoch": 0.6169846548806814, "grad_norm": 0.9374825750232973, "learning_rate": 3.378466522149413e-06, "loss": 0.1533, "step": 21149 }, { "epoch": 0.6170138281113251, "grad_norm": 0.9108943977469091, "learning_rate": 3.3780196293580125e-06, "loss": 0.1158, "step": 21150 }, { "epoch": 0.6170430013419687, "grad_norm": 0.9500285737204432, "learning_rate": 3.3775727510472644e-06, "loss": 0.1336, "step": 21151 }, { "epoch": 0.6170721745726122, "grad_norm": 0.941945039929362, "learning_rate": 3.3771258872211614e-06, "loss": 0.1101, "step": 21152 }, { "epoch": 0.6171013478032558, "grad_norm": 0.7556585325761117, "learning_rate": 3.37667903788369e-06, "loss": 0.1, "step": 21153 }, { "epoch": 0.6171305210338993, "grad_norm": 0.8734207602001116, "learning_rate": 3.3762322030388407e-06, "loss": 0.1027, "step": 21154 }, { "epoch": 0.6171596942645429, "grad_norm": 0.8427695756956373, "learning_rate": 3.375785382690601e-06, "loss": 0.1034, "step": 21155 }, { "epoch": 0.6171888674951864, "grad_norm": 0.8173671443059403, "learning_rate": 3.3753385768429624e-06, "loss": 0.11, "step": 21156 }, { "epoch": 0.61721804072583, "grad_norm": 0.7632379967922096, "learning_rate": 3.3748917854999153e-06, "loss": 0.0903, "step": 21157 }, { "epoch": 0.6172472139564735, "grad_norm": 0.899417514099561, "learning_rate": 3.3744450086654444e-06, "loss": 0.1207, "step": 21158 }, { "epoch": 0.6172763871871171, "grad_norm": 1.0236612305183193, "learning_rate": 3.3739982463435417e-06, "loss": 0.1486, "step": 21159 }, { "epoch": 0.6173055604177606, "grad_norm": 0.802210837903345, "learning_rate": 3.3735514985381944e-06, "loss": 0.1263, "step": 21160 }, { "epoch": 0.6173347336484042, "grad_norm": 0.8378422940751181, "learning_rate": 3.3731047652533892e-06, "loss": 0.1133, "step": 21161 }, { "epoch": 0.6173639068790477, "grad_norm": 1.1110821027982585, "learning_rate": 3.372658046493118e-06, "loss": 0.1414, "step": 21162 }, { "epoch": 0.6173930801096913, "grad_norm": 1.1105431698320545, "learning_rate": 3.3722113422613668e-06, "loss": 0.1099, "step": 21163 }, { "epoch": 0.617422253340335, "grad_norm": 0.7429347405761328, "learning_rate": 3.371764652562124e-06, "loss": 0.1043, "step": 21164 }, { "epoch": 0.6174514265709785, "grad_norm": 0.9633949073256, "learning_rate": 3.3713179773993787e-06, "loss": 0.1377, "step": 21165 }, { "epoch": 0.6174805998016221, "grad_norm": 1.0267365838698848, "learning_rate": 3.3708713167771166e-06, "loss": 0.1389, "step": 21166 }, { "epoch": 0.6175097730322656, "grad_norm": 1.0615631732340258, "learning_rate": 3.3704246706993255e-06, "loss": 0.1292, "step": 21167 }, { "epoch": 0.6175389462629092, "grad_norm": 0.8260204382415454, "learning_rate": 3.369978039169995e-06, "loss": 0.1252, "step": 21168 }, { "epoch": 0.6175681194935527, "grad_norm": 0.7363162209643128, "learning_rate": 3.3695314221931124e-06, "loss": 0.1297, "step": 21169 }, { "epoch": 0.6175972927241963, "grad_norm": 1.1806912401812193, "learning_rate": 3.369084819772663e-06, "loss": 0.134, "step": 21170 }, { "epoch": 0.6176264659548398, "grad_norm": 1.1202515002285367, "learning_rate": 3.3686382319126353e-06, "loss": 0.1309, "step": 21171 }, { "epoch": 0.6176556391854834, "grad_norm": 0.8047278167034158, "learning_rate": 3.368191658617017e-06, "loss": 0.1262, "step": 21172 }, { "epoch": 0.6176848124161269, "grad_norm": 0.6263439729541208, "learning_rate": 3.367745099889791e-06, "loss": 0.1286, "step": 21173 }, { "epoch": 0.6177139856467705, "grad_norm": 0.726227833659534, "learning_rate": 3.36729855573495e-06, "loss": 0.1359, "step": 21174 }, { "epoch": 0.617743158877414, "grad_norm": 1.057684417738265, "learning_rate": 3.3668520261564764e-06, "loss": 0.1254, "step": 21175 }, { "epoch": 0.6177723321080576, "grad_norm": 1.0064517708224447, "learning_rate": 3.3664055111583586e-06, "loss": 0.1468, "step": 21176 }, { "epoch": 0.6178015053387013, "grad_norm": 0.7996593234194277, "learning_rate": 3.3659590107445833e-06, "loss": 0.1249, "step": 21177 }, { "epoch": 0.6178306785693448, "grad_norm": 0.7087731280318946, "learning_rate": 3.3655125249191344e-06, "loss": 0.1063, "step": 21178 }, { "epoch": 0.6178598517999884, "grad_norm": 0.784415737054412, "learning_rate": 3.365066053685999e-06, "loss": 0.1065, "step": 21179 }, { "epoch": 0.6178890250306319, "grad_norm": 0.9828445544343942, "learning_rate": 3.3646195970491645e-06, "loss": 0.1356, "step": 21180 }, { "epoch": 0.6179181982612755, "grad_norm": 0.8968393461343391, "learning_rate": 3.364173155012616e-06, "loss": 0.1352, "step": 21181 }, { "epoch": 0.617947371491919, "grad_norm": 0.7622712764123849, "learning_rate": 3.3637267275803397e-06, "loss": 0.1169, "step": 21182 }, { "epoch": 0.6179765447225626, "grad_norm": 0.9958838711416718, "learning_rate": 3.36328031475632e-06, "loss": 0.1516, "step": 21183 }, { "epoch": 0.6180057179532061, "grad_norm": 0.7650714280782808, "learning_rate": 3.3628339165445427e-06, "loss": 0.1392, "step": 21184 }, { "epoch": 0.6180348911838497, "grad_norm": 0.9511637695257763, "learning_rate": 3.3623875329489923e-06, "loss": 0.1399, "step": 21185 }, { "epoch": 0.6180640644144932, "grad_norm": 0.7707891357080175, "learning_rate": 3.3619411639736566e-06, "loss": 0.128, "step": 21186 }, { "epoch": 0.6180932376451368, "grad_norm": 0.8522396463338711, "learning_rate": 3.3614948096225193e-06, "loss": 0.1276, "step": 21187 }, { "epoch": 0.6181224108757803, "grad_norm": 0.7604940444213334, "learning_rate": 3.3610484698995647e-06, "loss": 0.1423, "step": 21188 }, { "epoch": 0.6181515841064239, "grad_norm": 0.7199763847857643, "learning_rate": 3.3606021448087778e-06, "loss": 0.1142, "step": 21189 }, { "epoch": 0.6181807573370675, "grad_norm": 0.6883281593330257, "learning_rate": 3.360155834354145e-06, "loss": 0.1298, "step": 21190 }, { "epoch": 0.6182099305677111, "grad_norm": 1.2775797964422437, "learning_rate": 3.359709538539647e-06, "loss": 0.1405, "step": 21191 }, { "epoch": 0.6182391037983547, "grad_norm": 0.8332967776041809, "learning_rate": 3.359263257369272e-06, "loss": 0.1392, "step": 21192 }, { "epoch": 0.6182682770289982, "grad_norm": 0.9422662887694544, "learning_rate": 3.3588169908470024e-06, "loss": 0.129, "step": 21193 }, { "epoch": 0.6182974502596418, "grad_norm": 0.8398392910025081, "learning_rate": 3.358370738976825e-06, "loss": 0.122, "step": 21194 }, { "epoch": 0.6183266234902853, "grad_norm": 0.8505175982716272, "learning_rate": 3.35792450176272e-06, "loss": 0.1229, "step": 21195 }, { "epoch": 0.6183557967209289, "grad_norm": 0.7003612590512537, "learning_rate": 3.3574782792086735e-06, "loss": 0.0949, "step": 21196 }, { "epoch": 0.6183849699515724, "grad_norm": 0.7088503878253166, "learning_rate": 3.357032071318667e-06, "loss": 0.1279, "step": 21197 }, { "epoch": 0.618414143182216, "grad_norm": 1.0418224776350187, "learning_rate": 3.3565858780966875e-06, "loss": 0.1052, "step": 21198 }, { "epoch": 0.6184433164128595, "grad_norm": 0.9797337619283316, "learning_rate": 3.356139699546718e-06, "loss": 0.1059, "step": 21199 }, { "epoch": 0.6184724896435031, "grad_norm": 0.714229762414593, "learning_rate": 3.35569353567274e-06, "loss": 0.108, "step": 21200 }, { "epoch": 0.6185016628741467, "grad_norm": 1.134266107694089, "learning_rate": 3.3552473864787373e-06, "loss": 0.1575, "step": 21201 }, { "epoch": 0.6185308361047902, "grad_norm": 1.2277527972648197, "learning_rate": 3.3548012519686944e-06, "loss": 0.1414, "step": 21202 }, { "epoch": 0.6185600093354338, "grad_norm": 0.7349958918407727, "learning_rate": 3.35435513214659e-06, "loss": 0.1154, "step": 21203 }, { "epoch": 0.6185891825660774, "grad_norm": 0.8126375420422207, "learning_rate": 3.3539090270164134e-06, "loss": 0.1494, "step": 21204 }, { "epoch": 0.618618355796721, "grad_norm": 1.1103551371168165, "learning_rate": 3.3534629365821424e-06, "loss": 0.1406, "step": 21205 }, { "epoch": 0.6186475290273645, "grad_norm": 0.8389841037147578, "learning_rate": 3.353016860847762e-06, "loss": 0.1172, "step": 21206 }, { "epoch": 0.6186767022580081, "grad_norm": 0.800865656585191, "learning_rate": 3.352570799817255e-06, "loss": 0.1133, "step": 21207 }, { "epoch": 0.6187058754886516, "grad_norm": 0.6766690715697238, "learning_rate": 3.352124753494601e-06, "loss": 0.1163, "step": 21208 }, { "epoch": 0.6187350487192952, "grad_norm": 0.7791316632308496, "learning_rate": 3.351678721883783e-06, "loss": 0.1307, "step": 21209 }, { "epoch": 0.6187642219499387, "grad_norm": 0.9211891434207212, "learning_rate": 3.351232704988785e-06, "loss": 0.1284, "step": 21210 }, { "epoch": 0.6187933951805823, "grad_norm": 0.6421911306092272, "learning_rate": 3.3507867028135883e-06, "loss": 0.1202, "step": 21211 }, { "epoch": 0.6188225684112258, "grad_norm": 0.755354661394962, "learning_rate": 3.3503407153621747e-06, "loss": 0.1116, "step": 21212 }, { "epoch": 0.6188517416418694, "grad_norm": 0.9795982324837306, "learning_rate": 3.349894742638524e-06, "loss": 0.1379, "step": 21213 }, { "epoch": 0.618880914872513, "grad_norm": 0.7906302537901844, "learning_rate": 3.34944878464662e-06, "loss": 0.1303, "step": 21214 }, { "epoch": 0.6189100881031565, "grad_norm": 0.7370506327497786, "learning_rate": 3.349002841390442e-06, "loss": 0.1066, "step": 21215 }, { "epoch": 0.6189392613338001, "grad_norm": 0.7863086011781343, "learning_rate": 3.3485569128739724e-06, "loss": 0.134, "step": 21216 }, { "epoch": 0.6189684345644436, "grad_norm": 0.7531898753356019, "learning_rate": 3.348110999101195e-06, "loss": 0.133, "step": 21217 }, { "epoch": 0.6189976077950873, "grad_norm": 0.8702064344304873, "learning_rate": 3.347665100076086e-06, "loss": 0.0964, "step": 21218 }, { "epoch": 0.6190267810257308, "grad_norm": 0.7362051539785243, "learning_rate": 3.3472192158026296e-06, "loss": 0.1498, "step": 21219 }, { "epoch": 0.6190559542563744, "grad_norm": 0.7447196134231505, "learning_rate": 3.3467733462848063e-06, "loss": 0.1188, "step": 21220 }, { "epoch": 0.6190851274870179, "grad_norm": 0.7591857929875219, "learning_rate": 3.3463274915265935e-06, "loss": 0.1132, "step": 21221 }, { "epoch": 0.6191143007176615, "grad_norm": 0.6873927769309279, "learning_rate": 3.3458816515319753e-06, "loss": 0.124, "step": 21222 }, { "epoch": 0.619143473948305, "grad_norm": 0.9018772086631418, "learning_rate": 3.345435826304931e-06, "loss": 0.1254, "step": 21223 }, { "epoch": 0.6191726471789486, "grad_norm": 0.8982515765955048, "learning_rate": 3.3449900158494407e-06, "loss": 0.1257, "step": 21224 }, { "epoch": 0.6192018204095922, "grad_norm": 0.8706524419812917, "learning_rate": 3.3445442201694843e-06, "loss": 0.1203, "step": 21225 }, { "epoch": 0.6192309936402357, "grad_norm": 0.7358970491922977, "learning_rate": 3.3440984392690425e-06, "loss": 0.1138, "step": 21226 }, { "epoch": 0.6192601668708793, "grad_norm": 1.0222237823949332, "learning_rate": 3.3436526731520924e-06, "loss": 0.1222, "step": 21227 }, { "epoch": 0.6192893401015228, "grad_norm": 0.8041844508813587, "learning_rate": 3.3432069218226173e-06, "loss": 0.1106, "step": 21228 }, { "epoch": 0.6193185133321664, "grad_norm": 0.9842227525447131, "learning_rate": 3.3427611852845964e-06, "loss": 0.1641, "step": 21229 }, { "epoch": 0.6193476865628099, "grad_norm": 0.8740283958603253, "learning_rate": 3.3423154635420075e-06, "loss": 0.1113, "step": 21230 }, { "epoch": 0.6193768597934536, "grad_norm": 0.9697856100686789, "learning_rate": 3.341869756598829e-06, "loss": 0.1402, "step": 21231 }, { "epoch": 0.6194060330240971, "grad_norm": 0.9156001557062314, "learning_rate": 3.3414240644590435e-06, "loss": 0.1236, "step": 21232 }, { "epoch": 0.6194352062547407, "grad_norm": 0.8168858686619196, "learning_rate": 3.340978387126625e-06, "loss": 0.1204, "step": 21233 }, { "epoch": 0.6194643794853842, "grad_norm": 0.8154387843613505, "learning_rate": 3.3405327246055584e-06, "loss": 0.1083, "step": 21234 }, { "epoch": 0.6194935527160278, "grad_norm": 0.9627968238583299, "learning_rate": 3.3400870768998185e-06, "loss": 0.135, "step": 21235 }, { "epoch": 0.6195227259466713, "grad_norm": 0.8056921129051398, "learning_rate": 3.3396414440133846e-06, "loss": 0.1201, "step": 21236 }, { "epoch": 0.6195518991773149, "grad_norm": 0.7033696684907796, "learning_rate": 3.3391958259502364e-06, "loss": 0.1071, "step": 21237 }, { "epoch": 0.6195810724079585, "grad_norm": 1.1705868548999783, "learning_rate": 3.338750222714351e-06, "loss": 0.131, "step": 21238 }, { "epoch": 0.619610245638602, "grad_norm": 0.8836863283166284, "learning_rate": 3.3383046343097057e-06, "loss": 0.1141, "step": 21239 }, { "epoch": 0.6196394188692456, "grad_norm": 0.866811109363582, "learning_rate": 3.3378590607402805e-06, "loss": 0.122, "step": 21240 }, { "epoch": 0.6196685920998891, "grad_norm": 1.1313283525712725, "learning_rate": 3.337413502010054e-06, "loss": 0.1204, "step": 21241 }, { "epoch": 0.6196977653305327, "grad_norm": 1.2772787645472965, "learning_rate": 3.336967958123003e-06, "loss": 0.1095, "step": 21242 }, { "epoch": 0.6197269385611762, "grad_norm": 0.8394017627605113, "learning_rate": 3.3365224290831046e-06, "loss": 0.1003, "step": 21243 }, { "epoch": 0.6197561117918198, "grad_norm": 0.8194341602790984, "learning_rate": 3.336076914894336e-06, "loss": 0.1161, "step": 21244 }, { "epoch": 0.6197852850224634, "grad_norm": 0.9484147882504607, "learning_rate": 3.335631415560675e-06, "loss": 0.1518, "step": 21245 }, { "epoch": 0.619814458253107, "grad_norm": 0.6960903804747017, "learning_rate": 3.3351859310861002e-06, "loss": 0.1106, "step": 21246 }, { "epoch": 0.6198436314837505, "grad_norm": 0.7394339730995685, "learning_rate": 3.3347404614745893e-06, "loss": 0.1057, "step": 21247 }, { "epoch": 0.6198728047143941, "grad_norm": 0.9504018079113686, "learning_rate": 3.3342950067301173e-06, "loss": 0.1243, "step": 21248 }, { "epoch": 0.6199019779450377, "grad_norm": 0.9583044532792427, "learning_rate": 3.3338495668566614e-06, "loss": 0.1422, "step": 21249 }, { "epoch": 0.6199311511756812, "grad_norm": 0.8205100638906588, "learning_rate": 3.3334041418581996e-06, "loss": 0.1055, "step": 21250 }, { "epoch": 0.6199603244063248, "grad_norm": 0.9523776220684347, "learning_rate": 3.332958731738706e-06, "loss": 0.1338, "step": 21251 }, { "epoch": 0.6199894976369683, "grad_norm": 0.7827706205399767, "learning_rate": 3.33251333650216e-06, "loss": 0.1141, "step": 21252 }, { "epoch": 0.6200186708676119, "grad_norm": 1.0623234776400148, "learning_rate": 3.332067956152537e-06, "loss": 0.1217, "step": 21253 }, { "epoch": 0.6200478440982554, "grad_norm": 0.8327085585583657, "learning_rate": 3.3316225906938136e-06, "loss": 0.1231, "step": 21254 }, { "epoch": 0.620077017328899, "grad_norm": 1.0833418036181328, "learning_rate": 3.3311772401299645e-06, "loss": 0.1515, "step": 21255 }, { "epoch": 0.6201061905595425, "grad_norm": 0.9818978491360845, "learning_rate": 3.3307319044649663e-06, "loss": 0.1241, "step": 21256 }, { "epoch": 0.6201353637901861, "grad_norm": 1.0891961690935306, "learning_rate": 3.3302865837027954e-06, "loss": 0.1163, "step": 21257 }, { "epoch": 0.6201645370208296, "grad_norm": 0.8342598441064556, "learning_rate": 3.3298412778474277e-06, "loss": 0.1102, "step": 21258 }, { "epoch": 0.6201937102514733, "grad_norm": 0.9643015121715223, "learning_rate": 3.329395986902839e-06, "loss": 0.1105, "step": 21259 }, { "epoch": 0.6202228834821168, "grad_norm": 1.1013596414539768, "learning_rate": 3.3289507108730033e-06, "loss": 0.1168, "step": 21260 }, { "epoch": 0.6202520567127604, "grad_norm": 0.9029074386464252, "learning_rate": 3.3285054497618974e-06, "loss": 0.139, "step": 21261 }, { "epoch": 0.620281229943404, "grad_norm": 0.7658185436620013, "learning_rate": 3.3280602035734944e-06, "loss": 0.106, "step": 21262 }, { "epoch": 0.6203104031740475, "grad_norm": 0.9713398445467478, "learning_rate": 3.327614972311771e-06, "loss": 0.1107, "step": 21263 }, { "epoch": 0.6203395764046911, "grad_norm": 0.8215817782423971, "learning_rate": 3.3271697559807042e-06, "loss": 0.1202, "step": 21264 }, { "epoch": 0.6203687496353346, "grad_norm": 0.8677790075313323, "learning_rate": 3.3267245545842653e-06, "loss": 0.1597, "step": 21265 }, { "epoch": 0.6203979228659782, "grad_norm": 0.8700063377829749, "learning_rate": 3.3262793681264293e-06, "loss": 0.1151, "step": 21266 }, { "epoch": 0.6204270960966217, "grad_norm": 0.7790501776389437, "learning_rate": 3.3258341966111728e-06, "loss": 0.1317, "step": 21267 }, { "epoch": 0.6204562693272653, "grad_norm": 0.94202255518554, "learning_rate": 3.325389040042466e-06, "loss": 0.1339, "step": 21268 }, { "epoch": 0.6204854425579088, "grad_norm": 0.7582691685828977, "learning_rate": 3.3249438984242893e-06, "loss": 0.1148, "step": 21269 }, { "epoch": 0.6205146157885524, "grad_norm": 0.8749013701005308, "learning_rate": 3.3244987717606127e-06, "loss": 0.1152, "step": 21270 }, { "epoch": 0.6205437890191959, "grad_norm": 0.7694512091733933, "learning_rate": 3.324053660055411e-06, "loss": 0.1208, "step": 21271 }, { "epoch": 0.6205729622498396, "grad_norm": 0.8421991445483307, "learning_rate": 3.3236085633126586e-06, "loss": 0.1371, "step": 21272 }, { "epoch": 0.6206021354804832, "grad_norm": 0.7464571484475591, "learning_rate": 3.323163481536328e-06, "loss": 0.1303, "step": 21273 }, { "epoch": 0.6206313087111267, "grad_norm": 0.8140424366889636, "learning_rate": 3.3227184147303928e-06, "loss": 0.1206, "step": 21274 }, { "epoch": 0.6206604819417703, "grad_norm": 0.9722149153286191, "learning_rate": 3.322273362898828e-06, "loss": 0.1178, "step": 21275 }, { "epoch": 0.6206896551724138, "grad_norm": 0.7490105420493265, "learning_rate": 3.3218283260456065e-06, "loss": 0.1167, "step": 21276 }, { "epoch": 0.6207188284030574, "grad_norm": 0.907949776745723, "learning_rate": 3.321383304174702e-06, "loss": 0.1364, "step": 21277 }, { "epoch": 0.6207480016337009, "grad_norm": 0.9494358160912708, "learning_rate": 3.320938297290085e-06, "loss": 0.1187, "step": 21278 }, { "epoch": 0.6207771748643445, "grad_norm": 1.0470637100780809, "learning_rate": 3.3204933053957312e-06, "loss": 0.1292, "step": 21279 }, { "epoch": 0.620806348094988, "grad_norm": 0.8368251737513249, "learning_rate": 3.32004832849561e-06, "loss": 0.1249, "step": 21280 }, { "epoch": 0.6208355213256316, "grad_norm": 1.0894381658191175, "learning_rate": 3.319603366593699e-06, "loss": 0.1316, "step": 21281 }, { "epoch": 0.6208646945562751, "grad_norm": 1.053502910510657, "learning_rate": 3.3191584196939664e-06, "loss": 0.149, "step": 21282 }, { "epoch": 0.6208938677869187, "grad_norm": 0.7907878805400254, "learning_rate": 3.318713487800387e-06, "loss": 0.139, "step": 21283 }, { "epoch": 0.6209230410175622, "grad_norm": 0.806438614256576, "learning_rate": 3.318268570916933e-06, "loss": 0.1359, "step": 21284 }, { "epoch": 0.6209522142482058, "grad_norm": 0.8779660143369934, "learning_rate": 3.317823669047574e-06, "loss": 0.114, "step": 21285 }, { "epoch": 0.6209813874788495, "grad_norm": 0.780236913047683, "learning_rate": 3.3173787821962835e-06, "loss": 0.1013, "step": 21286 }, { "epoch": 0.621010560709493, "grad_norm": 0.7286366102868524, "learning_rate": 3.3169339103670346e-06, "loss": 0.1441, "step": 21287 }, { "epoch": 0.6210397339401366, "grad_norm": 0.8642237540419939, "learning_rate": 3.3164890535637973e-06, "loss": 0.1417, "step": 21288 }, { "epoch": 0.6210689071707801, "grad_norm": 1.0161236822646509, "learning_rate": 3.3160442117905457e-06, "loss": 0.1071, "step": 21289 }, { "epoch": 0.6210980804014237, "grad_norm": 0.7800651069117587, "learning_rate": 3.315599385051248e-06, "loss": 0.1363, "step": 21290 }, { "epoch": 0.6211272536320672, "grad_norm": 0.789159023649561, "learning_rate": 3.315154573349877e-06, "loss": 0.1266, "step": 21291 }, { "epoch": 0.6211564268627108, "grad_norm": 0.7680218715126719, "learning_rate": 3.3147097766904023e-06, "loss": 0.1096, "step": 21292 }, { "epoch": 0.6211856000933543, "grad_norm": 0.8466641849015817, "learning_rate": 3.314264995076798e-06, "loss": 0.1409, "step": 21293 }, { "epoch": 0.6212147733239979, "grad_norm": 0.6803673970957486, "learning_rate": 3.313820228513034e-06, "loss": 0.1142, "step": 21294 }, { "epoch": 0.6212439465546414, "grad_norm": 1.0595812562162283, "learning_rate": 3.313375477003079e-06, "loss": 0.1339, "step": 21295 }, { "epoch": 0.621273119785285, "grad_norm": 0.8386605136996219, "learning_rate": 3.3129307405509058e-06, "loss": 0.1323, "step": 21296 }, { "epoch": 0.6213022930159285, "grad_norm": 0.8007409591229493, "learning_rate": 3.312486019160486e-06, "loss": 0.1453, "step": 21297 }, { "epoch": 0.6213314662465721, "grad_norm": 0.8800377461827983, "learning_rate": 3.3120413128357837e-06, "loss": 0.1407, "step": 21298 }, { "epoch": 0.6213606394772158, "grad_norm": 0.7508978002471144, "learning_rate": 3.311596621580777e-06, "loss": 0.1032, "step": 21299 }, { "epoch": 0.6213898127078593, "grad_norm": 0.6572251173652115, "learning_rate": 3.311151945399432e-06, "loss": 0.1106, "step": 21300 }, { "epoch": 0.6214189859385029, "grad_norm": 0.8964986549543154, "learning_rate": 3.3107072842957188e-06, "loss": 0.129, "step": 21301 }, { "epoch": 0.6214481591691464, "grad_norm": 1.0203596500690153, "learning_rate": 3.310262638273609e-06, "loss": 0.1428, "step": 21302 }, { "epoch": 0.62147733239979, "grad_norm": 0.9062871790989347, "learning_rate": 3.3098180073370702e-06, "loss": 0.1242, "step": 21303 }, { "epoch": 0.6215065056304335, "grad_norm": 0.892411289294688, "learning_rate": 3.309373391490072e-06, "loss": 0.1277, "step": 21304 }, { "epoch": 0.6215356788610771, "grad_norm": 0.8217260832629453, "learning_rate": 3.3089287907365848e-06, "loss": 0.1387, "step": 21305 }, { "epoch": 0.6215648520917206, "grad_norm": 0.9979695852189724, "learning_rate": 3.3084842050805778e-06, "loss": 0.1206, "step": 21306 }, { "epoch": 0.6215940253223642, "grad_norm": 0.9104752937405064, "learning_rate": 3.3080396345260213e-06, "loss": 0.0993, "step": 21307 }, { "epoch": 0.6216231985530077, "grad_norm": 0.7880754227526585, "learning_rate": 3.3075950790768817e-06, "loss": 0.1326, "step": 21308 }, { "epoch": 0.6216523717836513, "grad_norm": 0.8896441449057885, "learning_rate": 3.3071505387371294e-06, "loss": 0.1217, "step": 21309 }, { "epoch": 0.6216815450142948, "grad_norm": 0.9020034823921245, "learning_rate": 3.306706013510732e-06, "loss": 0.1301, "step": 21310 }, { "epoch": 0.6217107182449384, "grad_norm": 0.7829725279898587, "learning_rate": 3.306261503401661e-06, "loss": 0.1202, "step": 21311 }, { "epoch": 0.621739891475582, "grad_norm": 1.0006266652654363, "learning_rate": 3.3058170084138824e-06, "loss": 0.1325, "step": 21312 }, { "epoch": 0.6217690647062256, "grad_norm": 0.8483983545262735, "learning_rate": 3.305372528551365e-06, "loss": 0.1383, "step": 21313 }, { "epoch": 0.6217982379368692, "grad_norm": 0.7990302783664022, "learning_rate": 3.304928063818078e-06, "loss": 0.1056, "step": 21314 }, { "epoch": 0.6218274111675127, "grad_norm": 1.3404300346733664, "learning_rate": 3.304483614217987e-06, "loss": 0.1365, "step": 21315 }, { "epoch": 0.6218565843981563, "grad_norm": 0.8902200862550608, "learning_rate": 3.304039179755061e-06, "loss": 0.1272, "step": 21316 }, { "epoch": 0.6218857576287998, "grad_norm": 0.9414809876283852, "learning_rate": 3.3035947604332697e-06, "loss": 0.1177, "step": 21317 }, { "epoch": 0.6219149308594434, "grad_norm": 0.8641232348744955, "learning_rate": 3.3031503562565793e-06, "loss": 0.121, "step": 21318 }, { "epoch": 0.6219441040900869, "grad_norm": 0.8492757587837996, "learning_rate": 3.302705967228958e-06, "loss": 0.1243, "step": 21319 }, { "epoch": 0.6219732773207305, "grad_norm": 0.675696365679319, "learning_rate": 3.3022615933543724e-06, "loss": 0.1139, "step": 21320 }, { "epoch": 0.622002450551374, "grad_norm": 0.9470120382955522, "learning_rate": 3.3018172346367896e-06, "loss": 0.1551, "step": 21321 }, { "epoch": 0.6220316237820176, "grad_norm": 1.0350572461555476, "learning_rate": 3.3013728910801758e-06, "loss": 0.1141, "step": 21322 }, { "epoch": 0.6220607970126611, "grad_norm": 0.686469596550152, "learning_rate": 3.3009285626885002e-06, "loss": 0.1136, "step": 21323 }, { "epoch": 0.6220899702433047, "grad_norm": 0.7361842123082664, "learning_rate": 3.3004842494657304e-06, "loss": 0.1491, "step": 21324 }, { "epoch": 0.6221191434739483, "grad_norm": 0.9547261361657784, "learning_rate": 3.30003995141583e-06, "loss": 0.122, "step": 21325 }, { "epoch": 0.6221483167045919, "grad_norm": 0.7418964773658194, "learning_rate": 3.299595668542768e-06, "loss": 0.1406, "step": 21326 }, { "epoch": 0.6221774899352355, "grad_norm": 0.7739919750428765, "learning_rate": 3.29915140085051e-06, "loss": 0.1337, "step": 21327 }, { "epoch": 0.622206663165879, "grad_norm": 0.7858944115116288, "learning_rate": 3.2987071483430195e-06, "loss": 0.1083, "step": 21328 }, { "epoch": 0.6222358363965226, "grad_norm": 1.0061417646637059, "learning_rate": 3.298262911024269e-06, "loss": 0.1297, "step": 21329 }, { "epoch": 0.6222650096271661, "grad_norm": 0.6176665245333957, "learning_rate": 3.2978186888982188e-06, "loss": 0.1171, "step": 21330 }, { "epoch": 0.6222941828578097, "grad_norm": 0.7673620279695847, "learning_rate": 3.297374481968838e-06, "loss": 0.1376, "step": 21331 }, { "epoch": 0.6223233560884532, "grad_norm": 1.0690542554056024, "learning_rate": 3.2969302902400925e-06, "loss": 0.1146, "step": 21332 }, { "epoch": 0.6223525293190968, "grad_norm": 0.7159491735092279, "learning_rate": 3.2964861137159453e-06, "loss": 0.1087, "step": 21333 }, { "epoch": 0.6223817025497403, "grad_norm": 0.8175363148488521, "learning_rate": 3.296041952400363e-06, "loss": 0.112, "step": 21334 }, { "epoch": 0.6224108757803839, "grad_norm": 0.8295698694515653, "learning_rate": 3.2955978062973117e-06, "loss": 0.1317, "step": 21335 }, { "epoch": 0.6224400490110275, "grad_norm": 0.9168800836975299, "learning_rate": 3.295153675410756e-06, "loss": 0.1245, "step": 21336 }, { "epoch": 0.622469222241671, "grad_norm": 0.9645563310931667, "learning_rate": 3.294709559744663e-06, "loss": 0.1397, "step": 21337 }, { "epoch": 0.6224983954723146, "grad_norm": 0.8367384801046545, "learning_rate": 3.2942654593029957e-06, "loss": 0.1266, "step": 21338 }, { "epoch": 0.6225275687029581, "grad_norm": 0.7889492779134213, "learning_rate": 3.2938213740897173e-06, "loss": 0.1085, "step": 21339 }, { "epoch": 0.6225567419336018, "grad_norm": 0.8859241154679921, "learning_rate": 3.2933773041087945e-06, "loss": 0.1594, "step": 21340 }, { "epoch": 0.6225859151642453, "grad_norm": 0.8246678630982585, "learning_rate": 3.292933249364194e-06, "loss": 0.1378, "step": 21341 }, { "epoch": 0.6226150883948889, "grad_norm": 0.9921870326117046, "learning_rate": 3.2924892098598765e-06, "loss": 0.1151, "step": 21342 }, { "epoch": 0.6226442616255324, "grad_norm": 0.8903747923174103, "learning_rate": 3.292045185599808e-06, "loss": 0.1186, "step": 21343 }, { "epoch": 0.622673434856176, "grad_norm": 1.0347288106601755, "learning_rate": 3.291601176587953e-06, "loss": 0.1235, "step": 21344 }, { "epoch": 0.6227026080868195, "grad_norm": 0.9944202953824731, "learning_rate": 3.291157182828274e-06, "loss": 0.1479, "step": 21345 }, { "epoch": 0.6227317813174631, "grad_norm": 0.8423356634400826, "learning_rate": 3.290713204324735e-06, "loss": 0.1128, "step": 21346 }, { "epoch": 0.6227609545481066, "grad_norm": 0.7917008469805683, "learning_rate": 3.290269241081301e-06, "loss": 0.1097, "step": 21347 }, { "epoch": 0.6227901277787502, "grad_norm": 0.7328504510975807, "learning_rate": 3.2898252931019353e-06, "loss": 0.1379, "step": 21348 }, { "epoch": 0.6228193010093938, "grad_norm": 1.1886418462196109, "learning_rate": 3.289381360390602e-06, "loss": 0.1268, "step": 21349 }, { "epoch": 0.6228484742400373, "grad_norm": 0.9171882169636048, "learning_rate": 3.2889374429512625e-06, "loss": 0.1191, "step": 21350 }, { "epoch": 0.6228776474706809, "grad_norm": 0.8192435164398073, "learning_rate": 3.2884935407878815e-06, "loss": 0.128, "step": 21351 }, { "epoch": 0.6229068207013244, "grad_norm": 0.9002291269705648, "learning_rate": 3.2880496539044204e-06, "loss": 0.1404, "step": 21352 }, { "epoch": 0.6229359939319681, "grad_norm": 1.4768307983113431, "learning_rate": 3.287605782304844e-06, "loss": 0.1265, "step": 21353 }, { "epoch": 0.6229651671626116, "grad_norm": 0.9693148724283404, "learning_rate": 3.2871619259931155e-06, "loss": 0.1306, "step": 21354 }, { "epoch": 0.6229943403932552, "grad_norm": 1.0424101658228286, "learning_rate": 3.286718084973196e-06, "loss": 0.1393, "step": 21355 }, { "epoch": 0.6230235136238987, "grad_norm": 1.0124755595158514, "learning_rate": 3.286274259249048e-06, "loss": 0.1319, "step": 21356 }, { "epoch": 0.6230526868545423, "grad_norm": 0.9645705464887633, "learning_rate": 3.285830448824635e-06, "loss": 0.1424, "step": 21357 }, { "epoch": 0.6230818600851858, "grad_norm": 0.7622431640789379, "learning_rate": 3.285386653703916e-06, "loss": 0.1367, "step": 21358 }, { "epoch": 0.6231110333158294, "grad_norm": 0.92898231544446, "learning_rate": 3.2849428738908585e-06, "loss": 0.1443, "step": 21359 }, { "epoch": 0.623140206546473, "grad_norm": 1.1094987025385317, "learning_rate": 3.2844991093894205e-06, "loss": 0.1361, "step": 21360 }, { "epoch": 0.6231693797771165, "grad_norm": 0.9244821197402684, "learning_rate": 3.284055360203565e-06, "loss": 0.1164, "step": 21361 }, { "epoch": 0.6231985530077601, "grad_norm": 1.5233771887704943, "learning_rate": 3.2836116263372553e-06, "loss": 0.1237, "step": 21362 }, { "epoch": 0.6232277262384036, "grad_norm": 1.3060899002913862, "learning_rate": 3.283167907794449e-06, "loss": 0.1436, "step": 21363 }, { "epoch": 0.6232568994690472, "grad_norm": 0.8891857246693189, "learning_rate": 3.2827242045791097e-06, "loss": 0.1126, "step": 21364 }, { "epoch": 0.6232860726996907, "grad_norm": 0.7134619818092386, "learning_rate": 3.2822805166951993e-06, "loss": 0.1269, "step": 21365 }, { "epoch": 0.6233152459303343, "grad_norm": 0.7131956097189778, "learning_rate": 3.2818368441466785e-06, "loss": 0.1174, "step": 21366 }, { "epoch": 0.6233444191609779, "grad_norm": 0.9251043737066043, "learning_rate": 3.2813931869375093e-06, "loss": 0.1017, "step": 21367 }, { "epoch": 0.6233735923916215, "grad_norm": 0.9510310397422824, "learning_rate": 3.2809495450716504e-06, "loss": 0.1194, "step": 21368 }, { "epoch": 0.623402765622265, "grad_norm": 0.8411471375005438, "learning_rate": 3.280505918553064e-06, "loss": 0.1269, "step": 21369 }, { "epoch": 0.6234319388529086, "grad_norm": 0.7882271282585804, "learning_rate": 3.2800623073857086e-06, "loss": 0.1124, "step": 21370 }, { "epoch": 0.6234611120835521, "grad_norm": 0.9041004644139871, "learning_rate": 3.279618711573549e-06, "loss": 0.1071, "step": 21371 }, { "epoch": 0.6234902853141957, "grad_norm": 1.0976159418705274, "learning_rate": 3.2791751311205412e-06, "loss": 0.117, "step": 21372 }, { "epoch": 0.6235194585448393, "grad_norm": 0.7036863548417275, "learning_rate": 3.2787315660306473e-06, "loss": 0.1277, "step": 21373 }, { "epoch": 0.6235486317754828, "grad_norm": 0.8272929067017146, "learning_rate": 3.278288016307828e-06, "loss": 0.1192, "step": 21374 }, { "epoch": 0.6235778050061264, "grad_norm": 0.9351901240229592, "learning_rate": 3.277844481956042e-06, "loss": 0.1257, "step": 21375 }, { "epoch": 0.6236069782367699, "grad_norm": 0.9171908978621737, "learning_rate": 3.277400962979247e-06, "loss": 0.134, "step": 21376 }, { "epoch": 0.6236361514674135, "grad_norm": 0.7627911834806919, "learning_rate": 3.2769574593814067e-06, "loss": 0.134, "step": 21377 }, { "epoch": 0.623665324698057, "grad_norm": 0.8982023656165522, "learning_rate": 3.2765139711664795e-06, "loss": 0.1599, "step": 21378 }, { "epoch": 0.6236944979287006, "grad_norm": 0.7722640945898418, "learning_rate": 3.2760704983384237e-06, "loss": 0.1149, "step": 21379 }, { "epoch": 0.6237236711593442, "grad_norm": 1.1665461068577712, "learning_rate": 3.2756270409011993e-06, "loss": 0.1295, "step": 21380 }, { "epoch": 0.6237528443899878, "grad_norm": 0.9510608179510497, "learning_rate": 3.2751835988587644e-06, "loss": 0.14, "step": 21381 }, { "epoch": 0.6237820176206313, "grad_norm": 0.8044782996137715, "learning_rate": 3.274740172215078e-06, "loss": 0.1097, "step": 21382 }, { "epoch": 0.6238111908512749, "grad_norm": 0.8821719331786081, "learning_rate": 3.2742967609741e-06, "loss": 0.123, "step": 21383 }, { "epoch": 0.6238403640819185, "grad_norm": 0.8969669929447068, "learning_rate": 3.2738533651397895e-06, "loss": 0.1282, "step": 21384 }, { "epoch": 0.623869537312562, "grad_norm": 0.8431979072420812, "learning_rate": 3.2734099847161038e-06, "loss": 0.1016, "step": 21385 }, { "epoch": 0.6238987105432056, "grad_norm": 1.2478708829528349, "learning_rate": 3.272966619707001e-06, "loss": 0.1379, "step": 21386 }, { "epoch": 0.6239278837738491, "grad_norm": 0.8291457332907493, "learning_rate": 3.272523270116441e-06, "loss": 0.1407, "step": 21387 }, { "epoch": 0.6239570570044927, "grad_norm": 0.9483068769052317, "learning_rate": 3.272079935948378e-06, "loss": 0.1219, "step": 21388 }, { "epoch": 0.6239862302351362, "grad_norm": 0.7799120919355934, "learning_rate": 3.271636617206776e-06, "loss": 0.1205, "step": 21389 }, { "epoch": 0.6240154034657798, "grad_norm": 0.8938662660427801, "learning_rate": 3.271193313895588e-06, "loss": 0.1285, "step": 21390 }, { "epoch": 0.6240445766964233, "grad_norm": 0.7676948460797017, "learning_rate": 3.270750026018774e-06, "loss": 0.1091, "step": 21391 }, { "epoch": 0.6240737499270669, "grad_norm": 0.6694981629040268, "learning_rate": 3.270306753580292e-06, "loss": 0.1397, "step": 21392 }, { "epoch": 0.6241029231577104, "grad_norm": 0.8658009481411322, "learning_rate": 3.269863496584097e-06, "loss": 0.111, "step": 21393 }, { "epoch": 0.6241320963883541, "grad_norm": 1.0818055981683743, "learning_rate": 3.2694202550341467e-06, "loss": 0.1224, "step": 21394 }, { "epoch": 0.6241612696189976, "grad_norm": 0.8759576032731533, "learning_rate": 3.2689770289344006e-06, "loss": 0.1412, "step": 21395 }, { "epoch": 0.6241904428496412, "grad_norm": 0.9859698274761876, "learning_rate": 3.2685338182888143e-06, "loss": 0.1089, "step": 21396 }, { "epoch": 0.6242196160802848, "grad_norm": 0.7141952478940872, "learning_rate": 3.268090623101346e-06, "loss": 0.112, "step": 21397 }, { "epoch": 0.6242487893109283, "grad_norm": 0.9453485591901521, "learning_rate": 3.2676474433759498e-06, "loss": 0.1081, "step": 21398 }, { "epoch": 0.6242779625415719, "grad_norm": 0.921193296818913, "learning_rate": 3.2672042791165837e-06, "loss": 0.1242, "step": 21399 }, { "epoch": 0.6243071357722154, "grad_norm": 0.8739465438971922, "learning_rate": 3.266761130327203e-06, "loss": 0.1157, "step": 21400 }, { "epoch": 0.624336309002859, "grad_norm": 0.9048152619749588, "learning_rate": 3.2663179970117678e-06, "loss": 0.1399, "step": 21401 }, { "epoch": 0.6243654822335025, "grad_norm": 1.023687456115142, "learning_rate": 3.26587487917423e-06, "loss": 0.1455, "step": 21402 }, { "epoch": 0.6243946554641461, "grad_norm": 0.9609077409680912, "learning_rate": 3.2654317768185474e-06, "loss": 0.1206, "step": 21403 }, { "epoch": 0.6244238286947896, "grad_norm": 0.7574570783561895, "learning_rate": 3.264988689948677e-06, "loss": 0.1148, "step": 21404 }, { "epoch": 0.6244530019254332, "grad_norm": 0.8387826309541686, "learning_rate": 3.264545618568572e-06, "loss": 0.1156, "step": 21405 }, { "epoch": 0.6244821751560767, "grad_norm": 0.8790705048861258, "learning_rate": 3.264102562682189e-06, "loss": 0.1127, "step": 21406 }, { "epoch": 0.6245113483867204, "grad_norm": 0.8097963171134626, "learning_rate": 3.2636595222934843e-06, "loss": 0.1122, "step": 21407 }, { "epoch": 0.624540521617364, "grad_norm": 0.7712599598560937, "learning_rate": 3.2632164974064136e-06, "loss": 0.1315, "step": 21408 }, { "epoch": 0.6245696948480075, "grad_norm": 0.7878562866613266, "learning_rate": 3.262773488024932e-06, "loss": 0.1347, "step": 21409 }, { "epoch": 0.6245988680786511, "grad_norm": 0.7682216592352683, "learning_rate": 3.262330494152993e-06, "loss": 0.1227, "step": 21410 }, { "epoch": 0.6246280413092946, "grad_norm": 0.8909676263064586, "learning_rate": 3.2618875157945527e-06, "loss": 0.1085, "step": 21411 }, { "epoch": 0.6246572145399382, "grad_norm": 0.893901912628838, "learning_rate": 3.2614445529535643e-06, "loss": 0.1297, "step": 21412 }, { "epoch": 0.6246863877705817, "grad_norm": 0.8418011611485549, "learning_rate": 3.2610016056339855e-06, "loss": 0.1211, "step": 21413 }, { "epoch": 0.6247155610012253, "grad_norm": 0.8337426922324811, "learning_rate": 3.2605586738397697e-06, "loss": 0.1148, "step": 21414 }, { "epoch": 0.6247447342318688, "grad_norm": 0.8394602552448956, "learning_rate": 3.26011575757487e-06, "loss": 0.1356, "step": 21415 }, { "epoch": 0.6247739074625124, "grad_norm": 0.7867755359073282, "learning_rate": 3.2596728568432417e-06, "loss": 0.1688, "step": 21416 }, { "epoch": 0.6248030806931559, "grad_norm": 0.7488016430772473, "learning_rate": 3.2592299716488396e-06, "loss": 0.1353, "step": 21417 }, { "epoch": 0.6248322539237995, "grad_norm": 0.8252871185907982, "learning_rate": 3.2587871019956137e-06, "loss": 0.1291, "step": 21418 }, { "epoch": 0.624861427154443, "grad_norm": 0.6719926057896809, "learning_rate": 3.258344247887524e-06, "loss": 0.1394, "step": 21419 }, { "epoch": 0.6248906003850866, "grad_norm": 0.6376576206577068, "learning_rate": 3.25790140932852e-06, "loss": 0.1238, "step": 21420 }, { "epoch": 0.6249197736157303, "grad_norm": 0.8584609128994657, "learning_rate": 3.257458586322556e-06, "loss": 0.1254, "step": 21421 }, { "epoch": 0.6249489468463738, "grad_norm": 0.6927924290628801, "learning_rate": 3.257015778873587e-06, "loss": 0.1244, "step": 21422 }, { "epoch": 0.6249781200770174, "grad_norm": 0.6575024255520201, "learning_rate": 3.2565729869855643e-06, "loss": 0.1217, "step": 21423 }, { "epoch": 0.6250072933076609, "grad_norm": 0.7516460135581234, "learning_rate": 3.2561302106624405e-06, "loss": 0.1145, "step": 21424 }, { "epoch": 0.6250364665383045, "grad_norm": 0.9149866479396442, "learning_rate": 3.2556874499081715e-06, "loss": 0.1167, "step": 21425 }, { "epoch": 0.625065639768948, "grad_norm": 0.6900964557226459, "learning_rate": 3.255244704726708e-06, "loss": 0.1362, "step": 21426 }, { "epoch": 0.6250948129995916, "grad_norm": 0.7534044221475009, "learning_rate": 3.254801975122004e-06, "loss": 0.1245, "step": 21427 }, { "epoch": 0.6251239862302351, "grad_norm": 1.0243240109542218, "learning_rate": 3.2543592610980107e-06, "loss": 0.1232, "step": 21428 }, { "epoch": 0.6251531594608787, "grad_norm": 0.8209915087198103, "learning_rate": 3.2539165626586812e-06, "loss": 0.1122, "step": 21429 }, { "epoch": 0.6251823326915222, "grad_norm": 0.7725251185715415, "learning_rate": 3.253473879807967e-06, "loss": 0.1144, "step": 21430 }, { "epoch": 0.6252115059221658, "grad_norm": 0.787825801021517, "learning_rate": 3.2530312125498224e-06, "loss": 0.1143, "step": 21431 }, { "epoch": 0.6252406791528093, "grad_norm": 0.9066304345049514, "learning_rate": 3.252588560888198e-06, "loss": 0.1308, "step": 21432 }, { "epoch": 0.6252698523834529, "grad_norm": 0.8198773791558235, "learning_rate": 3.252145924827045e-06, "loss": 0.13, "step": 21433 }, { "epoch": 0.6252990256140966, "grad_norm": 1.1783804499968054, "learning_rate": 3.251703304370317e-06, "loss": 0.1235, "step": 21434 }, { "epoch": 0.6253281988447401, "grad_norm": 0.8029233077293314, "learning_rate": 3.251260699521964e-06, "loss": 0.1258, "step": 21435 }, { "epoch": 0.6253573720753837, "grad_norm": 0.847215641910633, "learning_rate": 3.2508181102859373e-06, "loss": 0.1132, "step": 21436 }, { "epoch": 0.6253865453060272, "grad_norm": 1.0726819381486028, "learning_rate": 3.2503755366661893e-06, "loss": 0.144, "step": 21437 }, { "epoch": 0.6254157185366708, "grad_norm": 0.9165456177087047, "learning_rate": 3.2499329786666704e-06, "loss": 0.1252, "step": 21438 }, { "epoch": 0.6254448917673143, "grad_norm": 1.0593579415371146, "learning_rate": 3.2494904362913336e-06, "loss": 0.1244, "step": 21439 }, { "epoch": 0.6254740649979579, "grad_norm": 0.7798101061649765, "learning_rate": 3.2490479095441274e-06, "loss": 0.1164, "step": 21440 }, { "epoch": 0.6255032382286014, "grad_norm": 1.165482104967654, "learning_rate": 3.248605398429004e-06, "loss": 0.1128, "step": 21441 }, { "epoch": 0.625532411459245, "grad_norm": 1.002490336377689, "learning_rate": 3.248162902949912e-06, "loss": 0.1132, "step": 21442 }, { "epoch": 0.6255615846898885, "grad_norm": 0.6985548173851968, "learning_rate": 3.247720423110804e-06, "loss": 0.1191, "step": 21443 }, { "epoch": 0.6255907579205321, "grad_norm": 0.9088624263014558, "learning_rate": 3.2472779589156313e-06, "loss": 0.1432, "step": 21444 }, { "epoch": 0.6256199311511756, "grad_norm": 0.91907924242179, "learning_rate": 3.2468355103683414e-06, "loss": 0.1445, "step": 21445 }, { "epoch": 0.6256491043818192, "grad_norm": 0.8977712600118576, "learning_rate": 3.246393077472886e-06, "loss": 0.1299, "step": 21446 }, { "epoch": 0.6256782776124628, "grad_norm": 0.8134447544544907, "learning_rate": 3.2459506602332124e-06, "loss": 0.1424, "step": 21447 }, { "epoch": 0.6257074508431064, "grad_norm": 0.8258625851606094, "learning_rate": 3.2455082586532748e-06, "loss": 0.1214, "step": 21448 }, { "epoch": 0.62573662407375, "grad_norm": 0.8219977428003091, "learning_rate": 3.245065872737021e-06, "loss": 0.1138, "step": 21449 }, { "epoch": 0.6257657973043935, "grad_norm": 0.8148850511239529, "learning_rate": 3.2446235024883998e-06, "loss": 0.1259, "step": 21450 }, { "epoch": 0.6257949705350371, "grad_norm": 0.9931304727284846, "learning_rate": 3.2441811479113606e-06, "loss": 0.1206, "step": 21451 }, { "epoch": 0.6258241437656806, "grad_norm": 1.130776706632609, "learning_rate": 3.243738809009853e-06, "loss": 0.1212, "step": 21452 }, { "epoch": 0.6258533169963242, "grad_norm": 0.7070222500696697, "learning_rate": 3.2432964857878255e-06, "loss": 0.111, "step": 21453 }, { "epoch": 0.6258824902269677, "grad_norm": 1.3226701988216725, "learning_rate": 3.242854178249228e-06, "loss": 0.1343, "step": 21454 }, { "epoch": 0.6259116634576113, "grad_norm": 1.1425727052245944, "learning_rate": 3.242411886398009e-06, "loss": 0.1107, "step": 21455 }, { "epoch": 0.6259408366882548, "grad_norm": 1.0726275174312958, "learning_rate": 3.241969610238117e-06, "loss": 0.1204, "step": 21456 }, { "epoch": 0.6259700099188984, "grad_norm": 0.8533176144163238, "learning_rate": 3.241527349773501e-06, "loss": 0.0913, "step": 21457 }, { "epoch": 0.625999183149542, "grad_norm": 0.8312751156994354, "learning_rate": 3.2410851050081093e-06, "loss": 0.1581, "step": 21458 }, { "epoch": 0.6260283563801855, "grad_norm": 1.3826534479492336, "learning_rate": 3.2406428759458886e-06, "loss": 0.1461, "step": 21459 }, { "epoch": 0.626057529610829, "grad_norm": 0.8075980524239934, "learning_rate": 3.240200662590789e-06, "loss": 0.119, "step": 21460 }, { "epoch": 0.6260867028414727, "grad_norm": 1.0854000076389174, "learning_rate": 3.2397584649467584e-06, "loss": 0.1475, "step": 21461 }, { "epoch": 0.6261158760721163, "grad_norm": 1.2238030639072361, "learning_rate": 3.239316283017744e-06, "loss": 0.108, "step": 21462 }, { "epoch": 0.6261450493027598, "grad_norm": 1.0729826646664034, "learning_rate": 3.2388741168076927e-06, "loss": 0.1129, "step": 21463 }, { "epoch": 0.6261742225334034, "grad_norm": 0.8640682667547323, "learning_rate": 3.2384319663205544e-06, "loss": 0.1131, "step": 21464 }, { "epoch": 0.6262033957640469, "grad_norm": 0.7957328888807539, "learning_rate": 3.237989831560271e-06, "loss": 0.1325, "step": 21465 }, { "epoch": 0.6262325689946905, "grad_norm": 0.9437612541507222, "learning_rate": 3.2375477125307976e-06, "loss": 0.1359, "step": 21466 }, { "epoch": 0.626261742225334, "grad_norm": 0.690983930350747, "learning_rate": 3.2371056092360764e-06, "loss": 0.1126, "step": 21467 }, { "epoch": 0.6262909154559776, "grad_norm": 0.8852358085702343, "learning_rate": 3.2366635216800556e-06, "loss": 0.1162, "step": 21468 }, { "epoch": 0.6263200886866211, "grad_norm": 0.6476452883828339, "learning_rate": 3.2362214498666826e-06, "loss": 0.1279, "step": 21469 }, { "epoch": 0.6263492619172647, "grad_norm": 0.7760825147597292, "learning_rate": 3.235779393799903e-06, "loss": 0.1059, "step": 21470 }, { "epoch": 0.6263784351479083, "grad_norm": 1.120546129680719, "learning_rate": 3.2353373534836618e-06, "loss": 0.112, "step": 21471 }, { "epoch": 0.6264076083785518, "grad_norm": 0.6623240211488992, "learning_rate": 3.23489532892191e-06, "loss": 0.1296, "step": 21472 }, { "epoch": 0.6264367816091954, "grad_norm": 0.9737138537624721, "learning_rate": 3.2344533201185903e-06, "loss": 0.1355, "step": 21473 }, { "epoch": 0.6264659548398389, "grad_norm": 1.0758797997196936, "learning_rate": 3.234011327077651e-06, "loss": 0.1303, "step": 21474 }, { "epoch": 0.6264951280704826, "grad_norm": 0.7697573308968146, "learning_rate": 3.233569349803036e-06, "loss": 0.0937, "step": 21475 }, { "epoch": 0.6265243013011261, "grad_norm": 0.7873452600584238, "learning_rate": 3.233127388298692e-06, "loss": 0.1574, "step": 21476 }, { "epoch": 0.6265534745317697, "grad_norm": 1.3188187568593865, "learning_rate": 3.232685442568564e-06, "loss": 0.1401, "step": 21477 }, { "epoch": 0.6265826477624132, "grad_norm": 0.8299559483035459, "learning_rate": 3.2322435126165998e-06, "loss": 0.1251, "step": 21478 }, { "epoch": 0.6266118209930568, "grad_norm": 0.7907233450056685, "learning_rate": 3.2318015984467444e-06, "loss": 0.1093, "step": 21479 }, { "epoch": 0.6266409942237003, "grad_norm": 0.7562805504276985, "learning_rate": 3.2313597000629405e-06, "loss": 0.1078, "step": 21480 }, { "epoch": 0.6266701674543439, "grad_norm": 0.7682667137237226, "learning_rate": 3.230917817469136e-06, "loss": 0.1096, "step": 21481 }, { "epoch": 0.6266993406849874, "grad_norm": 0.778745490208327, "learning_rate": 3.230475950669275e-06, "loss": 0.0987, "step": 21482 }, { "epoch": 0.626728513915631, "grad_norm": 0.7573155065175218, "learning_rate": 3.2300340996673007e-06, "loss": 0.123, "step": 21483 }, { "epoch": 0.6267576871462746, "grad_norm": 0.6423302620766055, "learning_rate": 3.2295922644671605e-06, "loss": 0.1363, "step": 21484 }, { "epoch": 0.6267868603769181, "grad_norm": 0.9531062125109175, "learning_rate": 3.2291504450727983e-06, "loss": 0.1193, "step": 21485 }, { "epoch": 0.6268160336075617, "grad_norm": 0.7858304417946237, "learning_rate": 3.228708641488158e-06, "loss": 0.1228, "step": 21486 }, { "epoch": 0.6268452068382052, "grad_norm": 0.8371163322809207, "learning_rate": 3.2282668537171845e-06, "loss": 0.1123, "step": 21487 }, { "epoch": 0.6268743800688488, "grad_norm": 0.8470091732039831, "learning_rate": 3.2278250817638213e-06, "loss": 0.1247, "step": 21488 }, { "epoch": 0.6269035532994924, "grad_norm": 0.6490071039160903, "learning_rate": 3.227383325632012e-06, "loss": 0.0973, "step": 21489 }, { "epoch": 0.626932726530136, "grad_norm": 0.7524538481288671, "learning_rate": 3.2269415853257015e-06, "loss": 0.1603, "step": 21490 }, { "epoch": 0.6269618997607795, "grad_norm": 0.8569837417623171, "learning_rate": 3.226499860848834e-06, "loss": 0.1109, "step": 21491 }, { "epoch": 0.6269910729914231, "grad_norm": 0.8558427233602092, "learning_rate": 3.226058152205352e-06, "loss": 0.1319, "step": 21492 }, { "epoch": 0.6270202462220666, "grad_norm": 0.7372753385136062, "learning_rate": 3.225616459399199e-06, "loss": 0.1222, "step": 21493 }, { "epoch": 0.6270494194527102, "grad_norm": 0.6613528140723616, "learning_rate": 3.22517478243432e-06, "loss": 0.1116, "step": 21494 }, { "epoch": 0.6270785926833538, "grad_norm": 0.8100790230582315, "learning_rate": 3.2247331213146537e-06, "loss": 0.1212, "step": 21495 }, { "epoch": 0.6271077659139973, "grad_norm": 1.0019922446123124, "learning_rate": 3.2242914760441492e-06, "loss": 0.0958, "step": 21496 }, { "epoch": 0.6271369391446409, "grad_norm": 0.8196795105674665, "learning_rate": 3.2238498466267452e-06, "loss": 0.1361, "step": 21497 }, { "epoch": 0.6271661123752844, "grad_norm": 1.0121264732408293, "learning_rate": 3.2234082330663862e-06, "loss": 0.1417, "step": 21498 }, { "epoch": 0.627195285605928, "grad_norm": 0.8623796532033311, "learning_rate": 3.2229666353670157e-06, "loss": 0.1257, "step": 21499 }, { "epoch": 0.6272244588365715, "grad_norm": 0.706500786192673, "learning_rate": 3.2225250535325734e-06, "loss": 0.0941, "step": 21500 }, { "epoch": 0.6272536320672151, "grad_norm": 1.0776435973948162, "learning_rate": 3.2220834875670025e-06, "loss": 0.1343, "step": 21501 }, { "epoch": 0.6272828052978587, "grad_norm": 0.9433531895740077, "learning_rate": 3.2216419374742463e-06, "loss": 0.1196, "step": 21502 }, { "epoch": 0.6273119785285023, "grad_norm": 0.8541372782299788, "learning_rate": 3.221200403258247e-06, "loss": 0.138, "step": 21503 }, { "epoch": 0.6273411517591458, "grad_norm": 1.0613124678525838, "learning_rate": 3.220758884922946e-06, "loss": 0.1404, "step": 21504 }, { "epoch": 0.6273703249897894, "grad_norm": 0.7149691283911175, "learning_rate": 3.2203173824722845e-06, "loss": 0.1287, "step": 21505 }, { "epoch": 0.627399498220433, "grad_norm": 0.8569712554103952, "learning_rate": 3.2198758959102044e-06, "loss": 0.1309, "step": 21506 }, { "epoch": 0.6274286714510765, "grad_norm": 0.8563520174119542, "learning_rate": 3.219434425240646e-06, "loss": 0.1076, "step": 21507 }, { "epoch": 0.6274578446817201, "grad_norm": 0.8683837521634081, "learning_rate": 3.218992970467554e-06, "loss": 0.1434, "step": 21508 }, { "epoch": 0.6274870179123636, "grad_norm": 0.7393628992403547, "learning_rate": 3.218551531594868e-06, "loss": 0.1175, "step": 21509 }, { "epoch": 0.6275161911430072, "grad_norm": 0.7744820819302393, "learning_rate": 3.218110108626528e-06, "loss": 0.1128, "step": 21510 }, { "epoch": 0.6275453643736507, "grad_norm": 0.764739802815339, "learning_rate": 3.2176687015664744e-06, "loss": 0.1169, "step": 21511 }, { "epoch": 0.6275745376042943, "grad_norm": 0.8077778593371385, "learning_rate": 3.217227310418651e-06, "loss": 0.125, "step": 21512 }, { "epoch": 0.6276037108349378, "grad_norm": 0.7929958180323915, "learning_rate": 3.2167859351869946e-06, "loss": 0.1407, "step": 21513 }, { "epoch": 0.6276328840655814, "grad_norm": 0.9001240930573178, "learning_rate": 3.2163445758754484e-06, "loss": 0.1162, "step": 21514 }, { "epoch": 0.6276620572962249, "grad_norm": 0.9234262503649429, "learning_rate": 3.2159032324879522e-06, "loss": 0.1074, "step": 21515 }, { "epoch": 0.6276912305268686, "grad_norm": 0.6901976147452621, "learning_rate": 3.2154619050284465e-06, "loss": 0.1425, "step": 21516 }, { "epoch": 0.6277204037575121, "grad_norm": 0.9370112500263487, "learning_rate": 3.2150205935008715e-06, "loss": 0.1164, "step": 21517 }, { "epoch": 0.6277495769881557, "grad_norm": 0.8671280063844882, "learning_rate": 3.2145792979091656e-06, "loss": 0.1213, "step": 21518 }, { "epoch": 0.6277787502187993, "grad_norm": 0.7151301844910141, "learning_rate": 3.2141380182572684e-06, "loss": 0.1236, "step": 21519 }, { "epoch": 0.6278079234494428, "grad_norm": 0.7907461814608361, "learning_rate": 3.2136967545491214e-06, "loss": 0.1054, "step": 21520 }, { "epoch": 0.6278370966800864, "grad_norm": 1.0311974990272104, "learning_rate": 3.213255506788665e-06, "loss": 0.13, "step": 21521 }, { "epoch": 0.6278662699107299, "grad_norm": 0.654783786397787, "learning_rate": 3.2128142749798357e-06, "loss": 0.1073, "step": 21522 }, { "epoch": 0.6278954431413735, "grad_norm": 0.7715701914786791, "learning_rate": 3.212373059126574e-06, "loss": 0.1276, "step": 21523 }, { "epoch": 0.627924616372017, "grad_norm": 0.8551968957402468, "learning_rate": 3.21193185923282e-06, "loss": 0.123, "step": 21524 }, { "epoch": 0.6279537896026606, "grad_norm": 0.9263490150613771, "learning_rate": 3.211490675302508e-06, "loss": 0.1269, "step": 21525 }, { "epoch": 0.6279829628333041, "grad_norm": 0.7033271582772593, "learning_rate": 3.211049507339583e-06, "loss": 0.1149, "step": 21526 }, { "epoch": 0.6280121360639477, "grad_norm": 0.7079676583610439, "learning_rate": 3.2106083553479803e-06, "loss": 0.1284, "step": 21527 }, { "epoch": 0.6280413092945912, "grad_norm": 0.7680249952309003, "learning_rate": 3.2101672193316396e-06, "loss": 0.1159, "step": 21528 }, { "epoch": 0.6280704825252349, "grad_norm": 0.7992028237089969, "learning_rate": 3.209726099294499e-06, "loss": 0.1269, "step": 21529 }, { "epoch": 0.6280996557558784, "grad_norm": 0.7976631278886198, "learning_rate": 3.2092849952404958e-06, "loss": 0.1264, "step": 21530 }, { "epoch": 0.628128828986522, "grad_norm": 0.8019705601305389, "learning_rate": 3.208843907173568e-06, "loss": 0.1292, "step": 21531 }, { "epoch": 0.6281580022171656, "grad_norm": 0.8028068650962843, "learning_rate": 3.2084028350976547e-06, "loss": 0.1165, "step": 21532 }, { "epoch": 0.6281871754478091, "grad_norm": 0.8200340992995329, "learning_rate": 3.207961779016693e-06, "loss": 0.1262, "step": 21533 }, { "epoch": 0.6282163486784527, "grad_norm": 0.9319842655998817, "learning_rate": 3.207520738934622e-06, "loss": 0.1202, "step": 21534 }, { "epoch": 0.6282455219090962, "grad_norm": 0.7868073065749692, "learning_rate": 3.207079714855377e-06, "loss": 0.1269, "step": 21535 }, { "epoch": 0.6282746951397398, "grad_norm": 0.7929034376400665, "learning_rate": 3.2066387067828964e-06, "loss": 0.1134, "step": 21536 }, { "epoch": 0.6283038683703833, "grad_norm": 0.9960031040117716, "learning_rate": 3.2061977147211167e-06, "loss": 0.132, "step": 21537 }, { "epoch": 0.6283330416010269, "grad_norm": 0.8034204287393533, "learning_rate": 3.205756738673976e-06, "loss": 0.1156, "step": 21538 }, { "epoch": 0.6283622148316704, "grad_norm": 0.8642989459802942, "learning_rate": 3.2053157786454115e-06, "loss": 0.1093, "step": 21539 }, { "epoch": 0.628391388062314, "grad_norm": 0.8764213622877319, "learning_rate": 3.2048748346393587e-06, "loss": 0.116, "step": 21540 }, { "epoch": 0.6284205612929575, "grad_norm": 0.9479980537473418, "learning_rate": 3.2044339066597554e-06, "loss": 0.1141, "step": 21541 }, { "epoch": 0.6284497345236011, "grad_norm": 0.8517522390729108, "learning_rate": 3.2039929947105373e-06, "loss": 0.12, "step": 21542 }, { "epoch": 0.6284789077542448, "grad_norm": 0.9185116737345116, "learning_rate": 3.2035520987956403e-06, "loss": 0.1127, "step": 21543 }, { "epoch": 0.6285080809848883, "grad_norm": 1.076201627421619, "learning_rate": 3.2031112189190016e-06, "loss": 0.1275, "step": 21544 }, { "epoch": 0.6285372542155319, "grad_norm": 0.9351418794293074, "learning_rate": 3.202670355084557e-06, "loss": 0.1495, "step": 21545 }, { "epoch": 0.6285664274461754, "grad_norm": 0.7919129578114034, "learning_rate": 3.202229507296242e-06, "loss": 0.1061, "step": 21546 }, { "epoch": 0.628595600676819, "grad_norm": 0.9598799706381894, "learning_rate": 3.2017886755579945e-06, "loss": 0.1157, "step": 21547 }, { "epoch": 0.6286247739074625, "grad_norm": 1.256863121251991, "learning_rate": 3.2013478598737473e-06, "loss": 0.1213, "step": 21548 }, { "epoch": 0.6286539471381061, "grad_norm": 1.0244973188794877, "learning_rate": 3.2009070602474364e-06, "loss": 0.1055, "step": 21549 }, { "epoch": 0.6286831203687496, "grad_norm": 0.8329326346392835, "learning_rate": 3.200466276682998e-06, "loss": 0.1197, "step": 21550 }, { "epoch": 0.6287122935993932, "grad_norm": 1.07833819645262, "learning_rate": 3.2000255091843685e-06, "loss": 0.144, "step": 21551 }, { "epoch": 0.6287414668300367, "grad_norm": 1.4706122642924964, "learning_rate": 3.1995847577554805e-06, "loss": 0.1278, "step": 21552 }, { "epoch": 0.6287706400606803, "grad_norm": 0.7771298614194058, "learning_rate": 3.1991440224002703e-06, "loss": 0.1046, "step": 21553 }, { "epoch": 0.6287998132913238, "grad_norm": 0.934078883731427, "learning_rate": 3.1987033031226734e-06, "loss": 0.1175, "step": 21554 }, { "epoch": 0.6288289865219674, "grad_norm": 0.8632368368291131, "learning_rate": 3.1982625999266192e-06, "loss": 0.1314, "step": 21555 }, { "epoch": 0.6288581597526111, "grad_norm": 0.9722694887346638, "learning_rate": 3.1978219128160506e-06, "loss": 0.14, "step": 21556 }, { "epoch": 0.6288873329832546, "grad_norm": 0.7143577061373547, "learning_rate": 3.197381241794897e-06, "loss": 0.1179, "step": 21557 }, { "epoch": 0.6289165062138982, "grad_norm": 0.7677500897266829, "learning_rate": 3.1969405868670923e-06, "loss": 0.1145, "step": 21558 }, { "epoch": 0.6289456794445417, "grad_norm": 1.192550759965728, "learning_rate": 3.1964999480365732e-06, "loss": 0.1412, "step": 21559 }, { "epoch": 0.6289748526751853, "grad_norm": 1.1865159356900845, "learning_rate": 3.1960593253072713e-06, "loss": 0.1232, "step": 21560 }, { "epoch": 0.6290040259058288, "grad_norm": 0.7064843681198242, "learning_rate": 3.1956187186831197e-06, "loss": 0.1118, "step": 21561 }, { "epoch": 0.6290331991364724, "grad_norm": 0.8028464328453727, "learning_rate": 3.1951781281680537e-06, "loss": 0.1262, "step": 21562 }, { "epoch": 0.6290623723671159, "grad_norm": 1.0914709799610633, "learning_rate": 3.1947375537660073e-06, "loss": 0.1282, "step": 21563 }, { "epoch": 0.6290915455977595, "grad_norm": 0.828817605881031, "learning_rate": 3.1942969954809142e-06, "loss": 0.1255, "step": 21564 }, { "epoch": 0.629120718828403, "grad_norm": 0.7945320374821138, "learning_rate": 3.193856453316706e-06, "loss": 0.1267, "step": 21565 }, { "epoch": 0.6291498920590466, "grad_norm": 0.8926668259099348, "learning_rate": 3.1934159272773153e-06, "loss": 0.1261, "step": 21566 }, { "epoch": 0.6291790652896901, "grad_norm": 1.019131136454555, "learning_rate": 3.192975417366675e-06, "loss": 0.1262, "step": 21567 }, { "epoch": 0.6292082385203337, "grad_norm": 1.4016807723883395, "learning_rate": 3.1925349235887206e-06, "loss": 0.1222, "step": 21568 }, { "epoch": 0.6292374117509772, "grad_norm": 0.7037588645852593, "learning_rate": 3.192094445947383e-06, "loss": 0.1351, "step": 21569 }, { "epoch": 0.6292665849816209, "grad_norm": 1.1536549875530948, "learning_rate": 3.1916539844465945e-06, "loss": 0.1325, "step": 21570 }, { "epoch": 0.6292957582122645, "grad_norm": 1.0094463591261138, "learning_rate": 3.1912135390902866e-06, "loss": 0.1162, "step": 21571 }, { "epoch": 0.629324931442908, "grad_norm": 0.7365809998674072, "learning_rate": 3.1907731098823934e-06, "loss": 0.1085, "step": 21572 }, { "epoch": 0.6293541046735516, "grad_norm": 1.0474714461770684, "learning_rate": 3.1903326968268445e-06, "loss": 0.129, "step": 21573 }, { "epoch": 0.6293832779041951, "grad_norm": 1.105126739367677, "learning_rate": 3.1898922999275746e-06, "loss": 0.1489, "step": 21574 }, { "epoch": 0.6294124511348387, "grad_norm": 1.2915971982128405, "learning_rate": 3.189451919188513e-06, "loss": 0.1301, "step": 21575 }, { "epoch": 0.6294416243654822, "grad_norm": 0.8876481332728403, "learning_rate": 3.1890115546135946e-06, "loss": 0.1258, "step": 21576 }, { "epoch": 0.6294707975961258, "grad_norm": 0.874967281714496, "learning_rate": 3.1885712062067474e-06, "loss": 0.1245, "step": 21577 }, { "epoch": 0.6294999708267693, "grad_norm": 1.031186192412197, "learning_rate": 3.1881308739719043e-06, "loss": 0.1414, "step": 21578 }, { "epoch": 0.6295291440574129, "grad_norm": 0.7004382066330048, "learning_rate": 3.1876905579129947e-06, "loss": 0.1136, "step": 21579 }, { "epoch": 0.6295583172880564, "grad_norm": 0.7906080386510678, "learning_rate": 3.187250258033952e-06, "loss": 0.1358, "step": 21580 }, { "epoch": 0.6295874905187, "grad_norm": 1.0132762957323436, "learning_rate": 3.186809974338708e-06, "loss": 0.1321, "step": 21581 }, { "epoch": 0.6296166637493436, "grad_norm": 0.8342937522349811, "learning_rate": 3.18636970683119e-06, "loss": 0.1096, "step": 21582 }, { "epoch": 0.6296458369799872, "grad_norm": 0.7805721851565051, "learning_rate": 3.1859294555153307e-06, "loss": 0.1147, "step": 21583 }, { "epoch": 0.6296750102106308, "grad_norm": 0.8848720620201574, "learning_rate": 3.185489220395061e-06, "loss": 0.1242, "step": 21584 }, { "epoch": 0.6297041834412743, "grad_norm": 0.9549712490551664, "learning_rate": 3.1850490014743073e-06, "loss": 0.1267, "step": 21585 }, { "epoch": 0.6297333566719179, "grad_norm": 0.6993995909889085, "learning_rate": 3.1846087987570064e-06, "loss": 0.1316, "step": 21586 }, { "epoch": 0.6297625299025614, "grad_norm": 0.7656935525511819, "learning_rate": 3.184168612247083e-06, "loss": 0.1434, "step": 21587 }, { "epoch": 0.629791703133205, "grad_norm": 0.6832487924100321, "learning_rate": 3.1837284419484692e-06, "loss": 0.1163, "step": 21588 }, { "epoch": 0.6298208763638485, "grad_norm": 0.6859772217800502, "learning_rate": 3.183288287865095e-06, "loss": 0.1234, "step": 21589 }, { "epoch": 0.6298500495944921, "grad_norm": 0.7820156328027904, "learning_rate": 3.182848150000889e-06, "loss": 0.1293, "step": 21590 }, { "epoch": 0.6298792228251356, "grad_norm": 1.0464432483476764, "learning_rate": 3.182408028359779e-06, "loss": 0.1476, "step": 21591 }, { "epoch": 0.6299083960557792, "grad_norm": 0.9398890972408397, "learning_rate": 3.181967922945698e-06, "loss": 0.1092, "step": 21592 }, { "epoch": 0.6299375692864227, "grad_norm": 0.6819936556417671, "learning_rate": 3.181527833762573e-06, "loss": 0.1228, "step": 21593 }, { "epoch": 0.6299667425170663, "grad_norm": 0.7882889215378267, "learning_rate": 3.181087760814334e-06, "loss": 0.1217, "step": 21594 }, { "epoch": 0.6299959157477099, "grad_norm": 0.8198202330165746, "learning_rate": 3.1806477041049088e-06, "loss": 0.1269, "step": 21595 }, { "epoch": 0.6300250889783534, "grad_norm": 0.8829989907763003, "learning_rate": 3.1802076636382266e-06, "loss": 0.1294, "step": 21596 }, { "epoch": 0.6300542622089971, "grad_norm": 0.8119415114941211, "learning_rate": 3.1797676394182154e-06, "loss": 0.146, "step": 21597 }, { "epoch": 0.6300834354396406, "grad_norm": 1.0180215103513393, "learning_rate": 3.1793276314488044e-06, "loss": 0.1354, "step": 21598 }, { "epoch": 0.6301126086702842, "grad_norm": 0.8614817739939169, "learning_rate": 3.178887639733923e-06, "loss": 0.113, "step": 21599 }, { "epoch": 0.6301417819009277, "grad_norm": 0.9822827151036375, "learning_rate": 3.1784476642774965e-06, "loss": 0.1062, "step": 21600 }, { "epoch": 0.6301709551315713, "grad_norm": 0.883722596679789, "learning_rate": 3.178007705083455e-06, "loss": 0.1336, "step": 21601 }, { "epoch": 0.6302001283622148, "grad_norm": 0.8868749676557872, "learning_rate": 3.1775677621557266e-06, "loss": 0.1601, "step": 21602 }, { "epoch": 0.6302293015928584, "grad_norm": 0.8256364246893216, "learning_rate": 3.177127835498236e-06, "loss": 0.126, "step": 21603 }, { "epoch": 0.630258474823502, "grad_norm": 0.7191852828996356, "learning_rate": 3.176687925114914e-06, "loss": 0.0992, "step": 21604 }, { "epoch": 0.6302876480541455, "grad_norm": 0.9261084337875681, "learning_rate": 3.1762480310096875e-06, "loss": 0.1197, "step": 21605 }, { "epoch": 0.630316821284789, "grad_norm": 0.7821855119917328, "learning_rate": 3.1758081531864836e-06, "loss": 0.1343, "step": 21606 }, { "epoch": 0.6303459945154326, "grad_norm": 1.2769804075875613, "learning_rate": 3.1753682916492283e-06, "loss": 0.1321, "step": 21607 }, { "epoch": 0.6303751677460762, "grad_norm": 0.6846081119655252, "learning_rate": 3.1749284464018493e-06, "loss": 0.1102, "step": 21608 }, { "epoch": 0.6304043409767197, "grad_norm": 0.803259659986723, "learning_rate": 3.1744886174482727e-06, "loss": 0.1315, "step": 21609 }, { "epoch": 0.6304335142073634, "grad_norm": 1.0148603553708146, "learning_rate": 3.174048804792426e-06, "loss": 0.1175, "step": 21610 }, { "epoch": 0.6304626874380069, "grad_norm": 0.8276113582559743, "learning_rate": 3.1736090084382375e-06, "loss": 0.1352, "step": 21611 }, { "epoch": 0.6304918606686505, "grad_norm": 0.674217183269698, "learning_rate": 3.173169228389631e-06, "loss": 0.1189, "step": 21612 }, { "epoch": 0.630521033899294, "grad_norm": 0.8854670487468641, "learning_rate": 3.1727294646505326e-06, "loss": 0.111, "step": 21613 }, { "epoch": 0.6305502071299376, "grad_norm": 0.7736500206964132, "learning_rate": 3.172289717224871e-06, "loss": 0.1251, "step": 21614 }, { "epoch": 0.6305793803605811, "grad_norm": 0.8464978375916811, "learning_rate": 3.1718499861165675e-06, "loss": 0.1306, "step": 21615 }, { "epoch": 0.6306085535912247, "grad_norm": 0.8395150571330763, "learning_rate": 3.1714102713295538e-06, "loss": 0.1349, "step": 21616 }, { "epoch": 0.6306377268218682, "grad_norm": 0.8225095010010273, "learning_rate": 3.1709705728677516e-06, "loss": 0.119, "step": 21617 }, { "epoch": 0.6306669000525118, "grad_norm": 0.7485651100930605, "learning_rate": 3.1705308907350874e-06, "loss": 0.1363, "step": 21618 }, { "epoch": 0.6306960732831554, "grad_norm": 0.8285357155703458, "learning_rate": 3.1700912249354876e-06, "loss": 0.1408, "step": 21619 }, { "epoch": 0.6307252465137989, "grad_norm": 0.725291400812458, "learning_rate": 3.169651575472876e-06, "loss": 0.1147, "step": 21620 }, { "epoch": 0.6307544197444425, "grad_norm": 0.8214455353634862, "learning_rate": 3.169211942351177e-06, "loss": 0.1304, "step": 21621 }, { "epoch": 0.630783592975086, "grad_norm": 0.8523354910578501, "learning_rate": 3.1687723255743175e-06, "loss": 0.1285, "step": 21622 }, { "epoch": 0.6308127662057296, "grad_norm": 0.8011027639649022, "learning_rate": 3.1683327251462214e-06, "loss": 0.1254, "step": 21623 }, { "epoch": 0.6308419394363732, "grad_norm": 0.8352920934123474, "learning_rate": 3.1678931410708147e-06, "loss": 0.14, "step": 21624 }, { "epoch": 0.6308711126670168, "grad_norm": 0.7782148491210552, "learning_rate": 3.16745357335202e-06, "loss": 0.1106, "step": 21625 }, { "epoch": 0.6309002858976603, "grad_norm": 0.7553737254850275, "learning_rate": 3.1670140219937618e-06, "loss": 0.1091, "step": 21626 }, { "epoch": 0.6309294591283039, "grad_norm": 0.844354724598551, "learning_rate": 3.166574486999964e-06, "loss": 0.1399, "step": 21627 }, { "epoch": 0.6309586323589474, "grad_norm": 0.84286281446668, "learning_rate": 3.1661349683745527e-06, "loss": 0.1194, "step": 21628 }, { "epoch": 0.630987805589591, "grad_norm": 0.9342965999046021, "learning_rate": 3.1656954661214517e-06, "loss": 0.1265, "step": 21629 }, { "epoch": 0.6310169788202346, "grad_norm": 0.8805631626157451, "learning_rate": 3.1652559802445824e-06, "loss": 0.1242, "step": 21630 }, { "epoch": 0.6310461520508781, "grad_norm": 0.826035691178042, "learning_rate": 3.16481651074787e-06, "loss": 0.1262, "step": 21631 }, { "epoch": 0.6310753252815217, "grad_norm": 1.0506807534626028, "learning_rate": 3.1643770576352385e-06, "loss": 0.15, "step": 21632 }, { "epoch": 0.6311044985121652, "grad_norm": 0.9071116991791308, "learning_rate": 3.1639376209106087e-06, "loss": 0.132, "step": 21633 }, { "epoch": 0.6311336717428088, "grad_norm": 0.810589991856113, "learning_rate": 3.1634982005779057e-06, "loss": 0.1183, "step": 21634 }, { "epoch": 0.6311628449734523, "grad_norm": 1.1061353599375823, "learning_rate": 3.163058796641053e-06, "loss": 0.1338, "step": 21635 }, { "epoch": 0.6311920182040959, "grad_norm": 1.2644968792372722, "learning_rate": 3.162619409103974e-06, "loss": 0.1386, "step": 21636 }, { "epoch": 0.6312211914347395, "grad_norm": 1.2277733713954433, "learning_rate": 3.162180037970589e-06, "loss": 0.1226, "step": 21637 }, { "epoch": 0.6312503646653831, "grad_norm": 0.9519430141792353, "learning_rate": 3.1617406832448226e-06, "loss": 0.1272, "step": 21638 }, { "epoch": 0.6312795378960266, "grad_norm": 1.2693948166689224, "learning_rate": 3.1613013449305948e-06, "loss": 0.1311, "step": 21639 }, { "epoch": 0.6313087111266702, "grad_norm": 0.9420657904999812, "learning_rate": 3.160862023031831e-06, "loss": 0.1352, "step": 21640 }, { "epoch": 0.6313378843573137, "grad_norm": 1.052262365752245, "learning_rate": 3.1604227175524527e-06, "loss": 0.1177, "step": 21641 }, { "epoch": 0.6313670575879573, "grad_norm": 1.098416247575714, "learning_rate": 3.15998342849638e-06, "loss": 0.1403, "step": 21642 }, { "epoch": 0.6313962308186009, "grad_norm": 0.7520614242649607, "learning_rate": 3.1595441558675364e-06, "loss": 0.1178, "step": 21643 }, { "epoch": 0.6314254040492444, "grad_norm": 1.3690315430696907, "learning_rate": 3.1591048996698426e-06, "loss": 0.1332, "step": 21644 }, { "epoch": 0.631454577279888, "grad_norm": 0.8677210659976563, "learning_rate": 3.1586656599072205e-06, "loss": 0.1458, "step": 21645 }, { "epoch": 0.6314837505105315, "grad_norm": 0.8448804808239114, "learning_rate": 3.1582264365835946e-06, "loss": 0.1169, "step": 21646 }, { "epoch": 0.6315129237411751, "grad_norm": 0.7226770601195681, "learning_rate": 3.1577872297028813e-06, "loss": 0.1143, "step": 21647 }, { "epoch": 0.6315420969718186, "grad_norm": 0.7707979322774487, "learning_rate": 3.157348039269004e-06, "loss": 0.0992, "step": 21648 }, { "epoch": 0.6315712702024622, "grad_norm": 1.046312267048633, "learning_rate": 3.1569088652858847e-06, "loss": 0.1156, "step": 21649 }, { "epoch": 0.6316004434331057, "grad_norm": 0.9281426295578425, "learning_rate": 3.1564697077574403e-06, "loss": 0.1208, "step": 21650 }, { "epoch": 0.6316296166637494, "grad_norm": 0.9278221382126396, "learning_rate": 3.156030566687597e-06, "loss": 0.1546, "step": 21651 }, { "epoch": 0.631658789894393, "grad_norm": 0.7366072250886886, "learning_rate": 3.155591442080271e-06, "loss": 0.1143, "step": 21652 }, { "epoch": 0.6316879631250365, "grad_norm": 1.1135366008466816, "learning_rate": 3.1551523339393855e-06, "loss": 0.1464, "step": 21653 }, { "epoch": 0.63171713635568, "grad_norm": 0.7537104048847073, "learning_rate": 3.1547132422688593e-06, "loss": 0.1416, "step": 21654 }, { "epoch": 0.6317463095863236, "grad_norm": 0.8406120523779976, "learning_rate": 3.1542741670726123e-06, "loss": 0.1212, "step": 21655 }, { "epoch": 0.6317754828169672, "grad_norm": 0.9879725891146476, "learning_rate": 3.153835108354564e-06, "loss": 0.1427, "step": 21656 }, { "epoch": 0.6318046560476107, "grad_norm": 0.9870917487487902, "learning_rate": 3.153396066118636e-06, "loss": 0.1104, "step": 21657 }, { "epoch": 0.6318338292782543, "grad_norm": 0.57449428922917, "learning_rate": 3.152957040368747e-06, "loss": 0.1127, "step": 21658 }, { "epoch": 0.6318630025088978, "grad_norm": 0.6881469921397672, "learning_rate": 3.152518031108818e-06, "loss": 0.1076, "step": 21659 }, { "epoch": 0.6318921757395414, "grad_norm": 0.7112782570654465, "learning_rate": 3.1520790383427657e-06, "loss": 0.0951, "step": 21660 }, { "epoch": 0.6319213489701849, "grad_norm": 0.6982666678987339, "learning_rate": 3.1516400620745112e-06, "loss": 0.117, "step": 21661 }, { "epoch": 0.6319505222008285, "grad_norm": 0.6983628511843952, "learning_rate": 3.1512011023079714e-06, "loss": 0.1204, "step": 21662 }, { "epoch": 0.631979695431472, "grad_norm": 0.8568639745493805, "learning_rate": 3.1507621590470692e-06, "loss": 0.149, "step": 21663 }, { "epoch": 0.6320088686621157, "grad_norm": 0.8962051912890376, "learning_rate": 3.15032323229572e-06, "loss": 0.1431, "step": 21664 }, { "epoch": 0.6320380418927593, "grad_norm": 1.0384683097379803, "learning_rate": 3.149884322057843e-06, "loss": 0.1375, "step": 21665 }, { "epoch": 0.6320672151234028, "grad_norm": 0.7291252888006237, "learning_rate": 3.1494454283373583e-06, "loss": 0.1366, "step": 21666 }, { "epoch": 0.6320963883540464, "grad_norm": 0.9349662375087829, "learning_rate": 3.1490065511381816e-06, "loss": 0.1428, "step": 21667 }, { "epoch": 0.6321255615846899, "grad_norm": 0.9394634596186122, "learning_rate": 3.1485676904642326e-06, "loss": 0.1158, "step": 21668 }, { "epoch": 0.6321547348153335, "grad_norm": 0.812259875545716, "learning_rate": 3.1481288463194295e-06, "loss": 0.1306, "step": 21669 }, { "epoch": 0.632183908045977, "grad_norm": 0.7069932550567836, "learning_rate": 3.1476900187076896e-06, "loss": 0.1288, "step": 21670 }, { "epoch": 0.6322130812766206, "grad_norm": 0.9154211675415918, "learning_rate": 3.147251207632933e-06, "loss": 0.1164, "step": 21671 }, { "epoch": 0.6322422545072641, "grad_norm": 1.0751263803716586, "learning_rate": 3.146812413099074e-06, "loss": 0.1401, "step": 21672 }, { "epoch": 0.6322714277379077, "grad_norm": 0.8306769690576609, "learning_rate": 3.1463736351100315e-06, "loss": 0.1309, "step": 21673 }, { "epoch": 0.6323006009685512, "grad_norm": 0.8637797866408252, "learning_rate": 3.1459348736697214e-06, "loss": 0.1242, "step": 21674 }, { "epoch": 0.6323297741991948, "grad_norm": 0.899775583907776, "learning_rate": 3.1454961287820627e-06, "loss": 0.1073, "step": 21675 }, { "epoch": 0.6323589474298383, "grad_norm": 0.9252264441433307, "learning_rate": 3.1450574004509737e-06, "loss": 0.1477, "step": 21676 }, { "epoch": 0.6323881206604819, "grad_norm": 0.8818618303142279, "learning_rate": 3.144618688680368e-06, "loss": 0.1186, "step": 21677 }, { "epoch": 0.6324172938911256, "grad_norm": 1.1302293896714315, "learning_rate": 3.144179993474164e-06, "loss": 0.1442, "step": 21678 }, { "epoch": 0.6324464671217691, "grad_norm": 0.9210576401465935, "learning_rate": 3.143741314836279e-06, "loss": 0.133, "step": 21679 }, { "epoch": 0.6324756403524127, "grad_norm": 0.6783099486101454, "learning_rate": 3.143302652770625e-06, "loss": 0.1169, "step": 21680 }, { "epoch": 0.6325048135830562, "grad_norm": 0.9228908753654181, "learning_rate": 3.142864007281125e-06, "loss": 0.1219, "step": 21681 }, { "epoch": 0.6325339868136998, "grad_norm": 0.748219865458826, "learning_rate": 3.142425378371691e-06, "loss": 0.1161, "step": 21682 }, { "epoch": 0.6325631600443433, "grad_norm": 0.7959972009416684, "learning_rate": 3.1419867660462393e-06, "loss": 0.1249, "step": 21683 }, { "epoch": 0.6325923332749869, "grad_norm": 0.766086134365, "learning_rate": 3.1415481703086875e-06, "loss": 0.1207, "step": 21684 }, { "epoch": 0.6326215065056304, "grad_norm": 0.7530772631663772, "learning_rate": 3.1411095911629493e-06, "loss": 0.1139, "step": 21685 }, { "epoch": 0.632650679736274, "grad_norm": 0.9548755474435264, "learning_rate": 3.1406710286129395e-06, "loss": 0.1403, "step": 21686 }, { "epoch": 0.6326798529669175, "grad_norm": 0.8235797774988136, "learning_rate": 3.1402324826625758e-06, "loss": 0.1414, "step": 21687 }, { "epoch": 0.6327090261975611, "grad_norm": 0.749668243905599, "learning_rate": 3.139793953315773e-06, "loss": 0.1318, "step": 21688 }, { "epoch": 0.6327381994282046, "grad_norm": 0.9450579388649207, "learning_rate": 3.139355440576446e-06, "loss": 0.1006, "step": 21689 }, { "epoch": 0.6327673726588482, "grad_norm": 0.802373093106417, "learning_rate": 3.1389169444485092e-06, "loss": 0.1392, "step": 21690 }, { "epoch": 0.6327965458894919, "grad_norm": 0.9469888424479527, "learning_rate": 3.138478464935877e-06, "loss": 0.1293, "step": 21691 }, { "epoch": 0.6328257191201354, "grad_norm": 0.922208982657413, "learning_rate": 3.1380400020424638e-06, "loss": 0.1415, "step": 21692 }, { "epoch": 0.632854892350779, "grad_norm": 0.9266849914243759, "learning_rate": 3.1376015557721875e-06, "loss": 0.1035, "step": 21693 }, { "epoch": 0.6328840655814225, "grad_norm": 0.9004134124589869, "learning_rate": 3.1371631261289583e-06, "loss": 0.1177, "step": 21694 }, { "epoch": 0.6329132388120661, "grad_norm": 0.8702502407614358, "learning_rate": 3.136724713116692e-06, "loss": 0.1164, "step": 21695 }, { "epoch": 0.6329424120427096, "grad_norm": 0.7611662273728962, "learning_rate": 3.136286316739304e-06, "loss": 0.0976, "step": 21696 }, { "epoch": 0.6329715852733532, "grad_norm": 1.0615112949234136, "learning_rate": 3.1358479370007067e-06, "loss": 0.1348, "step": 21697 }, { "epoch": 0.6330007585039967, "grad_norm": 0.8625541826782752, "learning_rate": 3.135409573904812e-06, "loss": 0.1151, "step": 21698 }, { "epoch": 0.6330299317346403, "grad_norm": 0.9753872935193014, "learning_rate": 3.1349712274555364e-06, "loss": 0.123, "step": 21699 }, { "epoch": 0.6330591049652838, "grad_norm": 0.7421668580821029, "learning_rate": 3.1345328976567923e-06, "loss": 0.1111, "step": 21700 }, { "epoch": 0.6330882781959274, "grad_norm": 0.666921239172021, "learning_rate": 3.1340945845124948e-06, "loss": 0.1144, "step": 21701 }, { "epoch": 0.6331174514265709, "grad_norm": 0.8423616308448548, "learning_rate": 3.133656288026554e-06, "loss": 0.1502, "step": 21702 }, { "epoch": 0.6331466246572145, "grad_norm": 0.9447961324947602, "learning_rate": 3.133218008202885e-06, "loss": 0.096, "step": 21703 }, { "epoch": 0.633175797887858, "grad_norm": 0.8084249031859451, "learning_rate": 3.1327797450453984e-06, "loss": 0.1131, "step": 21704 }, { "epoch": 0.6332049711185017, "grad_norm": 0.7578063947746068, "learning_rate": 3.1323414985580092e-06, "loss": 0.1187, "step": 21705 }, { "epoch": 0.6332341443491453, "grad_norm": 0.9348425084795907, "learning_rate": 3.131903268744631e-06, "loss": 0.1348, "step": 21706 }, { "epoch": 0.6332633175797888, "grad_norm": 1.0434833243171249, "learning_rate": 3.131465055609173e-06, "loss": 0.12, "step": 21707 }, { "epoch": 0.6332924908104324, "grad_norm": 0.6079233290908971, "learning_rate": 3.1310268591555494e-06, "loss": 0.1126, "step": 21708 }, { "epoch": 0.6333216640410759, "grad_norm": 0.7797421497396152, "learning_rate": 3.130588679387672e-06, "loss": 0.1135, "step": 21709 }, { "epoch": 0.6333508372717195, "grad_norm": 0.8559849384990991, "learning_rate": 3.13015051630945e-06, "loss": 0.1189, "step": 21710 }, { "epoch": 0.633380010502363, "grad_norm": 0.7625466746703312, "learning_rate": 3.129712369924801e-06, "loss": 0.1053, "step": 21711 }, { "epoch": 0.6334091837330066, "grad_norm": 0.870000530641801, "learning_rate": 3.129274240237633e-06, "loss": 0.1402, "step": 21712 }, { "epoch": 0.6334383569636501, "grad_norm": 1.1223567464906534, "learning_rate": 3.1288361272518575e-06, "loss": 0.1295, "step": 21713 }, { "epoch": 0.6334675301942937, "grad_norm": 0.9488875518791894, "learning_rate": 3.128398030971387e-06, "loss": 0.1374, "step": 21714 }, { "epoch": 0.6334967034249372, "grad_norm": 0.7958988894901167, "learning_rate": 3.127959951400131e-06, "loss": 0.1418, "step": 21715 }, { "epoch": 0.6335258766555808, "grad_norm": 1.2673980555432538, "learning_rate": 3.127521888542001e-06, "loss": 0.1388, "step": 21716 }, { "epoch": 0.6335550498862244, "grad_norm": 0.996179579160665, "learning_rate": 3.1270838424009097e-06, "loss": 0.1238, "step": 21717 }, { "epoch": 0.633584223116868, "grad_norm": 0.8176115520522832, "learning_rate": 3.126645812980767e-06, "loss": 0.1467, "step": 21718 }, { "epoch": 0.6336133963475116, "grad_norm": 0.7650193098981782, "learning_rate": 3.126207800285484e-06, "loss": 0.1244, "step": 21719 }, { "epoch": 0.6336425695781551, "grad_norm": 0.9278519089992957, "learning_rate": 3.1257698043189693e-06, "loss": 0.1235, "step": 21720 }, { "epoch": 0.6336717428087987, "grad_norm": 0.7736597741982416, "learning_rate": 3.1253318250851345e-06, "loss": 0.1126, "step": 21721 }, { "epoch": 0.6337009160394422, "grad_norm": 0.6927540006471979, "learning_rate": 3.124893862587889e-06, "loss": 0.114, "step": 21722 }, { "epoch": 0.6337300892700858, "grad_norm": 0.9132711973936171, "learning_rate": 3.1244559168311452e-06, "loss": 0.1257, "step": 21723 }, { "epoch": 0.6337592625007293, "grad_norm": 0.8161342566372455, "learning_rate": 3.124017987818809e-06, "loss": 0.1343, "step": 21724 }, { "epoch": 0.6337884357313729, "grad_norm": 0.8222380140798606, "learning_rate": 3.123580075554794e-06, "loss": 0.1135, "step": 21725 }, { "epoch": 0.6338176089620164, "grad_norm": 0.9491268803419923, "learning_rate": 3.1231421800430084e-06, "loss": 0.1088, "step": 21726 }, { "epoch": 0.63384678219266, "grad_norm": 0.9556709148887018, "learning_rate": 3.122704301287361e-06, "loss": 0.146, "step": 21727 }, { "epoch": 0.6338759554233035, "grad_norm": 0.9430692757125659, "learning_rate": 3.12226643929176e-06, "loss": 0.1389, "step": 21728 }, { "epoch": 0.6339051286539471, "grad_norm": 0.7764755199732704, "learning_rate": 3.1218285940601166e-06, "loss": 0.1235, "step": 21729 }, { "epoch": 0.6339343018845907, "grad_norm": 0.7909181206676502, "learning_rate": 3.1213907655963406e-06, "loss": 0.1428, "step": 21730 }, { "epoch": 0.6339634751152342, "grad_norm": 0.7880675764901902, "learning_rate": 3.120952953904339e-06, "loss": 0.1104, "step": 21731 }, { "epoch": 0.6339926483458779, "grad_norm": 0.739293609115454, "learning_rate": 3.12051515898802e-06, "loss": 0.1136, "step": 21732 }, { "epoch": 0.6340218215765214, "grad_norm": 0.8153702795905208, "learning_rate": 3.1200773808512936e-06, "loss": 0.1182, "step": 21733 }, { "epoch": 0.634050994807165, "grad_norm": 0.7242207422869644, "learning_rate": 3.119639619498066e-06, "loss": 0.131, "step": 21734 }, { "epoch": 0.6340801680378085, "grad_norm": 0.8791651001527339, "learning_rate": 3.1192018749322482e-06, "loss": 0.1218, "step": 21735 }, { "epoch": 0.6341093412684521, "grad_norm": 0.7925481732877339, "learning_rate": 3.1187641471577478e-06, "loss": 0.1277, "step": 21736 }, { "epoch": 0.6341385144990956, "grad_norm": 1.0229415245648514, "learning_rate": 3.1183264361784716e-06, "loss": 0.127, "step": 21737 }, { "epoch": 0.6341676877297392, "grad_norm": 1.0213441175537599, "learning_rate": 3.117888741998328e-06, "loss": 0.1278, "step": 21738 }, { "epoch": 0.6341968609603827, "grad_norm": 0.8416204239503033, "learning_rate": 3.1174510646212247e-06, "loss": 0.1324, "step": 21739 }, { "epoch": 0.6342260341910263, "grad_norm": 0.9804147924450002, "learning_rate": 3.117013404051066e-06, "loss": 0.1569, "step": 21740 }, { "epoch": 0.6342552074216699, "grad_norm": 1.604150237655962, "learning_rate": 3.1165757602917653e-06, "loss": 0.1202, "step": 21741 }, { "epoch": 0.6342843806523134, "grad_norm": 0.8373751070357379, "learning_rate": 3.1161381333472253e-06, "loss": 0.1116, "step": 21742 }, { "epoch": 0.634313553882957, "grad_norm": 0.7782899909471713, "learning_rate": 3.1157005232213542e-06, "loss": 0.1354, "step": 21743 }, { "epoch": 0.6343427271136005, "grad_norm": 0.9136252138127734, "learning_rate": 3.115262929918061e-06, "loss": 0.1233, "step": 21744 }, { "epoch": 0.6343719003442441, "grad_norm": 0.8607119451216523, "learning_rate": 3.114825353441249e-06, "loss": 0.1379, "step": 21745 }, { "epoch": 0.6344010735748877, "grad_norm": 0.911061730616713, "learning_rate": 3.1143877937948247e-06, "loss": 0.1122, "step": 21746 }, { "epoch": 0.6344302468055313, "grad_norm": 0.8118839289481564, "learning_rate": 3.1139502509826975e-06, "loss": 0.1215, "step": 21747 }, { "epoch": 0.6344594200361748, "grad_norm": 0.6488405045595612, "learning_rate": 3.113512725008772e-06, "loss": 0.1115, "step": 21748 }, { "epoch": 0.6344885932668184, "grad_norm": 0.7618996093964154, "learning_rate": 3.1130752158769555e-06, "loss": 0.1318, "step": 21749 }, { "epoch": 0.6345177664974619, "grad_norm": 0.9880605615753427, "learning_rate": 3.112637723591152e-06, "loss": 0.1144, "step": 21750 }, { "epoch": 0.6345469397281055, "grad_norm": 0.7804695926800223, "learning_rate": 3.112200248155269e-06, "loss": 0.1663, "step": 21751 }, { "epoch": 0.634576112958749, "grad_norm": 0.9445166281720175, "learning_rate": 3.11176278957321e-06, "loss": 0.112, "step": 21752 }, { "epoch": 0.6346052861893926, "grad_norm": 0.7725115968509061, "learning_rate": 3.111325347848884e-06, "loss": 0.1182, "step": 21753 }, { "epoch": 0.6346344594200362, "grad_norm": 0.655354166208351, "learning_rate": 3.1108879229861934e-06, "loss": 0.119, "step": 21754 }, { "epoch": 0.6346636326506797, "grad_norm": 0.7494193134613015, "learning_rate": 3.110450514989045e-06, "loss": 0.1402, "step": 21755 }, { "epoch": 0.6346928058813233, "grad_norm": 0.9474238399449878, "learning_rate": 3.110013123861344e-06, "loss": 0.1192, "step": 21756 }, { "epoch": 0.6347219791119668, "grad_norm": 0.8790234496562304, "learning_rate": 3.1095757496069934e-06, "loss": 0.1343, "step": 21757 }, { "epoch": 0.6347511523426104, "grad_norm": 0.7351632736785756, "learning_rate": 3.1091383922298982e-06, "loss": 0.1185, "step": 21758 }, { "epoch": 0.634780325573254, "grad_norm": 0.8324877787027928, "learning_rate": 3.1087010517339656e-06, "loss": 0.1278, "step": 21759 }, { "epoch": 0.6348094988038976, "grad_norm": 0.8039823791184235, "learning_rate": 3.1082637281230977e-06, "loss": 0.1122, "step": 21760 }, { "epoch": 0.6348386720345411, "grad_norm": 0.8083964921544764, "learning_rate": 3.107826421401201e-06, "loss": 0.107, "step": 21761 }, { "epoch": 0.6348678452651847, "grad_norm": 0.7078659341223058, "learning_rate": 3.107389131572178e-06, "loss": 0.109, "step": 21762 }, { "epoch": 0.6348970184958282, "grad_norm": 1.2444803594802591, "learning_rate": 3.1069518586399323e-06, "loss": 0.1635, "step": 21763 }, { "epoch": 0.6349261917264718, "grad_norm": 0.9175885724502149, "learning_rate": 3.1065146026083675e-06, "loss": 0.1134, "step": 21764 }, { "epoch": 0.6349553649571154, "grad_norm": 0.8921175961086458, "learning_rate": 3.1060773634813895e-06, "loss": 0.1361, "step": 21765 }, { "epoch": 0.6349845381877589, "grad_norm": 0.8195935800664395, "learning_rate": 3.1056401412629023e-06, "loss": 0.1332, "step": 21766 }, { "epoch": 0.6350137114184025, "grad_norm": 0.8485398500224487, "learning_rate": 3.105202935956806e-06, "loss": 0.1453, "step": 21767 }, { "epoch": 0.635042884649046, "grad_norm": 0.7762656619937783, "learning_rate": 3.104765747567005e-06, "loss": 0.0851, "step": 21768 }, { "epoch": 0.6350720578796896, "grad_norm": 0.7682391691572218, "learning_rate": 3.104328576097405e-06, "loss": 0.121, "step": 21769 }, { "epoch": 0.6351012311103331, "grad_norm": 0.7209647293901071, "learning_rate": 3.1038914215519035e-06, "loss": 0.1106, "step": 21770 }, { "epoch": 0.6351304043409767, "grad_norm": 0.7839635461636388, "learning_rate": 3.1034542839344094e-06, "loss": 0.0989, "step": 21771 }, { "epoch": 0.6351595775716202, "grad_norm": 0.9488156725088999, "learning_rate": 3.1030171632488226e-06, "loss": 0.1179, "step": 21772 }, { "epoch": 0.6351887508022639, "grad_norm": 0.8923147396641986, "learning_rate": 3.102580059499045e-06, "loss": 0.1145, "step": 21773 }, { "epoch": 0.6352179240329074, "grad_norm": 0.8070798670150509, "learning_rate": 3.1021429726889808e-06, "loss": 0.1352, "step": 21774 }, { "epoch": 0.635247097263551, "grad_norm": 1.0570815815348948, "learning_rate": 3.1017059028225303e-06, "loss": 0.1068, "step": 21775 }, { "epoch": 0.6352762704941946, "grad_norm": 0.8505928246591681, "learning_rate": 3.1012688499035955e-06, "loss": 0.111, "step": 21776 }, { "epoch": 0.6353054437248381, "grad_norm": 0.9204157772627148, "learning_rate": 3.1008318139360795e-06, "loss": 0.1308, "step": 21777 }, { "epoch": 0.6353346169554817, "grad_norm": 0.933776237861263, "learning_rate": 3.100394794923884e-06, "loss": 0.0989, "step": 21778 }, { "epoch": 0.6353637901861252, "grad_norm": 1.0928731214810563, "learning_rate": 3.0999577928709114e-06, "loss": 0.1189, "step": 21779 }, { "epoch": 0.6353929634167688, "grad_norm": 1.0584957960818002, "learning_rate": 3.0995208077810613e-06, "loss": 0.1253, "step": 21780 }, { "epoch": 0.6354221366474123, "grad_norm": 1.1712067444457757, "learning_rate": 3.0990838396582357e-06, "loss": 0.1262, "step": 21781 }, { "epoch": 0.6354513098780559, "grad_norm": 0.6716273021083589, "learning_rate": 3.0986468885063344e-06, "loss": 0.0927, "step": 21782 }, { "epoch": 0.6354804831086994, "grad_norm": 1.0672201278160154, "learning_rate": 3.098209954329262e-06, "loss": 0.1352, "step": 21783 }, { "epoch": 0.635509656339343, "grad_norm": 0.9193766952718748, "learning_rate": 3.0977730371309154e-06, "loss": 0.1437, "step": 21784 }, { "epoch": 0.6355388295699865, "grad_norm": 0.8749013625383029, "learning_rate": 3.0973361369151977e-06, "loss": 0.1117, "step": 21785 }, { "epoch": 0.6355680028006302, "grad_norm": 1.0707215369033931, "learning_rate": 3.09689925368601e-06, "loss": 0.122, "step": 21786 }, { "epoch": 0.6355971760312737, "grad_norm": 0.6587241875285823, "learning_rate": 3.0964623874472503e-06, "loss": 0.1101, "step": 21787 }, { "epoch": 0.6356263492619173, "grad_norm": 0.9705001267168245, "learning_rate": 3.0960255382028193e-06, "loss": 0.114, "step": 21788 }, { "epoch": 0.6356555224925609, "grad_norm": 1.2154443540220972, "learning_rate": 3.095588705956618e-06, "loss": 0.1406, "step": 21789 }, { "epoch": 0.6356846957232044, "grad_norm": 0.8496619936358655, "learning_rate": 3.0951518907125468e-06, "loss": 0.1327, "step": 21790 }, { "epoch": 0.635713868953848, "grad_norm": 0.741635813731282, "learning_rate": 3.094715092474505e-06, "loss": 0.1229, "step": 21791 }, { "epoch": 0.6357430421844915, "grad_norm": 0.8686250528099158, "learning_rate": 3.094278311246392e-06, "loss": 0.1253, "step": 21792 }, { "epoch": 0.6357722154151351, "grad_norm": 0.7785649933729758, "learning_rate": 3.093841547032107e-06, "loss": 0.1382, "step": 21793 }, { "epoch": 0.6358013886457786, "grad_norm": 0.9243323103759059, "learning_rate": 3.093404799835548e-06, "loss": 0.1135, "step": 21794 }, { "epoch": 0.6358305618764222, "grad_norm": 0.9318614880552503, "learning_rate": 3.092968069660618e-06, "loss": 0.1303, "step": 21795 }, { "epoch": 0.6358597351070657, "grad_norm": 0.8252247957126727, "learning_rate": 3.0925313565112135e-06, "loss": 0.11, "step": 21796 }, { "epoch": 0.6358889083377093, "grad_norm": 0.7766490834908291, "learning_rate": 3.092094660391234e-06, "loss": 0.1203, "step": 21797 }, { "epoch": 0.6359180815683528, "grad_norm": 0.8685011647973678, "learning_rate": 3.0916579813045764e-06, "loss": 0.1222, "step": 21798 }, { "epoch": 0.6359472547989964, "grad_norm": 0.8992556388119634, "learning_rate": 3.0912213192551434e-06, "loss": 0.0998, "step": 21799 }, { "epoch": 0.63597642802964, "grad_norm": 0.798910637193303, "learning_rate": 3.090784674246826e-06, "loss": 0.1154, "step": 21800 }, { "epoch": 0.6360056012602836, "grad_norm": 0.97939781384001, "learning_rate": 3.0903480462835323e-06, "loss": 0.1308, "step": 21801 }, { "epoch": 0.6360347744909272, "grad_norm": 1.0612139597400803, "learning_rate": 3.089911435369153e-06, "loss": 0.1358, "step": 21802 }, { "epoch": 0.6360639477215707, "grad_norm": 0.9971849600337275, "learning_rate": 3.0894748415075887e-06, "loss": 0.1333, "step": 21803 }, { "epoch": 0.6360931209522143, "grad_norm": 0.7554802910512478, "learning_rate": 3.0890382647027382e-06, "loss": 0.121, "step": 21804 }, { "epoch": 0.6361222941828578, "grad_norm": 1.1523452522450974, "learning_rate": 3.0886017049584963e-06, "loss": 0.1412, "step": 21805 }, { "epoch": 0.6361514674135014, "grad_norm": 1.0359600180640038, "learning_rate": 3.088165162278762e-06, "loss": 0.1324, "step": 21806 }, { "epoch": 0.6361806406441449, "grad_norm": 1.3569448829642885, "learning_rate": 3.087728636667433e-06, "loss": 0.1022, "step": 21807 }, { "epoch": 0.6362098138747885, "grad_norm": 1.2866344165629882, "learning_rate": 3.0872921281284063e-06, "loss": 0.1332, "step": 21808 }, { "epoch": 0.636238987105432, "grad_norm": 1.2706445821451313, "learning_rate": 3.08685563666558e-06, "loss": 0.1463, "step": 21809 }, { "epoch": 0.6362681603360756, "grad_norm": 1.354870983290892, "learning_rate": 3.086419162282849e-06, "loss": 0.1174, "step": 21810 }, { "epoch": 0.6362973335667191, "grad_norm": 1.383365738194712, "learning_rate": 3.0859827049841105e-06, "loss": 0.1216, "step": 21811 }, { "epoch": 0.6363265067973627, "grad_norm": 0.6837405199969548, "learning_rate": 3.0855462647732615e-06, "loss": 0.1301, "step": 21812 }, { "epoch": 0.6363556800280064, "grad_norm": 1.1635242670466868, "learning_rate": 3.085109841654199e-06, "loss": 0.1313, "step": 21813 }, { "epoch": 0.6363848532586499, "grad_norm": 1.405525454712235, "learning_rate": 3.084673435630819e-06, "loss": 0.1001, "step": 21814 }, { "epoch": 0.6364140264892935, "grad_norm": 1.165711951011714, "learning_rate": 3.084237046707017e-06, "loss": 0.1139, "step": 21815 }, { "epoch": 0.636443199719937, "grad_norm": 0.880725538768999, "learning_rate": 3.08380067488669e-06, "loss": 0.1213, "step": 21816 }, { "epoch": 0.6364723729505806, "grad_norm": 2.381796792180809, "learning_rate": 3.083364320173732e-06, "loss": 0.1301, "step": 21817 }, { "epoch": 0.6365015461812241, "grad_norm": 0.9165355178052541, "learning_rate": 3.0829279825720393e-06, "loss": 0.1074, "step": 21818 }, { "epoch": 0.6365307194118677, "grad_norm": 1.0651285245372524, "learning_rate": 3.082491662085508e-06, "loss": 0.1172, "step": 21819 }, { "epoch": 0.6365598926425112, "grad_norm": 0.8388221543037918, "learning_rate": 3.0820553587180346e-06, "loss": 0.1116, "step": 21820 }, { "epoch": 0.6365890658731548, "grad_norm": 0.7066232406302658, "learning_rate": 3.081619072473514e-06, "loss": 0.1284, "step": 21821 }, { "epoch": 0.6366182391037983, "grad_norm": 0.9950246610089574, "learning_rate": 3.0811828033558388e-06, "loss": 0.1267, "step": 21822 }, { "epoch": 0.6366474123344419, "grad_norm": 1.1898016694790707, "learning_rate": 3.080746551368906e-06, "loss": 0.1262, "step": 21823 }, { "epoch": 0.6366765855650854, "grad_norm": 0.8613786808996372, "learning_rate": 3.080310316516608e-06, "loss": 0.1154, "step": 21824 }, { "epoch": 0.636705758795729, "grad_norm": 0.8929237471297329, "learning_rate": 3.079874098802843e-06, "loss": 0.1239, "step": 21825 }, { "epoch": 0.6367349320263725, "grad_norm": 1.5433127283925474, "learning_rate": 3.0794378982315044e-06, "loss": 0.1454, "step": 21826 }, { "epoch": 0.6367641052570162, "grad_norm": 1.2118272981373643, "learning_rate": 3.0790017148064844e-06, "loss": 0.1218, "step": 21827 }, { "epoch": 0.6367932784876598, "grad_norm": 0.9964554054028634, "learning_rate": 3.0785655485316788e-06, "loss": 0.1148, "step": 21828 }, { "epoch": 0.6368224517183033, "grad_norm": 0.6779271565125494, "learning_rate": 3.0781293994109828e-06, "loss": 0.1307, "step": 21829 }, { "epoch": 0.6368516249489469, "grad_norm": 1.0674790488557546, "learning_rate": 3.077693267448285e-06, "loss": 0.1347, "step": 21830 }, { "epoch": 0.6368807981795904, "grad_norm": 1.316009182413062, "learning_rate": 3.077257152647486e-06, "loss": 0.13, "step": 21831 }, { "epoch": 0.636909971410234, "grad_norm": 0.9490288674966368, "learning_rate": 3.0768210550124757e-06, "loss": 0.0948, "step": 21832 }, { "epoch": 0.6369391446408775, "grad_norm": 0.6827399227485108, "learning_rate": 3.0763849745471475e-06, "loss": 0.1195, "step": 21833 }, { "epoch": 0.6369683178715211, "grad_norm": 1.268438017932574, "learning_rate": 3.075948911255396e-06, "loss": 0.1251, "step": 21834 }, { "epoch": 0.6369974911021646, "grad_norm": 1.5663374850285545, "learning_rate": 3.0755128651411115e-06, "loss": 0.1025, "step": 21835 }, { "epoch": 0.6370266643328082, "grad_norm": 1.061104852661182, "learning_rate": 3.0750768362081895e-06, "loss": 0.1092, "step": 21836 }, { "epoch": 0.6370558375634517, "grad_norm": 0.7752633644749488, "learning_rate": 3.074640824460522e-06, "loss": 0.1347, "step": 21837 }, { "epoch": 0.6370850107940953, "grad_norm": 1.2016071339398173, "learning_rate": 3.074204829902001e-06, "loss": 0.1365, "step": 21838 }, { "epoch": 0.6371141840247388, "grad_norm": 1.3821317503312849, "learning_rate": 3.073768852536522e-06, "loss": 0.1328, "step": 21839 }, { "epoch": 0.6371433572553825, "grad_norm": 0.9094382064689873, "learning_rate": 3.073332892367973e-06, "loss": 0.1144, "step": 21840 }, { "epoch": 0.6371725304860261, "grad_norm": 0.66381617402986, "learning_rate": 3.072896949400247e-06, "loss": 0.1253, "step": 21841 }, { "epoch": 0.6372017037166696, "grad_norm": 0.9821706677181078, "learning_rate": 3.0724610236372377e-06, "loss": 0.1125, "step": 21842 }, { "epoch": 0.6372308769473132, "grad_norm": 1.0959836495574657, "learning_rate": 3.072025115082838e-06, "loss": 0.0997, "step": 21843 }, { "epoch": 0.6372600501779567, "grad_norm": 1.1337204186278373, "learning_rate": 3.071589223740936e-06, "loss": 0.1119, "step": 21844 }, { "epoch": 0.6372892234086003, "grad_norm": 0.6897983155072406, "learning_rate": 3.0711533496154258e-06, "loss": 0.1149, "step": 21845 }, { "epoch": 0.6373183966392438, "grad_norm": 0.9365897347237178, "learning_rate": 3.070717492710199e-06, "loss": 0.1232, "step": 21846 }, { "epoch": 0.6373475698698874, "grad_norm": 0.9274447875877025, "learning_rate": 3.0702816530291425e-06, "loss": 0.1391, "step": 21847 }, { "epoch": 0.6373767431005309, "grad_norm": 1.0215428332432723, "learning_rate": 3.0698458305761538e-06, "loss": 0.1493, "step": 21848 }, { "epoch": 0.6374059163311745, "grad_norm": 0.9081846317617027, "learning_rate": 3.0694100253551195e-06, "loss": 0.1112, "step": 21849 }, { "epoch": 0.637435089561818, "grad_norm": 0.7751254122582037, "learning_rate": 3.068974237369932e-06, "loss": 0.1116, "step": 21850 }, { "epoch": 0.6374642627924616, "grad_norm": 0.8434729375194193, "learning_rate": 3.068538466624482e-06, "loss": 0.1376, "step": 21851 }, { "epoch": 0.6374934360231052, "grad_norm": 0.9442999885080988, "learning_rate": 3.068102713122659e-06, "loss": 0.1707, "step": 21852 }, { "epoch": 0.6375226092537487, "grad_norm": 0.9222775526189096, "learning_rate": 3.067666976868353e-06, "loss": 0.133, "step": 21853 }, { "epoch": 0.6375517824843924, "grad_norm": 0.7792483602620585, "learning_rate": 3.067231257865456e-06, "loss": 0.1364, "step": 21854 }, { "epoch": 0.6375809557150359, "grad_norm": 0.8573625377828165, "learning_rate": 3.0667955561178566e-06, "loss": 0.1257, "step": 21855 }, { "epoch": 0.6376101289456795, "grad_norm": 0.7886138594932548, "learning_rate": 3.066359871629446e-06, "loss": 0.1099, "step": 21856 }, { "epoch": 0.637639302176323, "grad_norm": 0.9117843252800424, "learning_rate": 3.0659242044041117e-06, "loss": 0.1359, "step": 21857 }, { "epoch": 0.6376684754069666, "grad_norm": 0.7730494799951317, "learning_rate": 3.0654885544457446e-06, "loss": 0.1193, "step": 21858 }, { "epoch": 0.6376976486376101, "grad_norm": 0.859282447127082, "learning_rate": 3.0650529217582333e-06, "loss": 0.1319, "step": 21859 }, { "epoch": 0.6377268218682537, "grad_norm": 1.0796400353288, "learning_rate": 3.0646173063454676e-06, "loss": 0.1279, "step": 21860 }, { "epoch": 0.6377559950988972, "grad_norm": 0.9053880327450825, "learning_rate": 3.0641817082113385e-06, "loss": 0.119, "step": 21861 }, { "epoch": 0.6377851683295408, "grad_norm": 0.727253148668691, "learning_rate": 3.0637461273597312e-06, "loss": 0.1181, "step": 21862 }, { "epoch": 0.6378143415601844, "grad_norm": 1.0231326515798984, "learning_rate": 3.063310563794537e-06, "loss": 0.1216, "step": 21863 }, { "epoch": 0.6378435147908279, "grad_norm": 0.8431087631210024, "learning_rate": 3.062875017519645e-06, "loss": 0.1049, "step": 21864 }, { "epoch": 0.6378726880214715, "grad_norm": 0.7231253542040589, "learning_rate": 3.0624394885389397e-06, "loss": 0.1382, "step": 21865 }, { "epoch": 0.637901861252115, "grad_norm": 0.8593676211645107, "learning_rate": 3.062003976856313e-06, "loss": 0.1327, "step": 21866 }, { "epoch": 0.6379310344827587, "grad_norm": 0.7948219579548143, "learning_rate": 3.0615684824756525e-06, "loss": 0.1246, "step": 21867 }, { "epoch": 0.6379602077134022, "grad_norm": 0.7230630063087355, "learning_rate": 3.061133005400846e-06, "loss": 0.1372, "step": 21868 }, { "epoch": 0.6379893809440458, "grad_norm": 0.8481834099556024, "learning_rate": 3.0606975456357817e-06, "loss": 0.1446, "step": 21869 }, { "epoch": 0.6380185541746893, "grad_norm": 0.8388905248271551, "learning_rate": 3.060262103184346e-06, "loss": 0.1242, "step": 21870 }, { "epoch": 0.6380477274053329, "grad_norm": 0.8039098227458443, "learning_rate": 3.0598266780504267e-06, "loss": 0.129, "step": 21871 }, { "epoch": 0.6380769006359764, "grad_norm": 0.6854274554351321, "learning_rate": 3.059391270237912e-06, "loss": 0.1023, "step": 21872 }, { "epoch": 0.63810607386662, "grad_norm": 0.8483208695899778, "learning_rate": 3.05895587975069e-06, "loss": 0.1288, "step": 21873 }, { "epoch": 0.6381352470972635, "grad_norm": 0.8825059011319244, "learning_rate": 3.0585205065926453e-06, "loss": 0.1131, "step": 21874 }, { "epoch": 0.6381644203279071, "grad_norm": 0.704871957973163, "learning_rate": 3.058085150767667e-06, "loss": 0.101, "step": 21875 }, { "epoch": 0.6381935935585507, "grad_norm": 0.7805779065666156, "learning_rate": 3.0576498122796403e-06, "loss": 0.1156, "step": 21876 }, { "epoch": 0.6382227667891942, "grad_norm": 0.9444300393589251, "learning_rate": 3.057214491132451e-06, "loss": 0.1326, "step": 21877 }, { "epoch": 0.6382519400198378, "grad_norm": 0.9479615537361952, "learning_rate": 3.056779187329989e-06, "loss": 0.1374, "step": 21878 }, { "epoch": 0.6382811132504813, "grad_norm": 0.8509617843592863, "learning_rate": 3.0563439008761377e-06, "loss": 0.1115, "step": 21879 }, { "epoch": 0.6383102864811249, "grad_norm": 0.7367877561446459, "learning_rate": 3.055908631774784e-06, "loss": 0.1287, "step": 21880 }, { "epoch": 0.6383394597117685, "grad_norm": 0.8504320472884936, "learning_rate": 3.0554733800298154e-06, "loss": 0.1279, "step": 21881 }, { "epoch": 0.6383686329424121, "grad_norm": 0.9429986359842464, "learning_rate": 3.0550381456451144e-06, "loss": 0.1256, "step": 21882 }, { "epoch": 0.6383978061730556, "grad_norm": 1.0032892736514707, "learning_rate": 3.054602928624568e-06, "loss": 0.1217, "step": 21883 }, { "epoch": 0.6384269794036992, "grad_norm": 0.9133680212190357, "learning_rate": 3.0541677289720632e-06, "loss": 0.1269, "step": 21884 }, { "epoch": 0.6384561526343427, "grad_norm": 0.9797065792975945, "learning_rate": 3.053732546691485e-06, "loss": 0.1046, "step": 21885 }, { "epoch": 0.6384853258649863, "grad_norm": 0.9860794712482452, "learning_rate": 3.0532973817867185e-06, "loss": 0.1342, "step": 21886 }, { "epoch": 0.6385144990956299, "grad_norm": 1.0183469029431744, "learning_rate": 3.0528622342616472e-06, "loss": 0.1214, "step": 21887 }, { "epoch": 0.6385436723262734, "grad_norm": 0.9259410328115509, "learning_rate": 3.052427104120157e-06, "loss": 0.1191, "step": 21888 }, { "epoch": 0.638572845556917, "grad_norm": 1.2581005181482376, "learning_rate": 3.0519919913661317e-06, "loss": 0.0981, "step": 21889 }, { "epoch": 0.6386020187875605, "grad_norm": 1.561430521600238, "learning_rate": 3.051556896003458e-06, "loss": 0.1356, "step": 21890 }, { "epoch": 0.6386311920182041, "grad_norm": 0.8987681531126557, "learning_rate": 3.05112181803602e-06, "loss": 0.1138, "step": 21891 }, { "epoch": 0.6386603652488476, "grad_norm": 0.9575434341929723, "learning_rate": 3.0506867574677007e-06, "loss": 0.1122, "step": 21892 }, { "epoch": 0.6386895384794912, "grad_norm": 0.9803084522916741, "learning_rate": 3.0502517143023846e-06, "loss": 0.1244, "step": 21893 }, { "epoch": 0.6387187117101348, "grad_norm": 1.1634107455634812, "learning_rate": 3.049816688543956e-06, "loss": 0.1319, "step": 21894 }, { "epoch": 0.6387478849407784, "grad_norm": 1.0048455952281852, "learning_rate": 3.0493816801962974e-06, "loss": 0.1404, "step": 21895 }, { "epoch": 0.6387770581714219, "grad_norm": 0.7597447083970655, "learning_rate": 3.0489466892632934e-06, "loss": 0.1034, "step": 21896 }, { "epoch": 0.6388062314020655, "grad_norm": 0.9691091562542609, "learning_rate": 3.0485117157488287e-06, "loss": 0.1387, "step": 21897 }, { "epoch": 0.638835404632709, "grad_norm": 0.8071088564651172, "learning_rate": 3.048076759656785e-06, "loss": 0.1211, "step": 21898 }, { "epoch": 0.6388645778633526, "grad_norm": 0.6358678297759078, "learning_rate": 3.0476418209910475e-06, "loss": 0.12, "step": 21899 }, { "epoch": 0.6388937510939962, "grad_norm": 0.7951613576571183, "learning_rate": 3.047206899755496e-06, "loss": 0.126, "step": 21900 }, { "epoch": 0.6389229243246397, "grad_norm": 1.0271889369572134, "learning_rate": 3.046771995954015e-06, "loss": 0.1086, "step": 21901 }, { "epoch": 0.6389520975552833, "grad_norm": 0.9344279582335326, "learning_rate": 3.046337109590488e-06, "loss": 0.1276, "step": 21902 }, { "epoch": 0.6389812707859268, "grad_norm": 0.7668315638447574, "learning_rate": 3.0459022406687977e-06, "loss": 0.1029, "step": 21903 }, { "epoch": 0.6390104440165704, "grad_norm": 0.8563832055480559, "learning_rate": 3.045467389192824e-06, "loss": 0.1381, "step": 21904 }, { "epoch": 0.6390396172472139, "grad_norm": 1.3599042135465556, "learning_rate": 3.0450325551664522e-06, "loss": 0.1243, "step": 21905 }, { "epoch": 0.6390687904778575, "grad_norm": 0.7885305964212832, "learning_rate": 3.044597738593564e-06, "loss": 0.1308, "step": 21906 }, { "epoch": 0.639097963708501, "grad_norm": 0.6982586480666967, "learning_rate": 3.044162939478037e-06, "loss": 0.1, "step": 21907 }, { "epoch": 0.6391271369391447, "grad_norm": 0.8285675420662989, "learning_rate": 3.0437281578237587e-06, "loss": 0.1265, "step": 21908 }, { "epoch": 0.6391563101697882, "grad_norm": 0.8283163405615398, "learning_rate": 3.0432933936346083e-06, "loss": 0.1257, "step": 21909 }, { "epoch": 0.6391854834004318, "grad_norm": 0.9649752723675649, "learning_rate": 3.042858646914467e-06, "loss": 0.1107, "step": 21910 }, { "epoch": 0.6392146566310754, "grad_norm": 0.9493165956215821, "learning_rate": 3.0424239176672177e-06, "loss": 0.0975, "step": 21911 }, { "epoch": 0.6392438298617189, "grad_norm": 1.0680619133120668, "learning_rate": 3.0419892058967393e-06, "loss": 0.1373, "step": 21912 }, { "epoch": 0.6392730030923625, "grad_norm": 1.3516981134621209, "learning_rate": 3.0415545116069127e-06, "loss": 0.109, "step": 21913 }, { "epoch": 0.639302176323006, "grad_norm": 1.0398911799811374, "learning_rate": 3.041119834801621e-06, "loss": 0.1146, "step": 21914 }, { "epoch": 0.6393313495536496, "grad_norm": 0.7618778724016294, "learning_rate": 3.040685175484744e-06, "loss": 0.1078, "step": 21915 }, { "epoch": 0.6393605227842931, "grad_norm": 0.8823225671438082, "learning_rate": 3.040250533660163e-06, "loss": 0.14, "step": 21916 }, { "epoch": 0.6393896960149367, "grad_norm": 0.9549327538169706, "learning_rate": 3.039815909331756e-06, "loss": 0.1263, "step": 21917 }, { "epoch": 0.6394188692455802, "grad_norm": 0.7160000367478118, "learning_rate": 3.0393813025034046e-06, "loss": 0.1138, "step": 21918 }, { "epoch": 0.6394480424762238, "grad_norm": 0.8889924544531044, "learning_rate": 3.0389467131789884e-06, "loss": 0.1198, "step": 21919 }, { "epoch": 0.6394772157068673, "grad_norm": 0.7604025105980199, "learning_rate": 3.0385121413623888e-06, "loss": 0.1263, "step": 21920 }, { "epoch": 0.639506388937511, "grad_norm": 0.8818718817076348, "learning_rate": 3.038077587057485e-06, "loss": 0.1126, "step": 21921 }, { "epoch": 0.6395355621681545, "grad_norm": 0.8709218495331252, "learning_rate": 3.0376430502681554e-06, "loss": 0.1251, "step": 21922 }, { "epoch": 0.6395647353987981, "grad_norm": 0.7977261760500468, "learning_rate": 3.03720853099828e-06, "loss": 0.1317, "step": 21923 }, { "epoch": 0.6395939086294417, "grad_norm": 0.9860642992008797, "learning_rate": 3.03677402925174e-06, "loss": 0.1247, "step": 21924 }, { "epoch": 0.6396230818600852, "grad_norm": 0.7759127192684441, "learning_rate": 3.0363395450324103e-06, "loss": 0.1148, "step": 21925 }, { "epoch": 0.6396522550907288, "grad_norm": 0.634868363926555, "learning_rate": 3.0359050783441736e-06, "loss": 0.1183, "step": 21926 }, { "epoch": 0.6396814283213723, "grad_norm": 0.7773985173968639, "learning_rate": 3.035470629190907e-06, "loss": 0.124, "step": 21927 }, { "epoch": 0.6397106015520159, "grad_norm": 0.8659888848175732, "learning_rate": 3.0350361975764907e-06, "loss": 0.127, "step": 21928 }, { "epoch": 0.6397397747826594, "grad_norm": 0.9017771768607322, "learning_rate": 3.0346017835048015e-06, "loss": 0.1269, "step": 21929 }, { "epoch": 0.639768948013303, "grad_norm": 0.7096728122358629, "learning_rate": 3.0341673869797183e-06, "loss": 0.111, "step": 21930 }, { "epoch": 0.6397981212439465, "grad_norm": 0.8243088859971814, "learning_rate": 3.0337330080051188e-06, "loss": 0.1141, "step": 21931 }, { "epoch": 0.6398272944745901, "grad_norm": 1.0075431142357507, "learning_rate": 3.0332986465848824e-06, "loss": 0.1346, "step": 21932 }, { "epoch": 0.6398564677052336, "grad_norm": 0.9271585951905548, "learning_rate": 3.0328643027228864e-06, "loss": 0.1114, "step": 21933 }, { "epoch": 0.6398856409358772, "grad_norm": 0.8825953290479391, "learning_rate": 3.032429976423008e-06, "loss": 0.1403, "step": 21934 }, { "epoch": 0.6399148141665209, "grad_norm": 0.6456542010582347, "learning_rate": 3.0319956676891253e-06, "loss": 0.1308, "step": 21935 }, { "epoch": 0.6399439873971644, "grad_norm": 0.9258455589709558, "learning_rate": 3.0315613765251164e-06, "loss": 0.1298, "step": 21936 }, { "epoch": 0.639973160627808, "grad_norm": 1.014659021103718, "learning_rate": 3.0311271029348545e-06, "loss": 0.1211, "step": 21937 }, { "epoch": 0.6400023338584515, "grad_norm": 1.040044223233855, "learning_rate": 3.0306928469222225e-06, "loss": 0.1402, "step": 21938 }, { "epoch": 0.6400315070890951, "grad_norm": 1.1177879221621851, "learning_rate": 3.0302586084910934e-06, "loss": 0.1388, "step": 21939 }, { "epoch": 0.6400606803197386, "grad_norm": 0.9252474808014111, "learning_rate": 3.0298243876453458e-06, "loss": 0.1178, "step": 21940 }, { "epoch": 0.6400898535503822, "grad_norm": 1.1589570442812709, "learning_rate": 3.0293901843888573e-06, "loss": 0.1315, "step": 21941 }, { "epoch": 0.6401190267810257, "grad_norm": 1.0330798901038698, "learning_rate": 3.0289559987255015e-06, "loss": 0.1342, "step": 21942 }, { "epoch": 0.6401482000116693, "grad_norm": 0.83232699843636, "learning_rate": 3.028521830659154e-06, "loss": 0.1269, "step": 21943 }, { "epoch": 0.6401773732423128, "grad_norm": 1.0162619330003844, "learning_rate": 3.028087680193695e-06, "loss": 0.1193, "step": 21944 }, { "epoch": 0.6402065464729564, "grad_norm": 0.9821735402285827, "learning_rate": 3.0276535473329983e-06, "loss": 0.1513, "step": 21945 }, { "epoch": 0.6402357197035999, "grad_norm": 0.7782382406666812, "learning_rate": 3.02721943208094e-06, "loss": 0.1204, "step": 21946 }, { "epoch": 0.6402648929342435, "grad_norm": 0.942174926867864, "learning_rate": 3.0267853344413956e-06, "loss": 0.1221, "step": 21947 }, { "epoch": 0.6402940661648872, "grad_norm": 1.2193659694124632, "learning_rate": 3.0263512544182407e-06, "loss": 0.121, "step": 21948 }, { "epoch": 0.6403232393955307, "grad_norm": 1.1613143866439641, "learning_rate": 3.025917192015349e-06, "loss": 0.1257, "step": 21949 }, { "epoch": 0.6403524126261743, "grad_norm": 1.198356382509822, "learning_rate": 3.025483147236599e-06, "loss": 0.1395, "step": 21950 }, { "epoch": 0.6403815858568178, "grad_norm": 0.8952116743310828, "learning_rate": 3.0250491200858643e-06, "loss": 0.1222, "step": 21951 }, { "epoch": 0.6404107590874614, "grad_norm": 1.2709399242472916, "learning_rate": 3.0246151105670197e-06, "loss": 0.136, "step": 21952 }, { "epoch": 0.6404399323181049, "grad_norm": 0.9343221816026714, "learning_rate": 3.0241811186839394e-06, "loss": 0.1505, "step": 21953 }, { "epoch": 0.6404691055487485, "grad_norm": 0.7738212314549767, "learning_rate": 3.0237471444404993e-06, "loss": 0.1052, "step": 21954 }, { "epoch": 0.640498278779392, "grad_norm": 1.096858823539028, "learning_rate": 3.023313187840571e-06, "loss": 0.1326, "step": 21955 }, { "epoch": 0.6405274520100356, "grad_norm": 1.0254494717705362, "learning_rate": 3.0228792488880315e-06, "loss": 0.1104, "step": 21956 }, { "epoch": 0.6405566252406791, "grad_norm": 0.7765213523737811, "learning_rate": 3.0224453275867544e-06, "loss": 0.1379, "step": 21957 }, { "epoch": 0.6405857984713227, "grad_norm": 1.0059879994998377, "learning_rate": 3.022011423940614e-06, "loss": 0.1173, "step": 21958 }, { "epoch": 0.6406149717019662, "grad_norm": 0.914641722131277, "learning_rate": 3.0215775379534827e-06, "loss": 0.1253, "step": 21959 }, { "epoch": 0.6406441449326098, "grad_norm": 0.9022035160712973, "learning_rate": 3.0211436696292346e-06, "loss": 0.1441, "step": 21960 }, { "epoch": 0.6406733181632533, "grad_norm": 0.8967770234631273, "learning_rate": 3.020709818971743e-06, "loss": 0.1271, "step": 21961 }, { "epoch": 0.640702491393897, "grad_norm": 0.9269267973477245, "learning_rate": 3.0202759859848818e-06, "loss": 0.1061, "step": 21962 }, { "epoch": 0.6407316646245406, "grad_norm": 0.8430561957964302, "learning_rate": 3.0198421706725257e-06, "loss": 0.1451, "step": 21963 }, { "epoch": 0.6407608378551841, "grad_norm": 0.8483089362372715, "learning_rate": 3.0194083730385443e-06, "loss": 0.1137, "step": 21964 }, { "epoch": 0.6407900110858277, "grad_norm": 0.7416063276970117, "learning_rate": 3.0189745930868127e-06, "loss": 0.1284, "step": 21965 }, { "epoch": 0.6408191843164712, "grad_norm": 0.6940232346523145, "learning_rate": 3.018540830821204e-06, "loss": 0.1348, "step": 21966 }, { "epoch": 0.6408483575471148, "grad_norm": 0.9563816919176291, "learning_rate": 3.0181070862455862e-06, "loss": 0.1409, "step": 21967 }, { "epoch": 0.6408775307777583, "grad_norm": 0.8849799514833532, "learning_rate": 3.0176733593638387e-06, "loss": 0.1225, "step": 21968 }, { "epoch": 0.6409067040084019, "grad_norm": 0.7482586046657074, "learning_rate": 3.0172396501798295e-06, "loss": 0.143, "step": 21969 }, { "epoch": 0.6409358772390454, "grad_norm": 0.7317275594846018, "learning_rate": 3.0168059586974307e-06, "loss": 0.1223, "step": 21970 }, { "epoch": 0.640965050469689, "grad_norm": 0.8917560050040593, "learning_rate": 3.0163722849205163e-06, "loss": 0.1132, "step": 21971 }, { "epoch": 0.6409942237003325, "grad_norm": 0.6154021358023682, "learning_rate": 3.0159386288529556e-06, "loss": 0.1044, "step": 21972 }, { "epoch": 0.6410233969309761, "grad_norm": 0.8310882793958907, "learning_rate": 3.01550499049862e-06, "loss": 0.1223, "step": 21973 }, { "epoch": 0.6410525701616197, "grad_norm": 0.8564255076719426, "learning_rate": 3.0150713698613833e-06, "loss": 0.1274, "step": 21974 }, { "epoch": 0.6410817433922633, "grad_norm": 0.8576802317833004, "learning_rate": 3.0146377669451154e-06, "loss": 0.112, "step": 21975 }, { "epoch": 0.6411109166229069, "grad_norm": 0.8924822883242373, "learning_rate": 3.0142041817536883e-06, "loss": 0.124, "step": 21976 }, { "epoch": 0.6411400898535504, "grad_norm": 0.8209841294782584, "learning_rate": 3.0137706142909717e-06, "loss": 0.1289, "step": 21977 }, { "epoch": 0.641169263084194, "grad_norm": 0.832261433301835, "learning_rate": 3.0133370645608372e-06, "loss": 0.1327, "step": 21978 }, { "epoch": 0.6411984363148375, "grad_norm": 0.9162160733691365, "learning_rate": 3.0129035325671534e-06, "loss": 0.1273, "step": 21979 }, { "epoch": 0.6412276095454811, "grad_norm": 1.0494374263771356, "learning_rate": 3.0124700183137938e-06, "loss": 0.1108, "step": 21980 }, { "epoch": 0.6412567827761246, "grad_norm": 0.8841927875348656, "learning_rate": 3.0120365218046287e-06, "loss": 0.1239, "step": 21981 }, { "epoch": 0.6412859560067682, "grad_norm": 0.8271655609304868, "learning_rate": 3.0116030430435254e-06, "loss": 0.113, "step": 21982 }, { "epoch": 0.6413151292374117, "grad_norm": 0.8725414580239986, "learning_rate": 3.0111695820343557e-06, "loss": 0.1194, "step": 21983 }, { "epoch": 0.6413443024680553, "grad_norm": 0.878717120453251, "learning_rate": 3.010736138780991e-06, "loss": 0.1217, "step": 21984 }, { "epoch": 0.6413734756986988, "grad_norm": 0.8967277763892505, "learning_rate": 3.010302713287296e-06, "loss": 0.1266, "step": 21985 }, { "epoch": 0.6414026489293424, "grad_norm": 0.9566597113111973, "learning_rate": 3.009869305557145e-06, "loss": 0.1211, "step": 21986 }, { "epoch": 0.641431822159986, "grad_norm": 0.6656255080399474, "learning_rate": 3.0094359155944053e-06, "loss": 0.1171, "step": 21987 }, { "epoch": 0.6414609953906295, "grad_norm": 0.926675776362127, "learning_rate": 3.009002543402948e-06, "loss": 0.1176, "step": 21988 }, { "epoch": 0.6414901686212732, "grad_norm": 0.9455297630585128, "learning_rate": 3.0085691889866396e-06, "loss": 0.1191, "step": 21989 }, { "epoch": 0.6415193418519167, "grad_norm": 0.8037505377903367, "learning_rate": 3.00813585234935e-06, "loss": 0.1352, "step": 21990 }, { "epoch": 0.6415485150825603, "grad_norm": 0.8411793116770895, "learning_rate": 3.0077025334949465e-06, "loss": 0.1368, "step": 21991 }, { "epoch": 0.6415776883132038, "grad_norm": 0.9225695404193294, "learning_rate": 3.007269232427301e-06, "loss": 0.1198, "step": 21992 }, { "epoch": 0.6416068615438474, "grad_norm": 0.9568972889835847, "learning_rate": 3.0068359491502806e-06, "loss": 0.1257, "step": 21993 }, { "epoch": 0.6416360347744909, "grad_norm": 0.8408048220332507, "learning_rate": 3.0064026836677527e-06, "loss": 0.1071, "step": 21994 }, { "epoch": 0.6416652080051345, "grad_norm": 1.0444506090823575, "learning_rate": 3.005969435983585e-06, "loss": 0.1565, "step": 21995 }, { "epoch": 0.641694381235778, "grad_norm": 0.8235011669755461, "learning_rate": 3.005536206101648e-06, "loss": 0.1205, "step": 21996 }, { "epoch": 0.6417235544664216, "grad_norm": 1.0100537331249817, "learning_rate": 3.0051029940258035e-06, "loss": 0.1314, "step": 21997 }, { "epoch": 0.6417527276970652, "grad_norm": 0.7175819177898534, "learning_rate": 3.004669799759927e-06, "loss": 0.1185, "step": 21998 }, { "epoch": 0.6417819009277087, "grad_norm": 0.6697634596920586, "learning_rate": 3.004236623307881e-06, "loss": 0.1176, "step": 21999 }, { "epoch": 0.6418110741583523, "grad_norm": 0.7314890809794784, "learning_rate": 3.003803464673534e-06, "loss": 0.1134, "step": 22000 }, { "epoch": 0.6418402473889958, "grad_norm": 0.8877970432339143, "learning_rate": 3.0033703238607544e-06, "loss": 0.1142, "step": 22001 }, { "epoch": 0.6418694206196394, "grad_norm": 1.0484756317865096, "learning_rate": 3.0029372008734065e-06, "loss": 0.1278, "step": 22002 }, { "epoch": 0.641898593850283, "grad_norm": 0.784807631667673, "learning_rate": 3.0025040957153576e-06, "loss": 0.1305, "step": 22003 }, { "epoch": 0.6419277670809266, "grad_norm": 0.7995695426138639, "learning_rate": 3.002071008390477e-06, "loss": 0.1022, "step": 22004 }, { "epoch": 0.6419569403115701, "grad_norm": 1.0498805837174303, "learning_rate": 3.0016379389026283e-06, "loss": 0.1178, "step": 22005 }, { "epoch": 0.6419861135422137, "grad_norm": 0.8907687355688043, "learning_rate": 3.001204887255681e-06, "loss": 0.132, "step": 22006 }, { "epoch": 0.6420152867728572, "grad_norm": 1.2388437603140898, "learning_rate": 3.000771853453498e-06, "loss": 0.1146, "step": 22007 }, { "epoch": 0.6420444600035008, "grad_norm": 0.7294235891008237, "learning_rate": 3.0003388374999464e-06, "loss": 0.1552, "step": 22008 }, { "epoch": 0.6420736332341443, "grad_norm": 0.9111170835689067, "learning_rate": 2.999905839398891e-06, "loss": 0.1453, "step": 22009 }, { "epoch": 0.6421028064647879, "grad_norm": 1.0006834918474947, "learning_rate": 2.9994728591542012e-06, "loss": 0.1386, "step": 22010 }, { "epoch": 0.6421319796954315, "grad_norm": 0.7411976676137246, "learning_rate": 2.99903989676974e-06, "loss": 0.1287, "step": 22011 }, { "epoch": 0.642161152926075, "grad_norm": 0.8374476498672586, "learning_rate": 2.998606952249372e-06, "loss": 0.1239, "step": 22012 }, { "epoch": 0.6421903261567186, "grad_norm": 1.1581749601311697, "learning_rate": 2.998174025596964e-06, "loss": 0.1205, "step": 22013 }, { "epoch": 0.6422194993873621, "grad_norm": 0.8413115633375319, "learning_rate": 2.9977411168163807e-06, "loss": 0.1247, "step": 22014 }, { "epoch": 0.6422486726180057, "grad_norm": 0.7172542444129659, "learning_rate": 2.997308225911485e-06, "loss": 0.1048, "step": 22015 }, { "epoch": 0.6422778458486493, "grad_norm": 0.7459797529607547, "learning_rate": 2.9968753528861443e-06, "loss": 0.143, "step": 22016 }, { "epoch": 0.6423070190792929, "grad_norm": 0.8838932432440447, "learning_rate": 2.9964424977442223e-06, "loss": 0.1219, "step": 22017 }, { "epoch": 0.6423361923099364, "grad_norm": 0.7892937388748272, "learning_rate": 2.9960096604895843e-06, "loss": 0.1356, "step": 22018 }, { "epoch": 0.64236536554058, "grad_norm": 0.723426527644377, "learning_rate": 2.9955768411260935e-06, "loss": 0.1041, "step": 22019 }, { "epoch": 0.6423945387712235, "grad_norm": 0.817449795560833, "learning_rate": 2.9951440396576128e-06, "loss": 0.1075, "step": 22020 }, { "epoch": 0.6424237120018671, "grad_norm": 0.7913939349359839, "learning_rate": 2.9947112560880076e-06, "loss": 0.1152, "step": 22021 }, { "epoch": 0.6424528852325107, "grad_norm": 0.766103026246493, "learning_rate": 2.9942784904211418e-06, "loss": 0.1364, "step": 22022 }, { "epoch": 0.6424820584631542, "grad_norm": 0.87075082492271, "learning_rate": 2.9938457426608802e-06, "loss": 0.1159, "step": 22023 }, { "epoch": 0.6425112316937978, "grad_norm": 0.9102059288066796, "learning_rate": 2.993413012811084e-06, "loss": 0.1258, "step": 22024 }, { "epoch": 0.6425404049244413, "grad_norm": 0.8912854960681408, "learning_rate": 2.9929803008756174e-06, "loss": 0.1038, "step": 22025 }, { "epoch": 0.6425695781550849, "grad_norm": 0.7777757062121337, "learning_rate": 2.992547606858345e-06, "loss": 0.1203, "step": 22026 }, { "epoch": 0.6425987513857284, "grad_norm": 0.9571044758716966, "learning_rate": 2.992114930763125e-06, "loss": 0.1407, "step": 22027 }, { "epoch": 0.642627924616372, "grad_norm": 1.0391279665164588, "learning_rate": 2.9916822725938253e-06, "loss": 0.1132, "step": 22028 }, { "epoch": 0.6426570978470155, "grad_norm": 0.7927344573522009, "learning_rate": 2.9912496323543074e-06, "loss": 0.0956, "step": 22029 }, { "epoch": 0.6426862710776592, "grad_norm": 0.6825566380151522, "learning_rate": 2.990817010048433e-06, "loss": 0.1274, "step": 22030 }, { "epoch": 0.6427154443083027, "grad_norm": 0.8293530431236341, "learning_rate": 2.9903844056800657e-06, "loss": 0.1324, "step": 22031 }, { "epoch": 0.6427446175389463, "grad_norm": 0.902414512926688, "learning_rate": 2.989951819253063e-06, "loss": 0.1078, "step": 22032 }, { "epoch": 0.6427737907695898, "grad_norm": 0.9077883812298531, "learning_rate": 2.9895192507712943e-06, "loss": 0.1289, "step": 22033 }, { "epoch": 0.6428029640002334, "grad_norm": 1.0576657118963588, "learning_rate": 2.989086700238617e-06, "loss": 0.1355, "step": 22034 }, { "epoch": 0.642832137230877, "grad_norm": 0.7742967496213374, "learning_rate": 2.988654167658893e-06, "loss": 0.117, "step": 22035 }, { "epoch": 0.6428613104615205, "grad_norm": 0.9320383661908841, "learning_rate": 2.9882216530359855e-06, "loss": 0.1129, "step": 22036 }, { "epoch": 0.6428904836921641, "grad_norm": 0.8551385599946922, "learning_rate": 2.9877891563737538e-06, "loss": 0.1171, "step": 22037 }, { "epoch": 0.6429196569228076, "grad_norm": 1.1244173168804574, "learning_rate": 2.98735667767606e-06, "loss": 0.1221, "step": 22038 }, { "epoch": 0.6429488301534512, "grad_norm": 0.8901108181760674, "learning_rate": 2.986924216946765e-06, "loss": 0.1115, "step": 22039 }, { "epoch": 0.6429780033840947, "grad_norm": 0.8029337076108818, "learning_rate": 2.986491774189731e-06, "loss": 0.1458, "step": 22040 }, { "epoch": 0.6430071766147383, "grad_norm": 0.8066120730382462, "learning_rate": 2.9860593494088187e-06, "loss": 0.12, "step": 22041 }, { "epoch": 0.6430363498453818, "grad_norm": 0.864314297791898, "learning_rate": 2.9856269426078867e-06, "loss": 0.1217, "step": 22042 }, { "epoch": 0.6430655230760255, "grad_norm": 0.7196071010194764, "learning_rate": 2.985194553790796e-06, "loss": 0.1192, "step": 22043 }, { "epoch": 0.643094696306669, "grad_norm": 0.8675494861147474, "learning_rate": 2.984762182961407e-06, "loss": 0.1164, "step": 22044 }, { "epoch": 0.6431238695373126, "grad_norm": 0.8661346968630518, "learning_rate": 2.9843298301235812e-06, "loss": 0.1364, "step": 22045 }, { "epoch": 0.6431530427679562, "grad_norm": 0.8267836367556943, "learning_rate": 2.983897495281177e-06, "loss": 0.1115, "step": 22046 }, { "epoch": 0.6431822159985997, "grad_norm": 0.8494980209202818, "learning_rate": 2.9834651784380554e-06, "loss": 0.1035, "step": 22047 }, { "epoch": 0.6432113892292433, "grad_norm": 0.8089928985302235, "learning_rate": 2.9830328795980756e-06, "loss": 0.1218, "step": 22048 }, { "epoch": 0.6432405624598868, "grad_norm": 0.9807952922080896, "learning_rate": 2.9826005987650964e-06, "loss": 0.1248, "step": 22049 }, { "epoch": 0.6432697356905304, "grad_norm": 0.8681631326657169, "learning_rate": 2.9821683359429755e-06, "loss": 0.1278, "step": 22050 }, { "epoch": 0.6432989089211739, "grad_norm": 2.583564011172082, "learning_rate": 2.981736091135575e-06, "loss": 0.1268, "step": 22051 }, { "epoch": 0.6433280821518175, "grad_norm": 1.0576873002634501, "learning_rate": 2.981303864346754e-06, "loss": 0.1167, "step": 22052 }, { "epoch": 0.643357255382461, "grad_norm": 0.9968347441174502, "learning_rate": 2.9808716555803704e-06, "loss": 0.1242, "step": 22053 }, { "epoch": 0.6433864286131046, "grad_norm": 0.8397921202388221, "learning_rate": 2.980439464840282e-06, "loss": 0.1306, "step": 22054 }, { "epoch": 0.6434156018437481, "grad_norm": 0.775762972254896, "learning_rate": 2.9800072921303474e-06, "loss": 0.1297, "step": 22055 }, { "epoch": 0.6434447750743917, "grad_norm": 0.8310130153168136, "learning_rate": 2.9795751374544244e-06, "loss": 0.1066, "step": 22056 }, { "epoch": 0.6434739483050353, "grad_norm": 0.7542953577879931, "learning_rate": 2.9791430008163743e-06, "loss": 0.1146, "step": 22057 }, { "epoch": 0.6435031215356789, "grad_norm": 0.7323427741304133, "learning_rate": 2.9787108822200535e-06, "loss": 0.1128, "step": 22058 }, { "epoch": 0.6435322947663225, "grad_norm": 0.907603976780851, "learning_rate": 2.978278781669318e-06, "loss": 0.1282, "step": 22059 }, { "epoch": 0.643561467996966, "grad_norm": 0.84944528132297, "learning_rate": 2.977846699168028e-06, "loss": 0.1365, "step": 22060 }, { "epoch": 0.6435906412276096, "grad_norm": 0.7779429623159441, "learning_rate": 2.9774146347200394e-06, "loss": 0.1582, "step": 22061 }, { "epoch": 0.6436198144582531, "grad_norm": 0.7783991942694073, "learning_rate": 2.9769825883292082e-06, "loss": 0.154, "step": 22062 }, { "epoch": 0.6436489876888967, "grad_norm": 1.0587142119934603, "learning_rate": 2.976550559999396e-06, "loss": 0.1439, "step": 22063 }, { "epoch": 0.6436781609195402, "grad_norm": 0.8804615046154666, "learning_rate": 2.976118549734457e-06, "loss": 0.1222, "step": 22064 }, { "epoch": 0.6437073341501838, "grad_norm": 0.8756130173308975, "learning_rate": 2.9756865575382475e-06, "loss": 0.1248, "step": 22065 }, { "epoch": 0.6437365073808273, "grad_norm": 0.7862666622013987, "learning_rate": 2.9752545834146275e-06, "loss": 0.1159, "step": 22066 }, { "epoch": 0.6437656806114709, "grad_norm": 0.9494680000328745, "learning_rate": 2.974822627367449e-06, "loss": 0.1288, "step": 22067 }, { "epoch": 0.6437948538421144, "grad_norm": 0.798034359516251, "learning_rate": 2.97439068940057e-06, "loss": 0.1132, "step": 22068 }, { "epoch": 0.643824027072758, "grad_norm": 1.1078607766031694, "learning_rate": 2.9739587695178485e-06, "loss": 0.1537, "step": 22069 }, { "epoch": 0.6438532003034017, "grad_norm": 1.2731222312197774, "learning_rate": 2.97352686772314e-06, "loss": 0.1083, "step": 22070 }, { "epoch": 0.6438823735340452, "grad_norm": 0.9063292670247907, "learning_rate": 2.9730949840203e-06, "loss": 0.1162, "step": 22071 }, { "epoch": 0.6439115467646888, "grad_norm": 0.939624863022762, "learning_rate": 2.9726631184131833e-06, "loss": 0.1585, "step": 22072 }, { "epoch": 0.6439407199953323, "grad_norm": 0.9894101783704818, "learning_rate": 2.9722312709056466e-06, "loss": 0.1278, "step": 22073 }, { "epoch": 0.6439698932259759, "grad_norm": 1.1466272745788815, "learning_rate": 2.971799441501544e-06, "loss": 0.1211, "step": 22074 }, { "epoch": 0.6439990664566194, "grad_norm": 0.7980771666472901, "learning_rate": 2.9713676302047335e-06, "loss": 0.0974, "step": 22075 }, { "epoch": 0.644028239687263, "grad_norm": 0.9849768463337183, "learning_rate": 2.9709358370190677e-06, "loss": 0.1329, "step": 22076 }, { "epoch": 0.6440574129179065, "grad_norm": 1.0127582034249096, "learning_rate": 2.970504061948403e-06, "loss": 0.1245, "step": 22077 }, { "epoch": 0.6440865861485501, "grad_norm": 1.1530073418603166, "learning_rate": 2.9700723049965928e-06, "loss": 0.1236, "step": 22078 }, { "epoch": 0.6441157593791936, "grad_norm": 0.9272447726282984, "learning_rate": 2.969640566167493e-06, "loss": 0.122, "step": 22079 }, { "epoch": 0.6441449326098372, "grad_norm": 0.917588325968273, "learning_rate": 2.969208845464956e-06, "loss": 0.126, "step": 22080 }, { "epoch": 0.6441741058404807, "grad_norm": 1.0576892000786284, "learning_rate": 2.968777142892839e-06, "loss": 0.1437, "step": 22081 }, { "epoch": 0.6442032790711243, "grad_norm": 0.6943798440503597, "learning_rate": 2.9683454584549943e-06, "loss": 0.1172, "step": 22082 }, { "epoch": 0.6442324523017678, "grad_norm": 0.985432734009756, "learning_rate": 2.967913792155278e-06, "loss": 0.1285, "step": 22083 }, { "epoch": 0.6442616255324115, "grad_norm": 0.9401943586052562, "learning_rate": 2.967482143997541e-06, "loss": 0.1253, "step": 22084 }, { "epoch": 0.6442907987630551, "grad_norm": 0.9388888997823897, "learning_rate": 2.9670505139856375e-06, "loss": 0.1139, "step": 22085 }, { "epoch": 0.6443199719936986, "grad_norm": 0.9569262205041986, "learning_rate": 2.9666189021234214e-06, "loss": 0.1053, "step": 22086 }, { "epoch": 0.6443491452243422, "grad_norm": 0.8517320319007174, "learning_rate": 2.9661873084147473e-06, "loss": 0.1084, "step": 22087 }, { "epoch": 0.6443783184549857, "grad_norm": 1.0762450535352732, "learning_rate": 2.9657557328634688e-06, "loss": 0.165, "step": 22088 }, { "epoch": 0.6444074916856293, "grad_norm": 0.9851269425213238, "learning_rate": 2.9653241754734363e-06, "loss": 0.1348, "step": 22089 }, { "epoch": 0.6444366649162728, "grad_norm": 0.8501366656248518, "learning_rate": 2.964892636248503e-06, "loss": 0.1143, "step": 22090 }, { "epoch": 0.6444658381469164, "grad_norm": 1.1994242588566304, "learning_rate": 2.964461115192524e-06, "loss": 0.1307, "step": 22091 }, { "epoch": 0.6444950113775599, "grad_norm": 0.8736033208421453, "learning_rate": 2.9640296123093476e-06, "loss": 0.1183, "step": 22092 }, { "epoch": 0.6445241846082035, "grad_norm": 0.8446397914758896, "learning_rate": 2.963598127602831e-06, "loss": 0.1307, "step": 22093 }, { "epoch": 0.644553357838847, "grad_norm": 0.7794112893337023, "learning_rate": 2.963166661076824e-06, "loss": 0.1301, "step": 22094 }, { "epoch": 0.6445825310694906, "grad_norm": 0.8520922326501634, "learning_rate": 2.9627352127351783e-06, "loss": 0.1395, "step": 22095 }, { "epoch": 0.6446117043001341, "grad_norm": 0.7382525198148784, "learning_rate": 2.962303782581748e-06, "loss": 0.1116, "step": 22096 }, { "epoch": 0.6446408775307778, "grad_norm": 1.0613390162714242, "learning_rate": 2.9618723706203812e-06, "loss": 0.1236, "step": 22097 }, { "epoch": 0.6446700507614214, "grad_norm": 0.9408842892032847, "learning_rate": 2.961440976854931e-06, "loss": 0.1264, "step": 22098 }, { "epoch": 0.6446992239920649, "grad_norm": 0.72467979429876, "learning_rate": 2.9610096012892496e-06, "loss": 0.1297, "step": 22099 }, { "epoch": 0.6447283972227085, "grad_norm": 0.737689601338275, "learning_rate": 2.960578243927188e-06, "loss": 0.1149, "step": 22100 }, { "epoch": 0.644757570453352, "grad_norm": 0.9520034245445577, "learning_rate": 2.960146904772598e-06, "loss": 0.1228, "step": 22101 }, { "epoch": 0.6447867436839956, "grad_norm": 1.136912996718269, "learning_rate": 2.959715583829328e-06, "loss": 0.1399, "step": 22102 }, { "epoch": 0.6448159169146391, "grad_norm": 0.8071727351381353, "learning_rate": 2.959284281101231e-06, "loss": 0.1399, "step": 22103 }, { "epoch": 0.6448450901452827, "grad_norm": 0.8089437736268954, "learning_rate": 2.958852996592155e-06, "loss": 0.1224, "step": 22104 }, { "epoch": 0.6448742633759262, "grad_norm": 0.947368275285795, "learning_rate": 2.958421730305955e-06, "loss": 0.1156, "step": 22105 }, { "epoch": 0.6449034366065698, "grad_norm": 1.1211907837726405, "learning_rate": 2.9579904822464767e-06, "loss": 0.1408, "step": 22106 }, { "epoch": 0.6449326098372133, "grad_norm": 0.792799923898483, "learning_rate": 2.9575592524175723e-06, "loss": 0.1132, "step": 22107 }, { "epoch": 0.6449617830678569, "grad_norm": 0.7318146421671569, "learning_rate": 2.9571280408230917e-06, "loss": 0.1305, "step": 22108 }, { "epoch": 0.6449909562985005, "grad_norm": 1.0511070170228909, "learning_rate": 2.9566968474668847e-06, "loss": 0.1203, "step": 22109 }, { "epoch": 0.645020129529144, "grad_norm": 0.8679755600181671, "learning_rate": 2.956265672352798e-06, "loss": 0.1153, "step": 22110 }, { "epoch": 0.6450493027597877, "grad_norm": 0.9430004558181917, "learning_rate": 2.955834515484685e-06, "loss": 0.1296, "step": 22111 }, { "epoch": 0.6450784759904312, "grad_norm": 0.9449628681644011, "learning_rate": 2.9554033768663937e-06, "loss": 0.1121, "step": 22112 }, { "epoch": 0.6451076492210748, "grad_norm": 0.7746785251412969, "learning_rate": 2.9549722565017737e-06, "loss": 0.1392, "step": 22113 }, { "epoch": 0.6451368224517183, "grad_norm": 0.8924165420964965, "learning_rate": 2.9545411543946723e-06, "loss": 0.1305, "step": 22114 }, { "epoch": 0.6451659956823619, "grad_norm": 0.7748486079407143, "learning_rate": 2.9541100705489393e-06, "loss": 0.1518, "step": 22115 }, { "epoch": 0.6451951689130054, "grad_norm": 0.6335176502798734, "learning_rate": 2.9536790049684224e-06, "loss": 0.1081, "step": 22116 }, { "epoch": 0.645224342143649, "grad_norm": 1.0450723518143625, "learning_rate": 2.9532479576569716e-06, "loss": 0.1275, "step": 22117 }, { "epoch": 0.6452535153742925, "grad_norm": 0.7215618244914422, "learning_rate": 2.9528169286184348e-06, "loss": 0.1052, "step": 22118 }, { "epoch": 0.6452826886049361, "grad_norm": 0.9781881820207516, "learning_rate": 2.9523859178566594e-06, "loss": 0.133, "step": 22119 }, { "epoch": 0.6453118618355796, "grad_norm": 0.707457863237948, "learning_rate": 2.951954925375494e-06, "loss": 0.1145, "step": 22120 }, { "epoch": 0.6453410350662232, "grad_norm": 0.7934944591494858, "learning_rate": 2.951523951178787e-06, "loss": 0.1044, "step": 22121 }, { "epoch": 0.6453702082968668, "grad_norm": 0.7912181486480262, "learning_rate": 2.9510929952703815e-06, "loss": 0.1289, "step": 22122 }, { "epoch": 0.6453993815275103, "grad_norm": 0.7793378273608388, "learning_rate": 2.950662057654132e-06, "loss": 0.1309, "step": 22123 }, { "epoch": 0.645428554758154, "grad_norm": 0.8729551155646463, "learning_rate": 2.950231138333882e-06, "loss": 0.1038, "step": 22124 }, { "epoch": 0.6454577279887975, "grad_norm": 0.813778001061841, "learning_rate": 2.949800237313478e-06, "loss": 0.126, "step": 22125 }, { "epoch": 0.6454869012194411, "grad_norm": 0.894484055265241, "learning_rate": 2.94936935459677e-06, "loss": 0.1372, "step": 22126 }, { "epoch": 0.6455160744500846, "grad_norm": 0.895704340897552, "learning_rate": 2.9489384901876016e-06, "loss": 0.1383, "step": 22127 }, { "epoch": 0.6455452476807282, "grad_norm": 0.8094463295701287, "learning_rate": 2.94850764408982e-06, "loss": 0.1336, "step": 22128 }, { "epoch": 0.6455744209113717, "grad_norm": 0.8192602504632791, "learning_rate": 2.9480768163072726e-06, "loss": 0.117, "step": 22129 }, { "epoch": 0.6456035941420153, "grad_norm": 0.6793074005980579, "learning_rate": 2.9476460068438064e-06, "loss": 0.1049, "step": 22130 }, { "epoch": 0.6456327673726588, "grad_norm": 0.7619057899262515, "learning_rate": 2.947215215703267e-06, "loss": 0.1251, "step": 22131 }, { "epoch": 0.6456619406033024, "grad_norm": 0.8574812210018996, "learning_rate": 2.9467844428894998e-06, "loss": 0.1229, "step": 22132 }, { "epoch": 0.645691113833946, "grad_norm": 0.7172180666131697, "learning_rate": 2.9463536884063505e-06, "loss": 0.1324, "step": 22133 }, { "epoch": 0.6457202870645895, "grad_norm": 0.7592250951735323, "learning_rate": 2.945922952257664e-06, "loss": 0.1339, "step": 22134 }, { "epoch": 0.6457494602952331, "grad_norm": 1.1939515370283684, "learning_rate": 2.9454922344472893e-06, "loss": 0.1114, "step": 22135 }, { "epoch": 0.6457786335258766, "grad_norm": 0.6845693570592758, "learning_rate": 2.945061534979069e-06, "loss": 0.0953, "step": 22136 }, { "epoch": 0.6458078067565202, "grad_norm": 0.7923648608066438, "learning_rate": 2.944630853856848e-06, "loss": 0.1186, "step": 22137 }, { "epoch": 0.6458369799871638, "grad_norm": 1.3492082174648148, "learning_rate": 2.944200191084473e-06, "loss": 0.1334, "step": 22138 }, { "epoch": 0.6458661532178074, "grad_norm": 1.0539879261958156, "learning_rate": 2.9437695466657877e-06, "loss": 0.1165, "step": 22139 }, { "epoch": 0.6458953264484509, "grad_norm": 0.6507382814474879, "learning_rate": 2.943338920604636e-06, "loss": 0.1012, "step": 22140 }, { "epoch": 0.6459244996790945, "grad_norm": 0.9651391579973144, "learning_rate": 2.9429083129048636e-06, "loss": 0.1133, "step": 22141 }, { "epoch": 0.645953672909738, "grad_norm": 1.1172918869556225, "learning_rate": 2.942477723570315e-06, "loss": 0.1211, "step": 22142 }, { "epoch": 0.6459828461403816, "grad_norm": 0.914783104135238, "learning_rate": 2.9420471526048356e-06, "loss": 0.1299, "step": 22143 }, { "epoch": 0.6460120193710251, "grad_norm": 1.1613740299798396, "learning_rate": 2.941616600012267e-06, "loss": 0.1403, "step": 22144 }, { "epoch": 0.6460411926016687, "grad_norm": 0.7755959362290784, "learning_rate": 2.941186065796453e-06, "loss": 0.139, "step": 22145 }, { "epoch": 0.6460703658323123, "grad_norm": 0.7695792323176357, "learning_rate": 2.9407555499612383e-06, "loss": 0.0885, "step": 22146 }, { "epoch": 0.6460995390629558, "grad_norm": 0.7572841970919096, "learning_rate": 2.9403250525104672e-06, "loss": 0.1054, "step": 22147 }, { "epoch": 0.6461287122935994, "grad_norm": 0.8386233756755662, "learning_rate": 2.939894573447983e-06, "loss": 0.1171, "step": 22148 }, { "epoch": 0.6461578855242429, "grad_norm": 1.1214203074447373, "learning_rate": 2.939464112777628e-06, "loss": 0.1308, "step": 22149 }, { "epoch": 0.6461870587548865, "grad_norm": 0.7332803395155896, "learning_rate": 2.9390336705032452e-06, "loss": 0.1036, "step": 22150 }, { "epoch": 0.6462162319855301, "grad_norm": 0.9084925279839042, "learning_rate": 2.9386032466286783e-06, "loss": 0.1378, "step": 22151 }, { "epoch": 0.6462454052161737, "grad_norm": 1.191419971198691, "learning_rate": 2.938172841157767e-06, "loss": 0.1477, "step": 22152 }, { "epoch": 0.6462745784468172, "grad_norm": 0.8901477574469311, "learning_rate": 2.9377424540943594e-06, "loss": 0.1216, "step": 22153 }, { "epoch": 0.6463037516774608, "grad_norm": 0.8752770002895012, "learning_rate": 2.937312085442294e-06, "loss": 0.1337, "step": 22154 }, { "epoch": 0.6463329249081043, "grad_norm": 0.7788200669650033, "learning_rate": 2.9368817352054137e-06, "loss": 0.1083, "step": 22155 }, { "epoch": 0.6463620981387479, "grad_norm": 0.9631934396815398, "learning_rate": 2.9364514033875614e-06, "loss": 0.1024, "step": 22156 }, { "epoch": 0.6463912713693915, "grad_norm": 0.7865690791547649, "learning_rate": 2.936021089992578e-06, "loss": 0.0926, "step": 22157 }, { "epoch": 0.646420444600035, "grad_norm": 0.8685141462870842, "learning_rate": 2.935590795024304e-06, "loss": 0.1306, "step": 22158 }, { "epoch": 0.6464496178306786, "grad_norm": 0.8094680277486507, "learning_rate": 2.935160518486584e-06, "loss": 0.1125, "step": 22159 }, { "epoch": 0.6464787910613221, "grad_norm": 0.9249522662181008, "learning_rate": 2.934730260383258e-06, "loss": 0.1466, "step": 22160 }, { "epoch": 0.6465079642919657, "grad_norm": 0.8095583761245791, "learning_rate": 2.9343000207181676e-06, "loss": 0.1269, "step": 22161 }, { "epoch": 0.6465371375226092, "grad_norm": 1.096617108135167, "learning_rate": 2.9338697994951532e-06, "loss": 0.1555, "step": 22162 }, { "epoch": 0.6465663107532528, "grad_norm": 0.8291858025448493, "learning_rate": 2.933439596718056e-06, "loss": 0.1289, "step": 22163 }, { "epoch": 0.6465954839838963, "grad_norm": 0.7494580678500257, "learning_rate": 2.933009412390715e-06, "loss": 0.1132, "step": 22164 }, { "epoch": 0.64662465721454, "grad_norm": 0.7315831730628344, "learning_rate": 2.9325792465169755e-06, "loss": 0.1325, "step": 22165 }, { "epoch": 0.6466538304451835, "grad_norm": 0.7560886606993555, "learning_rate": 2.932149099100673e-06, "loss": 0.1274, "step": 22166 }, { "epoch": 0.6466830036758271, "grad_norm": 0.8554363865744427, "learning_rate": 2.9317189701456505e-06, "loss": 0.0989, "step": 22167 }, { "epoch": 0.6467121769064706, "grad_norm": 0.8235054489768502, "learning_rate": 2.9312888596557476e-06, "loss": 0.1083, "step": 22168 }, { "epoch": 0.6467413501371142, "grad_norm": 0.7127127157064905, "learning_rate": 2.930858767634803e-06, "loss": 0.0995, "step": 22169 }, { "epoch": 0.6467705233677578, "grad_norm": 0.8658708281194661, "learning_rate": 2.930428694086657e-06, "loss": 0.1464, "step": 22170 }, { "epoch": 0.6467996965984013, "grad_norm": 0.8377800537767242, "learning_rate": 2.92999863901515e-06, "loss": 0.119, "step": 22171 }, { "epoch": 0.6468288698290449, "grad_norm": 0.6426346720352603, "learning_rate": 2.9295686024241222e-06, "loss": 0.1198, "step": 22172 }, { "epoch": 0.6468580430596884, "grad_norm": 0.6362227121973375, "learning_rate": 2.9291385843174114e-06, "loss": 0.121, "step": 22173 }, { "epoch": 0.646887216290332, "grad_norm": 0.8023373488833564, "learning_rate": 2.928708584698856e-06, "loss": 0.147, "step": 22174 }, { "epoch": 0.6469163895209755, "grad_norm": 0.9216522972434915, "learning_rate": 2.9282786035722965e-06, "loss": 0.1238, "step": 22175 }, { "epoch": 0.6469455627516191, "grad_norm": 0.6761602563199479, "learning_rate": 2.9278486409415694e-06, "loss": 0.1292, "step": 22176 }, { "epoch": 0.6469747359822626, "grad_norm": 0.998378761133215, "learning_rate": 2.9274186968105167e-06, "loss": 0.1237, "step": 22177 }, { "epoch": 0.6470039092129063, "grad_norm": 0.7092643212091149, "learning_rate": 2.9269887711829758e-06, "loss": 0.0997, "step": 22178 }, { "epoch": 0.6470330824435498, "grad_norm": 1.027844812565083, "learning_rate": 2.926558864062783e-06, "loss": 0.1045, "step": 22179 }, { "epoch": 0.6470622556741934, "grad_norm": 0.7285614319596585, "learning_rate": 2.926128975453778e-06, "loss": 0.1292, "step": 22180 }, { "epoch": 0.647091428904837, "grad_norm": 0.8914201346505192, "learning_rate": 2.9256991053597995e-06, "loss": 0.125, "step": 22181 }, { "epoch": 0.6471206021354805, "grad_norm": 0.9331847810236362, "learning_rate": 2.9252692537846807e-06, "loss": 0.1307, "step": 22182 }, { "epoch": 0.6471497753661241, "grad_norm": 0.7859581084706962, "learning_rate": 2.924839420732266e-06, "loss": 0.1094, "step": 22183 }, { "epoch": 0.6471789485967676, "grad_norm": 1.0851130472313715, "learning_rate": 2.9244096062063887e-06, "loss": 0.1319, "step": 22184 }, { "epoch": 0.6472081218274112, "grad_norm": 0.843565091800913, "learning_rate": 2.9239798102108876e-06, "loss": 0.1102, "step": 22185 }, { "epoch": 0.6472372950580547, "grad_norm": 0.6561390125954926, "learning_rate": 2.923550032749599e-06, "loss": 0.1146, "step": 22186 }, { "epoch": 0.6472664682886983, "grad_norm": 0.9459923822715989, "learning_rate": 2.9231202738263596e-06, "loss": 0.1154, "step": 22187 }, { "epoch": 0.6472956415193418, "grad_norm": 0.7443980986626273, "learning_rate": 2.922690533445005e-06, "loss": 0.1258, "step": 22188 }, { "epoch": 0.6473248147499854, "grad_norm": 0.9890264743048002, "learning_rate": 2.922260811609375e-06, "loss": 0.1093, "step": 22189 }, { "epoch": 0.6473539879806289, "grad_norm": 0.7692884800769954, "learning_rate": 2.9218311083233043e-06, "loss": 0.1207, "step": 22190 }, { "epoch": 0.6473831612112725, "grad_norm": 0.8194871660919252, "learning_rate": 2.921401423590631e-06, "loss": 0.1304, "step": 22191 }, { "epoch": 0.6474123344419161, "grad_norm": 0.9914336772632879, "learning_rate": 2.9209717574151876e-06, "loss": 0.1135, "step": 22192 }, { "epoch": 0.6474415076725597, "grad_norm": 0.8285517709083561, "learning_rate": 2.9205421098008125e-06, "loss": 0.1224, "step": 22193 }, { "epoch": 0.6474706809032033, "grad_norm": 0.7174786091560991, "learning_rate": 2.9201124807513404e-06, "loss": 0.1307, "step": 22194 }, { "epoch": 0.6474998541338468, "grad_norm": 0.8336481348727043, "learning_rate": 2.9196828702706093e-06, "loss": 0.1233, "step": 22195 }, { "epoch": 0.6475290273644904, "grad_norm": 0.7776439857809159, "learning_rate": 2.9192532783624503e-06, "loss": 0.1339, "step": 22196 }, { "epoch": 0.6475582005951339, "grad_norm": 0.8855835433072633, "learning_rate": 2.9188237050307043e-06, "loss": 0.1128, "step": 22197 }, { "epoch": 0.6475873738257775, "grad_norm": 0.8987721106669736, "learning_rate": 2.9183941502792024e-06, "loss": 0.1206, "step": 22198 }, { "epoch": 0.647616547056421, "grad_norm": 0.8379566254560279, "learning_rate": 2.9179646141117796e-06, "loss": 0.1129, "step": 22199 }, { "epoch": 0.6476457202870646, "grad_norm": 0.9057008007230078, "learning_rate": 2.917535096532271e-06, "loss": 0.1265, "step": 22200 }, { "epoch": 0.6476748935177081, "grad_norm": 0.8331903270729928, "learning_rate": 2.9171055975445146e-06, "loss": 0.1172, "step": 22201 }, { "epoch": 0.6477040667483517, "grad_norm": 0.8653612302633052, "learning_rate": 2.916676117152342e-06, "loss": 0.1283, "step": 22202 }, { "epoch": 0.6477332399789952, "grad_norm": 0.7786705846466023, "learning_rate": 2.9162466553595855e-06, "loss": 0.1117, "step": 22203 }, { "epoch": 0.6477624132096388, "grad_norm": 0.8807180720103558, "learning_rate": 2.9158172121700832e-06, "loss": 0.1131, "step": 22204 }, { "epoch": 0.6477915864402825, "grad_norm": 0.8487521008138452, "learning_rate": 2.9153877875876676e-06, "loss": 0.1184, "step": 22205 }, { "epoch": 0.647820759670926, "grad_norm": 1.059747550967466, "learning_rate": 2.9149583816161696e-06, "loss": 0.1237, "step": 22206 }, { "epoch": 0.6478499329015696, "grad_norm": 0.883438067013105, "learning_rate": 2.9145289942594264e-06, "loss": 0.1162, "step": 22207 }, { "epoch": 0.6478791061322131, "grad_norm": 0.8808293893054353, "learning_rate": 2.9140996255212717e-06, "loss": 0.1392, "step": 22208 }, { "epoch": 0.6479082793628567, "grad_norm": 0.8223902277286501, "learning_rate": 2.9136702754055378e-06, "loss": 0.1249, "step": 22209 }, { "epoch": 0.6479374525935002, "grad_norm": 0.7806718802515316, "learning_rate": 2.9132409439160563e-06, "loss": 0.1365, "step": 22210 }, { "epoch": 0.6479666258241438, "grad_norm": 0.8111719380116086, "learning_rate": 2.912811631056663e-06, "loss": 0.1052, "step": 22211 }, { "epoch": 0.6479957990547873, "grad_norm": 0.8420319153966669, "learning_rate": 2.9123823368311872e-06, "loss": 0.1345, "step": 22212 }, { "epoch": 0.6480249722854309, "grad_norm": 0.8771429884939764, "learning_rate": 2.9119530612434632e-06, "loss": 0.1652, "step": 22213 }, { "epoch": 0.6480541455160744, "grad_norm": 0.8131677053655297, "learning_rate": 2.9115238042973263e-06, "loss": 0.1281, "step": 22214 }, { "epoch": 0.648083318746718, "grad_norm": 0.7371308360770922, "learning_rate": 2.9110945659966063e-06, "loss": 0.1211, "step": 22215 }, { "epoch": 0.6481124919773615, "grad_norm": 0.9234163657485489, "learning_rate": 2.9106653463451327e-06, "loss": 0.1186, "step": 22216 }, { "epoch": 0.6481416652080051, "grad_norm": 1.0125152645211053, "learning_rate": 2.9102361453467434e-06, "loss": 0.143, "step": 22217 }, { "epoch": 0.6481708384386486, "grad_norm": 0.8084206160272337, "learning_rate": 2.909806963005264e-06, "loss": 0.1068, "step": 22218 }, { "epoch": 0.6482000116692923, "grad_norm": 0.6277330122588356, "learning_rate": 2.909377799324531e-06, "loss": 0.1231, "step": 22219 }, { "epoch": 0.6482291848999359, "grad_norm": 0.6551189963208536, "learning_rate": 2.9089486543083724e-06, "loss": 0.1126, "step": 22220 }, { "epoch": 0.6482583581305794, "grad_norm": 0.8168921311399865, "learning_rate": 2.9085195279606226e-06, "loss": 0.1136, "step": 22221 }, { "epoch": 0.648287531361223, "grad_norm": 0.6861027445830455, "learning_rate": 2.908090420285112e-06, "loss": 0.127, "step": 22222 }, { "epoch": 0.6483167045918665, "grad_norm": 0.7218587338935585, "learning_rate": 2.9076613312856662e-06, "loss": 0.1196, "step": 22223 }, { "epoch": 0.6483458778225101, "grad_norm": 0.7896318321198683, "learning_rate": 2.907232260966124e-06, "loss": 0.1199, "step": 22224 }, { "epoch": 0.6483750510531536, "grad_norm": 0.837601289541288, "learning_rate": 2.906803209330313e-06, "loss": 0.1209, "step": 22225 }, { "epoch": 0.6484042242837972, "grad_norm": 0.7454560796760552, "learning_rate": 2.906374176382062e-06, "loss": 0.1191, "step": 22226 }, { "epoch": 0.6484333975144407, "grad_norm": 0.85999224330815, "learning_rate": 2.9059451621252035e-06, "loss": 0.1383, "step": 22227 }, { "epoch": 0.6484625707450843, "grad_norm": 0.9845153676921726, "learning_rate": 2.9055161665635665e-06, "loss": 0.1326, "step": 22228 }, { "epoch": 0.6484917439757278, "grad_norm": 0.8082920375886358, "learning_rate": 2.9050871897009803e-06, "loss": 0.1037, "step": 22229 }, { "epoch": 0.6485209172063714, "grad_norm": 0.8762478996095815, "learning_rate": 2.9046582315412753e-06, "loss": 0.1245, "step": 22230 }, { "epoch": 0.648550090437015, "grad_norm": 0.8291544298446543, "learning_rate": 2.904229292088283e-06, "loss": 0.1234, "step": 22231 }, { "epoch": 0.6485792636676585, "grad_norm": 0.65625097007985, "learning_rate": 2.903800371345832e-06, "loss": 0.1023, "step": 22232 }, { "epoch": 0.6486084368983022, "grad_norm": 0.9834768611648416, "learning_rate": 2.9033714693177476e-06, "loss": 0.0977, "step": 22233 }, { "epoch": 0.6486376101289457, "grad_norm": 0.8343521171507311, "learning_rate": 2.9029425860078654e-06, "loss": 0.1226, "step": 22234 }, { "epoch": 0.6486667833595893, "grad_norm": 1.1112147369985677, "learning_rate": 2.9025137214200083e-06, "loss": 0.1363, "step": 22235 }, { "epoch": 0.6486959565902328, "grad_norm": 0.8137001270267556, "learning_rate": 2.9020848755580105e-06, "loss": 0.1277, "step": 22236 }, { "epoch": 0.6487251298208764, "grad_norm": 0.9032656744694125, "learning_rate": 2.9016560484256962e-06, "loss": 0.1174, "step": 22237 }, { "epoch": 0.6487543030515199, "grad_norm": 1.0388719305762362, "learning_rate": 2.9012272400268975e-06, "loss": 0.1263, "step": 22238 }, { "epoch": 0.6487834762821635, "grad_norm": 0.9447980528238217, "learning_rate": 2.9007984503654413e-06, "loss": 0.1392, "step": 22239 }, { "epoch": 0.648812649512807, "grad_norm": 0.8465938837144383, "learning_rate": 2.900369679445153e-06, "loss": 0.1165, "step": 22240 }, { "epoch": 0.6488418227434506, "grad_norm": 1.0450775181172034, "learning_rate": 2.899940927269863e-06, "loss": 0.1288, "step": 22241 }, { "epoch": 0.6488709959740941, "grad_norm": 0.9843836485440016, "learning_rate": 2.8995121938434013e-06, "loss": 0.134, "step": 22242 }, { "epoch": 0.6489001692047377, "grad_norm": 0.8441332050784572, "learning_rate": 2.8990834791695915e-06, "loss": 0.1363, "step": 22243 }, { "epoch": 0.6489293424353813, "grad_norm": 0.8157436532775706, "learning_rate": 2.898654783252265e-06, "loss": 0.1223, "step": 22244 }, { "epoch": 0.6489585156660248, "grad_norm": 1.0417248356619506, "learning_rate": 2.8982261060952464e-06, "loss": 0.1162, "step": 22245 }, { "epoch": 0.6489876888966685, "grad_norm": 0.8010471495555376, "learning_rate": 2.897797447702362e-06, "loss": 0.1068, "step": 22246 }, { "epoch": 0.649016862127312, "grad_norm": 0.8060256648823838, "learning_rate": 2.897368808077439e-06, "loss": 0.1405, "step": 22247 }, { "epoch": 0.6490460353579556, "grad_norm": 0.7801619476066202, "learning_rate": 2.8969401872243087e-06, "loss": 0.1295, "step": 22248 }, { "epoch": 0.6490752085885991, "grad_norm": 0.9191791421847849, "learning_rate": 2.8965115851467935e-06, "loss": 0.1606, "step": 22249 }, { "epoch": 0.6491043818192427, "grad_norm": 1.120677276714752, "learning_rate": 2.8960830018487183e-06, "loss": 0.161, "step": 22250 }, { "epoch": 0.6491335550498862, "grad_norm": 1.481076613089939, "learning_rate": 2.895654437333915e-06, "loss": 0.1237, "step": 22251 }, { "epoch": 0.6491627282805298, "grad_norm": 0.7821748784121355, "learning_rate": 2.895225891606206e-06, "loss": 0.1246, "step": 22252 }, { "epoch": 0.6491919015111733, "grad_norm": 0.7540817950701243, "learning_rate": 2.894797364669414e-06, "loss": 0.087, "step": 22253 }, { "epoch": 0.6492210747418169, "grad_norm": 0.7995361905697157, "learning_rate": 2.894368856527372e-06, "loss": 0.11, "step": 22254 }, { "epoch": 0.6492502479724604, "grad_norm": 0.9061402087324139, "learning_rate": 2.8939403671839027e-06, "loss": 0.1057, "step": 22255 }, { "epoch": 0.649279421203104, "grad_norm": 0.9513215363869687, "learning_rate": 2.893511896642829e-06, "loss": 0.122, "step": 22256 }, { "epoch": 0.6493085944337476, "grad_norm": 0.9415533149661969, "learning_rate": 2.8930834449079803e-06, "loss": 0.0895, "step": 22257 }, { "epoch": 0.6493377676643911, "grad_norm": 0.999627909313702, "learning_rate": 2.8926550119831798e-06, "loss": 0.1393, "step": 22258 }, { "epoch": 0.6493669408950347, "grad_norm": 0.8792576086743242, "learning_rate": 2.89222659787225e-06, "loss": 0.1368, "step": 22259 }, { "epoch": 0.6493961141256783, "grad_norm": 1.1220494267093675, "learning_rate": 2.891798202579018e-06, "loss": 0.1321, "step": 22260 }, { "epoch": 0.6494252873563219, "grad_norm": 1.2925017141032562, "learning_rate": 2.8913698261073097e-06, "loss": 0.1006, "step": 22261 }, { "epoch": 0.6494544605869654, "grad_norm": 1.1950285563697882, "learning_rate": 2.890941468460949e-06, "loss": 0.1339, "step": 22262 }, { "epoch": 0.649483633817609, "grad_norm": 0.9762538718653299, "learning_rate": 2.890513129643757e-06, "loss": 0.1421, "step": 22263 }, { "epoch": 0.6495128070482525, "grad_norm": 0.818201095500948, "learning_rate": 2.890084809659563e-06, "loss": 0.1297, "step": 22264 }, { "epoch": 0.6495419802788961, "grad_norm": 0.8716190450685624, "learning_rate": 2.8896565085121854e-06, "loss": 0.1294, "step": 22265 }, { "epoch": 0.6495711535095396, "grad_norm": 0.8326342416327545, "learning_rate": 2.8892282262054533e-06, "loss": 0.1141, "step": 22266 }, { "epoch": 0.6496003267401832, "grad_norm": 1.1768202664076068, "learning_rate": 2.8887999627431853e-06, "loss": 0.1527, "step": 22267 }, { "epoch": 0.6496294999708268, "grad_norm": 0.7728175670162283, "learning_rate": 2.8883717181292092e-06, "loss": 0.1076, "step": 22268 }, { "epoch": 0.6496586732014703, "grad_norm": 0.8384587145072449, "learning_rate": 2.8879434923673465e-06, "loss": 0.1268, "step": 22269 }, { "epoch": 0.6496878464321139, "grad_norm": 0.9827271946730417, "learning_rate": 2.887515285461418e-06, "loss": 0.1324, "step": 22270 }, { "epoch": 0.6497170196627574, "grad_norm": 0.7941963284896321, "learning_rate": 2.8870870974152485e-06, "loss": 0.1143, "step": 22271 }, { "epoch": 0.649746192893401, "grad_norm": 0.8922433027313872, "learning_rate": 2.8866589282326633e-06, "loss": 0.1311, "step": 22272 }, { "epoch": 0.6497753661240446, "grad_norm": 0.8916129357864748, "learning_rate": 2.886230777917481e-06, "loss": 0.1082, "step": 22273 }, { "epoch": 0.6498045393546882, "grad_norm": 0.8046660369678841, "learning_rate": 2.8858026464735275e-06, "loss": 0.1031, "step": 22274 }, { "epoch": 0.6498337125853317, "grad_norm": 0.7749585521185539, "learning_rate": 2.885374533904623e-06, "loss": 0.1054, "step": 22275 }, { "epoch": 0.6498628858159753, "grad_norm": 0.7019637526795982, "learning_rate": 2.8849464402145878e-06, "loss": 0.1207, "step": 22276 }, { "epoch": 0.6498920590466188, "grad_norm": 0.8911156397946876, "learning_rate": 2.8845183654072463e-06, "loss": 0.1156, "step": 22277 }, { "epoch": 0.6499212322772624, "grad_norm": 0.8485200099235773, "learning_rate": 2.8840903094864213e-06, "loss": 0.1494, "step": 22278 }, { "epoch": 0.649950405507906, "grad_norm": 0.7812087982819907, "learning_rate": 2.8836622724559332e-06, "loss": 0.1099, "step": 22279 }, { "epoch": 0.6499795787385495, "grad_norm": 0.9901185834870755, "learning_rate": 2.8832342543196013e-06, "loss": 0.1353, "step": 22280 }, { "epoch": 0.650008751969193, "grad_norm": 0.8845551968744959, "learning_rate": 2.882806255081251e-06, "loss": 0.1255, "step": 22281 }, { "epoch": 0.6500379251998366, "grad_norm": 0.7359573079446841, "learning_rate": 2.8823782747447002e-06, "loss": 0.0956, "step": 22282 }, { "epoch": 0.6500670984304802, "grad_norm": 1.1051803047028599, "learning_rate": 2.881950313313767e-06, "loss": 0.141, "step": 22283 }, { "epoch": 0.6500962716611237, "grad_norm": 1.2176424935196708, "learning_rate": 2.88152237079228e-06, "loss": 0.109, "step": 22284 }, { "epoch": 0.6501254448917673, "grad_norm": 0.8490213342693431, "learning_rate": 2.8810944471840553e-06, "loss": 0.1254, "step": 22285 }, { "epoch": 0.6501546181224108, "grad_norm": 0.7544935484334007, "learning_rate": 2.8806665424929115e-06, "loss": 0.1262, "step": 22286 }, { "epoch": 0.6501837913530545, "grad_norm": 1.1875890504706237, "learning_rate": 2.8802386567226724e-06, "loss": 0.1321, "step": 22287 }, { "epoch": 0.650212964583698, "grad_norm": 0.8672871483778205, "learning_rate": 2.8798107898771577e-06, "loss": 0.1198, "step": 22288 }, { "epoch": 0.6502421378143416, "grad_norm": 0.6797266463043273, "learning_rate": 2.879382941960183e-06, "loss": 0.1107, "step": 22289 }, { "epoch": 0.6502713110449851, "grad_norm": 0.7605994544537991, "learning_rate": 2.878955112975572e-06, "loss": 0.1037, "step": 22290 }, { "epoch": 0.6503004842756287, "grad_norm": 0.8885139352006814, "learning_rate": 2.8785273029271447e-06, "loss": 0.1225, "step": 22291 }, { "epoch": 0.6503296575062723, "grad_norm": 0.8725811308371497, "learning_rate": 2.87809951181872e-06, "loss": 0.1145, "step": 22292 }, { "epoch": 0.6503588307369158, "grad_norm": 0.946947772713514, "learning_rate": 2.8776717396541145e-06, "loss": 0.1336, "step": 22293 }, { "epoch": 0.6503880039675594, "grad_norm": 0.9082505042413088, "learning_rate": 2.8772439864371497e-06, "loss": 0.1463, "step": 22294 }, { "epoch": 0.6504171771982029, "grad_norm": 0.8276712450899966, "learning_rate": 2.8768162521716426e-06, "loss": 0.1227, "step": 22295 }, { "epoch": 0.6504463504288465, "grad_norm": 0.8256386614093127, "learning_rate": 2.876388536861415e-06, "loss": 0.0924, "step": 22296 }, { "epoch": 0.65047552365949, "grad_norm": 0.8900106813547162, "learning_rate": 2.875960840510282e-06, "loss": 0.1032, "step": 22297 }, { "epoch": 0.6505046968901336, "grad_norm": 1.0424582409899887, "learning_rate": 2.8755331631220654e-06, "loss": 0.1373, "step": 22298 }, { "epoch": 0.6505338701207771, "grad_norm": 0.9105610222122973, "learning_rate": 2.8751055047005817e-06, "loss": 0.1181, "step": 22299 }, { "epoch": 0.6505630433514208, "grad_norm": 0.8846220500941803, "learning_rate": 2.8746778652496467e-06, "loss": 0.1198, "step": 22300 }, { "epoch": 0.6505922165820643, "grad_norm": 1.130978155378496, "learning_rate": 2.8742502447730803e-06, "loss": 0.1324, "step": 22301 }, { "epoch": 0.6506213898127079, "grad_norm": 0.8745998816816701, "learning_rate": 2.8738226432747025e-06, "loss": 0.0988, "step": 22302 }, { "epoch": 0.6506505630433514, "grad_norm": 0.9903151874375687, "learning_rate": 2.873395060758326e-06, "loss": 0.1363, "step": 22303 }, { "epoch": 0.650679736273995, "grad_norm": 1.0525509613535189, "learning_rate": 2.872967497227773e-06, "loss": 0.1336, "step": 22304 }, { "epoch": 0.6507089095046386, "grad_norm": 0.932936869396191, "learning_rate": 2.872539952686859e-06, "loss": 0.1295, "step": 22305 }, { "epoch": 0.6507380827352821, "grad_norm": 0.9742411972903376, "learning_rate": 2.8721124271393973e-06, "loss": 0.103, "step": 22306 }, { "epoch": 0.6507672559659257, "grad_norm": 0.7703058739276191, "learning_rate": 2.8716849205892087e-06, "loss": 0.1104, "step": 22307 }, { "epoch": 0.6507964291965692, "grad_norm": 0.9885490723901301, "learning_rate": 2.8712574330401112e-06, "loss": 0.1156, "step": 22308 }, { "epoch": 0.6508256024272128, "grad_norm": 1.0284538432567505, "learning_rate": 2.8708299644959187e-06, "loss": 0.1092, "step": 22309 }, { "epoch": 0.6508547756578563, "grad_norm": 0.8335080222930304, "learning_rate": 2.8704025149604465e-06, "loss": 0.0943, "step": 22310 }, { "epoch": 0.6508839488884999, "grad_norm": 0.7431695653097307, "learning_rate": 2.8699750844375136e-06, "loss": 0.1068, "step": 22311 }, { "epoch": 0.6509131221191434, "grad_norm": 0.9368502487206658, "learning_rate": 2.8695476729309345e-06, "loss": 0.1296, "step": 22312 }, { "epoch": 0.650942295349787, "grad_norm": 0.668798187950919, "learning_rate": 2.869120280444522e-06, "loss": 0.1136, "step": 22313 }, { "epoch": 0.6509714685804306, "grad_norm": 1.0603467585020838, "learning_rate": 2.868692906982099e-06, "loss": 0.1578, "step": 22314 }, { "epoch": 0.6510006418110742, "grad_norm": 0.9109501761270273, "learning_rate": 2.868265552547477e-06, "loss": 0.1064, "step": 22315 }, { "epoch": 0.6510298150417178, "grad_norm": 0.8593863084108118, "learning_rate": 2.8678382171444686e-06, "loss": 0.1272, "step": 22316 }, { "epoch": 0.6510589882723613, "grad_norm": 1.026785609717716, "learning_rate": 2.8674109007768935e-06, "loss": 0.1226, "step": 22317 }, { "epoch": 0.6510881615030049, "grad_norm": 0.945703504374666, "learning_rate": 2.8669836034485655e-06, "loss": 0.1079, "step": 22318 }, { "epoch": 0.6511173347336484, "grad_norm": 0.7709154267247766, "learning_rate": 2.866556325163296e-06, "loss": 0.1226, "step": 22319 }, { "epoch": 0.651146507964292, "grad_norm": 0.7851121998357765, "learning_rate": 2.866129065924903e-06, "loss": 0.1164, "step": 22320 }, { "epoch": 0.6511756811949355, "grad_norm": 0.7446738456615879, "learning_rate": 2.8657018257372017e-06, "loss": 0.1214, "step": 22321 }, { "epoch": 0.6512048544255791, "grad_norm": 1.1456257114106072, "learning_rate": 2.8652746046040053e-06, "loss": 0.1531, "step": 22322 }, { "epoch": 0.6512340276562226, "grad_norm": 0.8452625852629456, "learning_rate": 2.8648474025291257e-06, "loss": 0.1209, "step": 22323 }, { "epoch": 0.6512632008868662, "grad_norm": 0.7001595159123495, "learning_rate": 2.8644202195163807e-06, "loss": 0.1125, "step": 22324 }, { "epoch": 0.6512923741175097, "grad_norm": 0.9826576706862223, "learning_rate": 2.86399305556958e-06, "loss": 0.123, "step": 22325 }, { "epoch": 0.6513215473481533, "grad_norm": 0.9083825273559737, "learning_rate": 2.8635659106925415e-06, "loss": 0.1181, "step": 22326 }, { "epoch": 0.651350720578797, "grad_norm": 0.8339662508236567, "learning_rate": 2.8631387848890744e-06, "loss": 0.1312, "step": 22327 }, { "epoch": 0.6513798938094405, "grad_norm": 1.1447321900342426, "learning_rate": 2.8627116781629966e-06, "loss": 0.1097, "step": 22328 }, { "epoch": 0.651409067040084, "grad_norm": 0.7953141510279437, "learning_rate": 2.8622845905181185e-06, "loss": 0.1135, "step": 22329 }, { "epoch": 0.6514382402707276, "grad_norm": 0.8991002693315734, "learning_rate": 2.8618575219582514e-06, "loss": 0.1194, "step": 22330 }, { "epoch": 0.6514674135013712, "grad_norm": 0.7034638652643109, "learning_rate": 2.8614304724872094e-06, "loss": 0.1043, "step": 22331 }, { "epoch": 0.6514965867320147, "grad_norm": 1.0133834699574427, "learning_rate": 2.8610034421088084e-06, "loss": 0.105, "step": 22332 }, { "epoch": 0.6515257599626583, "grad_norm": 0.815816599781631, "learning_rate": 2.8605764308268554e-06, "loss": 0.1279, "step": 22333 }, { "epoch": 0.6515549331933018, "grad_norm": 0.9157008933986431, "learning_rate": 2.860149438645168e-06, "loss": 0.1249, "step": 22334 }, { "epoch": 0.6515841064239454, "grad_norm": 0.8890247586150002, "learning_rate": 2.859722465567555e-06, "loss": 0.1009, "step": 22335 }, { "epoch": 0.6516132796545889, "grad_norm": 0.9735215141512003, "learning_rate": 2.8592955115978268e-06, "loss": 0.1254, "step": 22336 }, { "epoch": 0.6516424528852325, "grad_norm": 0.877890120340445, "learning_rate": 2.858868576739797e-06, "loss": 0.1311, "step": 22337 }, { "epoch": 0.651671626115876, "grad_norm": 0.7703131794966079, "learning_rate": 2.85844166099728e-06, "loss": 0.1164, "step": 22338 }, { "epoch": 0.6517007993465196, "grad_norm": 1.033143415958569, "learning_rate": 2.8580147643740847e-06, "loss": 0.146, "step": 22339 }, { "epoch": 0.6517299725771631, "grad_norm": 0.8663714746089348, "learning_rate": 2.8575878868740197e-06, "loss": 0.1347, "step": 22340 }, { "epoch": 0.6517591458078068, "grad_norm": 0.9090693280832018, "learning_rate": 2.857161028500901e-06, "loss": 0.1191, "step": 22341 }, { "epoch": 0.6517883190384504, "grad_norm": 0.6568736409347923, "learning_rate": 2.8567341892585373e-06, "loss": 0.1088, "step": 22342 }, { "epoch": 0.6518174922690939, "grad_norm": 0.9747536446220381, "learning_rate": 2.8563073691507346e-06, "loss": 0.1181, "step": 22343 }, { "epoch": 0.6518466654997375, "grad_norm": 0.8898675146297077, "learning_rate": 2.8558805681813123e-06, "loss": 0.1284, "step": 22344 }, { "epoch": 0.651875838730381, "grad_norm": 0.7639550982067947, "learning_rate": 2.8554537863540766e-06, "loss": 0.1247, "step": 22345 }, { "epoch": 0.6519050119610246, "grad_norm": 0.9532939328691233, "learning_rate": 2.855027023672835e-06, "loss": 0.1101, "step": 22346 }, { "epoch": 0.6519341851916681, "grad_norm": 0.8518130061041179, "learning_rate": 2.854600280141403e-06, "loss": 0.1437, "step": 22347 }, { "epoch": 0.6519633584223117, "grad_norm": 0.7728245534145937, "learning_rate": 2.8541735557635863e-06, "loss": 0.1003, "step": 22348 }, { "epoch": 0.6519925316529552, "grad_norm": 0.9433016687814527, "learning_rate": 2.853746850543195e-06, "loss": 0.1256, "step": 22349 }, { "epoch": 0.6520217048835988, "grad_norm": 0.8137539866414318, "learning_rate": 2.8533201644840392e-06, "loss": 0.0998, "step": 22350 }, { "epoch": 0.6520508781142423, "grad_norm": 0.5994125075179363, "learning_rate": 2.8528934975899303e-06, "loss": 0.1085, "step": 22351 }, { "epoch": 0.6520800513448859, "grad_norm": 0.8790534188690425, "learning_rate": 2.8524668498646755e-06, "loss": 0.141, "step": 22352 }, { "epoch": 0.6521092245755294, "grad_norm": 1.018077477680946, "learning_rate": 2.852040221312082e-06, "loss": 0.1224, "step": 22353 }, { "epoch": 0.6521383978061731, "grad_norm": 0.8866096984268983, "learning_rate": 2.851613611935963e-06, "loss": 0.1205, "step": 22354 }, { "epoch": 0.6521675710368167, "grad_norm": 0.697688642917243, "learning_rate": 2.8511870217401227e-06, "loss": 0.1191, "step": 22355 }, { "epoch": 0.6521967442674602, "grad_norm": 0.919552829781995, "learning_rate": 2.8507604507283736e-06, "loss": 0.1152, "step": 22356 }, { "epoch": 0.6522259174981038, "grad_norm": 0.9177034296130838, "learning_rate": 2.8503338989045202e-06, "loss": 0.092, "step": 22357 }, { "epoch": 0.6522550907287473, "grad_norm": 1.0123413429136259, "learning_rate": 2.8499073662723743e-06, "loss": 0.1375, "step": 22358 }, { "epoch": 0.6522842639593909, "grad_norm": 0.8239520774706534, "learning_rate": 2.8494808528357424e-06, "loss": 0.1055, "step": 22359 }, { "epoch": 0.6523134371900344, "grad_norm": 0.8823536883204104, "learning_rate": 2.8490543585984303e-06, "loss": 0.114, "step": 22360 }, { "epoch": 0.652342610420678, "grad_norm": 0.8723999640850891, "learning_rate": 2.8486278835642474e-06, "loss": 0.1116, "step": 22361 }, { "epoch": 0.6523717836513215, "grad_norm": 0.8112371910259645, "learning_rate": 2.848201427737003e-06, "loss": 0.1235, "step": 22362 }, { "epoch": 0.6524009568819651, "grad_norm": 0.8758281179562669, "learning_rate": 2.8477749911205007e-06, "loss": 0.1195, "step": 22363 }, { "epoch": 0.6524301301126086, "grad_norm": 0.7635111746276324, "learning_rate": 2.8473485737185513e-06, "loss": 0.118, "step": 22364 }, { "epoch": 0.6524593033432522, "grad_norm": 0.6954318845365827, "learning_rate": 2.8469221755349596e-06, "loss": 0.1204, "step": 22365 }, { "epoch": 0.6524884765738957, "grad_norm": 1.0995960484615341, "learning_rate": 2.8464957965735317e-06, "loss": 0.1341, "step": 22366 }, { "epoch": 0.6525176498045393, "grad_norm": 0.8960672492372462, "learning_rate": 2.846069436838075e-06, "loss": 0.1214, "step": 22367 }, { "epoch": 0.652546823035183, "grad_norm": 0.9486649441543449, "learning_rate": 2.8456430963323977e-06, "loss": 0.1361, "step": 22368 }, { "epoch": 0.6525759962658265, "grad_norm": 0.7028028926129187, "learning_rate": 2.8452167750603044e-06, "loss": 0.1102, "step": 22369 }, { "epoch": 0.6526051694964701, "grad_norm": 1.4042321279524002, "learning_rate": 2.8447904730256e-06, "loss": 0.1386, "step": 22370 }, { "epoch": 0.6526343427271136, "grad_norm": 1.2118493118809086, "learning_rate": 2.8443641902320935e-06, "loss": 0.1075, "step": 22371 }, { "epoch": 0.6526635159577572, "grad_norm": 0.8708631911218182, "learning_rate": 2.8439379266835888e-06, "loss": 0.1152, "step": 22372 }, { "epoch": 0.6526926891884007, "grad_norm": 0.8939611154119125, "learning_rate": 2.843511682383888e-06, "loss": 0.1383, "step": 22373 }, { "epoch": 0.6527218624190443, "grad_norm": 1.197413725856025, "learning_rate": 2.843085457336804e-06, "loss": 0.1162, "step": 22374 }, { "epoch": 0.6527510356496878, "grad_norm": 1.2039479656733154, "learning_rate": 2.842659251546137e-06, "loss": 0.1101, "step": 22375 }, { "epoch": 0.6527802088803314, "grad_norm": 0.7770492126751283, "learning_rate": 2.8422330650156926e-06, "loss": 0.1082, "step": 22376 }, { "epoch": 0.652809382110975, "grad_norm": 0.9534063290817145, "learning_rate": 2.8418068977492773e-06, "loss": 0.1278, "step": 22377 }, { "epoch": 0.6528385553416185, "grad_norm": 1.0784775794238626, "learning_rate": 2.841380749750696e-06, "loss": 0.118, "step": 22378 }, { "epoch": 0.652867728572262, "grad_norm": 1.0676492238026483, "learning_rate": 2.840954621023749e-06, "loss": 0.1132, "step": 22379 }, { "epoch": 0.6528969018029056, "grad_norm": 0.8335513307074479, "learning_rate": 2.840528511572245e-06, "loss": 0.139, "step": 22380 }, { "epoch": 0.6529260750335493, "grad_norm": 0.9354201849273656, "learning_rate": 2.840102421399987e-06, "loss": 0.1319, "step": 22381 }, { "epoch": 0.6529552482641928, "grad_norm": 1.068194468994753, "learning_rate": 2.8396763505107804e-06, "loss": 0.1268, "step": 22382 }, { "epoch": 0.6529844214948364, "grad_norm": 0.8156923346856937, "learning_rate": 2.8392502989084255e-06, "loss": 0.1394, "step": 22383 }, { "epoch": 0.6530135947254799, "grad_norm": 1.0070871330619462, "learning_rate": 2.8388242665967296e-06, "loss": 0.1086, "step": 22384 }, { "epoch": 0.6530427679561235, "grad_norm": 1.074138388207415, "learning_rate": 2.838398253579493e-06, "loss": 0.1111, "step": 22385 }, { "epoch": 0.653071941186767, "grad_norm": 0.756491479175004, "learning_rate": 2.8379722598605233e-06, "loss": 0.106, "step": 22386 }, { "epoch": 0.6531011144174106, "grad_norm": 0.7841927221775205, "learning_rate": 2.8375462854436187e-06, "loss": 0.1163, "step": 22387 }, { "epoch": 0.6531302876480541, "grad_norm": 0.78904380879566, "learning_rate": 2.837120330332587e-06, "loss": 0.1191, "step": 22388 }, { "epoch": 0.6531594608786977, "grad_norm": 1.0917631612426648, "learning_rate": 2.8366943945312274e-06, "loss": 0.1252, "step": 22389 }, { "epoch": 0.6531886341093412, "grad_norm": 0.7741510810631309, "learning_rate": 2.836268478043343e-06, "loss": 0.1294, "step": 22390 }, { "epoch": 0.6532178073399848, "grad_norm": 0.7935141619877567, "learning_rate": 2.835842580872737e-06, "loss": 0.1197, "step": 22391 }, { "epoch": 0.6532469805706284, "grad_norm": 0.9354382676729552, "learning_rate": 2.835416703023214e-06, "loss": 0.1092, "step": 22392 }, { "epoch": 0.6532761538012719, "grad_norm": 1.1299759732247512, "learning_rate": 2.8349908444985706e-06, "loss": 0.1147, "step": 22393 }, { "epoch": 0.6533053270319155, "grad_norm": 0.8956681349786876, "learning_rate": 2.834565005302615e-06, "loss": 0.1234, "step": 22394 }, { "epoch": 0.6533345002625591, "grad_norm": 0.8416045818463777, "learning_rate": 2.8341391854391466e-06, "loss": 0.1204, "step": 22395 }, { "epoch": 0.6533636734932027, "grad_norm": 0.873502656204317, "learning_rate": 2.8337133849119643e-06, "loss": 0.125, "step": 22396 }, { "epoch": 0.6533928467238462, "grad_norm": 0.8876938485626714, "learning_rate": 2.8332876037248714e-06, "loss": 0.1262, "step": 22397 }, { "epoch": 0.6534220199544898, "grad_norm": 0.8923825238580209, "learning_rate": 2.8328618418816715e-06, "loss": 0.1073, "step": 22398 }, { "epoch": 0.6534511931851333, "grad_norm": 0.7873329537573146, "learning_rate": 2.8324360993861644e-06, "loss": 0.1056, "step": 22399 }, { "epoch": 0.6534803664157769, "grad_norm": 0.7710899821948057, "learning_rate": 2.832010376242148e-06, "loss": 0.1298, "step": 22400 }, { "epoch": 0.6535095396464204, "grad_norm": 1.0411339849268668, "learning_rate": 2.831584672453427e-06, "loss": 0.108, "step": 22401 }, { "epoch": 0.653538712877064, "grad_norm": 0.9291247455436145, "learning_rate": 2.831158988023801e-06, "loss": 0.1155, "step": 22402 }, { "epoch": 0.6535678861077076, "grad_norm": 0.8797433662321947, "learning_rate": 2.8307333229570653e-06, "loss": 0.1177, "step": 22403 }, { "epoch": 0.6535970593383511, "grad_norm": 0.8237735555776691, "learning_rate": 2.8303076772570292e-06, "loss": 0.1317, "step": 22404 }, { "epoch": 0.6536262325689947, "grad_norm": 0.8579782609770397, "learning_rate": 2.8298820509274876e-06, "loss": 0.1307, "step": 22405 }, { "epoch": 0.6536554057996382, "grad_norm": 0.8855548023531, "learning_rate": 2.8294564439722395e-06, "loss": 0.107, "step": 22406 }, { "epoch": 0.6536845790302818, "grad_norm": 1.0135499412180398, "learning_rate": 2.8290308563950876e-06, "loss": 0.1073, "step": 22407 }, { "epoch": 0.6537137522609254, "grad_norm": 0.8882936866312848, "learning_rate": 2.8286052881998303e-06, "loss": 0.1173, "step": 22408 }, { "epoch": 0.653742925491569, "grad_norm": 0.8248935598199801, "learning_rate": 2.8281797393902643e-06, "loss": 0.1424, "step": 22409 }, { "epoch": 0.6537720987222125, "grad_norm": 0.9835032623361226, "learning_rate": 2.8277542099701916e-06, "loss": 0.1118, "step": 22410 }, { "epoch": 0.6538012719528561, "grad_norm": 1.080433309396559, "learning_rate": 2.827328699943413e-06, "loss": 0.1374, "step": 22411 }, { "epoch": 0.6538304451834996, "grad_norm": 1.0401450762281055, "learning_rate": 2.826903209313725e-06, "loss": 0.1389, "step": 22412 }, { "epoch": 0.6538596184141432, "grad_norm": 0.8951298171915354, "learning_rate": 2.826477738084924e-06, "loss": 0.131, "step": 22413 }, { "epoch": 0.6538887916447867, "grad_norm": 0.7924965449107844, "learning_rate": 2.8260522862608123e-06, "loss": 0.1204, "step": 22414 }, { "epoch": 0.6539179648754303, "grad_norm": 0.8684814252637882, "learning_rate": 2.825626853845186e-06, "loss": 0.1343, "step": 22415 }, { "epoch": 0.6539471381060739, "grad_norm": 1.14344951259655, "learning_rate": 2.8252014408418455e-06, "loss": 0.132, "step": 22416 }, { "epoch": 0.6539763113367174, "grad_norm": 1.650113471243881, "learning_rate": 2.8247760472545856e-06, "loss": 0.1187, "step": 22417 }, { "epoch": 0.654005484567361, "grad_norm": 0.9882441329202631, "learning_rate": 2.8243506730872072e-06, "loss": 0.109, "step": 22418 }, { "epoch": 0.6540346577980045, "grad_norm": 0.806671554677657, "learning_rate": 2.8239253183435078e-06, "loss": 0.1143, "step": 22419 }, { "epoch": 0.6540638310286481, "grad_norm": 0.8737769216924756, "learning_rate": 2.8234999830272793e-06, "loss": 0.1258, "step": 22420 }, { "epoch": 0.6540930042592916, "grad_norm": 1.027183654696904, "learning_rate": 2.823074667142327e-06, "loss": 0.1008, "step": 22421 }, { "epoch": 0.6541221774899353, "grad_norm": 1.0784696427280314, "learning_rate": 2.822649370692444e-06, "loss": 0.1229, "step": 22422 }, { "epoch": 0.6541513507205788, "grad_norm": 1.0854113484420793, "learning_rate": 2.822224093681426e-06, "loss": 0.1082, "step": 22423 }, { "epoch": 0.6541805239512224, "grad_norm": 0.8773620291506504, "learning_rate": 2.8217988361130745e-06, "loss": 0.1538, "step": 22424 }, { "epoch": 0.654209697181866, "grad_norm": 1.052376493376747, "learning_rate": 2.8213735979911815e-06, "loss": 0.1084, "step": 22425 }, { "epoch": 0.6542388704125095, "grad_norm": 1.00853733576621, "learning_rate": 2.8209483793195434e-06, "loss": 0.1258, "step": 22426 }, { "epoch": 0.654268043643153, "grad_norm": 0.9335007307215261, "learning_rate": 2.8205231801019584e-06, "loss": 0.116, "step": 22427 }, { "epoch": 0.6542972168737966, "grad_norm": 0.7882432973844449, "learning_rate": 2.820098000342224e-06, "loss": 0.1072, "step": 22428 }, { "epoch": 0.6543263901044402, "grad_norm": 0.9348378620044736, "learning_rate": 2.8196728400441343e-06, "loss": 0.1013, "step": 22429 }, { "epoch": 0.6543555633350837, "grad_norm": 1.0358406809412704, "learning_rate": 2.8192476992114825e-06, "loss": 0.1209, "step": 22430 }, { "epoch": 0.6543847365657273, "grad_norm": 0.9515981882527773, "learning_rate": 2.8188225778480694e-06, "loss": 0.1123, "step": 22431 }, { "epoch": 0.6544139097963708, "grad_norm": 0.854083675080113, "learning_rate": 2.818397475957685e-06, "loss": 0.1463, "step": 22432 }, { "epoch": 0.6544430830270144, "grad_norm": 1.0071188848555603, "learning_rate": 2.8179723935441273e-06, "loss": 0.1338, "step": 22433 }, { "epoch": 0.6544722562576579, "grad_norm": 0.9228795996135992, "learning_rate": 2.8175473306111932e-06, "loss": 0.1131, "step": 22434 }, { "epoch": 0.6545014294883016, "grad_norm": 1.0131591896620302, "learning_rate": 2.817122287162676e-06, "loss": 0.147, "step": 22435 }, { "epoch": 0.6545306027189451, "grad_norm": 0.9948899904967381, "learning_rate": 2.816697263202367e-06, "loss": 0.1174, "step": 22436 }, { "epoch": 0.6545597759495887, "grad_norm": 0.8279068293792077, "learning_rate": 2.8162722587340663e-06, "loss": 0.1318, "step": 22437 }, { "epoch": 0.6545889491802322, "grad_norm": 0.9452393538791887, "learning_rate": 2.815847273761564e-06, "loss": 0.1519, "step": 22438 }, { "epoch": 0.6546181224108758, "grad_norm": 0.8785567153294748, "learning_rate": 2.8154223082886568e-06, "loss": 0.1204, "step": 22439 }, { "epoch": 0.6546472956415194, "grad_norm": 0.8682213748099582, "learning_rate": 2.8149973623191363e-06, "loss": 0.1223, "step": 22440 }, { "epoch": 0.6546764688721629, "grad_norm": 0.794535610763149, "learning_rate": 2.8145724358567994e-06, "loss": 0.1071, "step": 22441 }, { "epoch": 0.6547056421028065, "grad_norm": 0.8324289979702626, "learning_rate": 2.8141475289054387e-06, "loss": 0.1357, "step": 22442 }, { "epoch": 0.65473481533345, "grad_norm": 0.946816262503394, "learning_rate": 2.8137226414688447e-06, "loss": 0.1198, "step": 22443 }, { "epoch": 0.6547639885640936, "grad_norm": 0.9347654787220115, "learning_rate": 2.8132977735508125e-06, "loss": 0.1156, "step": 22444 }, { "epoch": 0.6547931617947371, "grad_norm": 1.054201416734486, "learning_rate": 2.812872925155139e-06, "loss": 0.1205, "step": 22445 }, { "epoch": 0.6548223350253807, "grad_norm": 0.8743155275048713, "learning_rate": 2.812448096285613e-06, "loss": 0.1164, "step": 22446 }, { "epoch": 0.6548515082560242, "grad_norm": 0.989058469367441, "learning_rate": 2.812023286946026e-06, "loss": 0.1224, "step": 22447 }, { "epoch": 0.6548806814866678, "grad_norm": 0.8370971723206385, "learning_rate": 2.8115984971401753e-06, "loss": 0.1297, "step": 22448 }, { "epoch": 0.6549098547173114, "grad_norm": 1.2293559844157345, "learning_rate": 2.8111737268718507e-06, "loss": 0.1396, "step": 22449 }, { "epoch": 0.654939027947955, "grad_norm": 0.7159436549623293, "learning_rate": 2.8107489761448416e-06, "loss": 0.1094, "step": 22450 }, { "epoch": 0.6549682011785986, "grad_norm": 1.059710812761776, "learning_rate": 2.8103242449629455e-06, "loss": 0.1245, "step": 22451 }, { "epoch": 0.6549973744092421, "grad_norm": 1.0762803144274677, "learning_rate": 2.8098995333299522e-06, "loss": 0.132, "step": 22452 }, { "epoch": 0.6550265476398857, "grad_norm": 0.9713609543462169, "learning_rate": 2.8094748412496507e-06, "loss": 0.1087, "step": 22453 }, { "epoch": 0.6550557208705292, "grad_norm": 0.9352136934925658, "learning_rate": 2.8090501687258378e-06, "loss": 0.135, "step": 22454 }, { "epoch": 0.6550848941011728, "grad_norm": 1.009512518613979, "learning_rate": 2.8086255157623017e-06, "loss": 0.1451, "step": 22455 }, { "epoch": 0.6551140673318163, "grad_norm": 1.3412461828688422, "learning_rate": 2.8082008823628313e-06, "loss": 0.1209, "step": 22456 }, { "epoch": 0.6551432405624599, "grad_norm": 1.0134594124748277, "learning_rate": 2.807776268531221e-06, "loss": 0.1165, "step": 22457 }, { "epoch": 0.6551724137931034, "grad_norm": 0.9105265471124363, "learning_rate": 2.8073516742712626e-06, "loss": 0.1167, "step": 22458 }, { "epoch": 0.655201587023747, "grad_norm": 0.9796945647382419, "learning_rate": 2.8069270995867447e-06, "loss": 0.1277, "step": 22459 }, { "epoch": 0.6552307602543905, "grad_norm": 1.2798380530827744, "learning_rate": 2.8065025444814566e-06, "loss": 0.1201, "step": 22460 }, { "epoch": 0.6552599334850341, "grad_norm": 0.8342012696107435, "learning_rate": 2.8060780089591915e-06, "loss": 0.1418, "step": 22461 }, { "epoch": 0.6552891067156777, "grad_norm": 0.77476631915906, "learning_rate": 2.8056534930237367e-06, "loss": 0.1372, "step": 22462 }, { "epoch": 0.6553182799463213, "grad_norm": 0.7909487902491348, "learning_rate": 2.8052289966788838e-06, "loss": 0.1076, "step": 22463 }, { "epoch": 0.6553474531769649, "grad_norm": 0.6720644960359576, "learning_rate": 2.804804519928424e-06, "loss": 0.0971, "step": 22464 }, { "epoch": 0.6553766264076084, "grad_norm": 0.793948908039111, "learning_rate": 2.8043800627761453e-06, "loss": 0.111, "step": 22465 }, { "epoch": 0.655405799638252, "grad_norm": 1.0834419645421325, "learning_rate": 2.803955625225836e-06, "loss": 0.1469, "step": 22466 }, { "epoch": 0.6554349728688955, "grad_norm": 0.8105128864027233, "learning_rate": 2.803531207281288e-06, "loss": 0.1049, "step": 22467 }, { "epoch": 0.6554641460995391, "grad_norm": 0.8490429674101937, "learning_rate": 2.8031068089462874e-06, "loss": 0.1274, "step": 22468 }, { "epoch": 0.6554933193301826, "grad_norm": 0.7610165214629974, "learning_rate": 2.802682430224627e-06, "loss": 0.1364, "step": 22469 }, { "epoch": 0.6555224925608262, "grad_norm": 1.0817781727835747, "learning_rate": 2.802258071120091e-06, "loss": 0.1397, "step": 22470 }, { "epoch": 0.6555516657914697, "grad_norm": 0.8705608230901531, "learning_rate": 2.801833731636472e-06, "loss": 0.1193, "step": 22471 }, { "epoch": 0.6555808390221133, "grad_norm": 0.808086343932562, "learning_rate": 2.801409411777557e-06, "loss": 0.1137, "step": 22472 }, { "epoch": 0.6556100122527568, "grad_norm": 0.8992175199035769, "learning_rate": 2.800985111547132e-06, "loss": 0.1204, "step": 22473 }, { "epoch": 0.6556391854834004, "grad_norm": 0.7156651496369794, "learning_rate": 2.800560830948987e-06, "loss": 0.11, "step": 22474 }, { "epoch": 0.6556683587140439, "grad_norm": 0.7464135277631306, "learning_rate": 2.8001365699869108e-06, "loss": 0.1284, "step": 22475 }, { "epoch": 0.6556975319446876, "grad_norm": 1.0967097120862654, "learning_rate": 2.7997123286646916e-06, "loss": 0.1497, "step": 22476 }, { "epoch": 0.6557267051753312, "grad_norm": 0.9632295410992674, "learning_rate": 2.7992881069861135e-06, "loss": 0.1275, "step": 22477 }, { "epoch": 0.6557558784059747, "grad_norm": 0.8300706982170465, "learning_rate": 2.798863904954967e-06, "loss": 0.1543, "step": 22478 }, { "epoch": 0.6557850516366183, "grad_norm": 1.1260893811778192, "learning_rate": 2.798439722575038e-06, "loss": 0.1329, "step": 22479 }, { "epoch": 0.6558142248672618, "grad_norm": 0.8562290905610475, "learning_rate": 2.79801555985011e-06, "loss": 0.1233, "step": 22480 }, { "epoch": 0.6558433980979054, "grad_norm": 0.8569677028540854, "learning_rate": 2.797591416783978e-06, "loss": 0.1196, "step": 22481 }, { "epoch": 0.6558725713285489, "grad_norm": 0.7703821745089594, "learning_rate": 2.7971672933804227e-06, "loss": 0.133, "step": 22482 }, { "epoch": 0.6559017445591925, "grad_norm": 0.7191233824796355, "learning_rate": 2.796743189643231e-06, "loss": 0.1125, "step": 22483 }, { "epoch": 0.655930917789836, "grad_norm": 0.9049161664824638, "learning_rate": 2.7963191055761916e-06, "loss": 0.1316, "step": 22484 }, { "epoch": 0.6559600910204796, "grad_norm": 0.7079508217902415, "learning_rate": 2.79589504118309e-06, "loss": 0.098, "step": 22485 }, { "epoch": 0.6559892642511231, "grad_norm": 0.8930720409388776, "learning_rate": 2.7954709964677083e-06, "loss": 0.129, "step": 22486 }, { "epoch": 0.6560184374817667, "grad_norm": 0.802453894170231, "learning_rate": 2.7950469714338356e-06, "loss": 0.1348, "step": 22487 }, { "epoch": 0.6560476107124102, "grad_norm": 0.9234038288708448, "learning_rate": 2.7946229660852598e-06, "loss": 0.1159, "step": 22488 }, { "epoch": 0.6560767839430538, "grad_norm": 0.7178968242083876, "learning_rate": 2.7941989804257628e-06, "loss": 0.1225, "step": 22489 }, { "epoch": 0.6561059571736975, "grad_norm": 0.8047241178207227, "learning_rate": 2.793775014459129e-06, "loss": 0.1288, "step": 22490 }, { "epoch": 0.656135130404341, "grad_norm": 0.8627289103067527, "learning_rate": 2.7933510681891477e-06, "loss": 0.1203, "step": 22491 }, { "epoch": 0.6561643036349846, "grad_norm": 0.8776410857739505, "learning_rate": 2.792927141619599e-06, "loss": 0.121, "step": 22492 }, { "epoch": 0.6561934768656281, "grad_norm": 0.9019677742950784, "learning_rate": 2.79250323475427e-06, "loss": 0.1078, "step": 22493 }, { "epoch": 0.6562226500962717, "grad_norm": 0.6099422452034862, "learning_rate": 2.7920793475969465e-06, "loss": 0.1132, "step": 22494 }, { "epoch": 0.6562518233269152, "grad_norm": 1.3063584220997537, "learning_rate": 2.7916554801514124e-06, "loss": 0.1269, "step": 22495 }, { "epoch": 0.6562809965575588, "grad_norm": 0.7329641995200221, "learning_rate": 2.7912316324214485e-06, "loss": 0.1181, "step": 22496 }, { "epoch": 0.6563101697882023, "grad_norm": 0.7523293310272482, "learning_rate": 2.790807804410843e-06, "loss": 0.1166, "step": 22497 }, { "epoch": 0.6563393430188459, "grad_norm": 0.8334991656815292, "learning_rate": 2.790383996123377e-06, "loss": 0.1153, "step": 22498 }, { "epoch": 0.6563685162494894, "grad_norm": 0.8202842930485915, "learning_rate": 2.7899602075628366e-06, "loss": 0.1288, "step": 22499 }, { "epoch": 0.656397689480133, "grad_norm": 1.188380710720048, "learning_rate": 2.789536438733002e-06, "loss": 0.0935, "step": 22500 }, { "epoch": 0.6564268627107765, "grad_norm": 0.810922022589193, "learning_rate": 2.7891126896376603e-06, "loss": 0.1157, "step": 22501 }, { "epoch": 0.6564560359414201, "grad_norm": 0.7876301060910118, "learning_rate": 2.7886889602805926e-06, "loss": 0.1291, "step": 22502 }, { "epoch": 0.6564852091720638, "grad_norm": 0.7881536119215248, "learning_rate": 2.7882652506655807e-06, "loss": 0.1145, "step": 22503 }, { "epoch": 0.6565143824027073, "grad_norm": 0.8918753539973705, "learning_rate": 2.787841560796408e-06, "loss": 0.1267, "step": 22504 }, { "epoch": 0.6565435556333509, "grad_norm": 1.1072156778997373, "learning_rate": 2.78741789067686e-06, "loss": 0.1108, "step": 22505 }, { "epoch": 0.6565727288639944, "grad_norm": 0.8877621015890508, "learning_rate": 2.7869942403107163e-06, "loss": 0.1276, "step": 22506 }, { "epoch": 0.656601902094638, "grad_norm": 0.8072881168843168, "learning_rate": 2.7865706097017585e-06, "loss": 0.1032, "step": 22507 }, { "epoch": 0.6566310753252815, "grad_norm": 0.9372479949765035, "learning_rate": 2.7861469988537714e-06, "loss": 0.1358, "step": 22508 }, { "epoch": 0.6566602485559251, "grad_norm": 0.8541840180347505, "learning_rate": 2.7857234077705355e-06, "loss": 0.131, "step": 22509 }, { "epoch": 0.6566894217865686, "grad_norm": 0.7326392057265572, "learning_rate": 2.7852998364558287e-06, "loss": 0.1091, "step": 22510 }, { "epoch": 0.6567185950172122, "grad_norm": 1.046935756928821, "learning_rate": 2.7848762849134405e-06, "loss": 0.1423, "step": 22511 }, { "epoch": 0.6567477682478557, "grad_norm": 0.9414506643276448, "learning_rate": 2.784452753147147e-06, "loss": 0.14, "step": 22512 }, { "epoch": 0.6567769414784993, "grad_norm": 0.7318518189924511, "learning_rate": 2.7840292411607296e-06, "loss": 0.1243, "step": 22513 }, { "epoch": 0.6568061147091429, "grad_norm": 0.818140835759817, "learning_rate": 2.7836057489579714e-06, "loss": 0.1148, "step": 22514 }, { "epoch": 0.6568352879397864, "grad_norm": 0.9527922379739329, "learning_rate": 2.783182276542652e-06, "loss": 0.1245, "step": 22515 }, { "epoch": 0.65686446117043, "grad_norm": 0.8002737478246307, "learning_rate": 2.7827588239185497e-06, "loss": 0.123, "step": 22516 }, { "epoch": 0.6568936344010736, "grad_norm": 0.8946948698519541, "learning_rate": 2.7823353910894486e-06, "loss": 0.1199, "step": 22517 }, { "epoch": 0.6569228076317172, "grad_norm": 0.7873289502771038, "learning_rate": 2.7819119780591284e-06, "loss": 0.0966, "step": 22518 }, { "epoch": 0.6569519808623607, "grad_norm": 0.8008015819465731, "learning_rate": 2.7814885848313692e-06, "loss": 0.1136, "step": 22519 }, { "epoch": 0.6569811540930043, "grad_norm": 1.103038646793943, "learning_rate": 2.7810652114099483e-06, "loss": 0.1178, "step": 22520 }, { "epoch": 0.6570103273236478, "grad_norm": 0.9026383226586517, "learning_rate": 2.7806418577986494e-06, "loss": 0.1279, "step": 22521 }, { "epoch": 0.6570395005542914, "grad_norm": 0.8547687783967982, "learning_rate": 2.7802185240012485e-06, "loss": 0.1179, "step": 22522 }, { "epoch": 0.6570686737849349, "grad_norm": 1.0004540488073015, "learning_rate": 2.7797952100215263e-06, "loss": 0.1589, "step": 22523 }, { "epoch": 0.6570978470155785, "grad_norm": 0.8353649868482187, "learning_rate": 2.779371915863265e-06, "loss": 0.1183, "step": 22524 }, { "epoch": 0.657127020246222, "grad_norm": 0.7423876304035788, "learning_rate": 2.7789486415302404e-06, "loss": 0.1019, "step": 22525 }, { "epoch": 0.6571561934768656, "grad_norm": 0.8084602701288659, "learning_rate": 2.778525387026231e-06, "loss": 0.111, "step": 22526 }, { "epoch": 0.6571853667075092, "grad_norm": 0.823783401402058, "learning_rate": 2.7781021523550177e-06, "loss": 0.1175, "step": 22527 }, { "epoch": 0.6572145399381527, "grad_norm": 0.8878706143548062, "learning_rate": 2.777678937520376e-06, "loss": 0.1321, "step": 22528 }, { "epoch": 0.6572437131687963, "grad_norm": 0.9299055328002069, "learning_rate": 2.7772557425260886e-06, "loss": 0.114, "step": 22529 }, { "epoch": 0.6572728863994399, "grad_norm": 0.8653418937087174, "learning_rate": 2.7768325673759296e-06, "loss": 0.1359, "step": 22530 }, { "epoch": 0.6573020596300835, "grad_norm": 0.7045029678143745, "learning_rate": 2.7764094120736805e-06, "loss": 0.1325, "step": 22531 }, { "epoch": 0.657331232860727, "grad_norm": 2.7500356859860577, "learning_rate": 2.775986276623117e-06, "loss": 0.1112, "step": 22532 }, { "epoch": 0.6573604060913706, "grad_norm": 0.7649597187975548, "learning_rate": 2.7755631610280154e-06, "loss": 0.1313, "step": 22533 }, { "epoch": 0.6573895793220141, "grad_norm": 0.7479922180573809, "learning_rate": 2.775140065292155e-06, "loss": 0.1211, "step": 22534 }, { "epoch": 0.6574187525526577, "grad_norm": 0.7180944342727001, "learning_rate": 2.7747169894193148e-06, "loss": 0.1061, "step": 22535 }, { "epoch": 0.6574479257833012, "grad_norm": 0.6721148205570775, "learning_rate": 2.77429393341327e-06, "loss": 0.0963, "step": 22536 }, { "epoch": 0.6574770990139448, "grad_norm": 0.7612768261335503, "learning_rate": 2.7738708972777963e-06, "loss": 0.1313, "step": 22537 }, { "epoch": 0.6575062722445884, "grad_norm": 0.9427584560829441, "learning_rate": 2.7734478810166734e-06, "loss": 0.1453, "step": 22538 }, { "epoch": 0.6575354454752319, "grad_norm": 0.7583288904935364, "learning_rate": 2.773024884633676e-06, "loss": 0.1043, "step": 22539 }, { "epoch": 0.6575646187058755, "grad_norm": 0.9393309194213926, "learning_rate": 2.772601908132577e-06, "loss": 0.1476, "step": 22540 }, { "epoch": 0.657593791936519, "grad_norm": 0.9863844353901349, "learning_rate": 2.7721789515171605e-06, "loss": 0.1364, "step": 22541 }, { "epoch": 0.6576229651671626, "grad_norm": 1.2494265439675063, "learning_rate": 2.771756014791198e-06, "loss": 0.1439, "step": 22542 }, { "epoch": 0.6576521383978061, "grad_norm": 0.5955301698206025, "learning_rate": 2.7713330979584645e-06, "loss": 0.0968, "step": 22543 }, { "epoch": 0.6576813116284498, "grad_norm": 1.136010740585745, "learning_rate": 2.770910201022739e-06, "loss": 0.1314, "step": 22544 }, { "epoch": 0.6577104848590933, "grad_norm": 1.0458930875849077, "learning_rate": 2.770487323987795e-06, "loss": 0.1283, "step": 22545 }, { "epoch": 0.6577396580897369, "grad_norm": 1.3174283796579231, "learning_rate": 2.770064466857406e-06, "loss": 0.1136, "step": 22546 }, { "epoch": 0.6577688313203804, "grad_norm": 0.8495506250413206, "learning_rate": 2.769641629635349e-06, "loss": 0.1291, "step": 22547 }, { "epoch": 0.657798004551024, "grad_norm": 0.8198223648100067, "learning_rate": 2.769218812325401e-06, "loss": 0.1226, "step": 22548 }, { "epoch": 0.6578271777816675, "grad_norm": 0.7818460254895462, "learning_rate": 2.7687960149313354e-06, "loss": 0.128, "step": 22549 }, { "epoch": 0.6578563510123111, "grad_norm": 1.0414744926522435, "learning_rate": 2.7683732374569237e-06, "loss": 0.1172, "step": 22550 }, { "epoch": 0.6578855242429547, "grad_norm": 0.8192019799411547, "learning_rate": 2.7679504799059454e-06, "loss": 0.1012, "step": 22551 }, { "epoch": 0.6579146974735982, "grad_norm": 0.662398181258038, "learning_rate": 2.76752774228217e-06, "loss": 0.1066, "step": 22552 }, { "epoch": 0.6579438707042418, "grad_norm": 1.0746020643569365, "learning_rate": 2.767105024589375e-06, "loss": 0.1206, "step": 22553 }, { "epoch": 0.6579730439348853, "grad_norm": 0.9684590729721944, "learning_rate": 2.7666823268313342e-06, "loss": 0.1069, "step": 22554 }, { "epoch": 0.6580022171655289, "grad_norm": 1.0419397473849004, "learning_rate": 2.766259649011821e-06, "loss": 0.134, "step": 22555 }, { "epoch": 0.6580313903961724, "grad_norm": 0.8649779098765733, "learning_rate": 2.765836991134606e-06, "loss": 0.1424, "step": 22556 }, { "epoch": 0.6580605636268161, "grad_norm": 0.7707057213352337, "learning_rate": 2.765414353203467e-06, "loss": 0.086, "step": 22557 }, { "epoch": 0.6580897368574596, "grad_norm": 0.7544210102811724, "learning_rate": 2.7649917352221738e-06, "loss": 0.1119, "step": 22558 }, { "epoch": 0.6581189100881032, "grad_norm": 1.0636440454375342, "learning_rate": 2.764569137194503e-06, "loss": 0.1225, "step": 22559 }, { "epoch": 0.6581480833187467, "grad_norm": 0.7504238018266771, "learning_rate": 2.7641465591242224e-06, "loss": 0.0913, "step": 22560 }, { "epoch": 0.6581772565493903, "grad_norm": 0.8251551102768702, "learning_rate": 2.7637240010151103e-06, "loss": 0.1096, "step": 22561 }, { "epoch": 0.6582064297800339, "grad_norm": 0.8602708857579717, "learning_rate": 2.763301462870936e-06, "loss": 0.1281, "step": 22562 }, { "epoch": 0.6582356030106774, "grad_norm": 0.7887120908935958, "learning_rate": 2.7628789446954705e-06, "loss": 0.1217, "step": 22563 }, { "epoch": 0.658264776241321, "grad_norm": 0.9049183673836019, "learning_rate": 2.7624564464924874e-06, "loss": 0.1034, "step": 22564 }, { "epoch": 0.6582939494719645, "grad_norm": 0.889621781164339, "learning_rate": 2.7620339682657616e-06, "loss": 0.1307, "step": 22565 }, { "epoch": 0.6583231227026081, "grad_norm": 0.7334398805214061, "learning_rate": 2.761611510019062e-06, "loss": 0.1121, "step": 22566 }, { "epoch": 0.6583522959332516, "grad_norm": 0.7767238014675791, "learning_rate": 2.7611890717561584e-06, "loss": 0.142, "step": 22567 }, { "epoch": 0.6583814691638952, "grad_norm": 0.8985569388201397, "learning_rate": 2.7607666534808262e-06, "loss": 0.1152, "step": 22568 }, { "epoch": 0.6584106423945387, "grad_norm": 0.9604261215054245, "learning_rate": 2.760344255196835e-06, "loss": 0.1212, "step": 22569 }, { "epoch": 0.6584398156251823, "grad_norm": 1.0756881300950472, "learning_rate": 2.7599218769079518e-06, "loss": 0.1533, "step": 22570 }, { "epoch": 0.6584689888558259, "grad_norm": 0.6519123585154943, "learning_rate": 2.759499518617955e-06, "loss": 0.1068, "step": 22571 }, { "epoch": 0.6584981620864695, "grad_norm": 1.0883905142442654, "learning_rate": 2.759077180330612e-06, "loss": 0.1266, "step": 22572 }, { "epoch": 0.658527335317113, "grad_norm": 1.0998632526864929, "learning_rate": 2.758654862049691e-06, "loss": 0.1146, "step": 22573 }, { "epoch": 0.6585565085477566, "grad_norm": 0.9522597459448454, "learning_rate": 2.758232563778966e-06, "loss": 0.1441, "step": 22574 }, { "epoch": 0.6585856817784002, "grad_norm": 0.8295697964866922, "learning_rate": 2.7578102855222056e-06, "loss": 0.1267, "step": 22575 }, { "epoch": 0.6586148550090437, "grad_norm": 0.8743768041029046, "learning_rate": 2.757388027283178e-06, "loss": 0.1113, "step": 22576 }, { "epoch": 0.6586440282396873, "grad_norm": 1.2465440196922168, "learning_rate": 2.7569657890656543e-06, "loss": 0.131, "step": 22577 }, { "epoch": 0.6586732014703308, "grad_norm": 0.8671282291886849, "learning_rate": 2.7565435708734067e-06, "loss": 0.087, "step": 22578 }, { "epoch": 0.6587023747009744, "grad_norm": 1.0194614465372704, "learning_rate": 2.7561213727102026e-06, "loss": 0.114, "step": 22579 }, { "epoch": 0.6587315479316179, "grad_norm": 1.0730631218781865, "learning_rate": 2.7556991945798097e-06, "loss": 0.1359, "step": 22580 }, { "epoch": 0.6587607211622615, "grad_norm": 1.0049835996455878, "learning_rate": 2.755277036486e-06, "loss": 0.1217, "step": 22581 }, { "epoch": 0.658789894392905, "grad_norm": 0.9253939123222444, "learning_rate": 2.7548548984325392e-06, "loss": 0.1341, "step": 22582 }, { "epoch": 0.6588190676235486, "grad_norm": 0.8551010319335021, "learning_rate": 2.754432780423198e-06, "loss": 0.1269, "step": 22583 }, { "epoch": 0.6588482408541922, "grad_norm": 1.2062317792160597, "learning_rate": 2.7540106824617467e-06, "loss": 0.1574, "step": 22584 }, { "epoch": 0.6588774140848358, "grad_norm": 1.0425488044374756, "learning_rate": 2.753588604551952e-06, "loss": 0.1338, "step": 22585 }, { "epoch": 0.6589065873154794, "grad_norm": 0.6753586540234627, "learning_rate": 2.75316654669758e-06, "loss": 0.1133, "step": 22586 }, { "epoch": 0.6589357605461229, "grad_norm": 0.8399699469968134, "learning_rate": 2.752744508902403e-06, "loss": 0.1196, "step": 22587 }, { "epoch": 0.6589649337767665, "grad_norm": 1.1790994215607702, "learning_rate": 2.752322491170184e-06, "loss": 0.1364, "step": 22588 }, { "epoch": 0.65899410700741, "grad_norm": 0.8149822434798201, "learning_rate": 2.7519004935046955e-06, "loss": 0.1149, "step": 22589 }, { "epoch": 0.6590232802380536, "grad_norm": 1.0824127306644329, "learning_rate": 2.7514785159097006e-06, "loss": 0.1208, "step": 22590 }, { "epoch": 0.6590524534686971, "grad_norm": 0.8803409266018206, "learning_rate": 2.751056558388971e-06, "loss": 0.1272, "step": 22591 }, { "epoch": 0.6590816266993407, "grad_norm": 0.8494313396501025, "learning_rate": 2.7506346209462715e-06, "loss": 0.1227, "step": 22592 }, { "epoch": 0.6591107999299842, "grad_norm": 0.7793815655046239, "learning_rate": 2.7502127035853666e-06, "loss": 0.1403, "step": 22593 }, { "epoch": 0.6591399731606278, "grad_norm": 0.7266458890049697, "learning_rate": 2.7497908063100266e-06, "loss": 0.1105, "step": 22594 }, { "epoch": 0.6591691463912713, "grad_norm": 0.7358696537864176, "learning_rate": 2.7493689291240185e-06, "loss": 0.1348, "step": 22595 }, { "epoch": 0.6591983196219149, "grad_norm": 0.9577455005381101, "learning_rate": 2.7489470720311074e-06, "loss": 0.117, "step": 22596 }, { "epoch": 0.6592274928525584, "grad_norm": 0.8449180607899556, "learning_rate": 2.7485252350350576e-06, "loss": 0.1227, "step": 22597 }, { "epoch": 0.6592566660832021, "grad_norm": 0.8158164345241355, "learning_rate": 2.748103418139639e-06, "loss": 0.1136, "step": 22598 }, { "epoch": 0.6592858393138457, "grad_norm": 0.8134861595351345, "learning_rate": 2.747681621348615e-06, "loss": 0.1282, "step": 22599 }, { "epoch": 0.6593150125444892, "grad_norm": 1.0964996014100363, "learning_rate": 2.7472598446657484e-06, "loss": 0.1115, "step": 22600 }, { "epoch": 0.6593441857751328, "grad_norm": 0.828303490228993, "learning_rate": 2.746838088094812e-06, "loss": 0.116, "step": 22601 }, { "epoch": 0.6593733590057763, "grad_norm": 0.7322896981011644, "learning_rate": 2.746416351639567e-06, "loss": 0.1378, "step": 22602 }, { "epoch": 0.6594025322364199, "grad_norm": 0.7766145382743532, "learning_rate": 2.7459946353037775e-06, "loss": 0.1154, "step": 22603 }, { "epoch": 0.6594317054670634, "grad_norm": 0.751708619639476, "learning_rate": 2.7455729390912113e-06, "loss": 0.1155, "step": 22604 }, { "epoch": 0.659460878697707, "grad_norm": 0.6654296957356924, "learning_rate": 2.7451512630056323e-06, "loss": 0.1212, "step": 22605 }, { "epoch": 0.6594900519283505, "grad_norm": 0.7198116486010239, "learning_rate": 2.7447296070508017e-06, "loss": 0.1178, "step": 22606 }, { "epoch": 0.6595192251589941, "grad_norm": 1.5396030220829788, "learning_rate": 2.744307971230487e-06, "loss": 0.1119, "step": 22607 }, { "epoch": 0.6595483983896376, "grad_norm": 0.7797775196398998, "learning_rate": 2.7438863555484545e-06, "loss": 0.1219, "step": 22608 }, { "epoch": 0.6595775716202812, "grad_norm": 0.7060434245789803, "learning_rate": 2.7434647600084662e-06, "loss": 0.1189, "step": 22609 }, { "epoch": 0.6596067448509247, "grad_norm": 0.7733708686479651, "learning_rate": 2.7430431846142837e-06, "loss": 0.1101, "step": 22610 }, { "epoch": 0.6596359180815684, "grad_norm": 0.6482630170163397, "learning_rate": 2.742621629369675e-06, "loss": 0.1187, "step": 22611 }, { "epoch": 0.659665091312212, "grad_norm": 0.9201612115909791, "learning_rate": 2.742200094278399e-06, "loss": 0.125, "step": 22612 }, { "epoch": 0.6596942645428555, "grad_norm": 0.8470097144589287, "learning_rate": 2.741778579344222e-06, "loss": 0.1421, "step": 22613 }, { "epoch": 0.6597234377734991, "grad_norm": 0.7590184952460127, "learning_rate": 2.7413570845709086e-06, "loss": 0.1166, "step": 22614 }, { "epoch": 0.6597526110041426, "grad_norm": 0.8141803945777077, "learning_rate": 2.74093560996222e-06, "loss": 0.1323, "step": 22615 }, { "epoch": 0.6597817842347862, "grad_norm": 0.8725507264487203, "learning_rate": 2.740514155521917e-06, "loss": 0.1274, "step": 22616 }, { "epoch": 0.6598109574654297, "grad_norm": 0.9987485743600544, "learning_rate": 2.7400927212537643e-06, "loss": 0.139, "step": 22617 }, { "epoch": 0.6598401306960733, "grad_norm": 0.751603426942623, "learning_rate": 2.7396713071615262e-06, "loss": 0.1146, "step": 22618 }, { "epoch": 0.6598693039267168, "grad_norm": 0.7431142504141429, "learning_rate": 2.739249913248963e-06, "loss": 0.1342, "step": 22619 }, { "epoch": 0.6598984771573604, "grad_norm": 0.9901582677371695, "learning_rate": 2.7388285395198354e-06, "loss": 0.1384, "step": 22620 }, { "epoch": 0.6599276503880039, "grad_norm": 0.6486016174974297, "learning_rate": 2.738407185977908e-06, "loss": 0.1129, "step": 22621 }, { "epoch": 0.6599568236186475, "grad_norm": 0.7825818162792993, "learning_rate": 2.7379858526269422e-06, "loss": 0.105, "step": 22622 }, { "epoch": 0.659985996849291, "grad_norm": 0.7434131882275584, "learning_rate": 2.7375645394706963e-06, "loss": 0.119, "step": 22623 }, { "epoch": 0.6600151700799346, "grad_norm": 0.8849720606460794, "learning_rate": 2.7371432465129343e-06, "loss": 0.134, "step": 22624 }, { "epoch": 0.6600443433105783, "grad_norm": 0.749407026158925, "learning_rate": 2.736721973757419e-06, "loss": 0.0968, "step": 22625 }, { "epoch": 0.6600735165412218, "grad_norm": 0.7722278356763757, "learning_rate": 2.7363007212079097e-06, "loss": 0.1402, "step": 22626 }, { "epoch": 0.6601026897718654, "grad_norm": 0.9857979943000369, "learning_rate": 2.735879488868165e-06, "loss": 0.1364, "step": 22627 }, { "epoch": 0.6601318630025089, "grad_norm": 0.9618799872613633, "learning_rate": 2.7354582767419498e-06, "loss": 0.1221, "step": 22628 }, { "epoch": 0.6601610362331525, "grad_norm": 0.7715563866110873, "learning_rate": 2.7350370848330204e-06, "loss": 0.1237, "step": 22629 }, { "epoch": 0.660190209463796, "grad_norm": 0.9087764788045213, "learning_rate": 2.7346159131451396e-06, "loss": 0.1066, "step": 22630 }, { "epoch": 0.6602193826944396, "grad_norm": 1.2365590407456843, "learning_rate": 2.7341947616820686e-06, "loss": 0.1238, "step": 22631 }, { "epoch": 0.6602485559250831, "grad_norm": 0.8406230466167327, "learning_rate": 2.7337736304475665e-06, "loss": 0.1078, "step": 22632 }, { "epoch": 0.6602777291557267, "grad_norm": 0.6884332220566729, "learning_rate": 2.7333525194453904e-06, "loss": 0.1041, "step": 22633 }, { "epoch": 0.6603069023863702, "grad_norm": 0.8460422483031798, "learning_rate": 2.732931428679303e-06, "loss": 0.1381, "step": 22634 }, { "epoch": 0.6603360756170138, "grad_norm": 0.7864247637944488, "learning_rate": 2.7325103581530616e-06, "loss": 0.1057, "step": 22635 }, { "epoch": 0.6603652488476573, "grad_norm": 0.9469238268633753, "learning_rate": 2.732089307870428e-06, "loss": 0.1331, "step": 22636 }, { "epoch": 0.6603944220783009, "grad_norm": 0.8255544281769466, "learning_rate": 2.7316682778351576e-06, "loss": 0.1067, "step": 22637 }, { "epoch": 0.6604235953089446, "grad_norm": 0.7599272483817766, "learning_rate": 2.731247268051014e-06, "loss": 0.1138, "step": 22638 }, { "epoch": 0.6604527685395881, "grad_norm": 1.4022651437557578, "learning_rate": 2.730826278521753e-06, "loss": 0.1035, "step": 22639 }, { "epoch": 0.6604819417702317, "grad_norm": 0.8874944794084177, "learning_rate": 2.7304053092511307e-06, "loss": 0.098, "step": 22640 }, { "epoch": 0.6605111150008752, "grad_norm": 0.7912420615086282, "learning_rate": 2.7299843602429076e-06, "loss": 0.1167, "step": 22641 }, { "epoch": 0.6605402882315188, "grad_norm": 0.899065096134132, "learning_rate": 2.7295634315008456e-06, "loss": 0.1204, "step": 22642 }, { "epoch": 0.6605694614621623, "grad_norm": 1.2234695633380441, "learning_rate": 2.7291425230286962e-06, "loss": 0.1114, "step": 22643 }, { "epoch": 0.6605986346928059, "grad_norm": 0.7589885649474262, "learning_rate": 2.7287216348302225e-06, "loss": 0.1191, "step": 22644 }, { "epoch": 0.6606278079234494, "grad_norm": 1.024785208722725, "learning_rate": 2.7283007669091804e-06, "loss": 0.123, "step": 22645 }, { "epoch": 0.660656981154093, "grad_norm": 0.7557866587988469, "learning_rate": 2.727879919269324e-06, "loss": 0.1158, "step": 22646 }, { "epoch": 0.6606861543847365, "grad_norm": 0.8675824733934782, "learning_rate": 2.727459091914414e-06, "loss": 0.1318, "step": 22647 }, { "epoch": 0.6607153276153801, "grad_norm": 0.9281154785377084, "learning_rate": 2.727038284848208e-06, "loss": 0.1035, "step": 22648 }, { "epoch": 0.6607445008460237, "grad_norm": 0.973165458035548, "learning_rate": 2.726617498074462e-06, "loss": 0.1208, "step": 22649 }, { "epoch": 0.6607736740766672, "grad_norm": 0.7990752633850764, "learning_rate": 2.7261967315969307e-06, "loss": 0.1154, "step": 22650 }, { "epoch": 0.6608028473073108, "grad_norm": 0.9689587981984759, "learning_rate": 2.7257759854193735e-06, "loss": 0.1187, "step": 22651 }, { "epoch": 0.6608320205379544, "grad_norm": 1.0824105336467058, "learning_rate": 2.7253552595455458e-06, "loss": 0.1379, "step": 22652 }, { "epoch": 0.660861193768598, "grad_norm": 0.976149906546021, "learning_rate": 2.724934553979201e-06, "loss": 0.1237, "step": 22653 }, { "epoch": 0.6608903669992415, "grad_norm": 0.8239511261071738, "learning_rate": 2.724513868724098e-06, "loss": 0.1063, "step": 22654 }, { "epoch": 0.6609195402298851, "grad_norm": 0.7308597033836158, "learning_rate": 2.724093203783993e-06, "loss": 0.1142, "step": 22655 }, { "epoch": 0.6609487134605286, "grad_norm": 1.0008807120680314, "learning_rate": 2.7236725591626413e-06, "loss": 0.0989, "step": 22656 }, { "epoch": 0.6609778866911722, "grad_norm": 0.8664122835198478, "learning_rate": 2.7232519348637955e-06, "loss": 0.1116, "step": 22657 }, { "epoch": 0.6610070599218157, "grad_norm": 0.7248420383427195, "learning_rate": 2.7228313308912145e-06, "loss": 0.1204, "step": 22658 }, { "epoch": 0.6610362331524593, "grad_norm": 0.7085192623477988, "learning_rate": 2.7224107472486504e-06, "loss": 0.133, "step": 22659 }, { "epoch": 0.6610654063831028, "grad_norm": 0.9273281691664607, "learning_rate": 2.721990183939859e-06, "loss": 0.1119, "step": 22660 }, { "epoch": 0.6610945796137464, "grad_norm": 0.7399050138849338, "learning_rate": 2.7215696409685977e-06, "loss": 0.1035, "step": 22661 }, { "epoch": 0.66112375284439, "grad_norm": 0.7656988761837674, "learning_rate": 2.7211491183386185e-06, "loss": 0.1336, "step": 22662 }, { "epoch": 0.6611529260750335, "grad_norm": 1.0817766966059668, "learning_rate": 2.720728616053674e-06, "loss": 0.1288, "step": 22663 }, { "epoch": 0.6611820993056771, "grad_norm": 0.9326493487858707, "learning_rate": 2.7203081341175225e-06, "loss": 0.1007, "step": 22664 }, { "epoch": 0.6612112725363207, "grad_norm": 0.8270822126515731, "learning_rate": 2.7198876725339143e-06, "loss": 0.1072, "step": 22665 }, { "epoch": 0.6612404457669643, "grad_norm": 0.9552120809115704, "learning_rate": 2.719467231306605e-06, "loss": 0.143, "step": 22666 }, { "epoch": 0.6612696189976078, "grad_norm": 1.0792521373628325, "learning_rate": 2.7190468104393474e-06, "loss": 0.1209, "step": 22667 }, { "epoch": 0.6612987922282514, "grad_norm": 0.8389385598404163, "learning_rate": 2.7186264099358965e-06, "loss": 0.0945, "step": 22668 }, { "epoch": 0.6613279654588949, "grad_norm": 0.8101273566871517, "learning_rate": 2.7182060298000047e-06, "loss": 0.1457, "step": 22669 }, { "epoch": 0.6613571386895385, "grad_norm": 0.8277979793603644, "learning_rate": 2.7177856700354233e-06, "loss": 0.1285, "step": 22670 }, { "epoch": 0.661386311920182, "grad_norm": 1.0234917416377036, "learning_rate": 2.7173653306459056e-06, "loss": 0.1152, "step": 22671 }, { "epoch": 0.6614154851508256, "grad_norm": 0.6416059728778798, "learning_rate": 2.716945011635208e-06, "loss": 0.0961, "step": 22672 }, { "epoch": 0.6614446583814692, "grad_norm": 0.7594503786367398, "learning_rate": 2.716524713007078e-06, "loss": 0.1093, "step": 22673 }, { "epoch": 0.6614738316121127, "grad_norm": 0.7648313826222802, "learning_rate": 2.716104434765273e-06, "loss": 0.1218, "step": 22674 }, { "epoch": 0.6615030048427563, "grad_norm": 0.6918303444233517, "learning_rate": 2.715684176913542e-06, "loss": 0.1183, "step": 22675 }, { "epoch": 0.6615321780733998, "grad_norm": 0.8202650141884076, "learning_rate": 2.7152639394556345e-06, "loss": 0.1297, "step": 22676 }, { "epoch": 0.6615613513040434, "grad_norm": 0.8170915936510572, "learning_rate": 2.7148437223953063e-06, "loss": 0.1175, "step": 22677 }, { "epoch": 0.6615905245346869, "grad_norm": 1.0014751947777412, "learning_rate": 2.7144235257363095e-06, "loss": 0.1346, "step": 22678 }, { "epoch": 0.6616196977653306, "grad_norm": 1.1731135235685226, "learning_rate": 2.7140033494823937e-06, "loss": 0.1249, "step": 22679 }, { "epoch": 0.6616488709959741, "grad_norm": 0.813509199094576, "learning_rate": 2.713583193637308e-06, "loss": 0.1167, "step": 22680 }, { "epoch": 0.6616780442266177, "grad_norm": 0.9376931570758963, "learning_rate": 2.713163058204808e-06, "loss": 0.118, "step": 22681 }, { "epoch": 0.6617072174572612, "grad_norm": 0.9350101979027772, "learning_rate": 2.712742943188642e-06, "loss": 0.1092, "step": 22682 }, { "epoch": 0.6617363906879048, "grad_norm": 0.7997215927470465, "learning_rate": 2.7123228485925603e-06, "loss": 0.1516, "step": 22683 }, { "epoch": 0.6617655639185483, "grad_norm": 1.004204197024303, "learning_rate": 2.7119027744203125e-06, "loss": 0.1301, "step": 22684 }, { "epoch": 0.6617947371491919, "grad_norm": 0.8437709970552765, "learning_rate": 2.7114827206756534e-06, "loss": 0.1384, "step": 22685 }, { "epoch": 0.6618239103798355, "grad_norm": 0.7818249507810892, "learning_rate": 2.71106268736233e-06, "loss": 0.1193, "step": 22686 }, { "epoch": 0.661853083610479, "grad_norm": 0.8459262924633701, "learning_rate": 2.7106426744840903e-06, "loss": 0.1061, "step": 22687 }, { "epoch": 0.6618822568411226, "grad_norm": 0.7941108554831526, "learning_rate": 2.710222682044689e-06, "loss": 0.1364, "step": 22688 }, { "epoch": 0.6619114300717661, "grad_norm": 0.8446789782799237, "learning_rate": 2.70980271004787e-06, "loss": 0.1131, "step": 22689 }, { "epoch": 0.6619406033024097, "grad_norm": 0.7177839383770583, "learning_rate": 2.7093827584973864e-06, "loss": 0.1033, "step": 22690 }, { "epoch": 0.6619697765330532, "grad_norm": 0.9387945384857022, "learning_rate": 2.708962827396988e-06, "loss": 0.1453, "step": 22691 }, { "epoch": 0.6619989497636969, "grad_norm": 0.8395679812808373, "learning_rate": 2.7085429167504227e-06, "loss": 0.1007, "step": 22692 }, { "epoch": 0.6620281229943404, "grad_norm": 0.7686783234017451, "learning_rate": 2.708123026561438e-06, "loss": 0.1278, "step": 22693 }, { "epoch": 0.662057296224984, "grad_norm": 0.8732786275286675, "learning_rate": 2.7077031568337853e-06, "loss": 0.1374, "step": 22694 }, { "epoch": 0.6620864694556275, "grad_norm": 0.9427103113246084, "learning_rate": 2.7072833075712102e-06, "loss": 0.1275, "step": 22695 }, { "epoch": 0.6621156426862711, "grad_norm": 0.7449211040189512, "learning_rate": 2.7068634787774637e-06, "loss": 0.1084, "step": 22696 }, { "epoch": 0.6621448159169147, "grad_norm": 1.0419258901937143, "learning_rate": 2.7064436704562906e-06, "loss": 0.1332, "step": 22697 }, { "epoch": 0.6621739891475582, "grad_norm": 0.6996777628115296, "learning_rate": 2.706023882611443e-06, "loss": 0.1061, "step": 22698 }, { "epoch": 0.6622031623782018, "grad_norm": 1.0122235070797678, "learning_rate": 2.705604115246667e-06, "loss": 0.1055, "step": 22699 }, { "epoch": 0.6622323356088453, "grad_norm": 0.7493860508219115, "learning_rate": 2.7051843683657073e-06, "loss": 0.1146, "step": 22700 }, { "epoch": 0.6622615088394889, "grad_norm": 0.9118083070252305, "learning_rate": 2.704764641972314e-06, "loss": 0.1298, "step": 22701 }, { "epoch": 0.6622906820701324, "grad_norm": 0.7962610988844777, "learning_rate": 2.7043449360702356e-06, "loss": 0.0997, "step": 22702 }, { "epoch": 0.662319855300776, "grad_norm": 0.8788015974195829, "learning_rate": 2.703925250663216e-06, "loss": 0.1139, "step": 22703 }, { "epoch": 0.6623490285314195, "grad_norm": 0.8997812162323178, "learning_rate": 2.7035055857550056e-06, "loss": 0.1232, "step": 22704 }, { "epoch": 0.6623782017620631, "grad_norm": 0.7622724133070561, "learning_rate": 2.703085941349349e-06, "loss": 0.106, "step": 22705 }, { "epoch": 0.6624073749927067, "grad_norm": 0.715834222217085, "learning_rate": 2.702666317449991e-06, "loss": 0.1294, "step": 22706 }, { "epoch": 0.6624365482233503, "grad_norm": 0.863459359701019, "learning_rate": 2.70224671406068e-06, "loss": 0.1324, "step": 22707 }, { "epoch": 0.6624657214539938, "grad_norm": 1.007169623792825, "learning_rate": 2.701827131185163e-06, "loss": 0.1229, "step": 22708 }, { "epoch": 0.6624948946846374, "grad_norm": 0.9935501999166708, "learning_rate": 2.7014075688271857e-06, "loss": 0.1311, "step": 22709 }, { "epoch": 0.662524067915281, "grad_norm": 0.8176213043713191, "learning_rate": 2.70098802699049e-06, "loss": 0.1334, "step": 22710 }, { "epoch": 0.6625532411459245, "grad_norm": 0.8131923698562828, "learning_rate": 2.7005685056788266e-06, "loss": 0.1098, "step": 22711 }, { "epoch": 0.6625824143765681, "grad_norm": 0.6752129161337445, "learning_rate": 2.700149004895939e-06, "loss": 0.1169, "step": 22712 }, { "epoch": 0.6626115876072116, "grad_norm": 1.3347273643967854, "learning_rate": 2.69972952464557e-06, "loss": 0.1615, "step": 22713 }, { "epoch": 0.6626407608378552, "grad_norm": 0.9448109125956424, "learning_rate": 2.6993100649314663e-06, "loss": 0.1235, "step": 22714 }, { "epoch": 0.6626699340684987, "grad_norm": 0.7656346529520892, "learning_rate": 2.6988906257573757e-06, "loss": 0.1326, "step": 22715 }, { "epoch": 0.6626991072991423, "grad_norm": 0.9833753913203368, "learning_rate": 2.6984712071270396e-06, "loss": 0.1242, "step": 22716 }, { "epoch": 0.6627282805297858, "grad_norm": 1.089224248732522, "learning_rate": 2.6980518090442016e-06, "loss": 0.1582, "step": 22717 }, { "epoch": 0.6627574537604294, "grad_norm": 0.7869744316955992, "learning_rate": 2.697632431512609e-06, "loss": 0.119, "step": 22718 }, { "epoch": 0.662786626991073, "grad_norm": 0.8584606434500435, "learning_rate": 2.6972130745360033e-06, "loss": 0.1059, "step": 22719 }, { "epoch": 0.6628158002217166, "grad_norm": 0.8352241109709454, "learning_rate": 2.696793738118129e-06, "loss": 0.1352, "step": 22720 }, { "epoch": 0.6628449734523602, "grad_norm": 0.9355569458030719, "learning_rate": 2.6963744222627326e-06, "loss": 0.1433, "step": 22721 }, { "epoch": 0.6628741466830037, "grad_norm": 0.7610063806664835, "learning_rate": 2.6959551269735553e-06, "loss": 0.112, "step": 22722 }, { "epoch": 0.6629033199136473, "grad_norm": 0.8125572547701, "learning_rate": 2.6955358522543385e-06, "loss": 0.1512, "step": 22723 }, { "epoch": 0.6629324931442908, "grad_norm": 0.8369608260762295, "learning_rate": 2.6951165981088303e-06, "loss": 0.1122, "step": 22724 }, { "epoch": 0.6629616663749344, "grad_norm": 0.7809482849014473, "learning_rate": 2.6946973645407674e-06, "loss": 0.1174, "step": 22725 }, { "epoch": 0.6629908396055779, "grad_norm": 0.8024793983428733, "learning_rate": 2.6942781515538996e-06, "loss": 0.118, "step": 22726 }, { "epoch": 0.6630200128362215, "grad_norm": 0.7561825368964618, "learning_rate": 2.6938589591519624e-06, "loss": 0.109, "step": 22727 }, { "epoch": 0.663049186066865, "grad_norm": 0.8774031918796692, "learning_rate": 2.693439787338705e-06, "loss": 0.1175, "step": 22728 }, { "epoch": 0.6630783592975086, "grad_norm": 0.7858477310656956, "learning_rate": 2.693020636117867e-06, "loss": 0.1382, "step": 22729 }, { "epoch": 0.6631075325281521, "grad_norm": 0.8101829820640726, "learning_rate": 2.6926015054931876e-06, "loss": 0.1137, "step": 22730 }, { "epoch": 0.6631367057587957, "grad_norm": 0.949393329970762, "learning_rate": 2.6921823954684105e-06, "loss": 0.1251, "step": 22731 }, { "epoch": 0.6631658789894392, "grad_norm": 0.7565575831085826, "learning_rate": 2.691763306047281e-06, "loss": 0.0978, "step": 22732 }, { "epoch": 0.6631950522200829, "grad_norm": 0.8482425123804851, "learning_rate": 2.6913442372335353e-06, "loss": 0.1201, "step": 22733 }, { "epoch": 0.6632242254507265, "grad_norm": 1.0875602580466324, "learning_rate": 2.6909251890309185e-06, "loss": 0.1484, "step": 22734 }, { "epoch": 0.66325339868137, "grad_norm": 0.8051571625003154, "learning_rate": 2.6905061614431716e-06, "loss": 0.116, "step": 22735 }, { "epoch": 0.6632825719120136, "grad_norm": 0.897759965070597, "learning_rate": 2.6900871544740315e-06, "loss": 0.1321, "step": 22736 }, { "epoch": 0.6633117451426571, "grad_norm": 0.9536557279590591, "learning_rate": 2.6896681681272417e-06, "loss": 0.1246, "step": 22737 }, { "epoch": 0.6633409183733007, "grad_norm": 0.7645782420215563, "learning_rate": 2.6892492024065453e-06, "loss": 0.1268, "step": 22738 }, { "epoch": 0.6633700916039442, "grad_norm": 0.7491515181046098, "learning_rate": 2.688830257315681e-06, "loss": 0.1105, "step": 22739 }, { "epoch": 0.6633992648345878, "grad_norm": 0.921236417838504, "learning_rate": 2.688411332858386e-06, "loss": 0.1299, "step": 22740 }, { "epoch": 0.6634284380652313, "grad_norm": 0.9820780602449843, "learning_rate": 2.687992429038404e-06, "loss": 0.1477, "step": 22741 }, { "epoch": 0.6634576112958749, "grad_norm": 0.9562363784714465, "learning_rate": 2.687573545859475e-06, "loss": 0.1301, "step": 22742 }, { "epoch": 0.6634867845265184, "grad_norm": 0.7733876220060898, "learning_rate": 2.6871546833253347e-06, "loss": 0.1111, "step": 22743 }, { "epoch": 0.663515957757162, "grad_norm": 0.7413071223379776, "learning_rate": 2.686735841439725e-06, "loss": 0.12, "step": 22744 }, { "epoch": 0.6635451309878055, "grad_norm": 1.0183380551511207, "learning_rate": 2.6863170202063884e-06, "loss": 0.115, "step": 22745 }, { "epoch": 0.6635743042184491, "grad_norm": 0.786501491655702, "learning_rate": 2.68589821962906e-06, "loss": 0.098, "step": 22746 }, { "epoch": 0.6636034774490928, "grad_norm": 0.8241351984514618, "learning_rate": 2.6854794397114785e-06, "loss": 0.1419, "step": 22747 }, { "epoch": 0.6636326506797363, "grad_norm": 0.7353921423165194, "learning_rate": 2.685060680457386e-06, "loss": 0.1212, "step": 22748 }, { "epoch": 0.6636618239103799, "grad_norm": 1.0368551549822878, "learning_rate": 2.684641941870517e-06, "loss": 0.1134, "step": 22749 }, { "epoch": 0.6636909971410234, "grad_norm": 0.767425619343563, "learning_rate": 2.6842232239546118e-06, "loss": 0.1149, "step": 22750 }, { "epoch": 0.663720170371667, "grad_norm": 0.6842819277001925, "learning_rate": 2.6838045267134115e-06, "loss": 0.1067, "step": 22751 }, { "epoch": 0.6637493436023105, "grad_norm": 0.7447247423431499, "learning_rate": 2.683385850150651e-06, "loss": 0.1198, "step": 22752 }, { "epoch": 0.6637785168329541, "grad_norm": 1.1057954706132223, "learning_rate": 2.6829671942700665e-06, "loss": 0.1349, "step": 22753 }, { "epoch": 0.6638076900635976, "grad_norm": 0.7679389187018296, "learning_rate": 2.6825485590754e-06, "loss": 0.1342, "step": 22754 }, { "epoch": 0.6638368632942412, "grad_norm": 0.6422875569214838, "learning_rate": 2.682129944570385e-06, "loss": 0.1242, "step": 22755 }, { "epoch": 0.6638660365248847, "grad_norm": 0.787111958851915, "learning_rate": 2.6817113507587623e-06, "loss": 0.1137, "step": 22756 }, { "epoch": 0.6638952097555283, "grad_norm": 0.8947541133808778, "learning_rate": 2.6812927776442647e-06, "loss": 0.144, "step": 22757 }, { "epoch": 0.6639243829861718, "grad_norm": 0.7308854387359449, "learning_rate": 2.680874225230634e-06, "loss": 0.1208, "step": 22758 }, { "epoch": 0.6639535562168154, "grad_norm": 0.8570165983911302, "learning_rate": 2.680455693521605e-06, "loss": 0.1278, "step": 22759 }, { "epoch": 0.6639827294474591, "grad_norm": 0.6814366635582152, "learning_rate": 2.6800371825209114e-06, "loss": 0.1177, "step": 22760 }, { "epoch": 0.6640119026781026, "grad_norm": 0.757997692071325, "learning_rate": 2.6796186922322926e-06, "loss": 0.112, "step": 22761 }, { "epoch": 0.6640410759087462, "grad_norm": 0.9121638917545138, "learning_rate": 2.679200222659486e-06, "loss": 0.1344, "step": 22762 }, { "epoch": 0.6640702491393897, "grad_norm": 0.7417214753064422, "learning_rate": 2.6787817738062233e-06, "loss": 0.1396, "step": 22763 }, { "epoch": 0.6640994223700333, "grad_norm": 0.8072939739497127, "learning_rate": 2.678363345676245e-06, "loss": 0.1063, "step": 22764 }, { "epoch": 0.6641285956006768, "grad_norm": 0.6786321051846522, "learning_rate": 2.6779449382732846e-06, "loss": 0.0934, "step": 22765 }, { "epoch": 0.6641577688313204, "grad_norm": 0.7770781535074133, "learning_rate": 2.677526551601076e-06, "loss": 0.1086, "step": 22766 }, { "epoch": 0.6641869420619639, "grad_norm": 0.6871878643258786, "learning_rate": 2.6771081856633552e-06, "loss": 0.1175, "step": 22767 }, { "epoch": 0.6642161152926075, "grad_norm": 0.7890643089896222, "learning_rate": 2.6766898404638604e-06, "loss": 0.1386, "step": 22768 }, { "epoch": 0.664245288523251, "grad_norm": 0.7658669956462429, "learning_rate": 2.6762715160063236e-06, "loss": 0.1233, "step": 22769 }, { "epoch": 0.6642744617538946, "grad_norm": 0.9075248415891648, "learning_rate": 2.675853212294478e-06, "loss": 0.1158, "step": 22770 }, { "epoch": 0.6643036349845381, "grad_norm": 0.6410609193505, "learning_rate": 2.6754349293320625e-06, "loss": 0.105, "step": 22771 }, { "epoch": 0.6643328082151817, "grad_norm": 0.84713033760718, "learning_rate": 2.6750166671228094e-06, "loss": 0.1306, "step": 22772 }, { "epoch": 0.6643619814458253, "grad_norm": 0.7736255291218663, "learning_rate": 2.6745984256704498e-06, "loss": 0.1182, "step": 22773 }, { "epoch": 0.6643911546764689, "grad_norm": 0.9803888247539395, "learning_rate": 2.6741802049787202e-06, "loss": 0.1155, "step": 22774 }, { "epoch": 0.6644203279071125, "grad_norm": 1.0128989948567748, "learning_rate": 2.6737620050513567e-06, "loss": 0.1372, "step": 22775 }, { "epoch": 0.664449501137756, "grad_norm": 0.8742612390528713, "learning_rate": 2.6733438258920912e-06, "loss": 0.1428, "step": 22776 }, { "epoch": 0.6644786743683996, "grad_norm": 0.8847815637561429, "learning_rate": 2.6729256675046545e-06, "loss": 0.1233, "step": 22777 }, { "epoch": 0.6645078475990431, "grad_norm": 0.9185805914733591, "learning_rate": 2.6725075298927837e-06, "loss": 0.1385, "step": 22778 }, { "epoch": 0.6645370208296867, "grad_norm": 0.9081917259864982, "learning_rate": 2.672089413060208e-06, "loss": 0.1149, "step": 22779 }, { "epoch": 0.6645661940603302, "grad_norm": 0.9060151823771639, "learning_rate": 2.6716713170106622e-06, "loss": 0.12, "step": 22780 }, { "epoch": 0.6645953672909738, "grad_norm": 0.8265405242520284, "learning_rate": 2.6712532417478817e-06, "loss": 0.1203, "step": 22781 }, { "epoch": 0.6646245405216173, "grad_norm": 0.9303997829894811, "learning_rate": 2.6708351872755955e-06, "loss": 0.1205, "step": 22782 }, { "epoch": 0.6646537137522609, "grad_norm": 0.6978932775368064, "learning_rate": 2.6704171535975353e-06, "loss": 0.1163, "step": 22783 }, { "epoch": 0.6646828869829045, "grad_norm": 0.9416123650054097, "learning_rate": 2.669999140717436e-06, "loss": 0.1141, "step": 22784 }, { "epoch": 0.664712060213548, "grad_norm": 0.8858413083104199, "learning_rate": 2.6695811486390267e-06, "loss": 0.1127, "step": 22785 }, { "epoch": 0.6647412334441916, "grad_norm": 0.9055770930977479, "learning_rate": 2.6691631773660427e-06, "loss": 0.1231, "step": 22786 }, { "epoch": 0.6647704066748352, "grad_norm": 1.0147306886536345, "learning_rate": 2.6687452269022107e-06, "loss": 0.1046, "step": 22787 }, { "epoch": 0.6647995799054788, "grad_norm": 0.8475171238017849, "learning_rate": 2.6683272972512674e-06, "loss": 0.1185, "step": 22788 }, { "epoch": 0.6648287531361223, "grad_norm": 0.8877258272993748, "learning_rate": 2.6679093884169415e-06, "loss": 0.0949, "step": 22789 }, { "epoch": 0.6648579263667659, "grad_norm": 0.8681780252684836, "learning_rate": 2.6674915004029615e-06, "loss": 0.1303, "step": 22790 }, { "epoch": 0.6648870995974094, "grad_norm": 0.7965396402025787, "learning_rate": 2.6670736332130608e-06, "loss": 0.1159, "step": 22791 }, { "epoch": 0.664916272828053, "grad_norm": 0.9841908263698371, "learning_rate": 2.666655786850972e-06, "loss": 0.1137, "step": 22792 }, { "epoch": 0.6649454460586965, "grad_norm": 0.9079873660443103, "learning_rate": 2.666237961320421e-06, "loss": 0.1362, "step": 22793 }, { "epoch": 0.6649746192893401, "grad_norm": 1.3539266253683224, "learning_rate": 2.665820156625142e-06, "loss": 0.1111, "step": 22794 }, { "epoch": 0.6650037925199836, "grad_norm": 0.9942759204181132, "learning_rate": 2.6654023727688637e-06, "loss": 0.1299, "step": 22795 }, { "epoch": 0.6650329657506272, "grad_norm": 0.9141475331994009, "learning_rate": 2.6649846097553144e-06, "loss": 0.1284, "step": 22796 }, { "epoch": 0.6650621389812708, "grad_norm": 0.8335895823147706, "learning_rate": 2.664566867588224e-06, "loss": 0.1221, "step": 22797 }, { "epoch": 0.6650913122119143, "grad_norm": 0.9376403578406329, "learning_rate": 2.664149146271326e-06, "loss": 0.1081, "step": 22798 }, { "epoch": 0.6651204854425579, "grad_norm": 0.7709837010877325, "learning_rate": 2.6637314458083465e-06, "loss": 0.1039, "step": 22799 }, { "epoch": 0.6651496586732014, "grad_norm": 0.7341095668651686, "learning_rate": 2.6633137662030128e-06, "loss": 0.113, "step": 22800 }, { "epoch": 0.6651788319038451, "grad_norm": 0.8577788157092703, "learning_rate": 2.6628961074590575e-06, "loss": 0.1331, "step": 22801 }, { "epoch": 0.6652080051344886, "grad_norm": 1.0247188704207537, "learning_rate": 2.6624784695802087e-06, "loss": 0.1204, "step": 22802 }, { "epoch": 0.6652371783651322, "grad_norm": 0.7539953384403465, "learning_rate": 2.662060852570192e-06, "loss": 0.1102, "step": 22803 }, { "epoch": 0.6652663515957757, "grad_norm": 0.8177840501455511, "learning_rate": 2.6616432564327375e-06, "loss": 0.123, "step": 22804 }, { "epoch": 0.6652955248264193, "grad_norm": 0.6556082097341025, "learning_rate": 2.6612256811715758e-06, "loss": 0.1325, "step": 22805 }, { "epoch": 0.6653246980570628, "grad_norm": 0.9077159408637516, "learning_rate": 2.660808126790433e-06, "loss": 0.1088, "step": 22806 }, { "epoch": 0.6653538712877064, "grad_norm": 0.8344040674081844, "learning_rate": 2.6603905932930353e-06, "loss": 0.1328, "step": 22807 }, { "epoch": 0.66538304451835, "grad_norm": 0.8027804473541733, "learning_rate": 2.6599730806831114e-06, "loss": 0.1232, "step": 22808 }, { "epoch": 0.6654122177489935, "grad_norm": 0.7289277425195105, "learning_rate": 2.659555588964391e-06, "loss": 0.1079, "step": 22809 }, { "epoch": 0.6654413909796371, "grad_norm": 0.7811310114777291, "learning_rate": 2.6591381181405982e-06, "loss": 0.1022, "step": 22810 }, { "epoch": 0.6654705642102806, "grad_norm": 0.6699944434434246, "learning_rate": 2.6587206682154632e-06, "loss": 0.1115, "step": 22811 }, { "epoch": 0.6654997374409242, "grad_norm": 0.8527080409705557, "learning_rate": 2.658303239192711e-06, "loss": 0.1171, "step": 22812 }, { "epoch": 0.6655289106715677, "grad_norm": 0.8503458504227392, "learning_rate": 2.657885831076067e-06, "loss": 0.13, "step": 22813 }, { "epoch": 0.6655580839022114, "grad_norm": 0.8610452118826941, "learning_rate": 2.657468443869259e-06, "loss": 0.1096, "step": 22814 }, { "epoch": 0.6655872571328549, "grad_norm": 0.8117637844797436, "learning_rate": 2.657051077576015e-06, "loss": 0.1137, "step": 22815 }, { "epoch": 0.6656164303634985, "grad_norm": 1.0340139159103572, "learning_rate": 2.6566337322000604e-06, "loss": 0.1161, "step": 22816 }, { "epoch": 0.665645603594142, "grad_norm": 1.0225009069481235, "learning_rate": 2.656216407745118e-06, "loss": 0.1208, "step": 22817 }, { "epoch": 0.6656747768247856, "grad_norm": 0.883007858803241, "learning_rate": 2.655799104214918e-06, "loss": 0.1284, "step": 22818 }, { "epoch": 0.6657039500554292, "grad_norm": 1.3535224376299615, "learning_rate": 2.6553818216131837e-06, "loss": 0.1147, "step": 22819 }, { "epoch": 0.6657331232860727, "grad_norm": 1.1754917276501897, "learning_rate": 2.654964559943639e-06, "loss": 0.1333, "step": 22820 }, { "epoch": 0.6657622965167163, "grad_norm": 0.909508895822343, "learning_rate": 2.654547319210011e-06, "loss": 0.1188, "step": 22821 }, { "epoch": 0.6657914697473598, "grad_norm": 0.756381712481357, "learning_rate": 2.6541300994160267e-06, "loss": 0.1047, "step": 22822 }, { "epoch": 0.6658206429780034, "grad_norm": 0.8862876683421308, "learning_rate": 2.653712900565407e-06, "loss": 0.1345, "step": 22823 }, { "epoch": 0.6658498162086469, "grad_norm": 0.8249110733240348, "learning_rate": 2.6532957226618805e-06, "loss": 0.1335, "step": 22824 }, { "epoch": 0.6658789894392905, "grad_norm": 1.007246692300794, "learning_rate": 2.6528785657091696e-06, "loss": 0.1023, "step": 22825 }, { "epoch": 0.665908162669934, "grad_norm": 0.8183854563361862, "learning_rate": 2.652461429710996e-06, "loss": 0.1175, "step": 22826 }, { "epoch": 0.6659373359005776, "grad_norm": 0.9746215184008209, "learning_rate": 2.652044314671086e-06, "loss": 0.1166, "step": 22827 }, { "epoch": 0.6659665091312212, "grad_norm": 0.7990369652570128, "learning_rate": 2.6516272205931672e-06, "loss": 0.1196, "step": 22828 }, { "epoch": 0.6659956823618648, "grad_norm": 0.7056059919518601, "learning_rate": 2.6512101474809595e-06, "loss": 0.1273, "step": 22829 }, { "epoch": 0.6660248555925083, "grad_norm": 1.076309111226193, "learning_rate": 2.6507930953381844e-06, "loss": 0.1254, "step": 22830 }, { "epoch": 0.6660540288231519, "grad_norm": 0.7191027710196798, "learning_rate": 2.6503760641685698e-06, "loss": 0.0996, "step": 22831 }, { "epoch": 0.6660832020537955, "grad_norm": 0.8633287497024185, "learning_rate": 2.6499590539758354e-06, "loss": 0.1207, "step": 22832 }, { "epoch": 0.666112375284439, "grad_norm": 0.7809386224903961, "learning_rate": 2.6495420647637073e-06, "loss": 0.1511, "step": 22833 }, { "epoch": 0.6661415485150826, "grad_norm": 0.801690683183648, "learning_rate": 2.649125096535904e-06, "loss": 0.1244, "step": 22834 }, { "epoch": 0.6661707217457261, "grad_norm": 0.8307182356116696, "learning_rate": 2.648708149296153e-06, "loss": 0.1535, "step": 22835 }, { "epoch": 0.6661998949763697, "grad_norm": 0.7847049312689528, "learning_rate": 2.648291223048175e-06, "loss": 0.1113, "step": 22836 }, { "epoch": 0.6662290682070132, "grad_norm": 0.648994948062591, "learning_rate": 2.6478743177956888e-06, "loss": 0.1291, "step": 22837 }, { "epoch": 0.6662582414376568, "grad_norm": 1.0227885258841345, "learning_rate": 2.6474574335424193e-06, "loss": 0.1692, "step": 22838 }, { "epoch": 0.6662874146683003, "grad_norm": 0.8035949903899916, "learning_rate": 2.64704057029209e-06, "loss": 0.1081, "step": 22839 }, { "epoch": 0.6663165878989439, "grad_norm": 0.8940843548417005, "learning_rate": 2.6466237280484197e-06, "loss": 0.1176, "step": 22840 }, { "epoch": 0.6663457611295875, "grad_norm": 0.7227817489635756, "learning_rate": 2.646206906815132e-06, "loss": 0.1291, "step": 22841 }, { "epoch": 0.6663749343602311, "grad_norm": 1.041345619677843, "learning_rate": 2.6457901065959474e-06, "loss": 0.1174, "step": 22842 }, { "epoch": 0.6664041075908747, "grad_norm": 0.8271141276860493, "learning_rate": 2.645373327394585e-06, "loss": 0.1318, "step": 22843 }, { "epoch": 0.6664332808215182, "grad_norm": 0.7784161027827586, "learning_rate": 2.6449565692147673e-06, "loss": 0.1233, "step": 22844 }, { "epoch": 0.6664624540521618, "grad_norm": 0.913431319897687, "learning_rate": 2.6445398320602168e-06, "loss": 0.1175, "step": 22845 }, { "epoch": 0.6664916272828053, "grad_norm": 0.7544592426231707, "learning_rate": 2.644123115934653e-06, "loss": 0.1218, "step": 22846 }, { "epoch": 0.6665208005134489, "grad_norm": 0.7237926195654799, "learning_rate": 2.6437064208417934e-06, "loss": 0.1255, "step": 22847 }, { "epoch": 0.6665499737440924, "grad_norm": 0.7785536203346751, "learning_rate": 2.6432897467853626e-06, "loss": 0.1359, "step": 22848 }, { "epoch": 0.666579146974736, "grad_norm": 0.9023965626581943, "learning_rate": 2.642873093769078e-06, "loss": 0.1294, "step": 22849 }, { "epoch": 0.6666083202053795, "grad_norm": 1.1122519111675686, "learning_rate": 2.6424564617966574e-06, "loss": 0.1302, "step": 22850 }, { "epoch": 0.6666374934360231, "grad_norm": 0.7810632635125973, "learning_rate": 2.6420398508718237e-06, "loss": 0.1274, "step": 22851 }, { "epoch": 0.6666666666666666, "grad_norm": 1.1671355798287721, "learning_rate": 2.641623260998296e-06, "loss": 0.1267, "step": 22852 }, { "epoch": 0.6666958398973102, "grad_norm": 0.7620712314961029, "learning_rate": 2.641206692179794e-06, "loss": 0.1082, "step": 22853 }, { "epoch": 0.6667250131279537, "grad_norm": 0.9685592833678796, "learning_rate": 2.6407901444200335e-06, "loss": 0.1198, "step": 22854 }, { "epoch": 0.6667541863585974, "grad_norm": 1.3405049572231604, "learning_rate": 2.640373617722737e-06, "loss": 0.1386, "step": 22855 }, { "epoch": 0.666783359589241, "grad_norm": 0.7881531449010316, "learning_rate": 2.639957112091619e-06, "loss": 0.1266, "step": 22856 }, { "epoch": 0.6668125328198845, "grad_norm": 0.9545364113754767, "learning_rate": 2.6395406275304014e-06, "loss": 0.1009, "step": 22857 }, { "epoch": 0.6668417060505281, "grad_norm": 0.9211951782557177, "learning_rate": 2.6391241640428034e-06, "loss": 0.1227, "step": 22858 }, { "epoch": 0.6668708792811716, "grad_norm": 0.8779482312824567, "learning_rate": 2.6387077216325407e-06, "loss": 0.1228, "step": 22859 }, { "epoch": 0.6669000525118152, "grad_norm": 0.9842666461074883, "learning_rate": 2.6382913003033305e-06, "loss": 0.1368, "step": 22860 }, { "epoch": 0.6669292257424587, "grad_norm": 0.81804116491733, "learning_rate": 2.637874900058893e-06, "loss": 0.1422, "step": 22861 }, { "epoch": 0.6669583989731023, "grad_norm": 1.093203373968162, "learning_rate": 2.6374585209029435e-06, "loss": 0.1342, "step": 22862 }, { "epoch": 0.6669875722037458, "grad_norm": 0.8712050536001551, "learning_rate": 2.637042162839202e-06, "loss": 0.1317, "step": 22863 }, { "epoch": 0.6670167454343894, "grad_norm": 0.8753929827276967, "learning_rate": 2.6366258258713816e-06, "loss": 0.1203, "step": 22864 }, { "epoch": 0.6670459186650329, "grad_norm": 0.9179428116205727, "learning_rate": 2.636209510003204e-06, "loss": 0.1156, "step": 22865 }, { "epoch": 0.6670750918956765, "grad_norm": 0.9122813596479129, "learning_rate": 2.6357932152383837e-06, "loss": 0.1229, "step": 22866 }, { "epoch": 0.66710426512632, "grad_norm": 0.9132962926045334, "learning_rate": 2.635376941580635e-06, "loss": 0.1178, "step": 22867 }, { "epoch": 0.6671334383569637, "grad_norm": 0.746429558365203, "learning_rate": 2.6349606890336765e-06, "loss": 0.1243, "step": 22868 }, { "epoch": 0.6671626115876073, "grad_norm": 0.8487321079667792, "learning_rate": 2.634544457601227e-06, "loss": 0.1306, "step": 22869 }, { "epoch": 0.6671917848182508, "grad_norm": 1.071069474226585, "learning_rate": 2.6341282472869968e-06, "loss": 0.1121, "step": 22870 }, { "epoch": 0.6672209580488944, "grad_norm": 0.7754749506074043, "learning_rate": 2.6337120580947074e-06, "loss": 0.0795, "step": 22871 }, { "epoch": 0.6672501312795379, "grad_norm": 0.8771783417649982, "learning_rate": 2.6332958900280715e-06, "loss": 0.1316, "step": 22872 }, { "epoch": 0.6672793045101815, "grad_norm": 1.0235420465187148, "learning_rate": 2.6328797430908038e-06, "loss": 0.1148, "step": 22873 }, { "epoch": 0.667308477740825, "grad_norm": 0.7937782970107664, "learning_rate": 2.63246361728662e-06, "loss": 0.108, "step": 22874 }, { "epoch": 0.6673376509714686, "grad_norm": 0.8326839070925353, "learning_rate": 2.6320475126192378e-06, "loss": 0.0956, "step": 22875 }, { "epoch": 0.6673668242021121, "grad_norm": 0.7108209346505884, "learning_rate": 2.6316314290923705e-06, "loss": 0.1105, "step": 22876 }, { "epoch": 0.6673959974327557, "grad_norm": 0.7732239015891544, "learning_rate": 2.6312153667097297e-06, "loss": 0.1262, "step": 22877 }, { "epoch": 0.6674251706633992, "grad_norm": 0.8739867117762663, "learning_rate": 2.6307993254750354e-06, "loss": 0.1309, "step": 22878 }, { "epoch": 0.6674543438940428, "grad_norm": 0.7112937245112858, "learning_rate": 2.630383305391999e-06, "loss": 0.1312, "step": 22879 }, { "epoch": 0.6674835171246863, "grad_norm": 0.9659848731887509, "learning_rate": 2.629967306464333e-06, "loss": 0.1247, "step": 22880 }, { "epoch": 0.6675126903553299, "grad_norm": 0.9564188845262732, "learning_rate": 2.629551328695752e-06, "loss": 0.123, "step": 22881 }, { "epoch": 0.6675418635859736, "grad_norm": 0.9836315585119073, "learning_rate": 2.629135372089974e-06, "loss": 0.1247, "step": 22882 }, { "epoch": 0.6675710368166171, "grad_norm": 1.0055943275658235, "learning_rate": 2.628719436650709e-06, "loss": 0.1459, "step": 22883 }, { "epoch": 0.6676002100472607, "grad_norm": 0.9715136891055601, "learning_rate": 2.628303522381669e-06, "loss": 0.1388, "step": 22884 }, { "epoch": 0.6676293832779042, "grad_norm": 0.9111219117398697, "learning_rate": 2.6278876292865705e-06, "loss": 0.0897, "step": 22885 }, { "epoch": 0.6676585565085478, "grad_norm": 0.8552261691374892, "learning_rate": 2.627471757369123e-06, "loss": 0.1279, "step": 22886 }, { "epoch": 0.6676877297391913, "grad_norm": 0.7070872845443794, "learning_rate": 2.6270559066330425e-06, "loss": 0.1202, "step": 22887 }, { "epoch": 0.6677169029698349, "grad_norm": 0.9344461378859874, "learning_rate": 2.626640077082041e-06, "loss": 0.1433, "step": 22888 }, { "epoch": 0.6677460762004784, "grad_norm": 0.9546904343586825, "learning_rate": 2.626224268719831e-06, "loss": 0.126, "step": 22889 }, { "epoch": 0.667775249431122, "grad_norm": 0.8914692488127292, "learning_rate": 2.6258084815501217e-06, "loss": 0.133, "step": 22890 }, { "epoch": 0.6678044226617655, "grad_norm": 1.0945021448766752, "learning_rate": 2.62539271557663e-06, "loss": 0.1349, "step": 22891 }, { "epoch": 0.6678335958924091, "grad_norm": 0.8442179469964343, "learning_rate": 2.6249769708030626e-06, "loss": 0.1411, "step": 22892 }, { "epoch": 0.6678627691230526, "grad_norm": 0.6690842340004873, "learning_rate": 2.624561247233136e-06, "loss": 0.1161, "step": 22893 }, { "epoch": 0.6678919423536962, "grad_norm": 1.055252963845563, "learning_rate": 2.6241455448705585e-06, "loss": 0.1389, "step": 22894 }, { "epoch": 0.6679211155843399, "grad_norm": 0.9517407132556734, "learning_rate": 2.6237298637190433e-06, "loss": 0.1278, "step": 22895 }, { "epoch": 0.6679502888149834, "grad_norm": 0.714132413605472, "learning_rate": 2.6233142037823013e-06, "loss": 0.104, "step": 22896 }, { "epoch": 0.667979462045627, "grad_norm": 0.7384307375035443, "learning_rate": 2.6228985650640405e-06, "loss": 0.1249, "step": 22897 }, { "epoch": 0.6680086352762705, "grad_norm": 0.6353591443588497, "learning_rate": 2.6224829475679737e-06, "loss": 0.1235, "step": 22898 }, { "epoch": 0.6680378085069141, "grad_norm": 0.913948058624623, "learning_rate": 2.6220673512978135e-06, "loss": 0.1174, "step": 22899 }, { "epoch": 0.6680669817375576, "grad_norm": 0.8425906355515902, "learning_rate": 2.621651776257266e-06, "loss": 0.1102, "step": 22900 }, { "epoch": 0.6680961549682012, "grad_norm": 0.8877934953357128, "learning_rate": 2.6212362224500467e-06, "loss": 0.1241, "step": 22901 }, { "epoch": 0.6681253281988447, "grad_norm": 0.8366168848950594, "learning_rate": 2.6208206898798618e-06, "loss": 0.115, "step": 22902 }, { "epoch": 0.6681545014294883, "grad_norm": 0.9015235413550581, "learning_rate": 2.6204051785504197e-06, "loss": 0.1199, "step": 22903 }, { "epoch": 0.6681836746601318, "grad_norm": 0.7936000443709997, "learning_rate": 2.619989688465433e-06, "loss": 0.1208, "step": 22904 }, { "epoch": 0.6682128478907754, "grad_norm": 0.905018323752349, "learning_rate": 2.619574219628611e-06, "loss": 0.1142, "step": 22905 }, { "epoch": 0.668242021121419, "grad_norm": 0.9977333843447267, "learning_rate": 2.619158772043663e-06, "loss": 0.1455, "step": 22906 }, { "epoch": 0.6682711943520625, "grad_norm": 0.7992462283310806, "learning_rate": 2.6187433457142953e-06, "loss": 0.1124, "step": 22907 }, { "epoch": 0.6683003675827061, "grad_norm": 0.8310542157884723, "learning_rate": 2.6183279406442195e-06, "loss": 0.127, "step": 22908 }, { "epoch": 0.6683295408133497, "grad_norm": 1.1425389556738446, "learning_rate": 2.6179125568371444e-06, "loss": 0.1313, "step": 22909 }, { "epoch": 0.6683587140439933, "grad_norm": 1.0003638408942155, "learning_rate": 2.617497194296774e-06, "loss": 0.1308, "step": 22910 }, { "epoch": 0.6683878872746368, "grad_norm": 0.7607575092540827, "learning_rate": 2.6170818530268218e-06, "loss": 0.1416, "step": 22911 }, { "epoch": 0.6684170605052804, "grad_norm": 1.035812661802313, "learning_rate": 2.616666533030995e-06, "loss": 0.1503, "step": 22912 }, { "epoch": 0.6684462337359239, "grad_norm": 0.9809074352486825, "learning_rate": 2.6162512343129996e-06, "loss": 0.1125, "step": 22913 }, { "epoch": 0.6684754069665675, "grad_norm": 0.8755276446003115, "learning_rate": 2.6158359568765436e-06, "loss": 0.1218, "step": 22914 }, { "epoch": 0.668504580197211, "grad_norm": 0.931614951643964, "learning_rate": 2.6154207007253364e-06, "loss": 0.1165, "step": 22915 }, { "epoch": 0.6685337534278546, "grad_norm": 1.1166821915032044, "learning_rate": 2.6150054658630814e-06, "loss": 0.1401, "step": 22916 }, { "epoch": 0.6685629266584981, "grad_norm": 0.896098729123017, "learning_rate": 2.6145902522934886e-06, "loss": 0.1289, "step": 22917 }, { "epoch": 0.6685920998891417, "grad_norm": 0.7827358265099456, "learning_rate": 2.614175060020267e-06, "loss": 0.1031, "step": 22918 }, { "epoch": 0.6686212731197853, "grad_norm": 0.8556313979924545, "learning_rate": 2.6137598890471204e-06, "loss": 0.1316, "step": 22919 }, { "epoch": 0.6686504463504288, "grad_norm": 0.9517679559457114, "learning_rate": 2.6133447393777545e-06, "loss": 0.127, "step": 22920 }, { "epoch": 0.6686796195810724, "grad_norm": 0.9916652286106274, "learning_rate": 2.6129296110158784e-06, "loss": 0.0907, "step": 22921 }, { "epoch": 0.668708792811716, "grad_norm": 0.6660238125799548, "learning_rate": 2.6125145039651955e-06, "loss": 0.1211, "step": 22922 }, { "epoch": 0.6687379660423596, "grad_norm": 0.859101359514304, "learning_rate": 2.612099418229415e-06, "loss": 0.1568, "step": 22923 }, { "epoch": 0.6687671392730031, "grad_norm": 1.0702947707584312, "learning_rate": 2.6116843538122383e-06, "loss": 0.1346, "step": 22924 }, { "epoch": 0.6687963125036467, "grad_norm": 0.8805219805749532, "learning_rate": 2.611269310717376e-06, "loss": 0.1029, "step": 22925 }, { "epoch": 0.6688254857342902, "grad_norm": 0.8034430681476991, "learning_rate": 2.6108542889485304e-06, "loss": 0.1282, "step": 22926 }, { "epoch": 0.6688546589649338, "grad_norm": 0.8488474482376444, "learning_rate": 2.6104392885094067e-06, "loss": 0.1199, "step": 22927 }, { "epoch": 0.6688838321955773, "grad_norm": 1.282805480030664, "learning_rate": 2.610024309403709e-06, "loss": 0.118, "step": 22928 }, { "epoch": 0.6689130054262209, "grad_norm": 0.8617636404005417, "learning_rate": 2.6096093516351473e-06, "loss": 0.1236, "step": 22929 }, { "epoch": 0.6689421786568645, "grad_norm": 1.2089206394358851, "learning_rate": 2.60919441520742e-06, "loss": 0.1504, "step": 22930 }, { "epoch": 0.668971351887508, "grad_norm": 0.8843065407926914, "learning_rate": 2.6087795001242357e-06, "loss": 0.1226, "step": 22931 }, { "epoch": 0.6690005251181516, "grad_norm": 1.1493588882110541, "learning_rate": 2.6083646063892976e-06, "loss": 0.13, "step": 22932 }, { "epoch": 0.6690296983487951, "grad_norm": 1.1155950339039007, "learning_rate": 2.6079497340063077e-06, "loss": 0.1407, "step": 22933 }, { "epoch": 0.6690588715794387, "grad_norm": 0.9143638603560748, "learning_rate": 2.6075348829789716e-06, "loss": 0.1232, "step": 22934 }, { "epoch": 0.6690880448100822, "grad_norm": 0.8037598879231852, "learning_rate": 2.6071200533109943e-06, "loss": 0.1097, "step": 22935 }, { "epoch": 0.6691172180407259, "grad_norm": 1.1392089839495685, "learning_rate": 2.606705245006078e-06, "loss": 0.1231, "step": 22936 }, { "epoch": 0.6691463912713694, "grad_norm": 0.8531527364776602, "learning_rate": 2.6062904580679243e-06, "loss": 0.1279, "step": 22937 }, { "epoch": 0.669175564502013, "grad_norm": 0.815867906913901, "learning_rate": 2.6058756925002405e-06, "loss": 0.1155, "step": 22938 }, { "epoch": 0.6692047377326565, "grad_norm": 0.8291403675860054, "learning_rate": 2.605460948306726e-06, "loss": 0.1115, "step": 22939 }, { "epoch": 0.6692339109633001, "grad_norm": 0.7748396013848297, "learning_rate": 2.6050462254910825e-06, "loss": 0.1493, "step": 22940 }, { "epoch": 0.6692630841939436, "grad_norm": 0.8602687148445679, "learning_rate": 2.604631524057015e-06, "loss": 0.1066, "step": 22941 }, { "epoch": 0.6692922574245872, "grad_norm": 1.0815187177418915, "learning_rate": 2.6042168440082278e-06, "loss": 0.1051, "step": 22942 }, { "epoch": 0.6693214306552308, "grad_norm": 0.7993337586485786, "learning_rate": 2.6038021853484204e-06, "loss": 0.1247, "step": 22943 }, { "epoch": 0.6693506038858743, "grad_norm": 0.8172989834362377, "learning_rate": 2.6033875480812932e-06, "loss": 0.1323, "step": 22944 }, { "epoch": 0.6693797771165179, "grad_norm": 1.0694196145230908, "learning_rate": 2.602972932210551e-06, "loss": 0.1185, "step": 22945 }, { "epoch": 0.6694089503471614, "grad_norm": 0.8139093409115654, "learning_rate": 2.6025583377398933e-06, "loss": 0.1233, "step": 22946 }, { "epoch": 0.669438123577805, "grad_norm": 1.2906758880216815, "learning_rate": 2.602143764673022e-06, "loss": 0.1207, "step": 22947 }, { "epoch": 0.6694672968084485, "grad_norm": 0.8766576779177476, "learning_rate": 2.6017292130136406e-06, "loss": 0.1222, "step": 22948 }, { "epoch": 0.6694964700390922, "grad_norm": 0.7039154293282055, "learning_rate": 2.6013146827654485e-06, "loss": 0.0965, "step": 22949 }, { "epoch": 0.6695256432697357, "grad_norm": 0.7745158648055794, "learning_rate": 2.600900173932144e-06, "loss": 0.1098, "step": 22950 }, { "epoch": 0.6695548165003793, "grad_norm": 0.9171519033405305, "learning_rate": 2.600485686517432e-06, "loss": 0.1288, "step": 22951 }, { "epoch": 0.6695839897310228, "grad_norm": 1.0745458521225597, "learning_rate": 2.60007122052501e-06, "loss": 0.133, "step": 22952 }, { "epoch": 0.6696131629616664, "grad_norm": 1.0622810261729105, "learning_rate": 2.59965677595858e-06, "loss": 0.1201, "step": 22953 }, { "epoch": 0.66964233619231, "grad_norm": 0.6532650285431103, "learning_rate": 2.5992423528218404e-06, "loss": 0.1056, "step": 22954 }, { "epoch": 0.6696715094229535, "grad_norm": 1.0279845866111847, "learning_rate": 2.5988279511184934e-06, "loss": 0.1158, "step": 22955 }, { "epoch": 0.6697006826535971, "grad_norm": 0.8731483651185638, "learning_rate": 2.598413570852237e-06, "loss": 0.0986, "step": 22956 }, { "epoch": 0.6697298558842406, "grad_norm": 0.9696134873233425, "learning_rate": 2.597999212026769e-06, "loss": 0.1212, "step": 22957 }, { "epoch": 0.6697590291148842, "grad_norm": 1.1347937170465638, "learning_rate": 2.597584874645791e-06, "loss": 0.1624, "step": 22958 }, { "epoch": 0.6697882023455277, "grad_norm": 1.0999434347085677, "learning_rate": 2.5971705587130036e-06, "loss": 0.1261, "step": 22959 }, { "epoch": 0.6698173755761713, "grad_norm": 0.9410433692833908, "learning_rate": 2.5967562642321014e-06, "loss": 0.1308, "step": 22960 }, { "epoch": 0.6698465488068148, "grad_norm": 1.110225390909667, "learning_rate": 2.596341991206788e-06, "loss": 0.1168, "step": 22961 }, { "epoch": 0.6698757220374584, "grad_norm": 1.1251422889019116, "learning_rate": 2.5959277396407588e-06, "loss": 0.1421, "step": 22962 }, { "epoch": 0.669904895268102, "grad_norm": 0.9735252353434014, "learning_rate": 2.595513509537712e-06, "loss": 0.1162, "step": 22963 }, { "epoch": 0.6699340684987456, "grad_norm": 0.7471502557532341, "learning_rate": 2.595099300901346e-06, "loss": 0.0935, "step": 22964 }, { "epoch": 0.6699632417293891, "grad_norm": 0.7418263137633314, "learning_rate": 2.5946851137353614e-06, "loss": 0.1179, "step": 22965 }, { "epoch": 0.6699924149600327, "grad_norm": 1.192634862037079, "learning_rate": 2.594270948043454e-06, "loss": 0.1106, "step": 22966 }, { "epoch": 0.6700215881906763, "grad_norm": 0.7710972242554188, "learning_rate": 2.5938568038293193e-06, "loss": 0.1288, "step": 22967 }, { "epoch": 0.6700507614213198, "grad_norm": 0.8604964494284084, "learning_rate": 2.5934426810966585e-06, "loss": 0.117, "step": 22968 }, { "epoch": 0.6700799346519634, "grad_norm": 0.9719138250761021, "learning_rate": 2.593028579849167e-06, "loss": 0.1361, "step": 22969 }, { "epoch": 0.6701091078826069, "grad_norm": 0.9418603652946018, "learning_rate": 2.5926145000905402e-06, "loss": 0.1196, "step": 22970 }, { "epoch": 0.6701382811132505, "grad_norm": 0.8049131281734525, "learning_rate": 2.5922004418244758e-06, "loss": 0.1009, "step": 22971 }, { "epoch": 0.670167454343894, "grad_norm": 0.9613558638504005, "learning_rate": 2.591786405054673e-06, "loss": 0.1411, "step": 22972 }, { "epoch": 0.6701966275745376, "grad_norm": 1.01183092493497, "learning_rate": 2.5913723897848264e-06, "loss": 0.117, "step": 22973 }, { "epoch": 0.6702258008051811, "grad_norm": 0.9235575685507096, "learning_rate": 2.5909583960186306e-06, "loss": 0.0919, "step": 22974 }, { "epoch": 0.6702549740358247, "grad_norm": 0.8172537684550482, "learning_rate": 2.590544423759785e-06, "loss": 0.1164, "step": 22975 }, { "epoch": 0.6702841472664683, "grad_norm": 0.7142541635523699, "learning_rate": 2.5901304730119816e-06, "loss": 0.121, "step": 22976 }, { "epoch": 0.6703133204971119, "grad_norm": 0.905901228042321, "learning_rate": 2.5897165437789175e-06, "loss": 0.1393, "step": 22977 }, { "epoch": 0.6703424937277555, "grad_norm": 0.8181608904803088, "learning_rate": 2.5893026360642912e-06, "loss": 0.1244, "step": 22978 }, { "epoch": 0.670371666958399, "grad_norm": 0.9139770807479383, "learning_rate": 2.588888749871795e-06, "loss": 0.1453, "step": 22979 }, { "epoch": 0.6704008401890426, "grad_norm": 0.782312618193292, "learning_rate": 2.5884748852051236e-06, "loss": 0.1265, "step": 22980 }, { "epoch": 0.6704300134196861, "grad_norm": 0.6666164999326576, "learning_rate": 2.588061042067974e-06, "loss": 0.1319, "step": 22981 }, { "epoch": 0.6704591866503297, "grad_norm": 0.7876602526271521, "learning_rate": 2.5876472204640375e-06, "loss": 0.1064, "step": 22982 }, { "epoch": 0.6704883598809732, "grad_norm": 0.8579907867622241, "learning_rate": 2.587233420397013e-06, "loss": 0.1264, "step": 22983 }, { "epoch": 0.6705175331116168, "grad_norm": 0.9408215174558893, "learning_rate": 2.5868196418705906e-06, "loss": 0.1386, "step": 22984 }, { "epoch": 0.6705467063422603, "grad_norm": 0.8042278480049216, "learning_rate": 2.5864058848884678e-06, "loss": 0.1271, "step": 22985 }, { "epoch": 0.6705758795729039, "grad_norm": 0.8255218498327359, "learning_rate": 2.585992149454337e-06, "loss": 0.1257, "step": 22986 }, { "epoch": 0.6706050528035474, "grad_norm": 0.8001131882840679, "learning_rate": 2.585578435571891e-06, "loss": 0.1128, "step": 22987 }, { "epoch": 0.670634226034191, "grad_norm": 0.7617123776116494, "learning_rate": 2.5851647432448242e-06, "loss": 0.1418, "step": 22988 }, { "epoch": 0.6706633992648345, "grad_norm": 0.9987195121392268, "learning_rate": 2.5847510724768315e-06, "loss": 0.1189, "step": 22989 }, { "epoch": 0.6706925724954782, "grad_norm": 0.8461177176633972, "learning_rate": 2.5843374232716035e-06, "loss": 0.1251, "step": 22990 }, { "epoch": 0.6707217457261218, "grad_norm": 0.798303148708433, "learning_rate": 2.5839237956328356e-06, "loss": 0.1353, "step": 22991 }, { "epoch": 0.6707509189567653, "grad_norm": 0.8383877968105283, "learning_rate": 2.583510189564219e-06, "loss": 0.1175, "step": 22992 }, { "epoch": 0.6707800921874089, "grad_norm": 0.9793682524815928, "learning_rate": 2.583096605069445e-06, "loss": 0.1246, "step": 22993 }, { "epoch": 0.6708092654180524, "grad_norm": 0.9327028157121722, "learning_rate": 2.5826830421522075e-06, "loss": 0.1317, "step": 22994 }, { "epoch": 0.670838438648696, "grad_norm": 0.8195649016502793, "learning_rate": 2.5822695008162015e-06, "loss": 0.1238, "step": 22995 }, { "epoch": 0.6708676118793395, "grad_norm": 1.2466977300984077, "learning_rate": 2.581855981065115e-06, "loss": 0.1332, "step": 22996 }, { "epoch": 0.6708967851099831, "grad_norm": 0.9484199779616992, "learning_rate": 2.5814424829026395e-06, "loss": 0.1276, "step": 22997 }, { "epoch": 0.6709259583406266, "grad_norm": 0.9945373859264935, "learning_rate": 2.5810290063324705e-06, "loss": 0.1365, "step": 22998 }, { "epoch": 0.6709551315712702, "grad_norm": 0.8312290435145775, "learning_rate": 2.5806155513582963e-06, "loss": 0.1144, "step": 22999 }, { "epoch": 0.6709843048019137, "grad_norm": 0.912826024619701, "learning_rate": 2.580202117983808e-06, "loss": 0.1289, "step": 23000 }, { "epoch": 0.6710134780325573, "grad_norm": 0.8686912167702016, "learning_rate": 2.579788706212697e-06, "loss": 0.1068, "step": 23001 }, { "epoch": 0.6710426512632008, "grad_norm": 1.3427708204630477, "learning_rate": 2.5793753160486566e-06, "loss": 0.1316, "step": 23002 }, { "epoch": 0.6710718244938444, "grad_norm": 0.8592042705285448, "learning_rate": 2.5789619474953753e-06, "loss": 0.117, "step": 23003 }, { "epoch": 0.6711009977244881, "grad_norm": 1.082975401321015, "learning_rate": 2.578548600556542e-06, "loss": 0.1173, "step": 23004 }, { "epoch": 0.6711301709551316, "grad_norm": 1.2965524748252881, "learning_rate": 2.5781352752358492e-06, "loss": 0.1339, "step": 23005 }, { "epoch": 0.6711593441857752, "grad_norm": 1.1511374378045174, "learning_rate": 2.5777219715369876e-06, "loss": 0.1469, "step": 23006 }, { "epoch": 0.6711885174164187, "grad_norm": 1.0427632821341766, "learning_rate": 2.5773086894636446e-06, "loss": 0.1263, "step": 23007 }, { "epoch": 0.6712176906470623, "grad_norm": 0.929663944711284, "learning_rate": 2.5768954290195136e-06, "loss": 0.1361, "step": 23008 }, { "epoch": 0.6712468638777058, "grad_norm": 1.1165350096418751, "learning_rate": 2.5764821902082814e-06, "loss": 0.1264, "step": 23009 }, { "epoch": 0.6712760371083494, "grad_norm": 0.9749342362429257, "learning_rate": 2.576068973033635e-06, "loss": 0.1267, "step": 23010 }, { "epoch": 0.6713052103389929, "grad_norm": 0.8730546817614292, "learning_rate": 2.5756557774992676e-06, "loss": 0.1284, "step": 23011 }, { "epoch": 0.6713343835696365, "grad_norm": 0.9547620352732499, "learning_rate": 2.575242603608867e-06, "loss": 0.1297, "step": 23012 }, { "epoch": 0.67136355680028, "grad_norm": 1.1379441054263437, "learning_rate": 2.5748294513661233e-06, "loss": 0.1372, "step": 23013 }, { "epoch": 0.6713927300309236, "grad_norm": 0.9335225321098604, "learning_rate": 2.5744163207747202e-06, "loss": 0.1269, "step": 23014 }, { "epoch": 0.6714219032615671, "grad_norm": 1.6534952123573796, "learning_rate": 2.574003211838352e-06, "loss": 0.1327, "step": 23015 }, { "epoch": 0.6714510764922107, "grad_norm": 1.0425208606698102, "learning_rate": 2.573590124560703e-06, "loss": 0.1117, "step": 23016 }, { "epoch": 0.6714802497228544, "grad_norm": 0.8787349795954643, "learning_rate": 2.5731770589454584e-06, "loss": 0.0986, "step": 23017 }, { "epoch": 0.6715094229534979, "grad_norm": 0.9389100598612916, "learning_rate": 2.572764014996314e-06, "loss": 0.116, "step": 23018 }, { "epoch": 0.6715385961841415, "grad_norm": 0.8437284003147788, "learning_rate": 2.5723509927169526e-06, "loss": 0.1278, "step": 23019 }, { "epoch": 0.671567769414785, "grad_norm": 1.1954962997715544, "learning_rate": 2.5719379921110605e-06, "loss": 0.1513, "step": 23020 }, { "epoch": 0.6715969426454286, "grad_norm": 1.0029319571789357, "learning_rate": 2.5715250131823284e-06, "loss": 0.1262, "step": 23021 }, { "epoch": 0.6716261158760721, "grad_norm": 0.6272414702708596, "learning_rate": 2.5711120559344404e-06, "loss": 0.1005, "step": 23022 }, { "epoch": 0.6716552891067157, "grad_norm": 1.0685323592073221, "learning_rate": 2.570699120371083e-06, "loss": 0.1135, "step": 23023 }, { "epoch": 0.6716844623373592, "grad_norm": 0.848090512308395, "learning_rate": 2.5702862064959445e-06, "loss": 0.1227, "step": 23024 }, { "epoch": 0.6717136355680028, "grad_norm": 0.8136112669022519, "learning_rate": 2.569873314312712e-06, "loss": 0.1136, "step": 23025 }, { "epoch": 0.6717428087986463, "grad_norm": 0.7260969938467134, "learning_rate": 2.5694604438250697e-06, "loss": 0.1245, "step": 23026 }, { "epoch": 0.6717719820292899, "grad_norm": 0.9505974670743264, "learning_rate": 2.5690475950367035e-06, "loss": 0.1424, "step": 23027 }, { "epoch": 0.6718011552599334, "grad_norm": 0.8703509019340886, "learning_rate": 2.5686347679513013e-06, "loss": 0.1247, "step": 23028 }, { "epoch": 0.671830328490577, "grad_norm": 0.9355258205340619, "learning_rate": 2.5682219625725456e-06, "loss": 0.1345, "step": 23029 }, { "epoch": 0.6718595017212206, "grad_norm": 1.1087803126689815, "learning_rate": 2.5678091789041258e-06, "loss": 0.1325, "step": 23030 }, { "epoch": 0.6718886749518642, "grad_norm": 0.7716792466995319, "learning_rate": 2.5673964169497233e-06, "loss": 0.1189, "step": 23031 }, { "epoch": 0.6719178481825078, "grad_norm": 0.9071085343732047, "learning_rate": 2.5669836767130266e-06, "loss": 0.0846, "step": 23032 }, { "epoch": 0.6719470214131513, "grad_norm": 0.7954196170911573, "learning_rate": 2.5665709581977195e-06, "loss": 0.1092, "step": 23033 }, { "epoch": 0.6719761946437949, "grad_norm": 0.7621613749477639, "learning_rate": 2.566158261407483e-06, "loss": 0.122, "step": 23034 }, { "epoch": 0.6720053678744384, "grad_norm": 0.7136243545915051, "learning_rate": 2.565745586346005e-06, "loss": 0.105, "step": 23035 }, { "epoch": 0.672034541105082, "grad_norm": 0.8073084554454861, "learning_rate": 2.5653329330169713e-06, "loss": 0.1144, "step": 23036 }, { "epoch": 0.6720637143357255, "grad_norm": 1.0988537169193622, "learning_rate": 2.564920301424062e-06, "loss": 0.1342, "step": 23037 }, { "epoch": 0.6720928875663691, "grad_norm": 1.8690672441944067, "learning_rate": 2.5645076915709644e-06, "loss": 0.1218, "step": 23038 }, { "epoch": 0.6721220607970126, "grad_norm": 0.9031188759708206, "learning_rate": 2.5640951034613613e-06, "loss": 0.1322, "step": 23039 }, { "epoch": 0.6721512340276562, "grad_norm": 0.913525689768466, "learning_rate": 2.5636825370989336e-06, "loss": 0.1274, "step": 23040 }, { "epoch": 0.6721804072582998, "grad_norm": 0.8978105971050229, "learning_rate": 2.5632699924873667e-06, "loss": 0.1275, "step": 23041 }, { "epoch": 0.6722095804889433, "grad_norm": 0.8385083518811937, "learning_rate": 2.5628574696303452e-06, "loss": 0.1061, "step": 23042 }, { "epoch": 0.6722387537195869, "grad_norm": 0.7681185201675401, "learning_rate": 2.562444968531551e-06, "loss": 0.1372, "step": 23043 }, { "epoch": 0.6722679269502305, "grad_norm": 0.9136671236931833, "learning_rate": 2.5620324891946636e-06, "loss": 0.1218, "step": 23044 }, { "epoch": 0.6722971001808741, "grad_norm": 0.9791057087025754, "learning_rate": 2.5616200316233706e-06, "loss": 0.1517, "step": 23045 }, { "epoch": 0.6723262734115176, "grad_norm": 0.9491180681649855, "learning_rate": 2.5612075958213516e-06, "loss": 0.1194, "step": 23046 }, { "epoch": 0.6723554466421612, "grad_norm": 0.8517097631766665, "learning_rate": 2.560795181792285e-06, "loss": 0.1548, "step": 23047 }, { "epoch": 0.6723846198728047, "grad_norm": 0.816692508907859, "learning_rate": 2.5603827895398613e-06, "loss": 0.1373, "step": 23048 }, { "epoch": 0.6724137931034483, "grad_norm": 1.037973862130898, "learning_rate": 2.5599704190677567e-06, "loss": 0.1269, "step": 23049 }, { "epoch": 0.6724429663340918, "grad_norm": 0.8269344422895314, "learning_rate": 2.5595580703796526e-06, "loss": 0.1118, "step": 23050 }, { "epoch": 0.6724721395647354, "grad_norm": 0.7313766193411093, "learning_rate": 2.5591457434792332e-06, "loss": 0.1201, "step": 23051 }, { "epoch": 0.672501312795379, "grad_norm": 1.0086293225702918, "learning_rate": 2.5587334383701777e-06, "loss": 0.133, "step": 23052 }, { "epoch": 0.6725304860260225, "grad_norm": 0.8973611634538674, "learning_rate": 2.5583211550561654e-06, "loss": 0.1119, "step": 23053 }, { "epoch": 0.672559659256666, "grad_norm": 1.1141740133744749, "learning_rate": 2.5579088935408793e-06, "loss": 0.1111, "step": 23054 }, { "epoch": 0.6725888324873096, "grad_norm": 0.8410195825188868, "learning_rate": 2.557496653828001e-06, "loss": 0.117, "step": 23055 }, { "epoch": 0.6726180057179532, "grad_norm": 0.915494174963484, "learning_rate": 2.5570844359212098e-06, "loss": 0.1195, "step": 23056 }, { "epoch": 0.6726471789485967, "grad_norm": 0.7680153816881505, "learning_rate": 2.556672239824183e-06, "loss": 0.1282, "step": 23057 }, { "epoch": 0.6726763521792404, "grad_norm": 0.8294072737514747, "learning_rate": 2.556260065540606e-06, "loss": 0.1017, "step": 23058 }, { "epoch": 0.6727055254098839, "grad_norm": 1.1684656397570399, "learning_rate": 2.5558479130741537e-06, "loss": 0.1243, "step": 23059 }, { "epoch": 0.6727346986405275, "grad_norm": 0.7347464654050591, "learning_rate": 2.555435782428509e-06, "loss": 0.1146, "step": 23060 }, { "epoch": 0.672763871871171, "grad_norm": 0.8680076540846722, "learning_rate": 2.555023673607349e-06, "loss": 0.1244, "step": 23061 }, { "epoch": 0.6727930451018146, "grad_norm": 0.8891559959126526, "learning_rate": 2.5546115866143555e-06, "loss": 0.1412, "step": 23062 }, { "epoch": 0.6728222183324581, "grad_norm": 0.9379800251188479, "learning_rate": 2.5541995214532066e-06, "loss": 0.11, "step": 23063 }, { "epoch": 0.6728513915631017, "grad_norm": 0.9374131690397216, "learning_rate": 2.5537874781275777e-06, "loss": 0.1059, "step": 23064 }, { "epoch": 0.6728805647937453, "grad_norm": 0.9932545360671732, "learning_rate": 2.5533754566411505e-06, "loss": 0.1339, "step": 23065 }, { "epoch": 0.6729097380243888, "grad_norm": 1.0481182700216478, "learning_rate": 2.5529634569976053e-06, "loss": 0.1207, "step": 23066 }, { "epoch": 0.6729389112550324, "grad_norm": 0.8243975726970814, "learning_rate": 2.552551479200616e-06, "loss": 0.1338, "step": 23067 }, { "epoch": 0.6729680844856759, "grad_norm": 0.7311252626071658, "learning_rate": 2.5521395232538647e-06, "loss": 0.0994, "step": 23068 }, { "epoch": 0.6729972577163195, "grad_norm": 0.8570503512909122, "learning_rate": 2.5517275891610283e-06, "loss": 0.1192, "step": 23069 }, { "epoch": 0.673026430946963, "grad_norm": 0.9790333521513247, "learning_rate": 2.551315676925781e-06, "loss": 0.122, "step": 23070 }, { "epoch": 0.6730556041776067, "grad_norm": 1.162734332338707, "learning_rate": 2.5509037865518026e-06, "loss": 0.1191, "step": 23071 }, { "epoch": 0.6730847774082502, "grad_norm": 0.8244726491109521, "learning_rate": 2.5504919180427723e-06, "loss": 0.1099, "step": 23072 }, { "epoch": 0.6731139506388938, "grad_norm": 0.906523145815668, "learning_rate": 2.5500800714023654e-06, "loss": 0.1252, "step": 23073 }, { "epoch": 0.6731431238695373, "grad_norm": 1.21337257274489, "learning_rate": 2.5496682466342576e-06, "loss": 0.1236, "step": 23074 }, { "epoch": 0.6731722971001809, "grad_norm": 1.0699147048227882, "learning_rate": 2.5492564437421287e-06, "loss": 0.11, "step": 23075 }, { "epoch": 0.6732014703308244, "grad_norm": 0.7315953389906471, "learning_rate": 2.5488446627296525e-06, "loss": 0.1431, "step": 23076 }, { "epoch": 0.673230643561468, "grad_norm": 1.1274509195859086, "learning_rate": 2.5484329036005024e-06, "loss": 0.1323, "step": 23077 }, { "epoch": 0.6732598167921116, "grad_norm": 1.2727683190101309, "learning_rate": 2.548021166358362e-06, "loss": 0.1141, "step": 23078 }, { "epoch": 0.6732889900227551, "grad_norm": 0.8429130965203371, "learning_rate": 2.5476094510069025e-06, "loss": 0.1226, "step": 23079 }, { "epoch": 0.6733181632533987, "grad_norm": 0.7867535761335129, "learning_rate": 2.5471977575497995e-06, "loss": 0.1034, "step": 23080 }, { "epoch": 0.6733473364840422, "grad_norm": 0.7939388142724333, "learning_rate": 2.5467860859907314e-06, "loss": 0.1424, "step": 23081 }, { "epoch": 0.6733765097146858, "grad_norm": 0.9056405970328766, "learning_rate": 2.546374436333371e-06, "loss": 0.1102, "step": 23082 }, { "epoch": 0.6734056829453293, "grad_norm": 0.9434431303885463, "learning_rate": 2.5459628085813924e-06, "loss": 0.1173, "step": 23083 }, { "epoch": 0.6734348561759729, "grad_norm": 0.802263753656373, "learning_rate": 2.5455512027384717e-06, "loss": 0.1192, "step": 23084 }, { "epoch": 0.6734640294066165, "grad_norm": 1.0040014999455318, "learning_rate": 2.5451396188082853e-06, "loss": 0.1215, "step": 23085 }, { "epoch": 0.6734932026372601, "grad_norm": 0.9112107723131383, "learning_rate": 2.5447280567945077e-06, "loss": 0.1028, "step": 23086 }, { "epoch": 0.6735223758679036, "grad_norm": 0.9070384194223957, "learning_rate": 2.544316516700809e-06, "loss": 0.1201, "step": 23087 }, { "epoch": 0.6735515490985472, "grad_norm": 0.8828354313074755, "learning_rate": 2.543904998530868e-06, "loss": 0.1157, "step": 23088 }, { "epoch": 0.6735807223291908, "grad_norm": 0.9397248081116105, "learning_rate": 2.5434935022883557e-06, "loss": 0.0965, "step": 23089 }, { "epoch": 0.6736098955598343, "grad_norm": 0.8354630477146472, "learning_rate": 2.5430820279769487e-06, "loss": 0.1424, "step": 23090 }, { "epoch": 0.6736390687904779, "grad_norm": 0.9887642570087085, "learning_rate": 2.5426705756003167e-06, "loss": 0.1349, "step": 23091 }, { "epoch": 0.6736682420211214, "grad_norm": 0.9275770073484282, "learning_rate": 2.542259145162137e-06, "loss": 0.1225, "step": 23092 }, { "epoch": 0.673697415251765, "grad_norm": 0.792629227694484, "learning_rate": 2.5418477366660808e-06, "loss": 0.1283, "step": 23093 }, { "epoch": 0.6737265884824085, "grad_norm": 0.8784732543703531, "learning_rate": 2.54143635011582e-06, "loss": 0.1232, "step": 23094 }, { "epoch": 0.6737557617130521, "grad_norm": 0.9103302091094058, "learning_rate": 2.541024985515028e-06, "loss": 0.1427, "step": 23095 }, { "epoch": 0.6737849349436956, "grad_norm": 0.8114707191285135, "learning_rate": 2.54061364286738e-06, "loss": 0.1133, "step": 23096 }, { "epoch": 0.6738141081743392, "grad_norm": 0.8646502927639343, "learning_rate": 2.540202322176544e-06, "loss": 0.1179, "step": 23097 }, { "epoch": 0.6738432814049828, "grad_norm": 1.0437988076601405, "learning_rate": 2.539791023446197e-06, "loss": 0.1272, "step": 23098 }, { "epoch": 0.6738724546356264, "grad_norm": 0.8614424288493707, "learning_rate": 2.5393797466800084e-06, "loss": 0.1208, "step": 23099 }, { "epoch": 0.67390162786627, "grad_norm": 1.023278228041395, "learning_rate": 2.5389684918816477e-06, "loss": 0.1463, "step": 23100 }, { "epoch": 0.6739308010969135, "grad_norm": 1.0339157199520221, "learning_rate": 2.5385572590547893e-06, "loss": 0.1144, "step": 23101 }, { "epoch": 0.673959974327557, "grad_norm": 1.023955827056682, "learning_rate": 2.538146048203105e-06, "loss": 0.1308, "step": 23102 }, { "epoch": 0.6739891475582006, "grad_norm": 0.8152534885073576, "learning_rate": 2.5377348593302664e-06, "loss": 0.1205, "step": 23103 }, { "epoch": 0.6740183207888442, "grad_norm": 0.7159288195277267, "learning_rate": 2.5373236924399402e-06, "loss": 0.1116, "step": 23104 }, { "epoch": 0.6740474940194877, "grad_norm": 0.8480227129254346, "learning_rate": 2.5369125475358027e-06, "loss": 0.121, "step": 23105 }, { "epoch": 0.6740766672501313, "grad_norm": 0.8506173762262046, "learning_rate": 2.536501424621522e-06, "loss": 0.1289, "step": 23106 }, { "epoch": 0.6741058404807748, "grad_norm": 0.7761814065000823, "learning_rate": 2.5360903237007647e-06, "loss": 0.1193, "step": 23107 }, { "epoch": 0.6741350137114184, "grad_norm": 0.7300850631860111, "learning_rate": 2.5356792447772084e-06, "loss": 0.1126, "step": 23108 }, { "epoch": 0.6741641869420619, "grad_norm": 0.7781441208719955, "learning_rate": 2.5352681878545195e-06, "loss": 0.1141, "step": 23109 }, { "epoch": 0.6741933601727055, "grad_norm": 0.8628981366748527, "learning_rate": 2.5348571529363658e-06, "loss": 0.1056, "step": 23110 }, { "epoch": 0.674222533403349, "grad_norm": 0.8131652804182944, "learning_rate": 2.534446140026421e-06, "loss": 0.1397, "step": 23111 }, { "epoch": 0.6742517066339927, "grad_norm": 1.1178345651148511, "learning_rate": 2.5340351491283527e-06, "loss": 0.141, "step": 23112 }, { "epoch": 0.6742808798646363, "grad_norm": 1.02264296395628, "learning_rate": 2.5336241802458283e-06, "loss": 0.1127, "step": 23113 }, { "epoch": 0.6743100530952798, "grad_norm": 0.8421944935825467, "learning_rate": 2.5332132333825177e-06, "loss": 0.1017, "step": 23114 }, { "epoch": 0.6743392263259234, "grad_norm": 0.9686863631015941, "learning_rate": 2.5328023085420926e-06, "loss": 0.1251, "step": 23115 }, { "epoch": 0.6743683995565669, "grad_norm": 0.9521066364827775, "learning_rate": 2.5323914057282194e-06, "loss": 0.1168, "step": 23116 }, { "epoch": 0.6743975727872105, "grad_norm": 0.9256275405216868, "learning_rate": 2.531980524944565e-06, "loss": 0.1321, "step": 23117 }, { "epoch": 0.674426746017854, "grad_norm": 1.1994132199038094, "learning_rate": 2.531569666194802e-06, "loss": 0.1153, "step": 23118 }, { "epoch": 0.6744559192484976, "grad_norm": 0.9828038265013251, "learning_rate": 2.531158829482593e-06, "loss": 0.1283, "step": 23119 }, { "epoch": 0.6744850924791411, "grad_norm": 1.0409838458988905, "learning_rate": 2.5307480148116105e-06, "loss": 0.1171, "step": 23120 }, { "epoch": 0.6745142657097847, "grad_norm": 1.014336557559262, "learning_rate": 2.5303372221855183e-06, "loss": 0.1108, "step": 23121 }, { "epoch": 0.6745434389404282, "grad_norm": 0.7821874423805307, "learning_rate": 2.529926451607988e-06, "loss": 0.1219, "step": 23122 }, { "epoch": 0.6745726121710718, "grad_norm": 0.7416077641116059, "learning_rate": 2.5295157030826844e-06, "loss": 0.1171, "step": 23123 }, { "epoch": 0.6746017854017153, "grad_norm": 2.0238546409117313, "learning_rate": 2.529104976613273e-06, "loss": 0.1265, "step": 23124 }, { "epoch": 0.674630958632359, "grad_norm": 0.8823217553091093, "learning_rate": 2.5286942722034226e-06, "loss": 0.1006, "step": 23125 }, { "epoch": 0.6746601318630026, "grad_norm": 1.0362855400209419, "learning_rate": 2.5282835898568014e-06, "loss": 0.1182, "step": 23126 }, { "epoch": 0.6746893050936461, "grad_norm": 0.8452556499659798, "learning_rate": 2.5278729295770733e-06, "loss": 0.1147, "step": 23127 }, { "epoch": 0.6747184783242897, "grad_norm": 1.0348333314274814, "learning_rate": 2.5274622913679063e-06, "loss": 0.107, "step": 23128 }, { "epoch": 0.6747476515549332, "grad_norm": 0.7822383658747245, "learning_rate": 2.5270516752329667e-06, "loss": 0.1183, "step": 23129 }, { "epoch": 0.6747768247855768, "grad_norm": 1.1259794431640837, "learning_rate": 2.526641081175917e-06, "loss": 0.1284, "step": 23130 }, { "epoch": 0.6748059980162203, "grad_norm": 1.5440868563599224, "learning_rate": 2.5262305092004246e-06, "loss": 0.1159, "step": 23131 }, { "epoch": 0.6748351712468639, "grad_norm": 0.9044928359421482, "learning_rate": 2.5258199593101586e-06, "loss": 0.1301, "step": 23132 }, { "epoch": 0.6748643444775074, "grad_norm": 0.7338884647055344, "learning_rate": 2.5254094315087814e-06, "loss": 0.1256, "step": 23133 }, { "epoch": 0.674893517708151, "grad_norm": 1.3777881612596148, "learning_rate": 2.524998925799956e-06, "loss": 0.1367, "step": 23134 }, { "epoch": 0.6749226909387945, "grad_norm": 1.0969808987305798, "learning_rate": 2.5245884421873507e-06, "loss": 0.1078, "step": 23135 }, { "epoch": 0.6749518641694381, "grad_norm": 0.8023092456942991, "learning_rate": 2.524177980674629e-06, "loss": 0.1297, "step": 23136 }, { "epoch": 0.6749810374000816, "grad_norm": 0.9949631306497734, "learning_rate": 2.523767541265452e-06, "loss": 0.1406, "step": 23137 }, { "epoch": 0.6750102106307252, "grad_norm": 1.4188378041600502, "learning_rate": 2.523357123963491e-06, "loss": 0.1247, "step": 23138 }, { "epoch": 0.6750393838613689, "grad_norm": 0.7450129807835786, "learning_rate": 2.5229467287724065e-06, "loss": 0.1013, "step": 23139 }, { "epoch": 0.6750685570920124, "grad_norm": 0.906348749249676, "learning_rate": 2.5225363556958594e-06, "loss": 0.1029, "step": 23140 }, { "epoch": 0.675097730322656, "grad_norm": 0.8819105263323815, "learning_rate": 2.522126004737519e-06, "loss": 0.1214, "step": 23141 }, { "epoch": 0.6751269035532995, "grad_norm": 0.9888744549332726, "learning_rate": 2.521715675901046e-06, "loss": 0.1137, "step": 23142 }, { "epoch": 0.6751560767839431, "grad_norm": 1.0263899287360436, "learning_rate": 2.521305369190102e-06, "loss": 0.1259, "step": 23143 }, { "epoch": 0.6751852500145866, "grad_norm": 0.7178879697205601, "learning_rate": 2.520895084608351e-06, "loss": 0.1275, "step": 23144 }, { "epoch": 0.6752144232452302, "grad_norm": 1.0230194022667145, "learning_rate": 2.5204848221594604e-06, "loss": 0.1074, "step": 23145 }, { "epoch": 0.6752435964758737, "grad_norm": 0.7791910482511467, "learning_rate": 2.5200745818470883e-06, "loss": 0.104, "step": 23146 }, { "epoch": 0.6752727697065173, "grad_norm": 0.9135505313843247, "learning_rate": 2.519664363674897e-06, "loss": 0.1194, "step": 23147 }, { "epoch": 0.6753019429371608, "grad_norm": 0.9800595424980734, "learning_rate": 2.519254167646552e-06, "loss": 0.1422, "step": 23148 }, { "epoch": 0.6753311161678044, "grad_norm": 0.9179820691529096, "learning_rate": 2.518843993765711e-06, "loss": 0.1163, "step": 23149 }, { "epoch": 0.6753602893984479, "grad_norm": 0.8089301134893563, "learning_rate": 2.518433842036041e-06, "loss": 0.1203, "step": 23150 }, { "epoch": 0.6753894626290915, "grad_norm": 0.9199195743294921, "learning_rate": 2.5180237124611996e-06, "loss": 0.1134, "step": 23151 }, { "epoch": 0.6754186358597352, "grad_norm": 1.1399409120340327, "learning_rate": 2.517613605044851e-06, "loss": 0.1124, "step": 23152 }, { "epoch": 0.6754478090903787, "grad_norm": 0.9247520371924938, "learning_rate": 2.5172035197906565e-06, "loss": 0.1116, "step": 23153 }, { "epoch": 0.6754769823210223, "grad_norm": 1.4068196431459592, "learning_rate": 2.516793456702274e-06, "loss": 0.1515, "step": 23154 }, { "epoch": 0.6755061555516658, "grad_norm": 0.8356682916380015, "learning_rate": 2.516383415783367e-06, "loss": 0.1261, "step": 23155 }, { "epoch": 0.6755353287823094, "grad_norm": 0.8440280544594345, "learning_rate": 2.5159733970375978e-06, "loss": 0.1133, "step": 23156 }, { "epoch": 0.6755645020129529, "grad_norm": 0.7475092168729427, "learning_rate": 2.5155634004686226e-06, "loss": 0.1201, "step": 23157 }, { "epoch": 0.6755936752435965, "grad_norm": 0.8433739199698794, "learning_rate": 2.5151534260801068e-06, "loss": 0.1226, "step": 23158 }, { "epoch": 0.67562284847424, "grad_norm": 0.8682621054044367, "learning_rate": 2.5147434738757074e-06, "loss": 0.1373, "step": 23159 }, { "epoch": 0.6756520217048836, "grad_norm": 0.8544899021696603, "learning_rate": 2.5143335438590837e-06, "loss": 0.1284, "step": 23160 }, { "epoch": 0.6756811949355271, "grad_norm": 0.7681728523315022, "learning_rate": 2.513923636033897e-06, "loss": 0.1035, "step": 23161 }, { "epoch": 0.6757103681661707, "grad_norm": 0.7930441141389791, "learning_rate": 2.5135137504038076e-06, "loss": 0.1314, "step": 23162 }, { "epoch": 0.6757395413968142, "grad_norm": 0.8554197424505521, "learning_rate": 2.5131038869724743e-06, "loss": 0.104, "step": 23163 }, { "epoch": 0.6757687146274578, "grad_norm": 1.0880113516953294, "learning_rate": 2.5126940457435543e-06, "loss": 0.1251, "step": 23164 }, { "epoch": 0.6757978878581014, "grad_norm": 0.8938954356306851, "learning_rate": 2.5122842267207092e-06, "loss": 0.0999, "step": 23165 }, { "epoch": 0.675827061088745, "grad_norm": 0.8132571370356921, "learning_rate": 2.511874429907597e-06, "loss": 0.1107, "step": 23166 }, { "epoch": 0.6758562343193886, "grad_norm": 1.1041847456088503, "learning_rate": 2.5114646553078726e-06, "loss": 0.1412, "step": 23167 }, { "epoch": 0.6758854075500321, "grad_norm": 0.9164250587438241, "learning_rate": 2.5110549029252006e-06, "loss": 0.117, "step": 23168 }, { "epoch": 0.6759145807806757, "grad_norm": 0.7557817034171268, "learning_rate": 2.5106451727632374e-06, "loss": 0.1219, "step": 23169 }, { "epoch": 0.6759437540113192, "grad_norm": 0.8457012666065661, "learning_rate": 2.5102354648256373e-06, "loss": 0.1018, "step": 23170 }, { "epoch": 0.6759729272419628, "grad_norm": 1.0266255610670945, "learning_rate": 2.5098257791160623e-06, "loss": 0.1123, "step": 23171 }, { "epoch": 0.6760021004726063, "grad_norm": 0.7980441916870344, "learning_rate": 2.509416115638169e-06, "loss": 0.1038, "step": 23172 }, { "epoch": 0.6760312737032499, "grad_norm": 0.8657842501474882, "learning_rate": 2.509006474395612e-06, "loss": 0.1331, "step": 23173 }, { "epoch": 0.6760604469338934, "grad_norm": 0.8752634771948503, "learning_rate": 2.5085968553920498e-06, "loss": 0.1236, "step": 23174 }, { "epoch": 0.676089620164537, "grad_norm": 1.3284849313228773, "learning_rate": 2.508187258631143e-06, "loss": 0.1306, "step": 23175 }, { "epoch": 0.6761187933951806, "grad_norm": 0.7788279697705117, "learning_rate": 2.507777684116545e-06, "loss": 0.118, "step": 23176 }, { "epoch": 0.6761479666258241, "grad_norm": 0.8955701524584327, "learning_rate": 2.5073681318519106e-06, "loss": 0.1276, "step": 23177 }, { "epoch": 0.6761771398564677, "grad_norm": 0.8405903324101602, "learning_rate": 2.506958601840901e-06, "loss": 0.1032, "step": 23178 }, { "epoch": 0.6762063130871113, "grad_norm": 0.8614963663971591, "learning_rate": 2.5065490940871674e-06, "loss": 0.109, "step": 23179 }, { "epoch": 0.6762354863177549, "grad_norm": 0.7247493205722442, "learning_rate": 2.50613960859437e-06, "loss": 0.1139, "step": 23180 }, { "epoch": 0.6762646595483984, "grad_norm": 0.94560884041923, "learning_rate": 2.505730145366162e-06, "loss": 0.1227, "step": 23181 }, { "epoch": 0.676293832779042, "grad_norm": 0.6935816823347132, "learning_rate": 2.505320704406201e-06, "loss": 0.0908, "step": 23182 }, { "epoch": 0.6763230060096855, "grad_norm": 0.992465071574614, "learning_rate": 2.5049112857181413e-06, "loss": 0.1393, "step": 23183 }, { "epoch": 0.6763521792403291, "grad_norm": 0.7836895910781367, "learning_rate": 2.504501889305636e-06, "loss": 0.1205, "step": 23184 }, { "epoch": 0.6763813524709726, "grad_norm": 0.8185898009347492, "learning_rate": 2.5040925151723428e-06, "loss": 0.1215, "step": 23185 }, { "epoch": 0.6764105257016162, "grad_norm": 0.8721580626868752, "learning_rate": 2.5036831633219173e-06, "loss": 0.1138, "step": 23186 }, { "epoch": 0.6764396989322597, "grad_norm": 0.8459028226000596, "learning_rate": 2.5032738337580107e-06, "loss": 0.1358, "step": 23187 }, { "epoch": 0.6764688721629033, "grad_norm": 0.8127755401220963, "learning_rate": 2.502864526484281e-06, "loss": 0.1182, "step": 23188 }, { "epoch": 0.6764980453935469, "grad_norm": 0.6487232810150235, "learning_rate": 2.5024552415043805e-06, "loss": 0.1226, "step": 23189 }, { "epoch": 0.6765272186241904, "grad_norm": 0.7757210954520243, "learning_rate": 2.502045978821962e-06, "loss": 0.1428, "step": 23190 }, { "epoch": 0.676556391854834, "grad_norm": 1.1467945440571865, "learning_rate": 2.5016367384406803e-06, "loss": 0.1327, "step": 23191 }, { "epoch": 0.6765855650854775, "grad_norm": 1.2988519775489853, "learning_rate": 2.5012275203641917e-06, "loss": 0.1091, "step": 23192 }, { "epoch": 0.6766147383161212, "grad_norm": 0.8683144536525187, "learning_rate": 2.500818324596147e-06, "loss": 0.1483, "step": 23193 }, { "epoch": 0.6766439115467647, "grad_norm": 0.7776973811963013, "learning_rate": 2.500409151140198e-06, "loss": 0.129, "step": 23194 }, { "epoch": 0.6766730847774083, "grad_norm": 1.0010403591909511, "learning_rate": 2.5000000000000015e-06, "loss": 0.116, "step": 23195 }, { "epoch": 0.6767022580080518, "grad_norm": 0.7641616983467394, "learning_rate": 2.4995908711792057e-06, "loss": 0.1117, "step": 23196 }, { "epoch": 0.6767314312386954, "grad_norm": 0.8650166714865422, "learning_rate": 2.499181764681466e-06, "loss": 0.1278, "step": 23197 }, { "epoch": 0.6767606044693389, "grad_norm": 0.7524820034499831, "learning_rate": 2.498772680510436e-06, "loss": 0.1183, "step": 23198 }, { "epoch": 0.6767897776999825, "grad_norm": 0.9294925244917861, "learning_rate": 2.498363618669767e-06, "loss": 0.1315, "step": 23199 }, { "epoch": 0.676818950930626, "grad_norm": 0.8821962518874622, "learning_rate": 2.497954579163108e-06, "loss": 0.1318, "step": 23200 }, { "epoch": 0.6768481241612696, "grad_norm": 0.8173712125778281, "learning_rate": 2.4975455619941158e-06, "loss": 0.1093, "step": 23201 }, { "epoch": 0.6768772973919132, "grad_norm": 0.8654358545193849, "learning_rate": 2.4971365671664373e-06, "loss": 0.1112, "step": 23202 }, { "epoch": 0.6769064706225567, "grad_norm": 1.0899199397860233, "learning_rate": 2.4967275946837276e-06, "loss": 0.1514, "step": 23203 }, { "epoch": 0.6769356438532003, "grad_norm": 0.901102202281327, "learning_rate": 2.496318644549635e-06, "loss": 0.1279, "step": 23204 }, { "epoch": 0.6769648170838438, "grad_norm": 0.8632830006164858, "learning_rate": 2.4959097167678135e-06, "loss": 0.1291, "step": 23205 }, { "epoch": 0.6769939903144875, "grad_norm": 1.075004156894414, "learning_rate": 2.495500811341912e-06, "loss": 0.1289, "step": 23206 }, { "epoch": 0.677023163545131, "grad_norm": 1.0066407992973547, "learning_rate": 2.4950919282755796e-06, "loss": 0.1234, "step": 23207 }, { "epoch": 0.6770523367757746, "grad_norm": 1.9631922248190352, "learning_rate": 2.4946830675724694e-06, "loss": 0.1149, "step": 23208 }, { "epoch": 0.6770815100064181, "grad_norm": 1.09266457228653, "learning_rate": 2.4942742292362316e-06, "loss": 0.1295, "step": 23209 }, { "epoch": 0.6771106832370617, "grad_norm": 1.2044147096693798, "learning_rate": 2.4938654132705154e-06, "loss": 0.1139, "step": 23210 }, { "epoch": 0.6771398564677052, "grad_norm": 0.6787450334175764, "learning_rate": 2.4934566196789687e-06, "loss": 0.1176, "step": 23211 }, { "epoch": 0.6771690296983488, "grad_norm": 1.0005079052068275, "learning_rate": 2.4930478484652447e-06, "loss": 0.1224, "step": 23212 }, { "epoch": 0.6771982029289924, "grad_norm": 0.9564861776070009, "learning_rate": 2.4926390996329912e-06, "loss": 0.1349, "step": 23213 }, { "epoch": 0.6772273761596359, "grad_norm": 0.9929649457165906, "learning_rate": 2.492230373185854e-06, "loss": 0.115, "step": 23214 }, { "epoch": 0.6772565493902795, "grad_norm": 0.9268754442125966, "learning_rate": 2.4918216691274888e-06, "loss": 0.1324, "step": 23215 }, { "epoch": 0.677285722620923, "grad_norm": 1.1756847446121403, "learning_rate": 2.4914129874615404e-06, "loss": 0.1494, "step": 23216 }, { "epoch": 0.6773148958515666, "grad_norm": 1.383475103297312, "learning_rate": 2.491004328191657e-06, "loss": 0.1198, "step": 23217 }, { "epoch": 0.6773440690822101, "grad_norm": 1.0030078487186729, "learning_rate": 2.4905956913214897e-06, "loss": 0.1442, "step": 23218 }, { "epoch": 0.6773732423128537, "grad_norm": 0.7002481196687629, "learning_rate": 2.4901870768546842e-06, "loss": 0.117, "step": 23219 }, { "epoch": 0.6774024155434973, "grad_norm": 0.97035648522706, "learning_rate": 2.4897784847948885e-06, "loss": 0.1112, "step": 23220 }, { "epoch": 0.6774315887741409, "grad_norm": 0.9056461538362122, "learning_rate": 2.4893699151457507e-06, "loss": 0.1136, "step": 23221 }, { "epoch": 0.6774607620047844, "grad_norm": 0.9779488105303187, "learning_rate": 2.4889613679109208e-06, "loss": 0.1217, "step": 23222 }, { "epoch": 0.677489935235428, "grad_norm": 0.7770129971333952, "learning_rate": 2.4885528430940447e-06, "loss": 0.1272, "step": 23223 }, { "epoch": 0.6775191084660716, "grad_norm": 0.9252881197770451, "learning_rate": 2.488144340698767e-06, "loss": 0.1407, "step": 23224 }, { "epoch": 0.6775482816967151, "grad_norm": 0.9574349860481227, "learning_rate": 2.4877358607287393e-06, "loss": 0.1341, "step": 23225 }, { "epoch": 0.6775774549273587, "grad_norm": 1.138847368745896, "learning_rate": 2.4873274031876045e-06, "loss": 0.1203, "step": 23226 }, { "epoch": 0.6776066281580022, "grad_norm": 1.0141871485961593, "learning_rate": 2.48691896807901e-06, "loss": 0.1262, "step": 23227 }, { "epoch": 0.6776358013886458, "grad_norm": 1.0605770219692743, "learning_rate": 2.4865105554066056e-06, "loss": 0.1182, "step": 23228 }, { "epoch": 0.6776649746192893, "grad_norm": 0.7354830297829584, "learning_rate": 2.4861021651740343e-06, "loss": 0.1202, "step": 23229 }, { "epoch": 0.6776941478499329, "grad_norm": 0.7369375301099033, "learning_rate": 2.485693797384941e-06, "loss": 0.1203, "step": 23230 }, { "epoch": 0.6777233210805764, "grad_norm": 1.1670457478783296, "learning_rate": 2.4852854520429754e-06, "loss": 0.1199, "step": 23231 }, { "epoch": 0.67775249431122, "grad_norm": 0.850800968871462, "learning_rate": 2.484877129151779e-06, "loss": 0.1025, "step": 23232 }, { "epoch": 0.6777816675418635, "grad_norm": 0.7398335044777398, "learning_rate": 2.4844688287150014e-06, "loss": 0.1115, "step": 23233 }, { "epoch": 0.6778108407725072, "grad_norm": 0.7725895973816064, "learning_rate": 2.484060550736283e-06, "loss": 0.1315, "step": 23234 }, { "epoch": 0.6778400140031507, "grad_norm": 1.172249753279124, "learning_rate": 2.4836522952192743e-06, "loss": 0.1013, "step": 23235 }, { "epoch": 0.6778691872337943, "grad_norm": 0.8574820241241642, "learning_rate": 2.483244062167616e-06, "loss": 0.1296, "step": 23236 }, { "epoch": 0.6778983604644379, "grad_norm": 0.7456131632183346, "learning_rate": 2.4828358515849532e-06, "loss": 0.1089, "step": 23237 }, { "epoch": 0.6779275336950814, "grad_norm": 1.4838246913619229, "learning_rate": 2.48242766347493e-06, "loss": 0.1022, "step": 23238 }, { "epoch": 0.677956706925725, "grad_norm": 0.9926797533852754, "learning_rate": 2.4820194978411944e-06, "loss": 0.1247, "step": 23239 }, { "epoch": 0.6779858801563685, "grad_norm": 0.8703493259399564, "learning_rate": 2.481611354687387e-06, "loss": 0.1317, "step": 23240 }, { "epoch": 0.6780150533870121, "grad_norm": 0.7748205997640173, "learning_rate": 2.4812032340171504e-06, "loss": 0.1159, "step": 23241 }, { "epoch": 0.6780442266176556, "grad_norm": 0.8303628657101826, "learning_rate": 2.480795135834132e-06, "loss": 0.12, "step": 23242 }, { "epoch": 0.6780733998482992, "grad_norm": 1.0797060426992753, "learning_rate": 2.480387060141974e-06, "loss": 0.1258, "step": 23243 }, { "epoch": 0.6781025730789427, "grad_norm": 0.7337524852911939, "learning_rate": 2.479979006944314e-06, "loss": 0.1139, "step": 23244 }, { "epoch": 0.6781317463095863, "grad_norm": 0.9822969250196009, "learning_rate": 2.479570976244804e-06, "loss": 0.1213, "step": 23245 }, { "epoch": 0.6781609195402298, "grad_norm": 0.9337499917483004, "learning_rate": 2.4791629680470826e-06, "loss": 0.1207, "step": 23246 }, { "epoch": 0.6781900927708735, "grad_norm": 0.7617695585390399, "learning_rate": 2.4787549823547906e-06, "loss": 0.108, "step": 23247 }, { "epoch": 0.678219266001517, "grad_norm": 0.8697977727272415, "learning_rate": 2.478347019171574e-06, "loss": 0.1329, "step": 23248 }, { "epoch": 0.6782484392321606, "grad_norm": 1.1181899706718783, "learning_rate": 2.477939078501074e-06, "loss": 0.1184, "step": 23249 }, { "epoch": 0.6782776124628042, "grad_norm": 0.9719237304218606, "learning_rate": 2.4775311603469294e-06, "loss": 0.1056, "step": 23250 }, { "epoch": 0.6783067856934477, "grad_norm": 0.74337342749288, "learning_rate": 2.4771232647127842e-06, "loss": 0.1435, "step": 23251 }, { "epoch": 0.6783359589240913, "grad_norm": 0.7109216846044918, "learning_rate": 2.4767153916022823e-06, "loss": 0.1187, "step": 23252 }, { "epoch": 0.6783651321547348, "grad_norm": 1.506068787002971, "learning_rate": 2.476307541019063e-06, "loss": 0.1273, "step": 23253 }, { "epoch": 0.6783943053853784, "grad_norm": 0.8728675770934057, "learning_rate": 2.4758997129667654e-06, "loss": 0.1036, "step": 23254 }, { "epoch": 0.6784234786160219, "grad_norm": 0.9665068777593661, "learning_rate": 2.4754919074490353e-06, "loss": 0.1018, "step": 23255 }, { "epoch": 0.6784526518466655, "grad_norm": 0.9857849818076866, "learning_rate": 2.4750841244695076e-06, "loss": 0.1378, "step": 23256 }, { "epoch": 0.678481825077309, "grad_norm": 1.0120718578592216, "learning_rate": 2.4746763640318273e-06, "loss": 0.1201, "step": 23257 }, { "epoch": 0.6785109983079526, "grad_norm": 1.1232237256642754, "learning_rate": 2.474268626139635e-06, "loss": 0.1418, "step": 23258 }, { "epoch": 0.6785401715385961, "grad_norm": 1.0372873118648187, "learning_rate": 2.47386091079657e-06, "loss": 0.1141, "step": 23259 }, { "epoch": 0.6785693447692397, "grad_norm": 0.8610873900766065, "learning_rate": 2.4734532180062694e-06, "loss": 0.1058, "step": 23260 }, { "epoch": 0.6785985179998834, "grad_norm": 0.7117429116053213, "learning_rate": 2.4730455477723768e-06, "loss": 0.094, "step": 23261 }, { "epoch": 0.6786276912305269, "grad_norm": 0.7874848766282998, "learning_rate": 2.472637900098529e-06, "loss": 0.1319, "step": 23262 }, { "epoch": 0.6786568644611705, "grad_norm": 1.1463798161372984, "learning_rate": 2.472230274988368e-06, "loss": 0.1305, "step": 23263 }, { "epoch": 0.678686037691814, "grad_norm": 0.9051252263064438, "learning_rate": 2.4718226724455307e-06, "loss": 0.1164, "step": 23264 }, { "epoch": 0.6787152109224576, "grad_norm": 0.8160666109047723, "learning_rate": 2.4714150924736586e-06, "loss": 0.1357, "step": 23265 }, { "epoch": 0.6787443841531011, "grad_norm": 1.2440188524743452, "learning_rate": 2.4710075350763884e-06, "loss": 0.1135, "step": 23266 }, { "epoch": 0.6787735573837447, "grad_norm": 1.0329275208505135, "learning_rate": 2.4706000002573575e-06, "loss": 0.1193, "step": 23267 }, { "epoch": 0.6788027306143882, "grad_norm": 0.8850802153830637, "learning_rate": 2.4701924880202068e-06, "loss": 0.1219, "step": 23268 }, { "epoch": 0.6788319038450318, "grad_norm": 1.5421189082104674, "learning_rate": 2.4697849983685746e-06, "loss": 0.1263, "step": 23269 }, { "epoch": 0.6788610770756753, "grad_norm": 0.8565440088863334, "learning_rate": 2.469377531306098e-06, "loss": 0.1098, "step": 23270 }, { "epoch": 0.6788902503063189, "grad_norm": 0.8226132125700635, "learning_rate": 2.4689700868364134e-06, "loss": 0.1082, "step": 23271 }, { "epoch": 0.6789194235369624, "grad_norm": 0.7672631597649043, "learning_rate": 2.4685626649631612e-06, "loss": 0.1036, "step": 23272 }, { "epoch": 0.678948596767606, "grad_norm": 0.8840749501854094, "learning_rate": 2.468155265689977e-06, "loss": 0.1105, "step": 23273 }, { "epoch": 0.6789777699982497, "grad_norm": 1.033047072091153, "learning_rate": 2.467747889020495e-06, "loss": 0.1145, "step": 23274 }, { "epoch": 0.6790069432288932, "grad_norm": 1.0287247916203313, "learning_rate": 2.4673405349583584e-06, "loss": 0.1327, "step": 23275 }, { "epoch": 0.6790361164595368, "grad_norm": 0.7547137343987765, "learning_rate": 2.4669332035072015e-06, "loss": 0.1206, "step": 23276 }, { "epoch": 0.6790652896901803, "grad_norm": 0.8254254707409097, "learning_rate": 2.4665258946706584e-06, "loss": 0.1152, "step": 23277 }, { "epoch": 0.6790944629208239, "grad_norm": 1.1018030493050908, "learning_rate": 2.4661186084523687e-06, "loss": 0.1147, "step": 23278 }, { "epoch": 0.6791236361514674, "grad_norm": 0.9821404654476183, "learning_rate": 2.465711344855967e-06, "loss": 0.1059, "step": 23279 }, { "epoch": 0.679152809382111, "grad_norm": 0.7834246814515112, "learning_rate": 2.4653041038850885e-06, "loss": 0.1076, "step": 23280 }, { "epoch": 0.6791819826127545, "grad_norm": 0.9638182775170588, "learning_rate": 2.464896885543369e-06, "loss": 0.1245, "step": 23281 }, { "epoch": 0.6792111558433981, "grad_norm": 0.9872090026624819, "learning_rate": 2.4644896898344474e-06, "loss": 0.1158, "step": 23282 }, { "epoch": 0.6792403290740416, "grad_norm": 1.0236346161098253, "learning_rate": 2.4640825167619565e-06, "loss": 0.1208, "step": 23283 }, { "epoch": 0.6792695023046852, "grad_norm": 0.8528396822851665, "learning_rate": 2.4636753663295293e-06, "loss": 0.1361, "step": 23284 }, { "epoch": 0.6792986755353287, "grad_norm": 1.0815056572565798, "learning_rate": 2.463268238540805e-06, "loss": 0.135, "step": 23285 }, { "epoch": 0.6793278487659723, "grad_norm": 0.8551548620165316, "learning_rate": 2.4628611333994147e-06, "loss": 0.1169, "step": 23286 }, { "epoch": 0.6793570219966159, "grad_norm": 1.1690525290461162, "learning_rate": 2.462454050908994e-06, "loss": 0.1496, "step": 23287 }, { "epoch": 0.6793861952272595, "grad_norm": 1.056309387325001, "learning_rate": 2.4620469910731805e-06, "loss": 0.1404, "step": 23288 }, { "epoch": 0.6794153684579031, "grad_norm": 0.8149182654688826, "learning_rate": 2.461639953895605e-06, "loss": 0.0967, "step": 23289 }, { "epoch": 0.6794445416885466, "grad_norm": 0.7322182970506431, "learning_rate": 2.4612329393799e-06, "loss": 0.1298, "step": 23290 }, { "epoch": 0.6794737149191902, "grad_norm": 0.9476505209053838, "learning_rate": 2.460825947529703e-06, "loss": 0.142, "step": 23291 }, { "epoch": 0.6795028881498337, "grad_norm": 0.9886733701256886, "learning_rate": 2.4604189783486445e-06, "loss": 0.1106, "step": 23292 }, { "epoch": 0.6795320613804773, "grad_norm": 0.8564438238412084, "learning_rate": 2.4600120318403607e-06, "loss": 0.1278, "step": 23293 }, { "epoch": 0.6795612346111208, "grad_norm": 0.8189582019618686, "learning_rate": 2.4596051080084814e-06, "loss": 0.1189, "step": 23294 }, { "epoch": 0.6795904078417644, "grad_norm": 1.2801099694260838, "learning_rate": 2.4591982068566427e-06, "loss": 0.111, "step": 23295 }, { "epoch": 0.6796195810724079, "grad_norm": 1.0073932176205675, "learning_rate": 2.458791328388477e-06, "loss": 0.1099, "step": 23296 }, { "epoch": 0.6796487543030515, "grad_norm": 1.0406894851352735, "learning_rate": 2.4583844726076124e-06, "loss": 0.1475, "step": 23297 }, { "epoch": 0.679677927533695, "grad_norm": 0.8681268505669584, "learning_rate": 2.4579776395176853e-06, "loss": 0.1234, "step": 23298 }, { "epoch": 0.6797071007643386, "grad_norm": 0.8673681118810458, "learning_rate": 2.457570829122329e-06, "loss": 0.1345, "step": 23299 }, { "epoch": 0.6797362739949822, "grad_norm": 1.1524444999473435, "learning_rate": 2.457164041425173e-06, "loss": 0.1229, "step": 23300 }, { "epoch": 0.6797654472256258, "grad_norm": 0.7966303620764652, "learning_rate": 2.4567572764298476e-06, "loss": 0.1173, "step": 23301 }, { "epoch": 0.6797946204562694, "grad_norm": 1.0292047862244138, "learning_rate": 2.456350534139988e-06, "loss": 0.1459, "step": 23302 }, { "epoch": 0.6798237936869129, "grad_norm": 0.7958008433308223, "learning_rate": 2.4559438145592234e-06, "loss": 0.1225, "step": 23303 }, { "epoch": 0.6798529669175565, "grad_norm": 1.1017273521197222, "learning_rate": 2.4555371176911817e-06, "loss": 0.1108, "step": 23304 }, { "epoch": 0.6798821401482, "grad_norm": 0.7683430539172763, "learning_rate": 2.4551304435395007e-06, "loss": 0.0946, "step": 23305 }, { "epoch": 0.6799113133788436, "grad_norm": 0.9074795400999534, "learning_rate": 2.4547237921078077e-06, "loss": 0.1348, "step": 23306 }, { "epoch": 0.6799404866094871, "grad_norm": 0.7755105568608713, "learning_rate": 2.4543171633997314e-06, "loss": 0.1034, "step": 23307 }, { "epoch": 0.6799696598401307, "grad_norm": 0.9758079951548948, "learning_rate": 2.4539105574189052e-06, "loss": 0.1347, "step": 23308 }, { "epoch": 0.6799988330707742, "grad_norm": 0.9060779365644929, "learning_rate": 2.453503974168958e-06, "loss": 0.1168, "step": 23309 }, { "epoch": 0.6800280063014178, "grad_norm": 0.9526106057205321, "learning_rate": 2.453097413653518e-06, "loss": 0.1333, "step": 23310 }, { "epoch": 0.6800571795320614, "grad_norm": 0.7123504495795431, "learning_rate": 2.4526908758762156e-06, "loss": 0.1195, "step": 23311 }, { "epoch": 0.6800863527627049, "grad_norm": 0.9783550740959671, "learning_rate": 2.4522843608406834e-06, "loss": 0.1203, "step": 23312 }, { "epoch": 0.6801155259933485, "grad_norm": 0.7570682363192021, "learning_rate": 2.451877868550548e-06, "loss": 0.1185, "step": 23313 }, { "epoch": 0.680144699223992, "grad_norm": 0.7957456151917327, "learning_rate": 2.451471399009437e-06, "loss": 0.1077, "step": 23314 }, { "epoch": 0.6801738724546357, "grad_norm": 0.7098774031726814, "learning_rate": 2.4510649522209825e-06, "loss": 0.1129, "step": 23315 }, { "epoch": 0.6802030456852792, "grad_norm": 0.7862130562280645, "learning_rate": 2.4506585281888096e-06, "loss": 0.1103, "step": 23316 }, { "epoch": 0.6802322189159228, "grad_norm": 0.7523670724296232, "learning_rate": 2.450252126916549e-06, "loss": 0.1062, "step": 23317 }, { "epoch": 0.6802613921465663, "grad_norm": 0.8176813521525674, "learning_rate": 2.449845748407831e-06, "loss": 0.1055, "step": 23318 }, { "epoch": 0.6802905653772099, "grad_norm": 0.8821584704883535, "learning_rate": 2.4494393926662807e-06, "loss": 0.1234, "step": 23319 }, { "epoch": 0.6803197386078534, "grad_norm": 0.7341871562175372, "learning_rate": 2.4490330596955254e-06, "loss": 0.1178, "step": 23320 }, { "epoch": 0.680348911838497, "grad_norm": 0.7906863096959323, "learning_rate": 2.4486267494991956e-06, "loss": 0.1069, "step": 23321 }, { "epoch": 0.6803780850691405, "grad_norm": 0.8430909846833469, "learning_rate": 2.4482204620809154e-06, "loss": 0.1163, "step": 23322 }, { "epoch": 0.6804072582997841, "grad_norm": 0.9695821294279957, "learning_rate": 2.4478141974443148e-06, "loss": 0.1032, "step": 23323 }, { "epoch": 0.6804364315304277, "grad_norm": 0.8744977225367986, "learning_rate": 2.4474079555930186e-06, "loss": 0.126, "step": 23324 }, { "epoch": 0.6804656047610712, "grad_norm": 0.789867314354223, "learning_rate": 2.447001736530657e-06, "loss": 0.0942, "step": 23325 }, { "epoch": 0.6804947779917148, "grad_norm": 0.8290402506456502, "learning_rate": 2.446595540260854e-06, "loss": 0.1289, "step": 23326 }, { "epoch": 0.6805239512223583, "grad_norm": 0.8054387956901801, "learning_rate": 2.446189366787235e-06, "loss": 0.1369, "step": 23327 }, { "epoch": 0.680553124453002, "grad_norm": 0.902868084152141, "learning_rate": 2.445783216113427e-06, "loss": 0.1257, "step": 23328 }, { "epoch": 0.6805822976836455, "grad_norm": 0.837937168979911, "learning_rate": 2.445377088243059e-06, "loss": 0.1069, "step": 23329 }, { "epoch": 0.6806114709142891, "grad_norm": 0.7170138498137587, "learning_rate": 2.4449709831797546e-06, "loss": 0.1297, "step": 23330 }, { "epoch": 0.6806406441449326, "grad_norm": 0.7305979998787503, "learning_rate": 2.4445649009271373e-06, "loss": 0.1198, "step": 23331 }, { "epoch": 0.6806698173755762, "grad_norm": 0.8470847097306455, "learning_rate": 2.444158841488836e-06, "loss": 0.1161, "step": 23332 }, { "epoch": 0.6806989906062197, "grad_norm": 0.8936664292927026, "learning_rate": 2.4437528048684757e-06, "loss": 0.1345, "step": 23333 }, { "epoch": 0.6807281638368633, "grad_norm": 0.8879470216622817, "learning_rate": 2.4433467910696752e-06, "loss": 0.143, "step": 23334 }, { "epoch": 0.6807573370675069, "grad_norm": 0.8787725909890597, "learning_rate": 2.442940800096068e-06, "loss": 0.1298, "step": 23335 }, { "epoch": 0.6807865102981504, "grad_norm": 0.8491824379892106, "learning_rate": 2.4425348319512753e-06, "loss": 0.1059, "step": 23336 }, { "epoch": 0.680815683528794, "grad_norm": 0.9070472465815405, "learning_rate": 2.4421288866389193e-06, "loss": 0.1427, "step": 23337 }, { "epoch": 0.6808448567594375, "grad_norm": 0.8201645344592756, "learning_rate": 2.441722964162628e-06, "loss": 0.1233, "step": 23338 }, { "epoch": 0.6808740299900811, "grad_norm": 0.673680317420867, "learning_rate": 2.441317064526023e-06, "loss": 0.0976, "step": 23339 }, { "epoch": 0.6809032032207246, "grad_norm": 0.7401891538857096, "learning_rate": 2.440911187732727e-06, "loss": 0.109, "step": 23340 }, { "epoch": 0.6809323764513682, "grad_norm": 0.8609853245740076, "learning_rate": 2.440505333786364e-06, "loss": 0.121, "step": 23341 }, { "epoch": 0.6809615496820118, "grad_norm": 0.852046250461729, "learning_rate": 2.4400995026905612e-06, "loss": 0.1045, "step": 23342 }, { "epoch": 0.6809907229126554, "grad_norm": 0.9457424621288142, "learning_rate": 2.4396936944489384e-06, "loss": 0.1028, "step": 23343 }, { "epoch": 0.6810198961432989, "grad_norm": 0.8938133279893553, "learning_rate": 2.439287909065118e-06, "loss": 0.1303, "step": 23344 }, { "epoch": 0.6810490693739425, "grad_norm": 0.9570157452775915, "learning_rate": 2.4388821465427252e-06, "loss": 0.136, "step": 23345 }, { "epoch": 0.681078242604586, "grad_norm": 0.791793052052301, "learning_rate": 2.4384764068853796e-06, "loss": 0.1152, "step": 23346 }, { "epoch": 0.6811074158352296, "grad_norm": 1.0417459373225932, "learning_rate": 2.4380706900967043e-06, "loss": 0.1149, "step": 23347 }, { "epoch": 0.6811365890658732, "grad_norm": 1.1372400470501136, "learning_rate": 2.437664996180325e-06, "loss": 0.1345, "step": 23348 }, { "epoch": 0.6811657622965167, "grad_norm": 1.0135776593769665, "learning_rate": 2.437259325139861e-06, "loss": 0.1228, "step": 23349 }, { "epoch": 0.6811949355271603, "grad_norm": 1.053747311183959, "learning_rate": 2.436853676978932e-06, "loss": 0.1064, "step": 23350 }, { "epoch": 0.6812241087578038, "grad_norm": 1.011768761569344, "learning_rate": 2.436448051701163e-06, "loss": 0.1208, "step": 23351 }, { "epoch": 0.6812532819884474, "grad_norm": 1.1087044230681251, "learning_rate": 2.436042449310172e-06, "loss": 0.1082, "step": 23352 }, { "epoch": 0.6812824552190909, "grad_norm": 1.2907939712860053, "learning_rate": 2.4356368698095838e-06, "loss": 0.1215, "step": 23353 }, { "epoch": 0.6813116284497345, "grad_norm": 0.9932325754634937, "learning_rate": 2.435231313203016e-06, "loss": 0.1348, "step": 23354 }, { "epoch": 0.6813408016803781, "grad_norm": 1.09646618133375, "learning_rate": 2.4348257794940925e-06, "loss": 0.1049, "step": 23355 }, { "epoch": 0.6813699749110217, "grad_norm": 0.9649665998018226, "learning_rate": 2.4344202686864323e-06, "loss": 0.1439, "step": 23356 }, { "epoch": 0.6813991481416652, "grad_norm": 0.7197520872665651, "learning_rate": 2.434014780783653e-06, "loss": 0.1191, "step": 23357 }, { "epoch": 0.6814283213723088, "grad_norm": 1.0539640534846044, "learning_rate": 2.4336093157893774e-06, "loss": 0.1028, "step": 23358 }, { "epoch": 0.6814574946029524, "grad_norm": 0.8796372851471373, "learning_rate": 2.433203873707227e-06, "loss": 0.1293, "step": 23359 }, { "epoch": 0.6814866678335959, "grad_norm": 0.6840906635886067, "learning_rate": 2.4327984545408203e-06, "loss": 0.1033, "step": 23360 }, { "epoch": 0.6815158410642395, "grad_norm": 0.6951351396440043, "learning_rate": 2.4323930582937737e-06, "loss": 0.1202, "step": 23361 }, { "epoch": 0.681545014294883, "grad_norm": 0.8606714595639731, "learning_rate": 2.4319876849697112e-06, "loss": 0.1125, "step": 23362 }, { "epoch": 0.6815741875255266, "grad_norm": 0.9876280544161635, "learning_rate": 2.431582334572249e-06, "loss": 0.1281, "step": 23363 }, { "epoch": 0.6816033607561701, "grad_norm": 0.8772466950283561, "learning_rate": 2.4311770071050035e-06, "loss": 0.1073, "step": 23364 }, { "epoch": 0.6816325339868137, "grad_norm": 0.6179190694491767, "learning_rate": 2.430771702571599e-06, "loss": 0.1002, "step": 23365 }, { "epoch": 0.6816617072174572, "grad_norm": 0.7588800959108316, "learning_rate": 2.4303664209756526e-06, "loss": 0.1166, "step": 23366 }, { "epoch": 0.6816908804481008, "grad_norm": 1.0027587154496904, "learning_rate": 2.42996116232078e-06, "loss": 0.1236, "step": 23367 }, { "epoch": 0.6817200536787443, "grad_norm": 0.8885632551773563, "learning_rate": 2.429555926610601e-06, "loss": 0.1244, "step": 23368 }, { "epoch": 0.681749226909388, "grad_norm": 0.6295678571108029, "learning_rate": 2.429150713848734e-06, "loss": 0.1096, "step": 23369 }, { "epoch": 0.6817784001400315, "grad_norm": 0.7654281934622027, "learning_rate": 2.428745524038794e-06, "loss": 0.1164, "step": 23370 }, { "epoch": 0.6818075733706751, "grad_norm": 0.7794154015509821, "learning_rate": 2.4283403571843994e-06, "loss": 0.1033, "step": 23371 }, { "epoch": 0.6818367466013187, "grad_norm": 0.8584749842108826, "learning_rate": 2.4279352132891705e-06, "loss": 0.1308, "step": 23372 }, { "epoch": 0.6818659198319622, "grad_norm": 0.7726199129739065, "learning_rate": 2.427530092356722e-06, "loss": 0.1203, "step": 23373 }, { "epoch": 0.6818950930626058, "grad_norm": 0.8598346354331359, "learning_rate": 2.427124994390669e-06, "loss": 0.1148, "step": 23374 }, { "epoch": 0.6819242662932493, "grad_norm": 0.7132961658887523, "learning_rate": 2.4267199193946313e-06, "loss": 0.1044, "step": 23375 }, { "epoch": 0.6819534395238929, "grad_norm": 0.8904430262186038, "learning_rate": 2.426314867372222e-06, "loss": 0.1567, "step": 23376 }, { "epoch": 0.6819826127545364, "grad_norm": 0.816336462451866, "learning_rate": 2.4259098383270596e-06, "loss": 0.1299, "step": 23377 }, { "epoch": 0.68201178598518, "grad_norm": 0.7746041696509879, "learning_rate": 2.425504832262761e-06, "loss": 0.0963, "step": 23378 }, { "epoch": 0.6820409592158235, "grad_norm": 0.8023654822258212, "learning_rate": 2.4250998491829414e-06, "loss": 0.1102, "step": 23379 }, { "epoch": 0.6820701324464671, "grad_norm": 1.3227585921290468, "learning_rate": 2.424694889091213e-06, "loss": 0.1295, "step": 23380 }, { "epoch": 0.6820993056771106, "grad_norm": 0.9538132455914216, "learning_rate": 2.4242899519911966e-06, "loss": 0.1253, "step": 23381 }, { "epoch": 0.6821284789077543, "grad_norm": 0.7747601742899379, "learning_rate": 2.423885037886502e-06, "loss": 0.108, "step": 23382 }, { "epoch": 0.6821576521383979, "grad_norm": 0.7359015483255598, "learning_rate": 2.4234801467807487e-06, "loss": 0.1091, "step": 23383 }, { "epoch": 0.6821868253690414, "grad_norm": 0.8063697329646766, "learning_rate": 2.4230752786775485e-06, "loss": 0.1227, "step": 23384 }, { "epoch": 0.682215998599685, "grad_norm": 1.1138567153813508, "learning_rate": 2.4226704335805186e-06, "loss": 0.1132, "step": 23385 }, { "epoch": 0.6822451718303285, "grad_norm": 0.7221672197416532, "learning_rate": 2.4222656114932713e-06, "loss": 0.1253, "step": 23386 }, { "epoch": 0.6822743450609721, "grad_norm": 0.6745835099362463, "learning_rate": 2.42186081241942e-06, "loss": 0.125, "step": 23387 }, { "epoch": 0.6823035182916156, "grad_norm": 0.7813051552278909, "learning_rate": 2.4214560363625794e-06, "loss": 0.1053, "step": 23388 }, { "epoch": 0.6823326915222592, "grad_norm": 0.8889806545809844, "learning_rate": 2.421051283326366e-06, "loss": 0.1277, "step": 23389 }, { "epoch": 0.6823618647529027, "grad_norm": 0.7596732140411573, "learning_rate": 2.4206465533143906e-06, "loss": 0.1298, "step": 23390 }, { "epoch": 0.6823910379835463, "grad_norm": 0.7427801645031388, "learning_rate": 2.420241846330266e-06, "loss": 0.1117, "step": 23391 }, { "epoch": 0.6824202112141898, "grad_norm": 0.7627876825591198, "learning_rate": 2.4198371623776077e-06, "loss": 0.1102, "step": 23392 }, { "epoch": 0.6824493844448334, "grad_norm": 0.9159339260042754, "learning_rate": 2.4194325014600254e-06, "loss": 0.1354, "step": 23393 }, { "epoch": 0.6824785576754769, "grad_norm": 0.8578802680763596, "learning_rate": 2.4190278635811336e-06, "loss": 0.1204, "step": 23394 }, { "epoch": 0.6825077309061205, "grad_norm": 1.0237255170517903, "learning_rate": 2.418623248744547e-06, "loss": 0.1217, "step": 23395 }, { "epoch": 0.6825369041367642, "grad_norm": 0.7705445245732929, "learning_rate": 2.4182186569538763e-06, "loss": 0.1173, "step": 23396 }, { "epoch": 0.6825660773674077, "grad_norm": 1.022543322282196, "learning_rate": 2.4178140882127304e-06, "loss": 0.1085, "step": 23397 }, { "epoch": 0.6825952505980513, "grad_norm": 0.8291564370473499, "learning_rate": 2.4174095425247263e-06, "loss": 0.105, "step": 23398 }, { "epoch": 0.6826244238286948, "grad_norm": 0.8308205991615312, "learning_rate": 2.4170050198934707e-06, "loss": 0.1126, "step": 23399 }, { "epoch": 0.6826535970593384, "grad_norm": 1.0059662857379013, "learning_rate": 2.4166005203225803e-06, "loss": 0.1048, "step": 23400 }, { "epoch": 0.6826827702899819, "grad_norm": 0.7635590561700802, "learning_rate": 2.416196043815662e-06, "loss": 0.1321, "step": 23401 }, { "epoch": 0.6827119435206255, "grad_norm": 0.91562126553842, "learning_rate": 2.4157915903763295e-06, "loss": 0.1234, "step": 23402 }, { "epoch": 0.682741116751269, "grad_norm": 0.8778772330689085, "learning_rate": 2.4153871600081936e-06, "loss": 0.0964, "step": 23403 }, { "epoch": 0.6827702899819126, "grad_norm": 0.7183699973220936, "learning_rate": 2.414982752714862e-06, "loss": 0.1201, "step": 23404 }, { "epoch": 0.6827994632125561, "grad_norm": 1.0354123592567526, "learning_rate": 2.4145783684999472e-06, "loss": 0.1278, "step": 23405 }, { "epoch": 0.6828286364431997, "grad_norm": 0.9630908241330794, "learning_rate": 2.4141740073670617e-06, "loss": 0.141, "step": 23406 }, { "epoch": 0.6828578096738432, "grad_norm": 0.7248657130182223, "learning_rate": 2.4137696693198113e-06, "loss": 0.1249, "step": 23407 }, { "epoch": 0.6828869829044868, "grad_norm": 0.7290492842727565, "learning_rate": 2.41336535436181e-06, "loss": 0.112, "step": 23408 }, { "epoch": 0.6829161561351305, "grad_norm": 0.6970817164852366, "learning_rate": 2.4129610624966654e-06, "loss": 0.1118, "step": 23409 }, { "epoch": 0.682945329365774, "grad_norm": 0.8694633343773415, "learning_rate": 2.412556793727985e-06, "loss": 0.1289, "step": 23410 }, { "epoch": 0.6829745025964176, "grad_norm": 0.810799855740774, "learning_rate": 2.4121525480593793e-06, "loss": 0.1214, "step": 23411 }, { "epoch": 0.6830036758270611, "grad_norm": 0.7721906771436191, "learning_rate": 2.41174832549446e-06, "loss": 0.1128, "step": 23412 }, { "epoch": 0.6830328490577047, "grad_norm": 0.8932376975416071, "learning_rate": 2.4113441260368335e-06, "loss": 0.1086, "step": 23413 }, { "epoch": 0.6830620222883482, "grad_norm": 0.8040576828677514, "learning_rate": 2.4109399496901074e-06, "loss": 0.1142, "step": 23414 }, { "epoch": 0.6830911955189918, "grad_norm": 1.060179750476534, "learning_rate": 2.4105357964578928e-06, "loss": 0.1019, "step": 23415 }, { "epoch": 0.6831203687496353, "grad_norm": 0.974518952914883, "learning_rate": 2.4101316663437966e-06, "loss": 0.1313, "step": 23416 }, { "epoch": 0.6831495419802789, "grad_norm": 0.9598616199303697, "learning_rate": 2.409727559351425e-06, "loss": 0.1114, "step": 23417 }, { "epoch": 0.6831787152109224, "grad_norm": 0.9963924525348814, "learning_rate": 2.4093234754843873e-06, "loss": 0.1005, "step": 23418 }, { "epoch": 0.683207888441566, "grad_norm": 1.0680388047755742, "learning_rate": 2.408919414746293e-06, "loss": 0.1324, "step": 23419 }, { "epoch": 0.6832370616722095, "grad_norm": 0.9501564668487673, "learning_rate": 2.4085153771407477e-06, "loss": 0.1359, "step": 23420 }, { "epoch": 0.6832662349028531, "grad_norm": 1.0012804483359667, "learning_rate": 2.4081113626713564e-06, "loss": 0.1265, "step": 23421 }, { "epoch": 0.6832954081334967, "grad_norm": 0.9585963243980063, "learning_rate": 2.4077073713417304e-06, "loss": 0.1282, "step": 23422 }, { "epoch": 0.6833245813641403, "grad_norm": 0.8267488349081108, "learning_rate": 2.407303403155472e-06, "loss": 0.14, "step": 23423 }, { "epoch": 0.6833537545947839, "grad_norm": 0.886117093413851, "learning_rate": 2.4068994581161898e-06, "loss": 0.1294, "step": 23424 }, { "epoch": 0.6833829278254274, "grad_norm": 0.8729574453971385, "learning_rate": 2.4064955362274924e-06, "loss": 0.1081, "step": 23425 }, { "epoch": 0.683412101056071, "grad_norm": 0.8968191632170365, "learning_rate": 2.406091637492983e-06, "loss": 0.12, "step": 23426 }, { "epoch": 0.6834412742867145, "grad_norm": 0.8399957092302629, "learning_rate": 2.4056877619162674e-06, "loss": 0.1321, "step": 23427 }, { "epoch": 0.6834704475173581, "grad_norm": 0.9490961480081912, "learning_rate": 2.4052839095009535e-06, "loss": 0.1234, "step": 23428 }, { "epoch": 0.6834996207480016, "grad_norm": 1.1315340538929324, "learning_rate": 2.404880080250643e-06, "loss": 0.1459, "step": 23429 }, { "epoch": 0.6835287939786452, "grad_norm": 0.8190062475940487, "learning_rate": 2.4044762741689464e-06, "loss": 0.1302, "step": 23430 }, { "epoch": 0.6835579672092887, "grad_norm": 1.027433233966887, "learning_rate": 2.404072491259464e-06, "loss": 0.135, "step": 23431 }, { "epoch": 0.6835871404399323, "grad_norm": 1.0991760340368277, "learning_rate": 2.403668731525804e-06, "loss": 0.0995, "step": 23432 }, { "epoch": 0.6836163136705758, "grad_norm": 1.0180612157341427, "learning_rate": 2.4032649949715703e-06, "loss": 0.1236, "step": 23433 }, { "epoch": 0.6836454869012194, "grad_norm": 0.7992345966043113, "learning_rate": 2.402861281600365e-06, "loss": 0.1304, "step": 23434 }, { "epoch": 0.683674660131863, "grad_norm": 1.0817479691060017, "learning_rate": 2.402457591415794e-06, "loss": 0.1264, "step": 23435 }, { "epoch": 0.6837038333625066, "grad_norm": 0.7583473275234861, "learning_rate": 2.402053924421463e-06, "loss": 0.1282, "step": 23436 }, { "epoch": 0.6837330065931502, "grad_norm": 0.8215215240016145, "learning_rate": 2.401650280620973e-06, "loss": 0.1194, "step": 23437 }, { "epoch": 0.6837621798237937, "grad_norm": 0.9652272594761065, "learning_rate": 2.401246660017931e-06, "loss": 0.1548, "step": 23438 }, { "epoch": 0.6837913530544373, "grad_norm": 0.9839096272142991, "learning_rate": 2.4008430626159383e-06, "loss": 0.1234, "step": 23439 }, { "epoch": 0.6838205262850808, "grad_norm": 0.8477160412619479, "learning_rate": 2.4004394884185965e-06, "loss": 0.1224, "step": 23440 }, { "epoch": 0.6838496995157244, "grad_norm": 0.9105366763503833, "learning_rate": 2.40003593742951e-06, "loss": 0.1489, "step": 23441 }, { "epoch": 0.6838788727463679, "grad_norm": 0.8676494595180941, "learning_rate": 2.3996324096522844e-06, "loss": 0.1178, "step": 23442 }, { "epoch": 0.6839080459770115, "grad_norm": 0.8251007198965699, "learning_rate": 2.3992289050905194e-06, "loss": 0.1004, "step": 23443 }, { "epoch": 0.683937219207655, "grad_norm": 0.8984564437788387, "learning_rate": 2.3988254237478164e-06, "loss": 0.1245, "step": 23444 }, { "epoch": 0.6839663924382986, "grad_norm": 0.7892369663173008, "learning_rate": 2.3984219656277807e-06, "loss": 0.1111, "step": 23445 }, { "epoch": 0.6839955656689422, "grad_norm": 1.003280951784191, "learning_rate": 2.3980185307340127e-06, "loss": 0.1026, "step": 23446 }, { "epoch": 0.6840247388995857, "grad_norm": 0.7889691182335306, "learning_rate": 2.3976151190701123e-06, "loss": 0.1122, "step": 23447 }, { "epoch": 0.6840539121302293, "grad_norm": 0.6360571341951934, "learning_rate": 2.3972117306396823e-06, "loss": 0.121, "step": 23448 }, { "epoch": 0.6840830853608728, "grad_norm": 1.0484686636854017, "learning_rate": 2.3968083654463277e-06, "loss": 0.1325, "step": 23449 }, { "epoch": 0.6841122585915165, "grad_norm": 1.1633400083088463, "learning_rate": 2.396405023493646e-06, "loss": 0.1189, "step": 23450 }, { "epoch": 0.68414143182216, "grad_norm": 0.7852866233186016, "learning_rate": 2.3960017047852362e-06, "loss": 0.14, "step": 23451 }, { "epoch": 0.6841706050528036, "grad_norm": 0.9356886034288822, "learning_rate": 2.395598409324704e-06, "loss": 0.1323, "step": 23452 }, { "epoch": 0.6841997782834471, "grad_norm": 0.9572955367971694, "learning_rate": 2.395195137115646e-06, "loss": 0.1307, "step": 23453 }, { "epoch": 0.6842289515140907, "grad_norm": 0.7671869608111163, "learning_rate": 2.394791888161663e-06, "loss": 0.1137, "step": 23454 }, { "epoch": 0.6842581247447342, "grad_norm": 0.9668753842441362, "learning_rate": 2.3943886624663586e-06, "loss": 0.1277, "step": 23455 }, { "epoch": 0.6842872979753778, "grad_norm": 0.7377271997149412, "learning_rate": 2.393985460033331e-06, "loss": 0.1213, "step": 23456 }, { "epoch": 0.6843164712060213, "grad_norm": 0.707175469957673, "learning_rate": 2.393582280866176e-06, "loss": 0.1252, "step": 23457 }, { "epoch": 0.6843456444366649, "grad_norm": 0.8718198956609994, "learning_rate": 2.393179124968498e-06, "loss": 0.1152, "step": 23458 }, { "epoch": 0.6843748176673085, "grad_norm": 0.8221659544288544, "learning_rate": 2.3927759923438936e-06, "loss": 0.1276, "step": 23459 }, { "epoch": 0.684403990897952, "grad_norm": 0.7789014250704809, "learning_rate": 2.392372882995964e-06, "loss": 0.1136, "step": 23460 }, { "epoch": 0.6844331641285956, "grad_norm": 0.7690363735768622, "learning_rate": 2.391969796928305e-06, "loss": 0.1068, "step": 23461 }, { "epoch": 0.6844623373592391, "grad_norm": 0.8627675179824307, "learning_rate": 2.3915667341445194e-06, "loss": 0.1178, "step": 23462 }, { "epoch": 0.6844915105898828, "grad_norm": 0.9365312772700242, "learning_rate": 2.3911636946482024e-06, "loss": 0.1415, "step": 23463 }, { "epoch": 0.6845206838205263, "grad_norm": 1.0855533313368741, "learning_rate": 2.390760678442952e-06, "loss": 0.1212, "step": 23464 }, { "epoch": 0.6845498570511699, "grad_norm": 0.8870097026272866, "learning_rate": 2.3903576855323676e-06, "loss": 0.1198, "step": 23465 }, { "epoch": 0.6845790302818134, "grad_norm": 0.6458907704108451, "learning_rate": 2.3899547159200478e-06, "loss": 0.1033, "step": 23466 }, { "epoch": 0.684608203512457, "grad_norm": 0.7754345369737338, "learning_rate": 2.389551769609588e-06, "loss": 0.1012, "step": 23467 }, { "epoch": 0.6846373767431005, "grad_norm": 0.8907205336921865, "learning_rate": 2.389148846604588e-06, "loss": 0.1336, "step": 23468 }, { "epoch": 0.6846665499737441, "grad_norm": 0.9164401893153775, "learning_rate": 2.388745946908645e-06, "loss": 0.1102, "step": 23469 }, { "epoch": 0.6846957232043877, "grad_norm": 0.9711317861214254, "learning_rate": 2.3883430705253517e-06, "loss": 0.1224, "step": 23470 }, { "epoch": 0.6847248964350312, "grad_norm": 0.9728623130958626, "learning_rate": 2.387940217458309e-06, "loss": 0.1204, "step": 23471 }, { "epoch": 0.6847540696656748, "grad_norm": 0.7746221587657227, "learning_rate": 2.387537387711114e-06, "loss": 0.0982, "step": 23472 }, { "epoch": 0.6847832428963183, "grad_norm": 1.0602203978021345, "learning_rate": 2.3871345812873614e-06, "loss": 0.1259, "step": 23473 }, { "epoch": 0.6848124161269619, "grad_norm": 0.7457289504548994, "learning_rate": 2.386731798190646e-06, "loss": 0.0918, "step": 23474 }, { "epoch": 0.6848415893576054, "grad_norm": 0.9372285937573585, "learning_rate": 2.386329038424567e-06, "loss": 0.1355, "step": 23475 }, { "epoch": 0.684870762588249, "grad_norm": 0.8393391738534541, "learning_rate": 2.3859263019927183e-06, "loss": 0.116, "step": 23476 }, { "epoch": 0.6848999358188926, "grad_norm": 0.8456078402600375, "learning_rate": 2.3855235888986934e-06, "loss": 0.1154, "step": 23477 }, { "epoch": 0.6849291090495362, "grad_norm": 0.9142331599646668, "learning_rate": 2.38512089914609e-06, "loss": 0.1232, "step": 23478 }, { "epoch": 0.6849582822801797, "grad_norm": 0.8034376540527108, "learning_rate": 2.384718232738505e-06, "loss": 0.1045, "step": 23479 }, { "epoch": 0.6849874555108233, "grad_norm": 0.9115998323203565, "learning_rate": 2.3843155896795312e-06, "loss": 0.1163, "step": 23480 }, { "epoch": 0.6850166287414668, "grad_norm": 0.8322445656168338, "learning_rate": 2.383912969972762e-06, "loss": 0.1149, "step": 23481 }, { "epoch": 0.6850458019721104, "grad_norm": 0.9240018036308494, "learning_rate": 2.3835103736217946e-06, "loss": 0.1094, "step": 23482 }, { "epoch": 0.685074975202754, "grad_norm": 0.936371784389214, "learning_rate": 2.38310780063022e-06, "loss": 0.1487, "step": 23483 }, { "epoch": 0.6851041484333975, "grad_norm": 0.8993352478631058, "learning_rate": 2.3827052510016345e-06, "loss": 0.1371, "step": 23484 }, { "epoch": 0.6851333216640411, "grad_norm": 0.866249042145432, "learning_rate": 2.3823027247396336e-06, "loss": 0.0973, "step": 23485 }, { "epoch": 0.6851624948946846, "grad_norm": 0.8120086252175812, "learning_rate": 2.3819002218478095e-06, "loss": 0.1294, "step": 23486 }, { "epoch": 0.6851916681253282, "grad_norm": 0.9075515094803396, "learning_rate": 2.3814977423297525e-06, "loss": 0.1142, "step": 23487 }, { "epoch": 0.6852208413559717, "grad_norm": 0.8194432468452225, "learning_rate": 2.381095286189061e-06, "loss": 0.0967, "step": 23488 }, { "epoch": 0.6852500145866153, "grad_norm": 0.911996593340731, "learning_rate": 2.380692853429324e-06, "loss": 0.1093, "step": 23489 }, { "epoch": 0.6852791878172588, "grad_norm": 0.7433077733892137, "learning_rate": 2.380290444054137e-06, "loss": 0.1153, "step": 23490 }, { "epoch": 0.6853083610479025, "grad_norm": 0.6776526815796657, "learning_rate": 2.37988805806709e-06, "loss": 0.1125, "step": 23491 }, { "epoch": 0.685337534278546, "grad_norm": 0.9523202249609923, "learning_rate": 2.379485695471779e-06, "loss": 0.1318, "step": 23492 }, { "epoch": 0.6853667075091896, "grad_norm": 0.7864939572064003, "learning_rate": 2.3790833562717942e-06, "loss": 0.1148, "step": 23493 }, { "epoch": 0.6853958807398332, "grad_norm": 0.7627730295940247, "learning_rate": 2.3786810404707255e-06, "loss": 0.1162, "step": 23494 }, { "epoch": 0.6854250539704767, "grad_norm": 0.7648168097564796, "learning_rate": 2.3782787480721665e-06, "loss": 0.1123, "step": 23495 }, { "epoch": 0.6854542272011203, "grad_norm": 0.681673275124196, "learning_rate": 2.377876479079711e-06, "loss": 0.1036, "step": 23496 }, { "epoch": 0.6854834004317638, "grad_norm": 0.7671201754749983, "learning_rate": 2.3774742334969463e-06, "loss": 0.1245, "step": 23497 }, { "epoch": 0.6855125736624074, "grad_norm": 0.8428541625357826, "learning_rate": 2.3770720113274683e-06, "loss": 0.127, "step": 23498 }, { "epoch": 0.6855417468930509, "grad_norm": 1.0171462256525663, "learning_rate": 2.3766698125748646e-06, "loss": 0.1248, "step": 23499 }, { "epoch": 0.6855709201236945, "grad_norm": 0.9667422677895646, "learning_rate": 2.3762676372427247e-06, "loss": 0.1088, "step": 23500 }, { "epoch": 0.685600093354338, "grad_norm": 0.6010521933297716, "learning_rate": 2.3758654853346407e-06, "loss": 0.1219, "step": 23501 }, { "epoch": 0.6856292665849816, "grad_norm": 0.93863480082487, "learning_rate": 2.3754633568542056e-06, "loss": 0.112, "step": 23502 }, { "epoch": 0.6856584398156251, "grad_norm": 1.0692147299668187, "learning_rate": 2.375061251805007e-06, "loss": 0.1307, "step": 23503 }, { "epoch": 0.6856876130462688, "grad_norm": 0.7559153663930311, "learning_rate": 2.374659170190633e-06, "loss": 0.1185, "step": 23504 }, { "epoch": 0.6857167862769123, "grad_norm": 0.6871234650276633, "learning_rate": 2.3742571120146767e-06, "loss": 0.1123, "step": 23505 }, { "epoch": 0.6857459595075559, "grad_norm": 1.0453357658913915, "learning_rate": 2.373855077280727e-06, "loss": 0.1149, "step": 23506 }, { "epoch": 0.6857751327381995, "grad_norm": 0.9279121847645624, "learning_rate": 2.3734530659923695e-06, "loss": 0.1181, "step": 23507 }, { "epoch": 0.685804305968843, "grad_norm": 0.7215969775900233, "learning_rate": 2.373051078153196e-06, "loss": 0.1215, "step": 23508 }, { "epoch": 0.6858334791994866, "grad_norm": 0.9561340154637641, "learning_rate": 2.372649113766798e-06, "loss": 0.1324, "step": 23509 }, { "epoch": 0.6858626524301301, "grad_norm": 0.8386322298917255, "learning_rate": 2.3722471728367613e-06, "loss": 0.1242, "step": 23510 }, { "epoch": 0.6858918256607737, "grad_norm": 0.9650038111161826, "learning_rate": 2.371845255366672e-06, "loss": 0.1396, "step": 23511 }, { "epoch": 0.6859209988914172, "grad_norm": 1.0789013915669485, "learning_rate": 2.3714433613601236e-06, "loss": 0.1324, "step": 23512 }, { "epoch": 0.6859501721220608, "grad_norm": 0.8364909352913613, "learning_rate": 2.3710414908206993e-06, "loss": 0.1165, "step": 23513 }, { "epoch": 0.6859793453527043, "grad_norm": 1.068960133563225, "learning_rate": 2.3706396437519884e-06, "loss": 0.1058, "step": 23514 }, { "epoch": 0.6860085185833479, "grad_norm": 1.4907302630864512, "learning_rate": 2.3702378201575813e-06, "loss": 0.1169, "step": 23515 }, { "epoch": 0.6860376918139914, "grad_norm": 0.9768349753225332, "learning_rate": 2.3698360200410637e-06, "loss": 0.1369, "step": 23516 }, { "epoch": 0.686066865044635, "grad_norm": 1.0809606732262722, "learning_rate": 2.3694342434060197e-06, "loss": 0.1306, "step": 23517 }, { "epoch": 0.6860960382752787, "grad_norm": 1.178862064829292, "learning_rate": 2.369032490256041e-06, "loss": 0.11, "step": 23518 }, { "epoch": 0.6861252115059222, "grad_norm": 1.0757149116537466, "learning_rate": 2.36863076059471e-06, "loss": 0.1141, "step": 23519 }, { "epoch": 0.6861543847365658, "grad_norm": 1.0754990610337491, "learning_rate": 2.3682290544256177e-06, "loss": 0.1072, "step": 23520 }, { "epoch": 0.6861835579672093, "grad_norm": 0.9965731383422439, "learning_rate": 2.367827371752346e-06, "loss": 0.1138, "step": 23521 }, { "epoch": 0.6862127311978529, "grad_norm": 1.0173008119921403, "learning_rate": 2.367425712578485e-06, "loss": 0.0927, "step": 23522 }, { "epoch": 0.6862419044284964, "grad_norm": 1.0887430923450627, "learning_rate": 2.367024076907619e-06, "loss": 0.1315, "step": 23523 }, { "epoch": 0.68627107765914, "grad_norm": 0.9403419166037104, "learning_rate": 2.3666224647433316e-06, "loss": 0.1346, "step": 23524 }, { "epoch": 0.6863002508897835, "grad_norm": 1.0754113609847809, "learning_rate": 2.36622087608921e-06, "loss": 0.1456, "step": 23525 }, { "epoch": 0.6863294241204271, "grad_norm": 0.891411413120843, "learning_rate": 2.365819310948842e-06, "loss": 0.1239, "step": 23526 }, { "epoch": 0.6863585973510706, "grad_norm": 0.8652430958392724, "learning_rate": 2.365417769325808e-06, "loss": 0.1306, "step": 23527 }, { "epoch": 0.6863877705817142, "grad_norm": 0.8521861191231433, "learning_rate": 2.3650162512236976e-06, "loss": 0.123, "step": 23528 }, { "epoch": 0.6864169438123577, "grad_norm": 0.7609696299282188, "learning_rate": 2.3646147566460925e-06, "loss": 0.0889, "step": 23529 }, { "epoch": 0.6864461170430013, "grad_norm": 0.8147779881198222, "learning_rate": 2.364213285596576e-06, "loss": 0.1262, "step": 23530 }, { "epoch": 0.686475290273645, "grad_norm": 0.6912119070395232, "learning_rate": 2.3638118380787343e-06, "loss": 0.1108, "step": 23531 }, { "epoch": 0.6865044635042885, "grad_norm": 0.7823489429027166, "learning_rate": 2.3634104140961526e-06, "loss": 0.1118, "step": 23532 }, { "epoch": 0.6865336367349321, "grad_norm": 0.8024496981683377, "learning_rate": 2.363009013652414e-06, "loss": 0.1224, "step": 23533 }, { "epoch": 0.6865628099655756, "grad_norm": 0.8262579075470796, "learning_rate": 2.362607636751099e-06, "loss": 0.1344, "step": 23534 }, { "epoch": 0.6865919831962192, "grad_norm": 0.7887056906078238, "learning_rate": 2.362206283395796e-06, "loss": 0.1151, "step": 23535 }, { "epoch": 0.6866211564268627, "grad_norm": 0.7806153772020534, "learning_rate": 2.361804953590085e-06, "loss": 0.1355, "step": 23536 }, { "epoch": 0.6866503296575063, "grad_norm": 0.8653411093637556, "learning_rate": 2.361403647337548e-06, "loss": 0.1285, "step": 23537 }, { "epoch": 0.6866795028881498, "grad_norm": 1.4017383155451708, "learning_rate": 2.361002364641769e-06, "loss": 0.1322, "step": 23538 }, { "epoch": 0.6867086761187934, "grad_norm": 1.144977948650432, "learning_rate": 2.3606011055063334e-06, "loss": 0.1307, "step": 23539 }, { "epoch": 0.6867378493494369, "grad_norm": 0.7977310722793292, "learning_rate": 2.3601998699348204e-06, "loss": 0.1241, "step": 23540 }, { "epoch": 0.6867670225800805, "grad_norm": 0.9778755835224564, "learning_rate": 2.359798657930811e-06, "loss": 0.1252, "step": 23541 }, { "epoch": 0.686796195810724, "grad_norm": 0.9654940653137173, "learning_rate": 2.359397469497891e-06, "loss": 0.1346, "step": 23542 }, { "epoch": 0.6868253690413676, "grad_norm": 0.8260042062894757, "learning_rate": 2.358996304639638e-06, "loss": 0.1188, "step": 23543 }, { "epoch": 0.6868545422720111, "grad_norm": 0.8274462611216946, "learning_rate": 2.3585951633596355e-06, "loss": 0.1116, "step": 23544 }, { "epoch": 0.6868837155026548, "grad_norm": 1.1001433245277148, "learning_rate": 2.358194045661467e-06, "loss": 0.1436, "step": 23545 }, { "epoch": 0.6869128887332984, "grad_norm": 0.9133574728664574, "learning_rate": 2.3577929515487114e-06, "loss": 0.1107, "step": 23546 }, { "epoch": 0.6869420619639419, "grad_norm": 0.953493345282723, "learning_rate": 2.3573918810249474e-06, "loss": 0.1138, "step": 23547 }, { "epoch": 0.6869712351945855, "grad_norm": 1.194121128930686, "learning_rate": 2.35699083409376e-06, "loss": 0.1361, "step": 23548 }, { "epoch": 0.687000408425229, "grad_norm": 0.8862759394539529, "learning_rate": 2.3565898107587252e-06, "loss": 0.1255, "step": 23549 }, { "epoch": 0.6870295816558726, "grad_norm": 0.9633645413471328, "learning_rate": 2.3561888110234282e-06, "loss": 0.1089, "step": 23550 }, { "epoch": 0.6870587548865161, "grad_norm": 0.7766659798818281, "learning_rate": 2.355787834891444e-06, "loss": 0.1031, "step": 23551 }, { "epoch": 0.6870879281171597, "grad_norm": 0.9616354443413634, "learning_rate": 2.3553868823663566e-06, "loss": 0.1298, "step": 23552 }, { "epoch": 0.6871171013478032, "grad_norm": 0.8658744516447879, "learning_rate": 2.354985953451744e-06, "loss": 0.1279, "step": 23553 }, { "epoch": 0.6871462745784468, "grad_norm": 0.7759188916579862, "learning_rate": 2.354585048151183e-06, "loss": 0.1247, "step": 23554 }, { "epoch": 0.6871754478090903, "grad_norm": 0.9923030343845581, "learning_rate": 2.3541841664682557e-06, "loss": 0.1286, "step": 23555 }, { "epoch": 0.6872046210397339, "grad_norm": 0.9361035162978071, "learning_rate": 2.353783308406542e-06, "loss": 0.1208, "step": 23556 }, { "epoch": 0.6872337942703775, "grad_norm": 0.842792622859444, "learning_rate": 2.3533824739696177e-06, "loss": 0.1457, "step": 23557 }, { "epoch": 0.6872629675010211, "grad_norm": 0.9782272165786156, "learning_rate": 2.352981663161065e-06, "loss": 0.1268, "step": 23558 }, { "epoch": 0.6872921407316647, "grad_norm": 0.8433063502457212, "learning_rate": 2.3525808759844597e-06, "loss": 0.1263, "step": 23559 }, { "epoch": 0.6873213139623082, "grad_norm": 0.8262014928374279, "learning_rate": 2.3521801124433785e-06, "loss": 0.1188, "step": 23560 }, { "epoch": 0.6873504871929518, "grad_norm": 0.8564325546700844, "learning_rate": 2.3517793725414012e-06, "loss": 0.104, "step": 23561 }, { "epoch": 0.6873796604235953, "grad_norm": 0.8299099115305489, "learning_rate": 2.3513786562821074e-06, "loss": 0.12, "step": 23562 }, { "epoch": 0.6874088336542389, "grad_norm": 0.8302422510429294, "learning_rate": 2.350977963669073e-06, "loss": 0.136, "step": 23563 }, { "epoch": 0.6874380068848824, "grad_norm": 1.0926938136093967, "learning_rate": 2.3505772947058724e-06, "loss": 0.1236, "step": 23564 }, { "epoch": 0.687467180115526, "grad_norm": 0.8118820649428803, "learning_rate": 2.3501766493960877e-06, "loss": 0.1447, "step": 23565 }, { "epoch": 0.6874963533461695, "grad_norm": 0.7483750510343007, "learning_rate": 2.349776027743293e-06, "loss": 0.1215, "step": 23566 }, { "epoch": 0.6875255265768131, "grad_norm": 0.9237602169403718, "learning_rate": 2.3493754297510633e-06, "loss": 0.1183, "step": 23567 }, { "epoch": 0.6875546998074566, "grad_norm": 0.9099250676978813, "learning_rate": 2.3489748554229776e-06, "loss": 0.0973, "step": 23568 }, { "epoch": 0.6875838730381002, "grad_norm": 0.8952863050396787, "learning_rate": 2.348574304762613e-06, "loss": 0.1191, "step": 23569 }, { "epoch": 0.6876130462687438, "grad_norm": 0.8276489452699551, "learning_rate": 2.3481737777735442e-06, "loss": 0.1178, "step": 23570 }, { "epoch": 0.6876422194993873, "grad_norm": 1.82843189096159, "learning_rate": 2.3477732744593447e-06, "loss": 0.1413, "step": 23571 }, { "epoch": 0.687671392730031, "grad_norm": 0.906001687367933, "learning_rate": 2.3473727948235942e-06, "loss": 0.1161, "step": 23572 }, { "epoch": 0.6877005659606745, "grad_norm": 0.7681955824426587, "learning_rate": 2.3469723388698647e-06, "loss": 0.1358, "step": 23573 }, { "epoch": 0.6877297391913181, "grad_norm": 0.8080897467609642, "learning_rate": 2.3465719066017323e-06, "loss": 0.1245, "step": 23574 }, { "epoch": 0.6877589124219616, "grad_norm": 0.8433169982377376, "learning_rate": 2.3461714980227744e-06, "loss": 0.1013, "step": 23575 }, { "epoch": 0.6877880856526052, "grad_norm": 0.8831885607690425, "learning_rate": 2.345771113136564e-06, "loss": 0.1329, "step": 23576 }, { "epoch": 0.6878172588832487, "grad_norm": 0.8231088923519766, "learning_rate": 2.345370751946674e-06, "loss": 0.1281, "step": 23577 }, { "epoch": 0.6878464321138923, "grad_norm": 0.8871687192229474, "learning_rate": 2.3449704144566817e-06, "loss": 0.1291, "step": 23578 }, { "epoch": 0.6878756053445358, "grad_norm": 0.8846479280956788, "learning_rate": 2.3445701006701576e-06, "loss": 0.1237, "step": 23579 }, { "epoch": 0.6879047785751794, "grad_norm": 0.8714965548841259, "learning_rate": 2.34416981059068e-06, "loss": 0.1027, "step": 23580 }, { "epoch": 0.687933951805823, "grad_norm": 0.8758882087340438, "learning_rate": 2.3437695442218184e-06, "loss": 0.1043, "step": 23581 }, { "epoch": 0.6879631250364665, "grad_norm": 0.7658119347014151, "learning_rate": 2.3433693015671498e-06, "loss": 0.105, "step": 23582 }, { "epoch": 0.6879922982671101, "grad_norm": 0.9644642036937223, "learning_rate": 2.3429690826302464e-06, "loss": 0.1235, "step": 23583 }, { "epoch": 0.6880214714977536, "grad_norm": 0.7584404917842424, "learning_rate": 2.3425688874146787e-06, "loss": 0.1224, "step": 23584 }, { "epoch": 0.6880506447283973, "grad_norm": 1.0403847680416156, "learning_rate": 2.3421687159240214e-06, "loss": 0.1183, "step": 23585 }, { "epoch": 0.6880798179590408, "grad_norm": 0.9765791641162657, "learning_rate": 2.341768568161849e-06, "loss": 0.1051, "step": 23586 }, { "epoch": 0.6881089911896844, "grad_norm": 0.8023847088816684, "learning_rate": 2.341368444131733e-06, "loss": 0.0974, "step": 23587 }, { "epoch": 0.6881381644203279, "grad_norm": 1.3936574373681199, "learning_rate": 2.3409683438372427e-06, "loss": 0.1291, "step": 23588 }, { "epoch": 0.6881673376509715, "grad_norm": 0.9726354913736999, "learning_rate": 2.3405682672819534e-06, "loss": 0.1296, "step": 23589 }, { "epoch": 0.688196510881615, "grad_norm": 1.2558747537163648, "learning_rate": 2.3401682144694347e-06, "loss": 0.1268, "step": 23590 }, { "epoch": 0.6882256841122586, "grad_norm": 1.049934863308751, "learning_rate": 2.339768185403259e-06, "loss": 0.1142, "step": 23591 }, { "epoch": 0.6882548573429021, "grad_norm": 0.9673425357026465, "learning_rate": 2.339368180087e-06, "loss": 0.1105, "step": 23592 }, { "epoch": 0.6882840305735457, "grad_norm": 0.8431165119550438, "learning_rate": 2.338968198524226e-06, "loss": 0.1207, "step": 23593 }, { "epoch": 0.6883132038041893, "grad_norm": 0.934694810059534, "learning_rate": 2.338568240718508e-06, "loss": 0.1051, "step": 23594 }, { "epoch": 0.6883423770348328, "grad_norm": 1.0677053958956826, "learning_rate": 2.3381683066734182e-06, "loss": 0.1272, "step": 23595 }, { "epoch": 0.6883715502654764, "grad_norm": 0.8241464009186282, "learning_rate": 2.3377683963925252e-06, "loss": 0.1262, "step": 23596 }, { "epoch": 0.6884007234961199, "grad_norm": 0.8876436793994138, "learning_rate": 2.3373685098794017e-06, "loss": 0.11, "step": 23597 }, { "epoch": 0.6884298967267635, "grad_norm": 0.7850310122455091, "learning_rate": 2.336968647137615e-06, "loss": 0.119, "step": 23598 }, { "epoch": 0.6884590699574071, "grad_norm": 0.762904058366715, "learning_rate": 2.3365688081707383e-06, "loss": 0.111, "step": 23599 }, { "epoch": 0.6884882431880507, "grad_norm": 0.8473425197337645, "learning_rate": 2.3361689929823396e-06, "loss": 0.1203, "step": 23600 }, { "epoch": 0.6885174164186942, "grad_norm": 0.7210537937423882, "learning_rate": 2.335769201575986e-06, "loss": 0.1177, "step": 23601 }, { "epoch": 0.6885465896493378, "grad_norm": 0.6633151120612071, "learning_rate": 2.335369433955249e-06, "loss": 0.1084, "step": 23602 }, { "epoch": 0.6885757628799813, "grad_norm": 0.8600509904948564, "learning_rate": 2.3349696901236995e-06, "loss": 0.1313, "step": 23603 }, { "epoch": 0.6886049361106249, "grad_norm": 0.6893909561461504, "learning_rate": 2.334569970084903e-06, "loss": 0.1178, "step": 23604 }, { "epoch": 0.6886341093412685, "grad_norm": 0.698221538234911, "learning_rate": 2.334170273842431e-06, "loss": 0.1129, "step": 23605 }, { "epoch": 0.688663282571912, "grad_norm": 0.8706561084080148, "learning_rate": 2.3337706013998508e-06, "loss": 0.1436, "step": 23606 }, { "epoch": 0.6886924558025556, "grad_norm": 0.7001568737571077, "learning_rate": 2.333370952760728e-06, "loss": 0.1254, "step": 23607 }, { "epoch": 0.6887216290331991, "grad_norm": 0.7872645545012035, "learning_rate": 2.3329713279286325e-06, "loss": 0.1285, "step": 23608 }, { "epoch": 0.6887508022638427, "grad_norm": 0.8688513455437255, "learning_rate": 2.3325717269071346e-06, "loss": 0.1594, "step": 23609 }, { "epoch": 0.6887799754944862, "grad_norm": 0.9447196492204607, "learning_rate": 2.332172149699799e-06, "loss": 0.1018, "step": 23610 }, { "epoch": 0.6888091487251298, "grad_norm": 3.555944155520488, "learning_rate": 2.3317725963101923e-06, "loss": 0.1203, "step": 23611 }, { "epoch": 0.6888383219557734, "grad_norm": 1.043843622347159, "learning_rate": 2.3313730667418846e-06, "loss": 0.1391, "step": 23612 }, { "epoch": 0.688867495186417, "grad_norm": 1.1034160358026852, "learning_rate": 2.3309735609984414e-06, "loss": 0.1444, "step": 23613 }, { "epoch": 0.6888966684170605, "grad_norm": 0.7520143710781712, "learning_rate": 2.3305740790834263e-06, "loss": 0.1004, "step": 23614 }, { "epoch": 0.6889258416477041, "grad_norm": 0.770345667529433, "learning_rate": 2.3301746210004094e-06, "loss": 0.1314, "step": 23615 }, { "epoch": 0.6889550148783476, "grad_norm": 0.7418194805544203, "learning_rate": 2.3297751867529578e-06, "loss": 0.1275, "step": 23616 }, { "epoch": 0.6889841881089912, "grad_norm": 0.991478484394904, "learning_rate": 2.329375776344636e-06, "loss": 0.1157, "step": 23617 }, { "epoch": 0.6890133613396348, "grad_norm": 0.722884649775238, "learning_rate": 2.328976389779008e-06, "loss": 0.1095, "step": 23618 }, { "epoch": 0.6890425345702783, "grad_norm": 0.7579140561546834, "learning_rate": 2.3285770270596424e-06, "loss": 0.1126, "step": 23619 }, { "epoch": 0.6890717078009219, "grad_norm": 0.7967802307443903, "learning_rate": 2.328177688190102e-06, "loss": 0.1139, "step": 23620 }, { "epoch": 0.6891008810315654, "grad_norm": 0.8044319982174284, "learning_rate": 2.3277783731739532e-06, "loss": 0.1157, "step": 23621 }, { "epoch": 0.689130054262209, "grad_norm": 0.593504956725694, "learning_rate": 2.3273790820147634e-06, "loss": 0.12, "step": 23622 }, { "epoch": 0.6891592274928525, "grad_norm": 0.934777912502349, "learning_rate": 2.326979814716095e-06, "loss": 0.1277, "step": 23623 }, { "epoch": 0.6891884007234961, "grad_norm": 0.9523415607532826, "learning_rate": 2.326580571281511e-06, "loss": 0.1304, "step": 23624 }, { "epoch": 0.6892175739541396, "grad_norm": 0.7213247178173505, "learning_rate": 2.3261813517145787e-06, "loss": 0.108, "step": 23625 }, { "epoch": 0.6892467471847833, "grad_norm": 0.5879304315447973, "learning_rate": 2.32578215601886e-06, "loss": 0.0956, "step": 23626 }, { "epoch": 0.6892759204154268, "grad_norm": 0.8829091017552038, "learning_rate": 2.325382984197921e-06, "loss": 0.1355, "step": 23627 }, { "epoch": 0.6893050936460704, "grad_norm": 1.078545661877942, "learning_rate": 2.3249838362553224e-06, "loss": 0.0971, "step": 23628 }, { "epoch": 0.689334266876714, "grad_norm": 0.8490332553845026, "learning_rate": 2.3245847121946314e-06, "loss": 0.111, "step": 23629 }, { "epoch": 0.6893634401073575, "grad_norm": 0.7672113373591892, "learning_rate": 2.3241856120194094e-06, "loss": 0.1227, "step": 23630 }, { "epoch": 0.6893926133380011, "grad_norm": 1.0170675625170897, "learning_rate": 2.3237865357332185e-06, "loss": 0.1175, "step": 23631 }, { "epoch": 0.6894217865686446, "grad_norm": 0.7222560693625352, "learning_rate": 2.3233874833396213e-06, "loss": 0.1017, "step": 23632 }, { "epoch": 0.6894509597992882, "grad_norm": 0.9561382202915274, "learning_rate": 2.3229884548421844e-06, "loss": 0.1212, "step": 23633 }, { "epoch": 0.6894801330299317, "grad_norm": 0.7687430226846184, "learning_rate": 2.322589450244465e-06, "loss": 0.1221, "step": 23634 }, { "epoch": 0.6895093062605753, "grad_norm": 0.8756176070118019, "learning_rate": 2.32219046955003e-06, "loss": 0.1118, "step": 23635 }, { "epoch": 0.6895384794912188, "grad_norm": 0.8219276207287353, "learning_rate": 2.32179151276244e-06, "loss": 0.1428, "step": 23636 }, { "epoch": 0.6895676527218624, "grad_norm": 0.7935008080019541, "learning_rate": 2.3213925798852534e-06, "loss": 0.1393, "step": 23637 }, { "epoch": 0.6895968259525059, "grad_norm": 1.013543929059074, "learning_rate": 2.3209936709220343e-06, "loss": 0.1336, "step": 23638 }, { "epoch": 0.6896259991831496, "grad_norm": 0.8030855768031363, "learning_rate": 2.320594785876346e-06, "loss": 0.1259, "step": 23639 }, { "epoch": 0.6896551724137931, "grad_norm": 0.9639021359278168, "learning_rate": 2.320195924751748e-06, "loss": 0.1383, "step": 23640 }, { "epoch": 0.6896843456444367, "grad_norm": 0.8700975954012806, "learning_rate": 2.3197970875517995e-06, "loss": 0.1281, "step": 23641 }, { "epoch": 0.6897135188750803, "grad_norm": 0.8136724596954176, "learning_rate": 2.3193982742800647e-06, "loss": 0.1443, "step": 23642 }, { "epoch": 0.6897426921057238, "grad_norm": 1.0189556984596226, "learning_rate": 2.3189994849401015e-06, "loss": 0.1379, "step": 23643 }, { "epoch": 0.6897718653363674, "grad_norm": 0.9742464893363215, "learning_rate": 2.31860071953547e-06, "loss": 0.1059, "step": 23644 }, { "epoch": 0.6898010385670109, "grad_norm": 0.7963701868180952, "learning_rate": 2.31820197806973e-06, "loss": 0.1269, "step": 23645 }, { "epoch": 0.6898302117976545, "grad_norm": 0.8386964550617722, "learning_rate": 2.317803260546445e-06, "loss": 0.131, "step": 23646 }, { "epoch": 0.689859385028298, "grad_norm": 0.9893009468384862, "learning_rate": 2.3174045669691724e-06, "loss": 0.1367, "step": 23647 }, { "epoch": 0.6898885582589416, "grad_norm": 0.8806406178431144, "learning_rate": 2.3170058973414696e-06, "loss": 0.1219, "step": 23648 }, { "epoch": 0.6899177314895851, "grad_norm": 0.9552274158140003, "learning_rate": 2.3166072516668992e-06, "loss": 0.1331, "step": 23649 }, { "epoch": 0.6899469047202287, "grad_norm": 1.0930118529298434, "learning_rate": 2.316208629949017e-06, "loss": 0.1157, "step": 23650 }, { "epoch": 0.6899760779508722, "grad_norm": 0.7272985772568562, "learning_rate": 2.3158100321913836e-06, "loss": 0.1108, "step": 23651 }, { "epoch": 0.6900052511815158, "grad_norm": 0.8064813106794204, "learning_rate": 2.315411458397559e-06, "loss": 0.1249, "step": 23652 }, { "epoch": 0.6900344244121595, "grad_norm": 0.9704866191529995, "learning_rate": 2.3150129085710998e-06, "loss": 0.12, "step": 23653 }, { "epoch": 0.690063597642803, "grad_norm": 0.9910425680357245, "learning_rate": 2.314614382715563e-06, "loss": 0.113, "step": 23654 }, { "epoch": 0.6900927708734466, "grad_norm": 0.9334528401558666, "learning_rate": 2.31421588083451e-06, "loss": 0.123, "step": 23655 }, { "epoch": 0.6901219441040901, "grad_norm": 0.9126471488991307, "learning_rate": 2.313817402931494e-06, "loss": 0.1528, "step": 23656 }, { "epoch": 0.6901511173347337, "grad_norm": 0.9908774099220438, "learning_rate": 2.3134189490100773e-06, "loss": 0.1204, "step": 23657 }, { "epoch": 0.6901802905653772, "grad_norm": 0.819538434326238, "learning_rate": 2.313020519073813e-06, "loss": 0.1182, "step": 23658 }, { "epoch": 0.6902094637960208, "grad_norm": 0.800339781816359, "learning_rate": 2.3126221131262614e-06, "loss": 0.1181, "step": 23659 }, { "epoch": 0.6902386370266643, "grad_norm": 0.8457097695409919, "learning_rate": 2.312223731170979e-06, "loss": 0.1153, "step": 23660 }, { "epoch": 0.6902678102573079, "grad_norm": 0.95150576897296, "learning_rate": 2.3118253732115186e-06, "loss": 0.1182, "step": 23661 }, { "epoch": 0.6902969834879514, "grad_norm": 0.8905554901884798, "learning_rate": 2.3114270392514404e-06, "loss": 0.1168, "step": 23662 }, { "epoch": 0.690326156718595, "grad_norm": 1.042156377872879, "learning_rate": 2.311028729294301e-06, "loss": 0.1083, "step": 23663 }, { "epoch": 0.6903553299492385, "grad_norm": 0.8702989898114386, "learning_rate": 2.310630443343654e-06, "loss": 0.1129, "step": 23664 }, { "epoch": 0.6903845031798821, "grad_norm": 1.0316251151985667, "learning_rate": 2.3102321814030577e-06, "loss": 0.1381, "step": 23665 }, { "epoch": 0.6904136764105258, "grad_norm": 1.038377239359597, "learning_rate": 2.309833943476067e-06, "loss": 0.1058, "step": 23666 }, { "epoch": 0.6904428496411693, "grad_norm": 0.8366944025737572, "learning_rate": 2.309435729566234e-06, "loss": 0.1254, "step": 23667 }, { "epoch": 0.6904720228718129, "grad_norm": 0.6714876382825048, "learning_rate": 2.309037539677117e-06, "loss": 0.1089, "step": 23668 }, { "epoch": 0.6905011961024564, "grad_norm": 0.91272258081961, "learning_rate": 2.3086393738122718e-06, "loss": 0.1167, "step": 23669 }, { "epoch": 0.6905303693331, "grad_norm": 1.1446665184893436, "learning_rate": 2.3082412319752525e-06, "loss": 0.142, "step": 23670 }, { "epoch": 0.6905595425637435, "grad_norm": 0.9077590475189545, "learning_rate": 2.307843114169611e-06, "loss": 0.1242, "step": 23671 }, { "epoch": 0.6905887157943871, "grad_norm": 0.6449499241531631, "learning_rate": 2.3074450203989046e-06, "loss": 0.1024, "step": 23672 }, { "epoch": 0.6906178890250306, "grad_norm": 1.0168014356209796, "learning_rate": 2.307046950666687e-06, "loss": 0.1385, "step": 23673 }, { "epoch": 0.6906470622556742, "grad_norm": 0.9092898333155542, "learning_rate": 2.3066489049765096e-06, "loss": 0.1162, "step": 23674 }, { "epoch": 0.6906762354863177, "grad_norm": 0.9193741929850859, "learning_rate": 2.3062508833319273e-06, "loss": 0.1428, "step": 23675 }, { "epoch": 0.6907054087169613, "grad_norm": 0.8523432817325961, "learning_rate": 2.3058528857364963e-06, "loss": 0.1107, "step": 23676 }, { "epoch": 0.6907345819476048, "grad_norm": 0.8534216923236597, "learning_rate": 2.3054549121937674e-06, "loss": 0.1242, "step": 23677 }, { "epoch": 0.6907637551782484, "grad_norm": 0.9272245904195249, "learning_rate": 2.305056962707292e-06, "loss": 0.1297, "step": 23678 }, { "epoch": 0.690792928408892, "grad_norm": 1.1494664409395687, "learning_rate": 2.3046590372806268e-06, "loss": 0.1071, "step": 23679 }, { "epoch": 0.6908221016395356, "grad_norm": 0.9187653162047094, "learning_rate": 2.30426113591732e-06, "loss": 0.1232, "step": 23680 }, { "epoch": 0.6908512748701792, "grad_norm": 0.8784483484111315, "learning_rate": 2.3038632586209264e-06, "loss": 0.1436, "step": 23681 }, { "epoch": 0.6908804481008227, "grad_norm": 0.8719874068627423, "learning_rate": 2.303465405395e-06, "loss": 0.1136, "step": 23682 }, { "epoch": 0.6909096213314663, "grad_norm": 1.1241996492284672, "learning_rate": 2.3030675762430906e-06, "loss": 0.1352, "step": 23683 }, { "epoch": 0.6909387945621098, "grad_norm": 0.9061017200117076, "learning_rate": 2.3026697711687477e-06, "loss": 0.1456, "step": 23684 }, { "epoch": 0.6909679677927534, "grad_norm": 0.9601401034716334, "learning_rate": 2.302271990175528e-06, "loss": 0.1281, "step": 23685 }, { "epoch": 0.6909971410233969, "grad_norm": 0.9931266493491848, "learning_rate": 2.3018742332669775e-06, "loss": 0.11, "step": 23686 }, { "epoch": 0.6910263142540405, "grad_norm": 0.8268954279743886, "learning_rate": 2.301476500446652e-06, "loss": 0.1063, "step": 23687 }, { "epoch": 0.691055487484684, "grad_norm": 0.7578368801598571, "learning_rate": 2.301078791718098e-06, "loss": 0.1133, "step": 23688 }, { "epoch": 0.6910846607153276, "grad_norm": 1.0595097715640953, "learning_rate": 2.30068110708487e-06, "loss": 0.1094, "step": 23689 }, { "epoch": 0.6911138339459711, "grad_norm": 0.7898769517130635, "learning_rate": 2.300283446550517e-06, "loss": 0.1308, "step": 23690 }, { "epoch": 0.6911430071766147, "grad_norm": 1.1145970578791549, "learning_rate": 2.2998858101185873e-06, "loss": 0.1293, "step": 23691 }, { "epoch": 0.6911721804072583, "grad_norm": 0.836399848990084, "learning_rate": 2.299488197792632e-06, "loss": 0.1221, "step": 23692 }, { "epoch": 0.6912013536379019, "grad_norm": 0.7747572602824823, "learning_rate": 2.2990906095762033e-06, "loss": 0.125, "step": 23693 }, { "epoch": 0.6912305268685455, "grad_norm": 0.9102249540121757, "learning_rate": 2.2986930454728474e-06, "loss": 0.1108, "step": 23694 }, { "epoch": 0.691259700099189, "grad_norm": 0.7265958771338673, "learning_rate": 2.2982955054861166e-06, "loss": 0.1251, "step": 23695 }, { "epoch": 0.6912888733298326, "grad_norm": 0.9534936034246931, "learning_rate": 2.2978979896195587e-06, "loss": 0.1257, "step": 23696 }, { "epoch": 0.6913180465604761, "grad_norm": 0.7784759412891694, "learning_rate": 2.2975004978767206e-06, "loss": 0.1051, "step": 23697 }, { "epoch": 0.6913472197911197, "grad_norm": 1.0035190140289736, "learning_rate": 2.297103030261153e-06, "loss": 0.1273, "step": 23698 }, { "epoch": 0.6913763930217632, "grad_norm": 0.8378814986653959, "learning_rate": 2.296705586776406e-06, "loss": 0.1235, "step": 23699 }, { "epoch": 0.6914055662524068, "grad_norm": 0.7593074884696484, "learning_rate": 2.2963081674260267e-06, "loss": 0.1148, "step": 23700 }, { "epoch": 0.6914347394830503, "grad_norm": 0.9424191104104974, "learning_rate": 2.2959107722135603e-06, "loss": 0.1188, "step": 23701 }, { "epoch": 0.6914639127136939, "grad_norm": 0.9697443066315308, "learning_rate": 2.295513401142559e-06, "loss": 0.1372, "step": 23702 }, { "epoch": 0.6914930859443374, "grad_norm": 0.8505360642343872, "learning_rate": 2.2951160542165684e-06, "loss": 0.1226, "step": 23703 }, { "epoch": 0.691522259174981, "grad_norm": 0.6855969249332672, "learning_rate": 2.2947187314391346e-06, "loss": 0.0891, "step": 23704 }, { "epoch": 0.6915514324056246, "grad_norm": 0.906423769640861, "learning_rate": 2.294321432813805e-06, "loss": 0.1031, "step": 23705 }, { "epoch": 0.6915806056362681, "grad_norm": 0.7999588831953454, "learning_rate": 2.2939241583441308e-06, "loss": 0.1298, "step": 23706 }, { "epoch": 0.6916097788669118, "grad_norm": 0.8355790758687139, "learning_rate": 2.2935269080336555e-06, "loss": 0.1134, "step": 23707 }, { "epoch": 0.6916389520975553, "grad_norm": 0.7848295123297799, "learning_rate": 2.2931296818859233e-06, "loss": 0.1226, "step": 23708 }, { "epoch": 0.6916681253281989, "grad_norm": 1.133574432844111, "learning_rate": 2.2927324799044858e-06, "loss": 0.1296, "step": 23709 }, { "epoch": 0.6916972985588424, "grad_norm": 0.8236149312082659, "learning_rate": 2.292335302092884e-06, "loss": 0.127, "step": 23710 }, { "epoch": 0.691726471789486, "grad_norm": 0.8715150804512658, "learning_rate": 2.2919381484546665e-06, "loss": 0.1163, "step": 23711 }, { "epoch": 0.6917556450201295, "grad_norm": 0.9397516361988193, "learning_rate": 2.2915410189933807e-06, "loss": 0.1062, "step": 23712 }, { "epoch": 0.6917848182507731, "grad_norm": 0.8283909892003528, "learning_rate": 2.29114391371257e-06, "loss": 0.124, "step": 23713 }, { "epoch": 0.6918139914814166, "grad_norm": 0.912337308859186, "learning_rate": 2.2907468326157777e-06, "loss": 0.1146, "step": 23714 }, { "epoch": 0.6918431647120602, "grad_norm": 0.8580879529726056, "learning_rate": 2.290349775706553e-06, "loss": 0.1235, "step": 23715 }, { "epoch": 0.6918723379427038, "grad_norm": 0.9868941377315759, "learning_rate": 2.289952742988437e-06, "loss": 0.1247, "step": 23716 }, { "epoch": 0.6919015111733473, "grad_norm": 1.0810218537199563, "learning_rate": 2.2895557344649777e-06, "loss": 0.124, "step": 23717 }, { "epoch": 0.6919306844039909, "grad_norm": 1.0714996206285003, "learning_rate": 2.2891587501397157e-06, "loss": 0.119, "step": 23718 }, { "epoch": 0.6919598576346344, "grad_norm": 1.0334090868066341, "learning_rate": 2.2887617900161996e-06, "loss": 0.1319, "step": 23719 }, { "epoch": 0.6919890308652781, "grad_norm": 1.0229856177819148, "learning_rate": 2.28836485409797e-06, "loss": 0.1351, "step": 23720 }, { "epoch": 0.6920182040959216, "grad_norm": 1.0630437252729676, "learning_rate": 2.2879679423885708e-06, "loss": 0.1147, "step": 23721 }, { "epoch": 0.6920473773265652, "grad_norm": 0.8101866141088259, "learning_rate": 2.2875710548915464e-06, "loss": 0.1241, "step": 23722 }, { "epoch": 0.6920765505572087, "grad_norm": 1.1746830181502605, "learning_rate": 2.2871741916104414e-06, "loss": 0.133, "step": 23723 }, { "epoch": 0.6921057237878523, "grad_norm": 1.3231516665272993, "learning_rate": 2.286777352548796e-06, "loss": 0.1341, "step": 23724 }, { "epoch": 0.6921348970184958, "grad_norm": 0.7157855577153792, "learning_rate": 2.2863805377101565e-06, "loss": 0.1034, "step": 23725 }, { "epoch": 0.6921640702491394, "grad_norm": 0.6594134682756123, "learning_rate": 2.2859837470980638e-06, "loss": 0.1252, "step": 23726 }, { "epoch": 0.692193243479783, "grad_norm": 0.8131794627778848, "learning_rate": 2.2855869807160588e-06, "loss": 0.1151, "step": 23727 }, { "epoch": 0.6922224167104265, "grad_norm": 1.2535305456139525, "learning_rate": 2.285190238567685e-06, "loss": 0.1341, "step": 23728 }, { "epoch": 0.6922515899410701, "grad_norm": 0.778980524726283, "learning_rate": 2.2847935206564865e-06, "loss": 0.1209, "step": 23729 }, { "epoch": 0.6922807631717136, "grad_norm": 0.8349112723321234, "learning_rate": 2.284396826986003e-06, "loss": 0.1353, "step": 23730 }, { "epoch": 0.6923099364023572, "grad_norm": 0.8500357543524237, "learning_rate": 2.284000157559775e-06, "loss": 0.1287, "step": 23731 }, { "epoch": 0.6923391096330007, "grad_norm": 0.7999652808402606, "learning_rate": 2.2836035123813466e-06, "loss": 0.1012, "step": 23732 }, { "epoch": 0.6923682828636443, "grad_norm": 0.967336312167194, "learning_rate": 2.2832068914542575e-06, "loss": 0.1263, "step": 23733 }, { "epoch": 0.6923974560942879, "grad_norm": 0.9032378828977392, "learning_rate": 2.2828102947820476e-06, "loss": 0.1406, "step": 23734 }, { "epoch": 0.6924266293249315, "grad_norm": 0.9399886097246759, "learning_rate": 2.282413722368258e-06, "loss": 0.1121, "step": 23735 }, { "epoch": 0.692455802555575, "grad_norm": 0.8896660325406226, "learning_rate": 2.282017174216432e-06, "loss": 0.12, "step": 23736 }, { "epoch": 0.6924849757862186, "grad_norm": 0.9735989195568626, "learning_rate": 2.281620650330108e-06, "loss": 0.1147, "step": 23737 }, { "epoch": 0.6925141490168621, "grad_norm": 0.8502541808391574, "learning_rate": 2.281224150712824e-06, "loss": 0.1269, "step": 23738 }, { "epoch": 0.6925433222475057, "grad_norm": 0.7925147321656687, "learning_rate": 2.2808276753681243e-06, "loss": 0.1381, "step": 23739 }, { "epoch": 0.6925724954781493, "grad_norm": 0.7868908178188531, "learning_rate": 2.280431224299543e-06, "loss": 0.1406, "step": 23740 }, { "epoch": 0.6926016687087928, "grad_norm": 1.0087179670265431, "learning_rate": 2.280034797510623e-06, "loss": 0.1135, "step": 23741 }, { "epoch": 0.6926308419394364, "grad_norm": 0.7662286779625004, "learning_rate": 2.279638395004905e-06, "loss": 0.1129, "step": 23742 }, { "epoch": 0.6926600151700799, "grad_norm": 0.8378206059463131, "learning_rate": 2.279242016785926e-06, "loss": 0.1106, "step": 23743 }, { "epoch": 0.6926891884007235, "grad_norm": 0.752022430396605, "learning_rate": 2.2788456628572227e-06, "loss": 0.1391, "step": 23744 }, { "epoch": 0.692718361631367, "grad_norm": 1.0032264413742915, "learning_rate": 2.2784493332223375e-06, "loss": 0.1302, "step": 23745 }, { "epoch": 0.6927475348620106, "grad_norm": 0.8548099192713413, "learning_rate": 2.278053027884805e-06, "loss": 0.1223, "step": 23746 }, { "epoch": 0.6927767080926541, "grad_norm": 1.001924147167816, "learning_rate": 2.2776567468481674e-06, "loss": 0.1323, "step": 23747 }, { "epoch": 0.6928058813232978, "grad_norm": 0.7231677851603999, "learning_rate": 2.277260490115959e-06, "loss": 0.1388, "step": 23748 }, { "epoch": 0.6928350545539413, "grad_norm": 0.8067489608143206, "learning_rate": 2.2768642576917206e-06, "loss": 0.101, "step": 23749 }, { "epoch": 0.6928642277845849, "grad_norm": 0.9504971563223352, "learning_rate": 2.2764680495789874e-06, "loss": 0.1103, "step": 23750 }, { "epoch": 0.6928934010152284, "grad_norm": 0.8027638111751217, "learning_rate": 2.2760718657812964e-06, "loss": 0.1183, "step": 23751 }, { "epoch": 0.692922574245872, "grad_norm": 0.9116895444309365, "learning_rate": 2.275675706302185e-06, "loss": 0.1304, "step": 23752 }, { "epoch": 0.6929517474765156, "grad_norm": 0.8590398988302214, "learning_rate": 2.2752795711451926e-06, "loss": 0.116, "step": 23753 }, { "epoch": 0.6929809207071591, "grad_norm": 0.7807675862826012, "learning_rate": 2.274883460313852e-06, "loss": 0.1227, "step": 23754 }, { "epoch": 0.6930100939378027, "grad_norm": 0.8985680026686662, "learning_rate": 2.274487373811703e-06, "loss": 0.1205, "step": 23755 }, { "epoch": 0.6930392671684462, "grad_norm": 0.9048122272525334, "learning_rate": 2.2740913116422796e-06, "loss": 0.0981, "step": 23756 }, { "epoch": 0.6930684403990898, "grad_norm": 0.7144588342863268, "learning_rate": 2.2736952738091173e-06, "loss": 0.088, "step": 23757 }, { "epoch": 0.6930976136297333, "grad_norm": 0.6768792673281033, "learning_rate": 2.273299260315752e-06, "loss": 0.135, "step": 23758 }, { "epoch": 0.6931267868603769, "grad_norm": 0.8819290215802724, "learning_rate": 2.2729032711657224e-06, "loss": 0.1014, "step": 23759 }, { "epoch": 0.6931559600910204, "grad_norm": 0.890492511431514, "learning_rate": 2.272507306362561e-06, "loss": 0.09, "step": 23760 }, { "epoch": 0.6931851333216641, "grad_norm": 0.793613907706273, "learning_rate": 2.2721113659098013e-06, "loss": 0.1237, "step": 23761 }, { "epoch": 0.6932143065523076, "grad_norm": 0.8754751817467925, "learning_rate": 2.271715449810982e-06, "loss": 0.1342, "step": 23762 }, { "epoch": 0.6932434797829512, "grad_norm": 0.8301535246481814, "learning_rate": 2.271319558069637e-06, "loss": 0.1349, "step": 23763 }, { "epoch": 0.6932726530135948, "grad_norm": 0.8639729205611231, "learning_rate": 2.2709236906892967e-06, "loss": 0.1098, "step": 23764 }, { "epoch": 0.6933018262442383, "grad_norm": 0.8410730677761642, "learning_rate": 2.2705278476734984e-06, "loss": 0.1211, "step": 23765 }, { "epoch": 0.6933309994748819, "grad_norm": 0.84913739173186, "learning_rate": 2.270132029025777e-06, "loss": 0.1245, "step": 23766 }, { "epoch": 0.6933601727055254, "grad_norm": 0.8301701775348659, "learning_rate": 2.2697362347496665e-06, "loss": 0.1298, "step": 23767 }, { "epoch": 0.693389345936169, "grad_norm": 0.797361811400939, "learning_rate": 2.269340464848697e-06, "loss": 0.11, "step": 23768 }, { "epoch": 0.6934185191668125, "grad_norm": 0.7244305386077172, "learning_rate": 2.268944719326405e-06, "loss": 0.1346, "step": 23769 }, { "epoch": 0.6934476923974561, "grad_norm": 0.9910751266872838, "learning_rate": 2.268548998186321e-06, "loss": 0.1052, "step": 23770 }, { "epoch": 0.6934768656280996, "grad_norm": 0.6653589091318827, "learning_rate": 2.26815330143198e-06, "loss": 0.1017, "step": 23771 }, { "epoch": 0.6935060388587432, "grad_norm": 0.8051877190686334, "learning_rate": 2.2677576290669157e-06, "loss": 0.1186, "step": 23772 }, { "epoch": 0.6935352120893867, "grad_norm": 0.7642106374320058, "learning_rate": 2.267361981094659e-06, "loss": 0.1434, "step": 23773 }, { "epoch": 0.6935643853200303, "grad_norm": 0.8472071730476597, "learning_rate": 2.2669663575187407e-06, "loss": 0.1052, "step": 23774 }, { "epoch": 0.693593558550674, "grad_norm": 0.8994965254843768, "learning_rate": 2.266570758342696e-06, "loss": 0.1192, "step": 23775 }, { "epoch": 0.6936227317813175, "grad_norm": 0.9891177393959779, "learning_rate": 2.266175183570053e-06, "loss": 0.1448, "step": 23776 }, { "epoch": 0.6936519050119611, "grad_norm": 0.8472114327687793, "learning_rate": 2.2657796332043476e-06, "loss": 0.1149, "step": 23777 }, { "epoch": 0.6936810782426046, "grad_norm": 1.0004615352191524, "learning_rate": 2.265384107249106e-06, "loss": 0.1123, "step": 23778 }, { "epoch": 0.6937102514732482, "grad_norm": 0.9177297504770803, "learning_rate": 2.264988605707865e-06, "loss": 0.1008, "step": 23779 }, { "epoch": 0.6937394247038917, "grad_norm": 0.9711912489017491, "learning_rate": 2.2645931285841533e-06, "loss": 0.1154, "step": 23780 }, { "epoch": 0.6937685979345353, "grad_norm": 0.8451203658486415, "learning_rate": 2.2641976758814966e-06, "loss": 0.1218, "step": 23781 }, { "epoch": 0.6937977711651788, "grad_norm": 0.7791037540092294, "learning_rate": 2.263802247603434e-06, "loss": 0.0981, "step": 23782 }, { "epoch": 0.6938269443958224, "grad_norm": 0.9122742883437229, "learning_rate": 2.263406843753492e-06, "loss": 0.1196, "step": 23783 }, { "epoch": 0.6938561176264659, "grad_norm": 0.9145510104429517, "learning_rate": 2.263011464335198e-06, "loss": 0.136, "step": 23784 }, { "epoch": 0.6938852908571095, "grad_norm": 0.8272283121849784, "learning_rate": 2.2626161093520866e-06, "loss": 0.1124, "step": 23785 }, { "epoch": 0.693914464087753, "grad_norm": 0.9133058887329731, "learning_rate": 2.2622207788076848e-06, "loss": 0.1052, "step": 23786 }, { "epoch": 0.6939436373183966, "grad_norm": 0.7982430957193886, "learning_rate": 2.2618254727055206e-06, "loss": 0.1194, "step": 23787 }, { "epoch": 0.6939728105490403, "grad_norm": 1.0187002693919793, "learning_rate": 2.261430191049125e-06, "loss": 0.1213, "step": 23788 }, { "epoch": 0.6940019837796838, "grad_norm": 0.854908536529277, "learning_rate": 2.2610349338420283e-06, "loss": 0.1149, "step": 23789 }, { "epoch": 0.6940311570103274, "grad_norm": 0.8299541363462521, "learning_rate": 2.2606397010877585e-06, "loss": 0.1414, "step": 23790 }, { "epoch": 0.6940603302409709, "grad_norm": 1.0390679356647874, "learning_rate": 2.2602444927898413e-06, "loss": 0.1295, "step": 23791 }, { "epoch": 0.6940895034716145, "grad_norm": 0.8770794811603092, "learning_rate": 2.2598493089518093e-06, "loss": 0.1317, "step": 23792 }, { "epoch": 0.694118676702258, "grad_norm": 0.818535052142983, "learning_rate": 2.2594541495771866e-06, "loss": 0.1207, "step": 23793 }, { "epoch": 0.6941478499329016, "grad_norm": 0.8774843127880833, "learning_rate": 2.2590590146695053e-06, "loss": 0.1445, "step": 23794 }, { "epoch": 0.6941770231635451, "grad_norm": 0.8805688386968867, "learning_rate": 2.258663904232288e-06, "loss": 0.1111, "step": 23795 }, { "epoch": 0.6942061963941887, "grad_norm": 0.8019469765437267, "learning_rate": 2.2582688182690674e-06, "loss": 0.1041, "step": 23796 }, { "epoch": 0.6942353696248322, "grad_norm": 0.9178817772973342, "learning_rate": 2.2578737567833688e-06, "loss": 0.1313, "step": 23797 }, { "epoch": 0.6942645428554758, "grad_norm": 0.7250859828107897, "learning_rate": 2.2574787197787155e-06, "loss": 0.1207, "step": 23798 }, { "epoch": 0.6942937160861193, "grad_norm": 0.8084459599289897, "learning_rate": 2.257083707258639e-06, "loss": 0.1133, "step": 23799 }, { "epoch": 0.6943228893167629, "grad_norm": 0.9793508490564601, "learning_rate": 2.256688719226665e-06, "loss": 0.1258, "step": 23800 }, { "epoch": 0.6943520625474064, "grad_norm": 0.6774895543187904, "learning_rate": 2.256293755686318e-06, "loss": 0.1306, "step": 23801 }, { "epoch": 0.6943812357780501, "grad_norm": 0.7826264163921307, "learning_rate": 2.255898816641127e-06, "loss": 0.1206, "step": 23802 }, { "epoch": 0.6944104090086937, "grad_norm": 1.5171655582646877, "learning_rate": 2.2555039020946163e-06, "loss": 0.1008, "step": 23803 }, { "epoch": 0.6944395822393372, "grad_norm": 1.0481798594953093, "learning_rate": 2.25510901205031e-06, "loss": 0.1161, "step": 23804 }, { "epoch": 0.6944687554699808, "grad_norm": 0.7933968532635306, "learning_rate": 2.254714146511735e-06, "loss": 0.1198, "step": 23805 }, { "epoch": 0.6944979287006243, "grad_norm": 0.9066459719823079, "learning_rate": 2.2543193054824185e-06, "loss": 0.1133, "step": 23806 }, { "epoch": 0.6945271019312679, "grad_norm": 1.2308532338379525, "learning_rate": 2.253924488965884e-06, "loss": 0.1051, "step": 23807 }, { "epoch": 0.6945562751619114, "grad_norm": 0.9219605085205348, "learning_rate": 2.2535296969656547e-06, "loss": 0.1212, "step": 23808 }, { "epoch": 0.694585448392555, "grad_norm": 0.9368588291885176, "learning_rate": 2.253134929485257e-06, "loss": 0.118, "step": 23809 }, { "epoch": 0.6946146216231985, "grad_norm": 0.8434970865606336, "learning_rate": 2.252740186528216e-06, "loss": 0.1143, "step": 23810 }, { "epoch": 0.6946437948538421, "grad_norm": 1.1473254781592623, "learning_rate": 2.252345468098051e-06, "loss": 0.0986, "step": 23811 }, { "epoch": 0.6946729680844856, "grad_norm": 0.8269881720214735, "learning_rate": 2.251950774198294e-06, "loss": 0.1225, "step": 23812 }, { "epoch": 0.6947021413151292, "grad_norm": 0.9458941903347555, "learning_rate": 2.2515561048324637e-06, "loss": 0.097, "step": 23813 }, { "epoch": 0.6947313145457727, "grad_norm": 0.7442355627088012, "learning_rate": 2.251161460004083e-06, "loss": 0.1228, "step": 23814 }, { "epoch": 0.6947604877764164, "grad_norm": 0.9360742571652984, "learning_rate": 2.2507668397166778e-06, "loss": 0.1091, "step": 23815 }, { "epoch": 0.69478966100706, "grad_norm": 0.9932204254641761, "learning_rate": 2.25037224397377e-06, "loss": 0.1336, "step": 23816 }, { "epoch": 0.6948188342377035, "grad_norm": 1.0242302724736048, "learning_rate": 2.2499776727788815e-06, "loss": 0.114, "step": 23817 }, { "epoch": 0.6948480074683471, "grad_norm": 0.816982814096064, "learning_rate": 2.249583126135535e-06, "loss": 0.1075, "step": 23818 }, { "epoch": 0.6948771806989906, "grad_norm": 1.0143184258574376, "learning_rate": 2.249188604047256e-06, "loss": 0.1252, "step": 23819 }, { "epoch": 0.6949063539296342, "grad_norm": 0.9985708969606071, "learning_rate": 2.2487941065175646e-06, "loss": 0.1165, "step": 23820 }, { "epoch": 0.6949355271602777, "grad_norm": 2.625312710630318, "learning_rate": 2.2483996335499804e-06, "loss": 0.1235, "step": 23821 }, { "epoch": 0.6949647003909213, "grad_norm": 0.8656596326522185, "learning_rate": 2.2480051851480296e-06, "loss": 0.1151, "step": 23822 }, { "epoch": 0.6949938736215648, "grad_norm": 1.0129732594232637, "learning_rate": 2.247610761315229e-06, "loss": 0.1367, "step": 23823 }, { "epoch": 0.6950230468522084, "grad_norm": 0.968424923186662, "learning_rate": 2.247216362055105e-06, "loss": 0.1194, "step": 23824 }, { "epoch": 0.695052220082852, "grad_norm": 1.0327727585847357, "learning_rate": 2.2468219873711737e-06, "loss": 0.114, "step": 23825 }, { "epoch": 0.6950813933134955, "grad_norm": 0.9663400679826721, "learning_rate": 2.2464276372669615e-06, "loss": 0.1176, "step": 23826 }, { "epoch": 0.695110566544139, "grad_norm": 1.0742687267998512, "learning_rate": 2.246033311745985e-06, "loss": 0.1364, "step": 23827 }, { "epoch": 0.6951397397747826, "grad_norm": 0.8803108750502666, "learning_rate": 2.245639010811764e-06, "loss": 0.1052, "step": 23828 }, { "epoch": 0.6951689130054263, "grad_norm": 0.8469585046262382, "learning_rate": 2.245244734467821e-06, "loss": 0.1337, "step": 23829 }, { "epoch": 0.6951980862360698, "grad_norm": 0.8042952636604691, "learning_rate": 2.2448504827176767e-06, "loss": 0.1168, "step": 23830 }, { "epoch": 0.6952272594667134, "grad_norm": 1.339756206521757, "learning_rate": 2.2444562555648474e-06, "loss": 0.1357, "step": 23831 }, { "epoch": 0.6952564326973569, "grad_norm": 0.7626741984499985, "learning_rate": 2.2440620530128572e-06, "loss": 0.1198, "step": 23832 }, { "epoch": 0.6952856059280005, "grad_norm": 0.623325959472245, "learning_rate": 2.243667875065223e-06, "loss": 0.1342, "step": 23833 }, { "epoch": 0.695314779158644, "grad_norm": 1.1762003072366538, "learning_rate": 2.2432737217254617e-06, "loss": 0.1132, "step": 23834 }, { "epoch": 0.6953439523892876, "grad_norm": 0.8413067163168597, "learning_rate": 2.2428795929970952e-06, "loss": 0.1132, "step": 23835 }, { "epoch": 0.6953731256199311, "grad_norm": 0.7803250474449454, "learning_rate": 2.2424854888836434e-06, "loss": 0.1185, "step": 23836 }, { "epoch": 0.6954022988505747, "grad_norm": 0.7790511825822023, "learning_rate": 2.2420914093886227e-06, "loss": 0.1202, "step": 23837 }, { "epoch": 0.6954314720812182, "grad_norm": 0.9583893332195279, "learning_rate": 2.2416973545155496e-06, "loss": 0.1339, "step": 23838 }, { "epoch": 0.6954606453118618, "grad_norm": 0.8739164437819695, "learning_rate": 2.2413033242679456e-06, "loss": 0.1181, "step": 23839 }, { "epoch": 0.6954898185425054, "grad_norm": 0.959295105755949, "learning_rate": 2.2409093186493276e-06, "loss": 0.1506, "step": 23840 }, { "epoch": 0.6955189917731489, "grad_norm": 0.8142623408323446, "learning_rate": 2.240515337663208e-06, "loss": 0.1186, "step": 23841 }, { "epoch": 0.6955481650037926, "grad_norm": 1.064582232367325, "learning_rate": 2.2401213813131133e-06, "loss": 0.1152, "step": 23842 }, { "epoch": 0.6955773382344361, "grad_norm": 1.2373366780234374, "learning_rate": 2.239727449602556e-06, "loss": 0.118, "step": 23843 }, { "epoch": 0.6956065114650797, "grad_norm": 0.7205967437679306, "learning_rate": 2.239333542535051e-06, "loss": 0.1287, "step": 23844 }, { "epoch": 0.6956356846957232, "grad_norm": 0.8186552784652059, "learning_rate": 2.2389396601141188e-06, "loss": 0.1103, "step": 23845 }, { "epoch": 0.6956648579263668, "grad_norm": 0.8412739325173756, "learning_rate": 2.2385458023432742e-06, "loss": 0.1075, "step": 23846 }, { "epoch": 0.6956940311570103, "grad_norm": 1.1737791197407967, "learning_rate": 2.2381519692260318e-06, "loss": 0.105, "step": 23847 }, { "epoch": 0.6957232043876539, "grad_norm": 0.7235996361954059, "learning_rate": 2.2377581607659095e-06, "loss": 0.1274, "step": 23848 }, { "epoch": 0.6957523776182974, "grad_norm": 0.7677692176770433, "learning_rate": 2.2373643769664243e-06, "loss": 0.126, "step": 23849 }, { "epoch": 0.695781550848941, "grad_norm": 0.9732730500905161, "learning_rate": 2.236970617831091e-06, "loss": 0.1165, "step": 23850 }, { "epoch": 0.6958107240795846, "grad_norm": 1.2348858461385546, "learning_rate": 2.236576883363422e-06, "loss": 0.1369, "step": 23851 }, { "epoch": 0.6958398973102281, "grad_norm": 1.0381658883900549, "learning_rate": 2.236183173566937e-06, "loss": 0.1144, "step": 23852 }, { "epoch": 0.6958690705408717, "grad_norm": 0.6237444371201205, "learning_rate": 2.235789488445147e-06, "loss": 0.0984, "step": 23853 }, { "epoch": 0.6958982437715152, "grad_norm": 0.7176785407206546, "learning_rate": 2.2353958280015703e-06, "loss": 0.1085, "step": 23854 }, { "epoch": 0.6959274170021588, "grad_norm": 0.9308137456355501, "learning_rate": 2.235002192239718e-06, "loss": 0.1161, "step": 23855 }, { "epoch": 0.6959565902328024, "grad_norm": 0.7946992493326904, "learning_rate": 2.234608581163108e-06, "loss": 0.1126, "step": 23856 }, { "epoch": 0.695985763463446, "grad_norm": 1.000559179116095, "learning_rate": 2.234214994775252e-06, "loss": 0.1274, "step": 23857 }, { "epoch": 0.6960149366940895, "grad_norm": 0.6762911103499605, "learning_rate": 2.2338214330796633e-06, "loss": 0.1051, "step": 23858 }, { "epoch": 0.6960441099247331, "grad_norm": 1.0782851143233587, "learning_rate": 2.233427896079856e-06, "loss": 0.0999, "step": 23859 }, { "epoch": 0.6960732831553766, "grad_norm": 0.8465079972471626, "learning_rate": 2.233034383779346e-06, "loss": 0.1377, "step": 23860 }, { "epoch": 0.6961024563860202, "grad_norm": 1.0538568765976666, "learning_rate": 2.2326408961816425e-06, "loss": 0.123, "step": 23861 }, { "epoch": 0.6961316296166637, "grad_norm": 1.0404147483084252, "learning_rate": 2.232247433290262e-06, "loss": 0.1282, "step": 23862 }, { "epoch": 0.6961608028473073, "grad_norm": 1.304273376189725, "learning_rate": 2.231853995108716e-06, "loss": 0.1027, "step": 23863 }, { "epoch": 0.6961899760779509, "grad_norm": 1.1026164216958099, "learning_rate": 2.231460581640515e-06, "loss": 0.1075, "step": 23864 }, { "epoch": 0.6962191493085944, "grad_norm": 0.8837947441898395, "learning_rate": 2.231067192889173e-06, "loss": 0.136, "step": 23865 }, { "epoch": 0.696248322539238, "grad_norm": 0.8644623049803704, "learning_rate": 2.2306738288582036e-06, "loss": 0.1273, "step": 23866 }, { "epoch": 0.6962774957698815, "grad_norm": 0.7718012780034156, "learning_rate": 2.2302804895511177e-06, "loss": 0.0974, "step": 23867 }, { "epoch": 0.6963066690005251, "grad_norm": 0.8881331523894219, "learning_rate": 2.229887174971424e-06, "loss": 0.1084, "step": 23868 }, { "epoch": 0.6963358422311687, "grad_norm": 0.9709450734308829, "learning_rate": 2.2294938851226387e-06, "loss": 0.1181, "step": 23869 }, { "epoch": 0.6963650154618123, "grad_norm": 1.0097910025405348, "learning_rate": 2.2291006200082705e-06, "loss": 0.1385, "step": 23870 }, { "epoch": 0.6963941886924558, "grad_norm": 1.2827005423027638, "learning_rate": 2.2287073796318266e-06, "loss": 0.0969, "step": 23871 }, { "epoch": 0.6964233619230994, "grad_norm": 0.708415974217797, "learning_rate": 2.2283141639968254e-06, "loss": 0.099, "step": 23872 }, { "epoch": 0.696452535153743, "grad_norm": 0.9949620544624855, "learning_rate": 2.2279209731067736e-06, "loss": 0.0995, "step": 23873 }, { "epoch": 0.6964817083843865, "grad_norm": 1.0254344330917917, "learning_rate": 2.22752780696518e-06, "loss": 0.136, "step": 23874 }, { "epoch": 0.69651088161503, "grad_norm": 0.9702845823420303, "learning_rate": 2.2271346655755577e-06, "loss": 0.1169, "step": 23875 }, { "epoch": 0.6965400548456736, "grad_norm": 0.7426787782128693, "learning_rate": 2.226741548941416e-06, "loss": 0.1129, "step": 23876 }, { "epoch": 0.6965692280763172, "grad_norm": 0.8579837307191444, "learning_rate": 2.226348457066261e-06, "loss": 0.1492, "step": 23877 }, { "epoch": 0.6965984013069607, "grad_norm": 0.9695186357645721, "learning_rate": 2.225955389953605e-06, "loss": 0.1313, "step": 23878 }, { "epoch": 0.6966275745376043, "grad_norm": 1.0191855552721403, "learning_rate": 2.2255623476069595e-06, "loss": 0.1281, "step": 23879 }, { "epoch": 0.6966567477682478, "grad_norm": 0.9127613348863842, "learning_rate": 2.2251693300298306e-06, "loss": 0.108, "step": 23880 }, { "epoch": 0.6966859209988914, "grad_norm": 1.0601321119294365, "learning_rate": 2.2247763372257253e-06, "loss": 0.1284, "step": 23881 }, { "epoch": 0.6967150942295349, "grad_norm": 0.7341699527352541, "learning_rate": 2.224383369198157e-06, "loss": 0.1079, "step": 23882 }, { "epoch": 0.6967442674601786, "grad_norm": 0.904234228900846, "learning_rate": 2.223990425950629e-06, "loss": 0.1073, "step": 23883 }, { "epoch": 0.6967734406908221, "grad_norm": 0.7317430894835597, "learning_rate": 2.223597507486654e-06, "loss": 0.1243, "step": 23884 }, { "epoch": 0.6968026139214657, "grad_norm": 0.8553002245374118, "learning_rate": 2.223204613809736e-06, "loss": 0.1077, "step": 23885 }, { "epoch": 0.6968317871521092, "grad_norm": 0.8570750394948702, "learning_rate": 2.2228117449233853e-06, "loss": 0.098, "step": 23886 }, { "epoch": 0.6968609603827528, "grad_norm": 0.8867475923460965, "learning_rate": 2.2224189008311088e-06, "loss": 0.1422, "step": 23887 }, { "epoch": 0.6968901336133964, "grad_norm": 0.9701536648985049, "learning_rate": 2.2220260815364113e-06, "loss": 0.1207, "step": 23888 }, { "epoch": 0.6969193068440399, "grad_norm": 0.8527957005656143, "learning_rate": 2.2216332870428025e-06, "loss": 0.1097, "step": 23889 }, { "epoch": 0.6969484800746835, "grad_norm": 0.8667439819336876, "learning_rate": 2.22124051735379e-06, "loss": 0.1198, "step": 23890 }, { "epoch": 0.696977653305327, "grad_norm": 0.6934904212398832, "learning_rate": 2.2208477724728765e-06, "loss": 0.1255, "step": 23891 }, { "epoch": 0.6970068265359706, "grad_norm": 1.0531606372921636, "learning_rate": 2.220455052403573e-06, "loss": 0.1344, "step": 23892 }, { "epoch": 0.6970359997666141, "grad_norm": 1.079285750979756, "learning_rate": 2.220062357149383e-06, "loss": 0.1121, "step": 23893 }, { "epoch": 0.6970651729972577, "grad_norm": 0.6888322008395154, "learning_rate": 2.219669686713811e-06, "loss": 0.1346, "step": 23894 }, { "epoch": 0.6970943462279012, "grad_norm": 0.9814590642120479, "learning_rate": 2.2192770411003638e-06, "loss": 0.1348, "step": 23895 }, { "epoch": 0.6971235194585449, "grad_norm": 1.0259025493308334, "learning_rate": 2.21888442031255e-06, "loss": 0.1356, "step": 23896 }, { "epoch": 0.6971526926891884, "grad_norm": 0.8542920359060812, "learning_rate": 2.2184918243538717e-06, "loss": 0.1181, "step": 23897 }, { "epoch": 0.697181865919832, "grad_norm": 0.8486031292820476, "learning_rate": 2.218099253227832e-06, "loss": 0.1371, "step": 23898 }, { "epoch": 0.6972110391504756, "grad_norm": 0.9135604801082281, "learning_rate": 2.217706706937941e-06, "loss": 0.1202, "step": 23899 }, { "epoch": 0.6972402123811191, "grad_norm": 1.0489641449001594, "learning_rate": 2.2173141854877e-06, "loss": 0.1291, "step": 23900 }, { "epoch": 0.6972693856117627, "grad_norm": 0.9118790562128938, "learning_rate": 2.21692168888061e-06, "loss": 0.1281, "step": 23901 }, { "epoch": 0.6972985588424062, "grad_norm": 0.8182972087568022, "learning_rate": 2.216529217120182e-06, "loss": 0.1277, "step": 23902 }, { "epoch": 0.6973277320730498, "grad_norm": 1.023858829391313, "learning_rate": 2.2161367702099172e-06, "loss": 0.1135, "step": 23903 }, { "epoch": 0.6973569053036933, "grad_norm": 0.8734826668400932, "learning_rate": 2.2157443481533165e-06, "loss": 0.1329, "step": 23904 }, { "epoch": 0.6973860785343369, "grad_norm": 0.7654454023172019, "learning_rate": 2.215351950953888e-06, "loss": 0.1125, "step": 23905 }, { "epoch": 0.6974152517649804, "grad_norm": 1.0366101945281585, "learning_rate": 2.214959578615132e-06, "loss": 0.1042, "step": 23906 }, { "epoch": 0.697444424995624, "grad_norm": 0.8992822757576193, "learning_rate": 2.2145672311405505e-06, "loss": 0.0937, "step": 23907 }, { "epoch": 0.6974735982262675, "grad_norm": 0.8214520386037066, "learning_rate": 2.2141749085336476e-06, "loss": 0.1563, "step": 23908 }, { "epoch": 0.6975027714569111, "grad_norm": 0.849531784424523, "learning_rate": 2.213782610797928e-06, "loss": 0.0978, "step": 23909 }, { "epoch": 0.6975319446875548, "grad_norm": 0.9702522776466446, "learning_rate": 2.213390337936892e-06, "loss": 0.1175, "step": 23910 }, { "epoch": 0.6975611179181983, "grad_norm": 0.6985722729712117, "learning_rate": 2.2129980899540403e-06, "loss": 0.1029, "step": 23911 }, { "epoch": 0.6975902911488419, "grad_norm": 0.6521646867903083, "learning_rate": 2.2126058668528784e-06, "loss": 0.1161, "step": 23912 }, { "epoch": 0.6976194643794854, "grad_norm": 0.8726392551150108, "learning_rate": 2.2122136686369038e-06, "loss": 0.1104, "step": 23913 }, { "epoch": 0.697648637610129, "grad_norm": 1.0279031197328325, "learning_rate": 2.2118214953096218e-06, "loss": 0.1098, "step": 23914 }, { "epoch": 0.6976778108407725, "grad_norm": 0.724605864266765, "learning_rate": 2.2114293468745302e-06, "loss": 0.1183, "step": 23915 }, { "epoch": 0.6977069840714161, "grad_norm": 0.8200723129100194, "learning_rate": 2.2110372233351334e-06, "loss": 0.1206, "step": 23916 }, { "epoch": 0.6977361573020596, "grad_norm": 0.9265442451272636, "learning_rate": 2.2106451246949307e-06, "loss": 0.1191, "step": 23917 }, { "epoch": 0.6977653305327032, "grad_norm": 0.6997308658185218, "learning_rate": 2.2102530509574204e-06, "loss": 0.1108, "step": 23918 }, { "epoch": 0.6977945037633467, "grad_norm": 0.7151739698896734, "learning_rate": 2.2098610021261046e-06, "loss": 0.1191, "step": 23919 }, { "epoch": 0.6978236769939903, "grad_norm": 0.9734372742959848, "learning_rate": 2.2094689782044857e-06, "loss": 0.1135, "step": 23920 }, { "epoch": 0.6978528502246338, "grad_norm": 0.7242728998374421, "learning_rate": 2.2090769791960604e-06, "loss": 0.1104, "step": 23921 }, { "epoch": 0.6978820234552774, "grad_norm": 1.1515168965564313, "learning_rate": 2.2086850051043314e-06, "loss": 0.1116, "step": 23922 }, { "epoch": 0.697911196685921, "grad_norm": 0.7441902855450077, "learning_rate": 2.2082930559327955e-06, "loss": 0.1283, "step": 23923 }, { "epoch": 0.6979403699165646, "grad_norm": 0.8467353741366159, "learning_rate": 2.2079011316849515e-06, "loss": 0.1259, "step": 23924 }, { "epoch": 0.6979695431472082, "grad_norm": 0.7578443751350888, "learning_rate": 2.207509232364299e-06, "loss": 0.1087, "step": 23925 }, { "epoch": 0.6979987163778517, "grad_norm": 0.7694819193593879, "learning_rate": 2.2071173579743405e-06, "loss": 0.1455, "step": 23926 }, { "epoch": 0.6980278896084953, "grad_norm": 0.8157212116854585, "learning_rate": 2.2067255085185707e-06, "loss": 0.1081, "step": 23927 }, { "epoch": 0.6980570628391388, "grad_norm": 0.8149733385337679, "learning_rate": 2.2063336840004868e-06, "loss": 0.124, "step": 23928 }, { "epoch": 0.6980862360697824, "grad_norm": 0.7288876374731305, "learning_rate": 2.2059418844235912e-06, "loss": 0.1165, "step": 23929 }, { "epoch": 0.6981154093004259, "grad_norm": 0.7677110662916525, "learning_rate": 2.205550109791379e-06, "loss": 0.1052, "step": 23930 }, { "epoch": 0.6981445825310695, "grad_norm": 0.8223754948929111, "learning_rate": 2.205158360107345e-06, "loss": 0.1192, "step": 23931 }, { "epoch": 0.698173755761713, "grad_norm": 0.8390319313943528, "learning_rate": 2.2047666353749936e-06, "loss": 0.1118, "step": 23932 }, { "epoch": 0.6982029289923566, "grad_norm": 0.7492390754786155, "learning_rate": 2.2043749355978183e-06, "loss": 0.1139, "step": 23933 }, { "epoch": 0.6982321022230001, "grad_norm": 0.7380429326903747, "learning_rate": 2.203983260779314e-06, "loss": 0.1162, "step": 23934 }, { "epoch": 0.6982612754536437, "grad_norm": 0.8982197852339344, "learning_rate": 2.203591610922982e-06, "loss": 0.1161, "step": 23935 }, { "epoch": 0.6982904486842872, "grad_norm": 0.8736667339876081, "learning_rate": 2.2031999860323165e-06, "loss": 0.1392, "step": 23936 }, { "epoch": 0.6983196219149309, "grad_norm": 0.6820437877376293, "learning_rate": 2.2028083861108123e-06, "loss": 0.126, "step": 23937 }, { "epoch": 0.6983487951455745, "grad_norm": 1.1657274485988667, "learning_rate": 2.2024168111619666e-06, "loss": 0.1394, "step": 23938 }, { "epoch": 0.698377968376218, "grad_norm": 0.720350106438394, "learning_rate": 2.202025261189278e-06, "loss": 0.1027, "step": 23939 }, { "epoch": 0.6984071416068616, "grad_norm": 0.842727327060568, "learning_rate": 2.20163373619624e-06, "loss": 0.1467, "step": 23940 }, { "epoch": 0.6984363148375051, "grad_norm": 0.7339955273043258, "learning_rate": 2.2012422361863457e-06, "loss": 0.1152, "step": 23941 }, { "epoch": 0.6984654880681487, "grad_norm": 0.9130713180378784, "learning_rate": 2.200850761163095e-06, "loss": 0.1123, "step": 23942 }, { "epoch": 0.6984946612987922, "grad_norm": 0.8363496227683581, "learning_rate": 2.200459311129978e-06, "loss": 0.1122, "step": 23943 }, { "epoch": 0.6985238345294358, "grad_norm": 0.9935560450590118, "learning_rate": 2.200067886090494e-06, "loss": 0.1146, "step": 23944 }, { "epoch": 0.6985530077600793, "grad_norm": 0.7691839559194878, "learning_rate": 2.1996764860481334e-06, "loss": 0.1337, "step": 23945 }, { "epoch": 0.6985821809907229, "grad_norm": 0.7693515060529519, "learning_rate": 2.1992851110063953e-06, "loss": 0.1089, "step": 23946 }, { "epoch": 0.6986113542213664, "grad_norm": 0.9126551268711051, "learning_rate": 2.1988937609687707e-06, "loss": 0.1246, "step": 23947 }, { "epoch": 0.69864052745201, "grad_norm": 0.7305308384724504, "learning_rate": 2.198502435938752e-06, "loss": 0.1282, "step": 23948 }, { "epoch": 0.6986697006826535, "grad_norm": 1.1475363486012335, "learning_rate": 2.198111135919834e-06, "loss": 0.1286, "step": 23949 }, { "epoch": 0.6986988739132972, "grad_norm": 1.3417151758689219, "learning_rate": 2.197719860915514e-06, "loss": 0.1108, "step": 23950 }, { "epoch": 0.6987280471439408, "grad_norm": 0.6706037887177457, "learning_rate": 2.19732861092928e-06, "loss": 0.1268, "step": 23951 }, { "epoch": 0.6987572203745843, "grad_norm": 0.86667105947099, "learning_rate": 2.1969373859646287e-06, "loss": 0.1082, "step": 23952 }, { "epoch": 0.6987863936052279, "grad_norm": 1.02242976326532, "learning_rate": 2.1965461860250515e-06, "loss": 0.1122, "step": 23953 }, { "epoch": 0.6988155668358714, "grad_norm": 0.8762455769409735, "learning_rate": 2.196155011114039e-06, "loss": 0.1131, "step": 23954 }, { "epoch": 0.698844740066515, "grad_norm": 0.89106618415591, "learning_rate": 2.1957638612350846e-06, "loss": 0.1103, "step": 23955 }, { "epoch": 0.6988739132971585, "grad_norm": 0.999620249948512, "learning_rate": 2.1953727363916833e-06, "loss": 0.136, "step": 23956 }, { "epoch": 0.6989030865278021, "grad_norm": 0.9502451622796657, "learning_rate": 2.194981636587325e-06, "loss": 0.1146, "step": 23957 }, { "epoch": 0.6989322597584456, "grad_norm": 0.7329955936712534, "learning_rate": 2.1945905618254985e-06, "loss": 0.098, "step": 23958 }, { "epoch": 0.6989614329890892, "grad_norm": 0.771353050670828, "learning_rate": 2.1941995121096997e-06, "loss": 0.1229, "step": 23959 }, { "epoch": 0.6989906062197327, "grad_norm": 0.9019231796641504, "learning_rate": 2.1938084874434184e-06, "loss": 0.1359, "step": 23960 }, { "epoch": 0.6990197794503763, "grad_norm": 0.8461351662171521, "learning_rate": 2.193417487830141e-06, "loss": 0.1132, "step": 23961 }, { "epoch": 0.6990489526810199, "grad_norm": 0.9079653025212666, "learning_rate": 2.1930265132733663e-06, "loss": 0.1577, "step": 23962 }, { "epoch": 0.6990781259116634, "grad_norm": 0.9916480713194713, "learning_rate": 2.1926355637765805e-06, "loss": 0.1072, "step": 23963 }, { "epoch": 0.6991072991423071, "grad_norm": 0.675691073335476, "learning_rate": 2.192244639343272e-06, "loss": 0.1155, "step": 23964 }, { "epoch": 0.6991364723729506, "grad_norm": 0.74496196362572, "learning_rate": 2.1918537399769358e-06, "loss": 0.1145, "step": 23965 }, { "epoch": 0.6991656456035942, "grad_norm": 0.8457333082543786, "learning_rate": 2.191462865681058e-06, "loss": 0.1355, "step": 23966 }, { "epoch": 0.6991948188342377, "grad_norm": 0.9140121292847121, "learning_rate": 2.191072016459129e-06, "loss": 0.1184, "step": 23967 }, { "epoch": 0.6992239920648813, "grad_norm": 0.9728469106059102, "learning_rate": 2.190681192314637e-06, "loss": 0.1065, "step": 23968 }, { "epoch": 0.6992531652955248, "grad_norm": 0.8514995422077425, "learning_rate": 2.1902903932510748e-06, "loss": 0.1197, "step": 23969 }, { "epoch": 0.6992823385261684, "grad_norm": 1.085130976010377, "learning_rate": 2.1898996192719297e-06, "loss": 0.1294, "step": 23970 }, { "epoch": 0.6993115117568119, "grad_norm": 0.8148898121292455, "learning_rate": 2.1895088703806877e-06, "loss": 0.1123, "step": 23971 }, { "epoch": 0.6993406849874555, "grad_norm": 0.8148589495926012, "learning_rate": 2.189118146580842e-06, "loss": 0.1368, "step": 23972 }, { "epoch": 0.699369858218099, "grad_norm": 0.8087628675117582, "learning_rate": 2.188727447875876e-06, "loss": 0.1269, "step": 23973 }, { "epoch": 0.6993990314487426, "grad_norm": 0.8024056352754089, "learning_rate": 2.1883367742692824e-06, "loss": 0.116, "step": 23974 }, { "epoch": 0.6994282046793862, "grad_norm": 0.807474093646638, "learning_rate": 2.1879461257645453e-06, "loss": 0.1368, "step": 23975 }, { "epoch": 0.6994573779100297, "grad_norm": 1.0470631070479548, "learning_rate": 2.1875555023651552e-06, "loss": 0.1174, "step": 23976 }, { "epoch": 0.6994865511406733, "grad_norm": 0.8551891079223305, "learning_rate": 2.1871649040745984e-06, "loss": 0.1115, "step": 23977 }, { "epoch": 0.6995157243713169, "grad_norm": 0.7515422966205455, "learning_rate": 2.1867743308963585e-06, "loss": 0.0985, "step": 23978 }, { "epoch": 0.6995448976019605, "grad_norm": 0.8912524447396101, "learning_rate": 2.186383782833929e-06, "loss": 0.1059, "step": 23979 }, { "epoch": 0.699574070832604, "grad_norm": 0.9428279664495918, "learning_rate": 2.1859932598907933e-06, "loss": 0.1312, "step": 23980 }, { "epoch": 0.6996032440632476, "grad_norm": 1.4812676271230822, "learning_rate": 2.1856027620704367e-06, "loss": 0.1031, "step": 23981 }, { "epoch": 0.6996324172938911, "grad_norm": 0.8477961693049424, "learning_rate": 2.1852122893763484e-06, "loss": 0.0992, "step": 23982 }, { "epoch": 0.6996615905245347, "grad_norm": 0.8264979109448708, "learning_rate": 2.1848218418120134e-06, "loss": 0.126, "step": 23983 }, { "epoch": 0.6996907637551782, "grad_norm": 1.1011081401884897, "learning_rate": 2.184431419380914e-06, "loss": 0.1329, "step": 23984 }, { "epoch": 0.6997199369858218, "grad_norm": 1.0591944467203402, "learning_rate": 2.1840410220865394e-06, "loss": 0.1013, "step": 23985 }, { "epoch": 0.6997491102164654, "grad_norm": 0.789704705507825, "learning_rate": 2.183650649932376e-06, "loss": 0.1472, "step": 23986 }, { "epoch": 0.6997782834471089, "grad_norm": 0.9502062566605105, "learning_rate": 2.1832603029219074e-06, "loss": 0.1409, "step": 23987 }, { "epoch": 0.6998074566777525, "grad_norm": 0.8025599434789594, "learning_rate": 2.182869981058617e-06, "loss": 0.1229, "step": 23988 }, { "epoch": 0.699836629908396, "grad_norm": 0.8018413521504824, "learning_rate": 2.1824796843459916e-06, "loss": 0.1063, "step": 23989 }, { "epoch": 0.6998658031390396, "grad_norm": 0.8754198713761392, "learning_rate": 2.182089412787514e-06, "loss": 0.1156, "step": 23990 }, { "epoch": 0.6998949763696832, "grad_norm": 0.991015694336839, "learning_rate": 2.1816991663866692e-06, "loss": 0.1253, "step": 23991 }, { "epoch": 0.6999241496003268, "grad_norm": 0.8127540572983639, "learning_rate": 2.1813089451469436e-06, "loss": 0.1432, "step": 23992 }, { "epoch": 0.6999533228309703, "grad_norm": 0.9592306562067225, "learning_rate": 2.1809187490718185e-06, "loss": 0.1228, "step": 23993 }, { "epoch": 0.6999824960616139, "grad_norm": 0.8551294010357323, "learning_rate": 2.180528578164776e-06, "loss": 0.1225, "step": 23994 }, { "epoch": 0.7000116692922574, "grad_norm": 1.0370886387783538, "learning_rate": 2.1801384324293036e-06, "loss": 0.1251, "step": 23995 }, { "epoch": 0.700040842522901, "grad_norm": 1.1952787806638514, "learning_rate": 2.17974831186888e-06, "loss": 0.1325, "step": 23996 }, { "epoch": 0.7000700157535446, "grad_norm": 0.7795203523082717, "learning_rate": 2.179358216486992e-06, "loss": 0.133, "step": 23997 }, { "epoch": 0.7000991889841881, "grad_norm": 0.8025040785672175, "learning_rate": 2.178968146287119e-06, "loss": 0.1296, "step": 23998 }, { "epoch": 0.7001283622148317, "grad_norm": 1.0730825866736446, "learning_rate": 2.1785781012727457e-06, "loss": 0.1214, "step": 23999 }, { "epoch": 0.7001575354454752, "grad_norm": 0.6690442535288196, "learning_rate": 2.1781880814473545e-06, "loss": 0.1107, "step": 24000 }, { "epoch": 0.7001867086761188, "grad_norm": 0.8040909148802236, "learning_rate": 2.1777980868144245e-06, "loss": 0.1437, "step": 24001 }, { "epoch": 0.7002158819067623, "grad_norm": 1.084027438536715, "learning_rate": 2.17740811737744e-06, "loss": 0.1134, "step": 24002 }, { "epoch": 0.7002450551374059, "grad_norm": 1.0520530415588623, "learning_rate": 2.177018173139883e-06, "loss": 0.133, "step": 24003 }, { "epoch": 0.7002742283680494, "grad_norm": 0.8252624810035873, "learning_rate": 2.176628254105234e-06, "loss": 0.102, "step": 24004 }, { "epoch": 0.7003034015986931, "grad_norm": 0.9945593723678332, "learning_rate": 2.176238360276972e-06, "loss": 0.1294, "step": 24005 }, { "epoch": 0.7003325748293366, "grad_norm": 1.7703537817533679, "learning_rate": 2.1758484916585828e-06, "loss": 0.0969, "step": 24006 }, { "epoch": 0.7003617480599802, "grad_norm": 0.8443668737230785, "learning_rate": 2.175458648253543e-06, "loss": 0.1137, "step": 24007 }, { "epoch": 0.7003909212906237, "grad_norm": 1.1310173847517508, "learning_rate": 2.1750688300653307e-06, "loss": 0.1452, "step": 24008 }, { "epoch": 0.7004200945212673, "grad_norm": 0.9392790970680069, "learning_rate": 2.174679037097433e-06, "loss": 0.1296, "step": 24009 }, { "epoch": 0.7004492677519109, "grad_norm": 1.1362254631985376, "learning_rate": 2.1742892693533263e-06, "loss": 0.1098, "step": 24010 }, { "epoch": 0.7004784409825544, "grad_norm": 0.7340869338528412, "learning_rate": 2.1738995268364893e-06, "loss": 0.1163, "step": 24011 }, { "epoch": 0.700507614213198, "grad_norm": 0.7099221677250814, "learning_rate": 2.1735098095504036e-06, "loss": 0.1208, "step": 24012 }, { "epoch": 0.7005367874438415, "grad_norm": 1.126775946889232, "learning_rate": 2.1731201174985484e-06, "loss": 0.1133, "step": 24013 }, { "epoch": 0.7005659606744851, "grad_norm": 0.8955328508740298, "learning_rate": 2.1727304506843998e-06, "loss": 0.1221, "step": 24014 }, { "epoch": 0.7005951339051286, "grad_norm": 0.8803672228134498, "learning_rate": 2.172340809111439e-06, "loss": 0.1302, "step": 24015 }, { "epoch": 0.7006243071357722, "grad_norm": 0.7970264155337651, "learning_rate": 2.171951192783146e-06, "loss": 0.1226, "step": 24016 }, { "epoch": 0.7006534803664157, "grad_norm": 0.9698896300487313, "learning_rate": 2.171561601702998e-06, "loss": 0.1264, "step": 24017 }, { "epoch": 0.7006826535970594, "grad_norm": 0.8072280167044456, "learning_rate": 2.1711720358744704e-06, "loss": 0.1267, "step": 24018 }, { "epoch": 0.7007118268277029, "grad_norm": 0.7550862104546195, "learning_rate": 2.170782495301046e-06, "loss": 0.1096, "step": 24019 }, { "epoch": 0.7007410000583465, "grad_norm": 1.1206125222080219, "learning_rate": 2.170392979986198e-06, "loss": 0.1383, "step": 24020 }, { "epoch": 0.70077017328899, "grad_norm": 0.7952524095088658, "learning_rate": 2.1700034899334056e-06, "loss": 0.11, "step": 24021 }, { "epoch": 0.7007993465196336, "grad_norm": 0.6707995429890307, "learning_rate": 2.169614025146149e-06, "loss": 0.1164, "step": 24022 }, { "epoch": 0.7008285197502772, "grad_norm": 1.335294007389897, "learning_rate": 2.169224585627902e-06, "loss": 0.122, "step": 24023 }, { "epoch": 0.7008576929809207, "grad_norm": 0.8019056674990741, "learning_rate": 2.168835171382141e-06, "loss": 0.1076, "step": 24024 }, { "epoch": 0.7008868662115643, "grad_norm": 0.9430484276438996, "learning_rate": 2.168445782412345e-06, "loss": 0.0955, "step": 24025 }, { "epoch": 0.7009160394422078, "grad_norm": 0.9831013496598711, "learning_rate": 2.1680564187219877e-06, "loss": 0.1253, "step": 24026 }, { "epoch": 0.7009452126728514, "grad_norm": 0.9485212158111505, "learning_rate": 2.1676670803145483e-06, "loss": 0.115, "step": 24027 }, { "epoch": 0.7009743859034949, "grad_norm": 1.155568103595562, "learning_rate": 2.167277767193499e-06, "loss": 0.1115, "step": 24028 }, { "epoch": 0.7010035591341385, "grad_norm": 1.0984865711311678, "learning_rate": 2.1668884793623202e-06, "loss": 0.1152, "step": 24029 }, { "epoch": 0.701032732364782, "grad_norm": 0.7952488702239149, "learning_rate": 2.166499216824484e-06, "loss": 0.1273, "step": 24030 }, { "epoch": 0.7010619055954256, "grad_norm": 0.8422218644867794, "learning_rate": 2.166109979583465e-06, "loss": 0.1226, "step": 24031 }, { "epoch": 0.7010910788260692, "grad_norm": 0.8822999547685475, "learning_rate": 2.1657207676427395e-06, "loss": 0.1188, "step": 24032 }, { "epoch": 0.7011202520567128, "grad_norm": 1.0246611848438882, "learning_rate": 2.165331581005784e-06, "loss": 0.0944, "step": 24033 }, { "epoch": 0.7011494252873564, "grad_norm": 1.0312611627298989, "learning_rate": 2.1649424196760717e-06, "loss": 0.1456, "step": 24034 }, { "epoch": 0.7011785985179999, "grad_norm": 0.7709151509435308, "learning_rate": 2.1645532836570744e-06, "loss": 0.1109, "step": 24035 }, { "epoch": 0.7012077717486435, "grad_norm": 0.8516470804910463, "learning_rate": 2.1641641729522705e-06, "loss": 0.1331, "step": 24036 }, { "epoch": 0.701236944979287, "grad_norm": 1.006222626001592, "learning_rate": 2.163775087565132e-06, "loss": 0.1055, "step": 24037 }, { "epoch": 0.7012661182099306, "grad_norm": 2.064272706827919, "learning_rate": 2.163386027499129e-06, "loss": 0.1243, "step": 24038 }, { "epoch": 0.7012952914405741, "grad_norm": 1.2481984411755531, "learning_rate": 2.1629969927577417e-06, "loss": 0.1146, "step": 24039 }, { "epoch": 0.7013244646712177, "grad_norm": 0.6976811043586211, "learning_rate": 2.16260798334444e-06, "loss": 0.0884, "step": 24040 }, { "epoch": 0.7013536379018612, "grad_norm": 0.7852034296196531, "learning_rate": 2.1622189992626956e-06, "loss": 0.1081, "step": 24041 }, { "epoch": 0.7013828111325048, "grad_norm": 1.2743735386200303, "learning_rate": 2.1618300405159844e-06, "loss": 0.1288, "step": 24042 }, { "epoch": 0.7014119843631483, "grad_norm": 1.1277866592727162, "learning_rate": 2.1614411071077764e-06, "loss": 0.1128, "step": 24043 }, { "epoch": 0.7014411575937919, "grad_norm": 0.8391056870980765, "learning_rate": 2.161052199041543e-06, "loss": 0.1128, "step": 24044 }, { "epoch": 0.7014703308244356, "grad_norm": 0.7315497197928056, "learning_rate": 2.160663316320758e-06, "loss": 0.1439, "step": 24045 }, { "epoch": 0.7014995040550791, "grad_norm": 0.7604841604689835, "learning_rate": 2.1602744589488944e-06, "loss": 0.104, "step": 24046 }, { "epoch": 0.7015286772857227, "grad_norm": 0.9601341189787869, "learning_rate": 2.1598856269294234e-06, "loss": 0.1232, "step": 24047 }, { "epoch": 0.7015578505163662, "grad_norm": 0.9800180073239532, "learning_rate": 2.159496820265813e-06, "loss": 0.1305, "step": 24048 }, { "epoch": 0.7015870237470098, "grad_norm": 0.9845668064896056, "learning_rate": 2.1591080389615386e-06, "loss": 0.1205, "step": 24049 }, { "epoch": 0.7016161969776533, "grad_norm": 0.8112349456134588, "learning_rate": 2.1587192830200683e-06, "loss": 0.0993, "step": 24050 }, { "epoch": 0.7016453702082969, "grad_norm": 1.1056914242147518, "learning_rate": 2.158330552444874e-06, "loss": 0.1197, "step": 24051 }, { "epoch": 0.7016745434389404, "grad_norm": 1.0860628694299137, "learning_rate": 2.1579418472394274e-06, "loss": 0.1228, "step": 24052 }, { "epoch": 0.701703716669584, "grad_norm": 0.9195609440088643, "learning_rate": 2.157553167407198e-06, "loss": 0.1199, "step": 24053 }, { "epoch": 0.7017328899002275, "grad_norm": 0.8267432432912861, "learning_rate": 2.1571645129516533e-06, "loss": 0.1103, "step": 24054 }, { "epoch": 0.7017620631308711, "grad_norm": 0.9119221876899662, "learning_rate": 2.156775883876267e-06, "loss": 0.1102, "step": 24055 }, { "epoch": 0.7017912363615146, "grad_norm": 1.1060395320976149, "learning_rate": 2.156387280184505e-06, "loss": 0.1258, "step": 24056 }, { "epoch": 0.7018204095921582, "grad_norm": 1.2418193467656058, "learning_rate": 2.1559987018798407e-06, "loss": 0.108, "step": 24057 }, { "epoch": 0.7018495828228017, "grad_norm": 0.6941779523718972, "learning_rate": 2.155610148965739e-06, "loss": 0.1126, "step": 24058 }, { "epoch": 0.7018787560534454, "grad_norm": 0.7468745610187636, "learning_rate": 2.155221621445673e-06, "loss": 0.106, "step": 24059 }, { "epoch": 0.701907929284089, "grad_norm": 1.3963220490013728, "learning_rate": 2.154833119323109e-06, "loss": 0.1153, "step": 24060 }, { "epoch": 0.7019371025147325, "grad_norm": 1.010510213635547, "learning_rate": 2.1544446426015137e-06, "loss": 0.119, "step": 24061 }, { "epoch": 0.7019662757453761, "grad_norm": 0.843969770784331, "learning_rate": 2.1540561912843577e-06, "loss": 0.1247, "step": 24062 }, { "epoch": 0.7019954489760196, "grad_norm": 0.9308770573205402, "learning_rate": 2.1536677653751103e-06, "loss": 0.1054, "step": 24063 }, { "epoch": 0.7020246222066632, "grad_norm": 1.1167229292695007, "learning_rate": 2.1532793648772376e-06, "loss": 0.1071, "step": 24064 }, { "epoch": 0.7020537954373067, "grad_norm": 0.6777538264692602, "learning_rate": 2.152890989794205e-06, "loss": 0.1162, "step": 24065 }, { "epoch": 0.7020829686679503, "grad_norm": 0.7712540637357135, "learning_rate": 2.1525026401294846e-06, "loss": 0.1106, "step": 24066 }, { "epoch": 0.7021121418985938, "grad_norm": 1.0568101289978415, "learning_rate": 2.1521143158865403e-06, "loss": 0.1249, "step": 24067 }, { "epoch": 0.7021413151292374, "grad_norm": 0.7967857620394879, "learning_rate": 2.1517260170688357e-06, "loss": 0.1057, "step": 24068 }, { "epoch": 0.7021704883598809, "grad_norm": 0.6882672674262725, "learning_rate": 2.1513377436798454e-06, "loss": 0.1286, "step": 24069 }, { "epoch": 0.7021996615905245, "grad_norm": 0.8283639053360913, "learning_rate": 2.150949495723032e-06, "loss": 0.1169, "step": 24070 }, { "epoch": 0.702228834821168, "grad_norm": 0.9816232198395284, "learning_rate": 2.1505612732018588e-06, "loss": 0.098, "step": 24071 }, { "epoch": 0.7022580080518117, "grad_norm": 0.8261055672546759, "learning_rate": 2.1501730761197962e-06, "loss": 0.1189, "step": 24072 }, { "epoch": 0.7022871812824553, "grad_norm": 0.6476406893872703, "learning_rate": 2.1497849044803088e-06, "loss": 0.1215, "step": 24073 }, { "epoch": 0.7023163545130988, "grad_norm": 0.8205112972150803, "learning_rate": 2.149396758286859e-06, "loss": 0.1185, "step": 24074 }, { "epoch": 0.7023455277437424, "grad_norm": 1.075725340492133, "learning_rate": 2.1490086375429146e-06, "loss": 0.1242, "step": 24075 }, { "epoch": 0.7023747009743859, "grad_norm": 0.777168613703323, "learning_rate": 2.1486205422519426e-06, "loss": 0.1072, "step": 24076 }, { "epoch": 0.7024038742050295, "grad_norm": 0.9361846554945391, "learning_rate": 2.1482324724174052e-06, "loss": 0.1484, "step": 24077 }, { "epoch": 0.702433047435673, "grad_norm": 0.8325886361286319, "learning_rate": 2.1478444280427657e-06, "loss": 0.1271, "step": 24078 }, { "epoch": 0.7024622206663166, "grad_norm": 0.706949477634099, "learning_rate": 2.1474564091314925e-06, "loss": 0.1249, "step": 24079 }, { "epoch": 0.7024913938969601, "grad_norm": 0.8312963660706355, "learning_rate": 2.1470684156870454e-06, "loss": 0.1086, "step": 24080 }, { "epoch": 0.7025205671276037, "grad_norm": 1.151050570465602, "learning_rate": 2.1466804477128905e-06, "loss": 0.1077, "step": 24081 }, { "epoch": 0.7025497403582472, "grad_norm": 0.8598811482320603, "learning_rate": 2.1462925052124934e-06, "loss": 0.1149, "step": 24082 }, { "epoch": 0.7025789135888908, "grad_norm": 0.789679013115193, "learning_rate": 2.1459045881893154e-06, "loss": 0.1103, "step": 24083 }, { "epoch": 0.7026080868195343, "grad_norm": 0.7495025644090153, "learning_rate": 2.1455166966468177e-06, "loss": 0.1204, "step": 24084 }, { "epoch": 0.7026372600501779, "grad_norm": 1.095025131927912, "learning_rate": 2.1451288305884683e-06, "loss": 0.1316, "step": 24085 }, { "epoch": 0.7026664332808216, "grad_norm": 0.7283491155742952, "learning_rate": 2.144740990017725e-06, "loss": 0.1111, "step": 24086 }, { "epoch": 0.7026956065114651, "grad_norm": 0.8667311375326322, "learning_rate": 2.1443531749380538e-06, "loss": 0.1185, "step": 24087 }, { "epoch": 0.7027247797421087, "grad_norm": 0.6838411245661873, "learning_rate": 2.143965385352914e-06, "loss": 0.1259, "step": 24088 }, { "epoch": 0.7027539529727522, "grad_norm": 1.052758190353409, "learning_rate": 2.1435776212657715e-06, "loss": 0.1414, "step": 24089 }, { "epoch": 0.7027831262033958, "grad_norm": 0.823716241046075, "learning_rate": 2.1431898826800866e-06, "loss": 0.1294, "step": 24090 }, { "epoch": 0.7028122994340393, "grad_norm": 0.9334328281490369, "learning_rate": 2.1428021695993184e-06, "loss": 0.1433, "step": 24091 }, { "epoch": 0.7028414726646829, "grad_norm": 0.8547639693236345, "learning_rate": 2.14241448202693e-06, "loss": 0.122, "step": 24092 }, { "epoch": 0.7028706458953264, "grad_norm": 0.6344574823642141, "learning_rate": 2.1420268199663854e-06, "loss": 0.1006, "step": 24093 }, { "epoch": 0.70289981912597, "grad_norm": 0.8307319846047498, "learning_rate": 2.141639183421142e-06, "loss": 0.1277, "step": 24094 }, { "epoch": 0.7029289923566135, "grad_norm": 0.792771071556801, "learning_rate": 2.141251572394661e-06, "loss": 0.1368, "step": 24095 }, { "epoch": 0.7029581655872571, "grad_norm": 0.6623395859811816, "learning_rate": 2.1408639868904046e-06, "loss": 0.1212, "step": 24096 }, { "epoch": 0.7029873388179007, "grad_norm": 0.8286602012946489, "learning_rate": 2.140476426911832e-06, "loss": 0.1317, "step": 24097 }, { "epoch": 0.7030165120485442, "grad_norm": 0.8179453115130316, "learning_rate": 2.1400888924623995e-06, "loss": 0.1441, "step": 24098 }, { "epoch": 0.7030456852791879, "grad_norm": 1.248903539581955, "learning_rate": 2.139701383545575e-06, "loss": 0.0981, "step": 24099 }, { "epoch": 0.7030748585098314, "grad_norm": 0.8845381821470489, "learning_rate": 2.139313900164813e-06, "loss": 0.1057, "step": 24100 }, { "epoch": 0.703104031740475, "grad_norm": 0.8787044640074702, "learning_rate": 2.1389264423235725e-06, "loss": 0.1166, "step": 24101 }, { "epoch": 0.7031332049711185, "grad_norm": 0.9290614703170608, "learning_rate": 2.138539010025315e-06, "loss": 0.1373, "step": 24102 }, { "epoch": 0.7031623782017621, "grad_norm": 0.8282834718855829, "learning_rate": 2.1381516032734985e-06, "loss": 0.1069, "step": 24103 }, { "epoch": 0.7031915514324056, "grad_norm": 0.8220651876508512, "learning_rate": 2.137764222071579e-06, "loss": 0.1075, "step": 24104 }, { "epoch": 0.7032207246630492, "grad_norm": 0.9565262772610033, "learning_rate": 2.137376866423018e-06, "loss": 0.1057, "step": 24105 }, { "epoch": 0.7032498978936927, "grad_norm": 1.1878622190301287, "learning_rate": 2.1369895363312735e-06, "loss": 0.1295, "step": 24106 }, { "epoch": 0.7032790711243363, "grad_norm": 0.9622737987465088, "learning_rate": 2.1366022317998042e-06, "loss": 0.1148, "step": 24107 }, { "epoch": 0.7033082443549799, "grad_norm": 0.9526874065691252, "learning_rate": 2.1362149528320646e-06, "loss": 0.1295, "step": 24108 }, { "epoch": 0.7033374175856234, "grad_norm": 1.354666136293969, "learning_rate": 2.135827699431516e-06, "loss": 0.1227, "step": 24109 }, { "epoch": 0.703366590816267, "grad_norm": 1.2650871130812544, "learning_rate": 2.135440471601612e-06, "loss": 0.1212, "step": 24110 }, { "epoch": 0.7033957640469105, "grad_norm": 0.847655465678781, "learning_rate": 2.1350532693458117e-06, "loss": 0.1034, "step": 24111 }, { "epoch": 0.7034249372775541, "grad_norm": 0.9253122018880892, "learning_rate": 2.1346660926675732e-06, "loss": 0.135, "step": 24112 }, { "epoch": 0.7034541105081977, "grad_norm": 0.8972131725035827, "learning_rate": 2.1342789415703524e-06, "loss": 0.111, "step": 24113 }, { "epoch": 0.7034832837388413, "grad_norm": 0.9270774916417938, "learning_rate": 2.1338918160576033e-06, "loss": 0.1209, "step": 24114 }, { "epoch": 0.7035124569694848, "grad_norm": 0.9024003077394835, "learning_rate": 2.1335047161327853e-06, "loss": 0.1249, "step": 24115 }, { "epoch": 0.7035416302001284, "grad_norm": 0.9166456653557409, "learning_rate": 2.1331176417993517e-06, "loss": 0.1104, "step": 24116 }, { "epoch": 0.7035708034307719, "grad_norm": 1.0659154021750539, "learning_rate": 2.1327305930607605e-06, "loss": 0.1047, "step": 24117 }, { "epoch": 0.7035999766614155, "grad_norm": 1.8382653879684283, "learning_rate": 2.1323435699204646e-06, "loss": 0.1144, "step": 24118 }, { "epoch": 0.703629149892059, "grad_norm": 0.8872106360749761, "learning_rate": 2.131956572381923e-06, "loss": 0.1426, "step": 24119 }, { "epoch": 0.7036583231227026, "grad_norm": 0.7547890595575304, "learning_rate": 2.131569600448588e-06, "loss": 0.1437, "step": 24120 }, { "epoch": 0.7036874963533462, "grad_norm": 1.0143542045822773, "learning_rate": 2.1311826541239133e-06, "loss": 0.1078, "step": 24121 }, { "epoch": 0.7037166695839897, "grad_norm": 0.7800617755717483, "learning_rate": 2.130795733411355e-06, "loss": 0.1429, "step": 24122 }, { "epoch": 0.7037458428146333, "grad_norm": 0.8782027026296402, "learning_rate": 2.130408838314369e-06, "loss": 0.1302, "step": 24123 }, { "epoch": 0.7037750160452768, "grad_norm": 0.9126599810947121, "learning_rate": 2.1300219688364078e-06, "loss": 0.1213, "step": 24124 }, { "epoch": 0.7038041892759204, "grad_norm": 0.8159882221397627, "learning_rate": 2.1296351249809237e-06, "loss": 0.1027, "step": 24125 }, { "epoch": 0.703833362506564, "grad_norm": 0.8295197981214928, "learning_rate": 2.129248306751374e-06, "loss": 0.1145, "step": 24126 }, { "epoch": 0.7038625357372076, "grad_norm": 1.2707174944966297, "learning_rate": 2.1288615141512098e-06, "loss": 0.1279, "step": 24127 }, { "epoch": 0.7038917089678511, "grad_norm": 0.9607515254219565, "learning_rate": 2.128474747183881e-06, "loss": 0.114, "step": 24128 }, { "epoch": 0.7039208821984947, "grad_norm": 0.8996369639105667, "learning_rate": 2.128088005852848e-06, "loss": 0.143, "step": 24129 }, { "epoch": 0.7039500554291382, "grad_norm": 0.9824472072603855, "learning_rate": 2.1277012901615595e-06, "loss": 0.135, "step": 24130 }, { "epoch": 0.7039792286597818, "grad_norm": 1.1287121871293229, "learning_rate": 2.1273146001134672e-06, "loss": 0.1507, "step": 24131 }, { "epoch": 0.7040084018904254, "grad_norm": 0.7840416532663803, "learning_rate": 2.126927935712025e-06, "loss": 0.1395, "step": 24132 }, { "epoch": 0.7040375751210689, "grad_norm": 1.1163970757759962, "learning_rate": 2.1265412969606846e-06, "loss": 0.1093, "step": 24133 }, { "epoch": 0.7040667483517125, "grad_norm": 0.8538029088499534, "learning_rate": 2.126154683862896e-06, "loss": 0.1193, "step": 24134 }, { "epoch": 0.704095921582356, "grad_norm": 0.7897568904118584, "learning_rate": 2.125768096422113e-06, "loss": 0.1015, "step": 24135 }, { "epoch": 0.7041250948129996, "grad_norm": 0.583966894031173, "learning_rate": 2.1253815346417873e-06, "loss": 0.0895, "step": 24136 }, { "epoch": 0.7041542680436431, "grad_norm": 0.7785901180582534, "learning_rate": 2.1249949985253686e-06, "loss": 0.1098, "step": 24137 }, { "epoch": 0.7041834412742867, "grad_norm": 0.7387064927061772, "learning_rate": 2.1246084880763073e-06, "loss": 0.1239, "step": 24138 }, { "epoch": 0.7042126145049302, "grad_norm": 0.8556757756360723, "learning_rate": 2.1242220032980563e-06, "loss": 0.1164, "step": 24139 }, { "epoch": 0.7042417877355739, "grad_norm": 0.899373524585836, "learning_rate": 2.1238355441940634e-06, "loss": 0.1032, "step": 24140 }, { "epoch": 0.7042709609662174, "grad_norm": 0.7693524759253813, "learning_rate": 2.1234491107677802e-06, "loss": 0.123, "step": 24141 }, { "epoch": 0.704300134196861, "grad_norm": 0.8646075646004397, "learning_rate": 2.123062703022658e-06, "loss": 0.098, "step": 24142 }, { "epoch": 0.7043293074275045, "grad_norm": 1.0283003112861857, "learning_rate": 2.1226763209621452e-06, "loss": 0.0983, "step": 24143 }, { "epoch": 0.7043584806581481, "grad_norm": 0.7773822812720221, "learning_rate": 2.12228996458969e-06, "loss": 0.1226, "step": 24144 }, { "epoch": 0.7043876538887917, "grad_norm": 1.1564431781030586, "learning_rate": 2.1219036339087447e-06, "loss": 0.1237, "step": 24145 }, { "epoch": 0.7044168271194352, "grad_norm": 0.8573595090024431, "learning_rate": 2.121517328922754e-06, "loss": 0.1174, "step": 24146 }, { "epoch": 0.7044460003500788, "grad_norm": 1.0830024680069363, "learning_rate": 2.1211310496351724e-06, "loss": 0.1275, "step": 24147 }, { "epoch": 0.7044751735807223, "grad_norm": 0.8617612438167792, "learning_rate": 2.120744796049443e-06, "loss": 0.0998, "step": 24148 }, { "epoch": 0.7045043468113659, "grad_norm": 1.0647450513089958, "learning_rate": 2.120358568169019e-06, "loss": 0.1069, "step": 24149 }, { "epoch": 0.7045335200420094, "grad_norm": 0.891693853593121, "learning_rate": 2.1199723659973466e-06, "loss": 0.1153, "step": 24150 }, { "epoch": 0.704562693272653, "grad_norm": 0.7222184254171092, "learning_rate": 2.1195861895378704e-06, "loss": 0.1167, "step": 24151 }, { "epoch": 0.7045918665032965, "grad_norm": 1.0003390452738457, "learning_rate": 2.119200038794042e-06, "loss": 0.1014, "step": 24152 }, { "epoch": 0.7046210397339402, "grad_norm": 1.0467032394858526, "learning_rate": 2.11881391376931e-06, "loss": 0.1533, "step": 24153 }, { "epoch": 0.7046502129645837, "grad_norm": 0.8797110334678755, "learning_rate": 2.118427814467119e-06, "loss": 0.0998, "step": 24154 }, { "epoch": 0.7046793861952273, "grad_norm": 1.316442117975834, "learning_rate": 2.118041740890915e-06, "loss": 0.1305, "step": 24155 }, { "epoch": 0.7047085594258709, "grad_norm": 0.8996957206478129, "learning_rate": 2.117655693044148e-06, "loss": 0.1029, "step": 24156 }, { "epoch": 0.7047377326565144, "grad_norm": 0.9367241576178393, "learning_rate": 2.117269670930263e-06, "loss": 0.1178, "step": 24157 }, { "epoch": 0.704766905887158, "grad_norm": 0.7263129627173697, "learning_rate": 2.116883674552703e-06, "loss": 0.1061, "step": 24158 }, { "epoch": 0.7047960791178015, "grad_norm": 1.4787850330764907, "learning_rate": 2.1164977039149203e-06, "loss": 0.1385, "step": 24159 }, { "epoch": 0.7048252523484451, "grad_norm": 0.798915100234943, "learning_rate": 2.116111759020358e-06, "loss": 0.1044, "step": 24160 }, { "epoch": 0.7048544255790886, "grad_norm": 1.0609816344790315, "learning_rate": 2.1157258398724593e-06, "loss": 0.1362, "step": 24161 }, { "epoch": 0.7048835988097322, "grad_norm": 0.64276459998314, "learning_rate": 2.1153399464746736e-06, "loss": 0.1334, "step": 24162 }, { "epoch": 0.7049127720403757, "grad_norm": 0.8832299790936908, "learning_rate": 2.1149540788304452e-06, "loss": 0.1103, "step": 24163 }, { "epoch": 0.7049419452710193, "grad_norm": 1.2314604621688052, "learning_rate": 2.1145682369432153e-06, "loss": 0.1119, "step": 24164 }, { "epoch": 0.7049711185016628, "grad_norm": 1.0568850293044327, "learning_rate": 2.114182420816432e-06, "loss": 0.1112, "step": 24165 }, { "epoch": 0.7050002917323064, "grad_norm": 0.9384272229348917, "learning_rate": 2.1137966304535407e-06, "loss": 0.1089, "step": 24166 }, { "epoch": 0.70502946496295, "grad_norm": 0.7942333966452567, "learning_rate": 2.1134108658579837e-06, "loss": 0.1192, "step": 24167 }, { "epoch": 0.7050586381935936, "grad_norm": 0.8440102570808579, "learning_rate": 2.1130251270332042e-06, "loss": 0.1113, "step": 24168 }, { "epoch": 0.7050878114242372, "grad_norm": 1.235031544150926, "learning_rate": 2.1126394139826468e-06, "loss": 0.1291, "step": 24169 }, { "epoch": 0.7051169846548807, "grad_norm": 1.1967882927456939, "learning_rate": 2.112253726709757e-06, "loss": 0.1146, "step": 24170 }, { "epoch": 0.7051461578855243, "grad_norm": 0.9534341476626986, "learning_rate": 2.111868065217975e-06, "loss": 0.1207, "step": 24171 }, { "epoch": 0.7051753311161678, "grad_norm": 0.8584359280085143, "learning_rate": 2.111482429510748e-06, "loss": 0.1253, "step": 24172 }, { "epoch": 0.7052045043468114, "grad_norm": 0.8391558648829219, "learning_rate": 2.1110968195915153e-06, "loss": 0.1258, "step": 24173 }, { "epoch": 0.7052336775774549, "grad_norm": 0.9438799524487655, "learning_rate": 2.1107112354637194e-06, "loss": 0.1256, "step": 24174 }, { "epoch": 0.7052628508080985, "grad_norm": 0.7457031989603938, "learning_rate": 2.1103256771308033e-06, "loss": 0.1043, "step": 24175 }, { "epoch": 0.705292024038742, "grad_norm": 0.9067185036060394, "learning_rate": 2.109940144596212e-06, "loss": 0.1295, "step": 24176 }, { "epoch": 0.7053211972693856, "grad_norm": 0.8066912346339389, "learning_rate": 2.109554637863385e-06, "loss": 0.1361, "step": 24177 }, { "epoch": 0.7053503705000291, "grad_norm": 0.7969519618243077, "learning_rate": 2.1091691569357626e-06, "loss": 0.1051, "step": 24178 }, { "epoch": 0.7053795437306727, "grad_norm": 0.8721099242465415, "learning_rate": 2.1087837018167893e-06, "loss": 0.1085, "step": 24179 }, { "epoch": 0.7054087169613164, "grad_norm": 1.0506240532952547, "learning_rate": 2.1083982725099055e-06, "loss": 0.1113, "step": 24180 }, { "epoch": 0.7054378901919599, "grad_norm": 1.008842102514274, "learning_rate": 2.108012869018549e-06, "loss": 0.1162, "step": 24181 }, { "epoch": 0.7054670634226035, "grad_norm": 0.814216316192569, "learning_rate": 2.107627491346164e-06, "loss": 0.1041, "step": 24182 }, { "epoch": 0.705496236653247, "grad_norm": 1.0236797961912931, "learning_rate": 2.107242139496192e-06, "loss": 0.1177, "step": 24183 }, { "epoch": 0.7055254098838906, "grad_norm": 1.13023933406375, "learning_rate": 2.1068568134720714e-06, "loss": 0.1356, "step": 24184 }, { "epoch": 0.7055545831145341, "grad_norm": 0.838739405281954, "learning_rate": 2.1064715132772406e-06, "loss": 0.1187, "step": 24185 }, { "epoch": 0.7055837563451777, "grad_norm": 0.8367746237079655, "learning_rate": 2.106086238915143e-06, "loss": 0.105, "step": 24186 }, { "epoch": 0.7056129295758212, "grad_norm": 0.6874452186299084, "learning_rate": 2.1057009903892155e-06, "loss": 0.1326, "step": 24187 }, { "epoch": 0.7056421028064648, "grad_norm": 0.8836810306274153, "learning_rate": 2.1053157677028985e-06, "loss": 0.1312, "step": 24188 }, { "epoch": 0.7056712760371083, "grad_norm": 0.8753607875102475, "learning_rate": 2.1049305708596322e-06, "loss": 0.1117, "step": 24189 }, { "epoch": 0.7057004492677519, "grad_norm": 0.7125699108263962, "learning_rate": 2.1045453998628555e-06, "loss": 0.1341, "step": 24190 }, { "epoch": 0.7057296224983954, "grad_norm": 0.8221614538570634, "learning_rate": 2.1041602547160043e-06, "loss": 0.1089, "step": 24191 }, { "epoch": 0.705758795729039, "grad_norm": 1.0271439627238976, "learning_rate": 2.103775135422521e-06, "loss": 0.1272, "step": 24192 }, { "epoch": 0.7057879689596825, "grad_norm": 0.7532382507475655, "learning_rate": 2.10339004198584e-06, "loss": 0.1149, "step": 24193 }, { "epoch": 0.7058171421903262, "grad_norm": 0.7786412246151072, "learning_rate": 2.1030049744094033e-06, "loss": 0.1363, "step": 24194 }, { "epoch": 0.7058463154209698, "grad_norm": 0.8096487627525432, "learning_rate": 2.1026199326966447e-06, "loss": 0.1155, "step": 24195 }, { "epoch": 0.7058754886516133, "grad_norm": 0.802356014632334, "learning_rate": 2.1022349168510047e-06, "loss": 0.1113, "step": 24196 }, { "epoch": 0.7059046618822569, "grad_norm": 0.7717028846688864, "learning_rate": 2.10184992687592e-06, "loss": 0.1165, "step": 24197 }, { "epoch": 0.7059338351129004, "grad_norm": 0.8428359819795502, "learning_rate": 2.1014649627748262e-06, "loss": 0.1439, "step": 24198 }, { "epoch": 0.705963008343544, "grad_norm": 1.003031010013575, "learning_rate": 2.101080024551161e-06, "loss": 0.1208, "step": 24199 }, { "epoch": 0.7059921815741875, "grad_norm": 0.8247121288769128, "learning_rate": 2.1006951122083626e-06, "loss": 0.1353, "step": 24200 }, { "epoch": 0.7060213548048311, "grad_norm": 1.0078679597901747, "learning_rate": 2.100310225749865e-06, "loss": 0.1276, "step": 24201 }, { "epoch": 0.7060505280354746, "grad_norm": 1.0383652940890002, "learning_rate": 2.099925365179107e-06, "loss": 0.1086, "step": 24202 }, { "epoch": 0.7060797012661182, "grad_norm": 1.040648646029909, "learning_rate": 2.0995405304995227e-06, "loss": 0.1285, "step": 24203 }, { "epoch": 0.7061088744967617, "grad_norm": 0.9445478766082092, "learning_rate": 2.0991557217145464e-06, "loss": 0.1142, "step": 24204 }, { "epoch": 0.7061380477274053, "grad_norm": 0.7634335003745596, "learning_rate": 2.0987709388276155e-06, "loss": 0.1146, "step": 24205 }, { "epoch": 0.7061672209580488, "grad_norm": 0.8196127515404932, "learning_rate": 2.098386181842167e-06, "loss": 0.1126, "step": 24206 }, { "epoch": 0.7061963941886925, "grad_norm": 1.191064054478148, "learning_rate": 2.0980014507616334e-06, "loss": 0.1299, "step": 24207 }, { "epoch": 0.7062255674193361, "grad_norm": 0.7970035370632912, "learning_rate": 2.097616745589449e-06, "loss": 0.124, "step": 24208 }, { "epoch": 0.7062547406499796, "grad_norm": 0.8143835814356327, "learning_rate": 2.097232066329051e-06, "loss": 0.1377, "step": 24209 }, { "epoch": 0.7062839138806232, "grad_norm": 0.8335081394825827, "learning_rate": 2.0968474129838724e-06, "loss": 0.1233, "step": 24210 }, { "epoch": 0.7063130871112667, "grad_norm": 0.8513562958901598, "learning_rate": 2.096462785557345e-06, "loss": 0.1177, "step": 24211 }, { "epoch": 0.7063422603419103, "grad_norm": 0.8739121203912802, "learning_rate": 2.096078184052905e-06, "loss": 0.1476, "step": 24212 }, { "epoch": 0.7063714335725538, "grad_norm": 0.9402304193632139, "learning_rate": 2.095693608473987e-06, "loss": 0.1411, "step": 24213 }, { "epoch": 0.7064006068031974, "grad_norm": 0.8652467146531886, "learning_rate": 2.095309058824024e-06, "loss": 0.1398, "step": 24214 }, { "epoch": 0.7064297800338409, "grad_norm": 0.7144904842046801, "learning_rate": 2.0949245351064456e-06, "loss": 0.0986, "step": 24215 }, { "epoch": 0.7064589532644845, "grad_norm": 0.8454451122292086, "learning_rate": 2.09454003732469e-06, "loss": 0.1203, "step": 24216 }, { "epoch": 0.706488126495128, "grad_norm": 0.8771193551816933, "learning_rate": 2.094155565482185e-06, "loss": 0.1024, "step": 24217 }, { "epoch": 0.7065172997257716, "grad_norm": 0.963960932283189, "learning_rate": 2.0937711195823658e-06, "loss": 0.1068, "step": 24218 }, { "epoch": 0.7065464729564152, "grad_norm": 0.8074460197791143, "learning_rate": 2.0933866996286656e-06, "loss": 0.1263, "step": 24219 }, { "epoch": 0.7065756461870587, "grad_norm": 0.9488724299713767, "learning_rate": 2.0930023056245156e-06, "loss": 0.1471, "step": 24220 }, { "epoch": 0.7066048194177024, "grad_norm": 0.8089768162625798, "learning_rate": 2.092617937573345e-06, "loss": 0.1039, "step": 24221 }, { "epoch": 0.7066339926483459, "grad_norm": 0.8412545528659938, "learning_rate": 2.0922335954785893e-06, "loss": 0.1205, "step": 24222 }, { "epoch": 0.7066631658789895, "grad_norm": 0.8330026310860443, "learning_rate": 2.091849279343676e-06, "loss": 0.1301, "step": 24223 }, { "epoch": 0.706692339109633, "grad_norm": 1.028899814171179, "learning_rate": 2.09146498917204e-06, "loss": 0.1276, "step": 24224 }, { "epoch": 0.7067215123402766, "grad_norm": 0.9870883693822461, "learning_rate": 2.0910807249671085e-06, "loss": 0.1294, "step": 24225 }, { "epoch": 0.7067506855709201, "grad_norm": 0.7054511588804655, "learning_rate": 2.0906964867323154e-06, "loss": 0.1016, "step": 24226 }, { "epoch": 0.7067798588015637, "grad_norm": 0.8912403946712257, "learning_rate": 2.0903122744710896e-06, "loss": 0.1027, "step": 24227 }, { "epoch": 0.7068090320322072, "grad_norm": 1.1862637949493537, "learning_rate": 2.08992808818686e-06, "loss": 0.1303, "step": 24228 }, { "epoch": 0.7068382052628508, "grad_norm": 0.7571432834830695, "learning_rate": 2.089543927883057e-06, "loss": 0.0978, "step": 24229 }, { "epoch": 0.7068673784934943, "grad_norm": 0.7935119833746682, "learning_rate": 2.0891597935631134e-06, "loss": 0.1307, "step": 24230 }, { "epoch": 0.7068965517241379, "grad_norm": 0.9305514332896524, "learning_rate": 2.088775685230454e-06, "loss": 0.1373, "step": 24231 }, { "epoch": 0.7069257249547815, "grad_norm": 1.0160859880353177, "learning_rate": 2.0883916028885126e-06, "loss": 0.1078, "step": 24232 }, { "epoch": 0.706954898185425, "grad_norm": 0.6911868767085836, "learning_rate": 2.0880075465407156e-06, "loss": 0.1099, "step": 24233 }, { "epoch": 0.7069840714160686, "grad_norm": 0.8930064859506339, "learning_rate": 2.08762351619049e-06, "loss": 0.106, "step": 24234 }, { "epoch": 0.7070132446467122, "grad_norm": 0.9108804388813418, "learning_rate": 2.0872395118412667e-06, "loss": 0.1099, "step": 24235 }, { "epoch": 0.7070424178773558, "grad_norm": 0.8310533896476148, "learning_rate": 2.086855533496476e-06, "loss": 0.1188, "step": 24236 }, { "epoch": 0.7070715911079993, "grad_norm": 0.7936093721437792, "learning_rate": 2.0864715811595433e-06, "loss": 0.109, "step": 24237 }, { "epoch": 0.7071007643386429, "grad_norm": 0.8778617281934136, "learning_rate": 2.0860876548338948e-06, "loss": 0.1226, "step": 24238 }, { "epoch": 0.7071299375692864, "grad_norm": 0.9491890116658876, "learning_rate": 2.085703754522962e-06, "loss": 0.1257, "step": 24239 }, { "epoch": 0.70715911079993, "grad_norm": 0.7687823104756915, "learning_rate": 2.0853198802301705e-06, "loss": 0.1164, "step": 24240 }, { "epoch": 0.7071882840305735, "grad_norm": 0.7390914642422536, "learning_rate": 2.0849360319589456e-06, "loss": 0.0944, "step": 24241 }, { "epoch": 0.7072174572612171, "grad_norm": 0.8037881174333962, "learning_rate": 2.0845522097127156e-06, "loss": 0.1069, "step": 24242 }, { "epoch": 0.7072466304918607, "grad_norm": 0.8585303715610015, "learning_rate": 2.08416841349491e-06, "loss": 0.1247, "step": 24243 }, { "epoch": 0.7072758037225042, "grad_norm": 0.9026023504426673, "learning_rate": 2.0837846433089516e-06, "loss": 0.118, "step": 24244 }, { "epoch": 0.7073049769531478, "grad_norm": 0.9478598025329202, "learning_rate": 2.0834008991582666e-06, "loss": 0.1312, "step": 24245 }, { "epoch": 0.7073341501837913, "grad_norm": 1.3842624142818118, "learning_rate": 2.083017181046284e-06, "loss": 0.1089, "step": 24246 }, { "epoch": 0.7073633234144349, "grad_norm": 0.8416271506515415, "learning_rate": 2.0826334889764254e-06, "loss": 0.1181, "step": 24247 }, { "epoch": 0.7073924966450785, "grad_norm": 0.9323315489613925, "learning_rate": 2.0822498229521195e-06, "loss": 0.1063, "step": 24248 }, { "epoch": 0.7074216698757221, "grad_norm": 1.0434978980243386, "learning_rate": 2.0818661829767915e-06, "loss": 0.148, "step": 24249 }, { "epoch": 0.7074508431063656, "grad_norm": 1.0057344394718042, "learning_rate": 2.081482569053866e-06, "loss": 0.1129, "step": 24250 }, { "epoch": 0.7074800163370092, "grad_norm": 0.8645375539330916, "learning_rate": 2.0810989811867656e-06, "loss": 0.136, "step": 24251 }, { "epoch": 0.7075091895676527, "grad_norm": 1.0281069821712148, "learning_rate": 2.0807154193789185e-06, "loss": 0.1147, "step": 24252 }, { "epoch": 0.7075383627982963, "grad_norm": 0.9376559980706509, "learning_rate": 2.0803318836337453e-06, "loss": 0.1102, "step": 24253 }, { "epoch": 0.7075675360289398, "grad_norm": 0.9411723033745302, "learning_rate": 2.0799483739546745e-06, "loss": 0.1153, "step": 24254 }, { "epoch": 0.7075967092595834, "grad_norm": 0.7318044208269741, "learning_rate": 2.0795648903451247e-06, "loss": 0.1083, "step": 24255 }, { "epoch": 0.707625882490227, "grad_norm": 0.8638369251224559, "learning_rate": 2.079181432808525e-06, "loss": 0.0976, "step": 24256 }, { "epoch": 0.7076550557208705, "grad_norm": 1.1350800421229454, "learning_rate": 2.0787980013482963e-06, "loss": 0.1056, "step": 24257 }, { "epoch": 0.7076842289515141, "grad_norm": 0.9195655840391993, "learning_rate": 2.0784145959678592e-06, "loss": 0.1165, "step": 24258 }, { "epoch": 0.7077134021821576, "grad_norm": 0.8873180874749146, "learning_rate": 2.0780312166706396e-06, "loss": 0.1298, "step": 24259 }, { "epoch": 0.7077425754128012, "grad_norm": 0.8909275358850974, "learning_rate": 2.0776478634600616e-06, "loss": 0.1162, "step": 24260 }, { "epoch": 0.7077717486434447, "grad_norm": 0.802875624164899, "learning_rate": 2.077264536339544e-06, "loss": 0.1368, "step": 24261 }, { "epoch": 0.7078009218740884, "grad_norm": 0.9929377481657958, "learning_rate": 2.076881235312512e-06, "loss": 0.1111, "step": 24262 }, { "epoch": 0.7078300951047319, "grad_norm": 1.263614547830916, "learning_rate": 2.0764979603823877e-06, "loss": 0.1292, "step": 24263 }, { "epoch": 0.7078592683353755, "grad_norm": 0.8915764046134468, "learning_rate": 2.076114711552589e-06, "loss": 0.1119, "step": 24264 }, { "epoch": 0.707888441566019, "grad_norm": 0.9872741977126354, "learning_rate": 2.0757314888265404e-06, "loss": 0.1076, "step": 24265 }, { "epoch": 0.7079176147966626, "grad_norm": 1.0237986457693453, "learning_rate": 2.075348292207665e-06, "loss": 0.1314, "step": 24266 }, { "epoch": 0.7079467880273062, "grad_norm": 1.2074351477954173, "learning_rate": 2.074965121699382e-06, "loss": 0.1433, "step": 24267 }, { "epoch": 0.7079759612579497, "grad_norm": 1.128488236197808, "learning_rate": 2.0745819773051103e-06, "loss": 0.1077, "step": 24268 }, { "epoch": 0.7080051344885933, "grad_norm": 0.6981280622894253, "learning_rate": 2.074198859028274e-06, "loss": 0.115, "step": 24269 }, { "epoch": 0.7080343077192368, "grad_norm": 1.0420269362369856, "learning_rate": 2.073815766872292e-06, "loss": 0.115, "step": 24270 }, { "epoch": 0.7080634809498804, "grad_norm": 0.8642225907002064, "learning_rate": 2.073432700840582e-06, "loss": 0.1263, "step": 24271 }, { "epoch": 0.7080926541805239, "grad_norm": 1.2527782822964186, "learning_rate": 2.073049660936567e-06, "loss": 0.1155, "step": 24272 }, { "epoch": 0.7081218274111675, "grad_norm": 1.03955845050689, "learning_rate": 2.072666647163667e-06, "loss": 0.1222, "step": 24273 }, { "epoch": 0.708151000641811, "grad_norm": 1.1281478865665038, "learning_rate": 2.0722836595253004e-06, "loss": 0.1242, "step": 24274 }, { "epoch": 0.7081801738724547, "grad_norm": 0.638413992170037, "learning_rate": 2.071900698024885e-06, "loss": 0.1027, "step": 24275 }, { "epoch": 0.7082093471030982, "grad_norm": 1.2789385595384781, "learning_rate": 2.0715177626658427e-06, "loss": 0.1112, "step": 24276 }, { "epoch": 0.7082385203337418, "grad_norm": 0.9262113633692753, "learning_rate": 2.071134853451589e-06, "loss": 0.1096, "step": 24277 }, { "epoch": 0.7082676935643853, "grad_norm": 0.9699446681662852, "learning_rate": 2.0707519703855446e-06, "loss": 0.1058, "step": 24278 }, { "epoch": 0.7082968667950289, "grad_norm": 0.8942764422388755, "learning_rate": 2.0703691134711284e-06, "loss": 0.1041, "step": 24279 }, { "epoch": 0.7083260400256725, "grad_norm": 0.8257282047171776, "learning_rate": 2.0699862827117576e-06, "loss": 0.1106, "step": 24280 }, { "epoch": 0.708355213256316, "grad_norm": 0.8997744291216947, "learning_rate": 2.069603478110848e-06, "loss": 0.1266, "step": 24281 }, { "epoch": 0.7083843864869596, "grad_norm": 0.8860122375314241, "learning_rate": 2.069220699671821e-06, "loss": 0.1163, "step": 24282 }, { "epoch": 0.7084135597176031, "grad_norm": 0.7941591419252904, "learning_rate": 2.0688379473980904e-06, "loss": 0.1143, "step": 24283 }, { "epoch": 0.7084427329482467, "grad_norm": 1.0087317633787842, "learning_rate": 2.068455221293076e-06, "loss": 0.1213, "step": 24284 }, { "epoch": 0.7084719061788902, "grad_norm": 0.8425652259654369, "learning_rate": 2.068072521360192e-06, "loss": 0.1065, "step": 24285 }, { "epoch": 0.7085010794095338, "grad_norm": 0.839806586690853, "learning_rate": 2.067689847602859e-06, "loss": 0.1361, "step": 24286 }, { "epoch": 0.7085302526401773, "grad_norm": 0.6598314202266496, "learning_rate": 2.0673072000244902e-06, "loss": 0.122, "step": 24287 }, { "epoch": 0.7085594258708209, "grad_norm": 0.9482659430429057, "learning_rate": 2.0669245786285015e-06, "loss": 0.1239, "step": 24288 }, { "epoch": 0.7085885991014645, "grad_norm": 0.8901788825063378, "learning_rate": 2.0665419834183093e-06, "loss": 0.1107, "step": 24289 }, { "epoch": 0.7086177723321081, "grad_norm": 0.7752036240710392, "learning_rate": 2.0661594143973323e-06, "loss": 0.1134, "step": 24290 }, { "epoch": 0.7086469455627517, "grad_norm": 0.9534933633836123, "learning_rate": 2.065776871568982e-06, "loss": 0.1192, "step": 24291 }, { "epoch": 0.7086761187933952, "grad_norm": 0.7428279053234718, "learning_rate": 2.0653943549366768e-06, "loss": 0.1058, "step": 24292 }, { "epoch": 0.7087052920240388, "grad_norm": 0.8873485982934974, "learning_rate": 2.0650118645038304e-06, "loss": 0.0999, "step": 24293 }, { "epoch": 0.7087344652546823, "grad_norm": 0.6832893915531372, "learning_rate": 2.0646294002738555e-06, "loss": 0.1284, "step": 24294 }, { "epoch": 0.7087636384853259, "grad_norm": 0.9197184713157679, "learning_rate": 2.0642469622501686e-06, "loss": 0.1205, "step": 24295 }, { "epoch": 0.7087928117159694, "grad_norm": 0.7557684728704201, "learning_rate": 2.0638645504361858e-06, "loss": 0.1118, "step": 24296 }, { "epoch": 0.708821984946613, "grad_norm": 0.6931123673153663, "learning_rate": 2.0634821648353197e-06, "loss": 0.1037, "step": 24297 }, { "epoch": 0.7088511581772565, "grad_norm": 0.855582365801452, "learning_rate": 2.063099805450982e-06, "loss": 0.1334, "step": 24298 }, { "epoch": 0.7088803314079001, "grad_norm": 0.8469217777266271, "learning_rate": 2.0627174722865894e-06, "loss": 0.1021, "step": 24299 }, { "epoch": 0.7089095046385436, "grad_norm": 0.8223315291781041, "learning_rate": 2.062335165345555e-06, "loss": 0.1028, "step": 24300 }, { "epoch": 0.7089386778691872, "grad_norm": 0.8780101299214722, "learning_rate": 2.0619528846312882e-06, "loss": 0.1283, "step": 24301 }, { "epoch": 0.7089678510998308, "grad_norm": 0.6556728397295637, "learning_rate": 2.061570630147205e-06, "loss": 0.1347, "step": 24302 }, { "epoch": 0.7089970243304744, "grad_norm": 0.9950550449399701, "learning_rate": 2.0611884018967195e-06, "loss": 0.1252, "step": 24303 }, { "epoch": 0.709026197561118, "grad_norm": 0.8847755573130276, "learning_rate": 2.0608061998832423e-06, "loss": 0.1046, "step": 24304 }, { "epoch": 0.7090553707917615, "grad_norm": 0.7599713932287432, "learning_rate": 2.0604240241101843e-06, "loss": 0.1364, "step": 24305 }, { "epoch": 0.7090845440224051, "grad_norm": 1.1163898676023372, "learning_rate": 2.0600418745809602e-06, "loss": 0.1397, "step": 24306 }, { "epoch": 0.7091137172530486, "grad_norm": 0.7909592846118836, "learning_rate": 2.059659751298979e-06, "loss": 0.1252, "step": 24307 }, { "epoch": 0.7091428904836922, "grad_norm": 0.9164340973327898, "learning_rate": 2.0592776542676535e-06, "loss": 0.157, "step": 24308 }, { "epoch": 0.7091720637143357, "grad_norm": 0.9238638354559386, "learning_rate": 2.0588955834903966e-06, "loss": 0.1241, "step": 24309 }, { "epoch": 0.7092012369449793, "grad_norm": 0.8793393489014347, "learning_rate": 2.0585135389706185e-06, "loss": 0.1131, "step": 24310 }, { "epoch": 0.7092304101756228, "grad_norm": 0.6373542181329356, "learning_rate": 2.058131520711727e-06, "loss": 0.1114, "step": 24311 }, { "epoch": 0.7092595834062664, "grad_norm": 0.8941034902251721, "learning_rate": 2.0577495287171374e-06, "loss": 0.1167, "step": 24312 }, { "epoch": 0.7092887566369099, "grad_norm": 0.7132029447358353, "learning_rate": 2.057367562990255e-06, "loss": 0.1386, "step": 24313 }, { "epoch": 0.7093179298675535, "grad_norm": 1.4641892671757206, "learning_rate": 2.0569856235344947e-06, "loss": 0.1013, "step": 24314 }, { "epoch": 0.709347103098197, "grad_norm": 0.9844328535665606, "learning_rate": 2.0566037103532628e-06, "loss": 0.1235, "step": 24315 }, { "epoch": 0.7093762763288407, "grad_norm": 0.9058529002089648, "learning_rate": 2.0562218234499714e-06, "loss": 0.1089, "step": 24316 }, { "epoch": 0.7094054495594843, "grad_norm": 0.828066655121191, "learning_rate": 2.055839962828029e-06, "loss": 0.1327, "step": 24317 }, { "epoch": 0.7094346227901278, "grad_norm": 0.7649869333445348, "learning_rate": 2.055458128490843e-06, "loss": 0.1158, "step": 24318 }, { "epoch": 0.7094637960207714, "grad_norm": 0.8032000343573327, "learning_rate": 2.055076320441824e-06, "loss": 0.128, "step": 24319 }, { "epoch": 0.7094929692514149, "grad_norm": 0.9063916433840569, "learning_rate": 2.0546945386843826e-06, "loss": 0.1304, "step": 24320 }, { "epoch": 0.7095221424820585, "grad_norm": 0.7374586396840892, "learning_rate": 2.0543127832219246e-06, "loss": 0.1068, "step": 24321 }, { "epoch": 0.709551315712702, "grad_norm": 0.7476665841702215, "learning_rate": 2.053931054057857e-06, "loss": 0.122, "step": 24322 }, { "epoch": 0.7095804889433456, "grad_norm": 0.8550252027256865, "learning_rate": 2.0535493511955925e-06, "loss": 0.1347, "step": 24323 }, { "epoch": 0.7096096621739891, "grad_norm": 0.8523804032655143, "learning_rate": 2.053167674638533e-06, "loss": 0.1207, "step": 24324 }, { "epoch": 0.7096388354046327, "grad_norm": 0.6336614375024203, "learning_rate": 2.0527860243900898e-06, "loss": 0.0878, "step": 24325 }, { "epoch": 0.7096680086352762, "grad_norm": 0.7924964057217228, "learning_rate": 2.0524044004536716e-06, "loss": 0.1415, "step": 24326 }, { "epoch": 0.7096971818659198, "grad_norm": 0.8762698703750451, "learning_rate": 2.052022802832682e-06, "loss": 0.1052, "step": 24327 }, { "epoch": 0.7097263550965633, "grad_norm": 0.9481505755848734, "learning_rate": 2.0516412315305282e-06, "loss": 0.1204, "step": 24328 }, { "epoch": 0.709755528327207, "grad_norm": 0.9267284542420775, "learning_rate": 2.0512596865506195e-06, "loss": 0.1297, "step": 24329 }, { "epoch": 0.7097847015578506, "grad_norm": 0.8690038560467187, "learning_rate": 2.05087816789636e-06, "loss": 0.1129, "step": 24330 }, { "epoch": 0.7098138747884941, "grad_norm": 0.9277653152803282, "learning_rate": 2.0504966755711547e-06, "loss": 0.107, "step": 24331 }, { "epoch": 0.7098430480191377, "grad_norm": 0.9933305147356886, "learning_rate": 2.0501152095784105e-06, "loss": 0.1085, "step": 24332 }, { "epoch": 0.7098722212497812, "grad_norm": 0.9288477974599171, "learning_rate": 2.049733769921536e-06, "loss": 0.1298, "step": 24333 }, { "epoch": 0.7099013944804248, "grad_norm": 1.02337141711138, "learning_rate": 2.0493523566039334e-06, "loss": 0.1251, "step": 24334 }, { "epoch": 0.7099305677110683, "grad_norm": 1.1745951293250398, "learning_rate": 2.0489709696290073e-06, "loss": 0.1173, "step": 24335 }, { "epoch": 0.7099597409417119, "grad_norm": 0.8583992001256617, "learning_rate": 2.0485896090001657e-06, "loss": 0.1343, "step": 24336 }, { "epoch": 0.7099889141723554, "grad_norm": 0.8578304036689427, "learning_rate": 2.0482082747208092e-06, "loss": 0.1042, "step": 24337 }, { "epoch": 0.710018087402999, "grad_norm": 0.942450149665433, "learning_rate": 2.0478269667943453e-06, "loss": 0.1128, "step": 24338 }, { "epoch": 0.7100472606336425, "grad_norm": 0.9497845138690882, "learning_rate": 2.047445685224179e-06, "loss": 0.1183, "step": 24339 }, { "epoch": 0.7100764338642861, "grad_norm": 0.7537585600108339, "learning_rate": 2.047064430013713e-06, "loss": 0.1251, "step": 24340 }, { "epoch": 0.7101056070949296, "grad_norm": 0.9194084478200095, "learning_rate": 2.0466832011663486e-06, "loss": 0.1167, "step": 24341 }, { "epoch": 0.7101347803255732, "grad_norm": 0.7379825674920547, "learning_rate": 2.0463019986854932e-06, "loss": 0.1152, "step": 24342 }, { "epoch": 0.7101639535562169, "grad_norm": 0.8357722724183001, "learning_rate": 2.045920822574547e-06, "loss": 0.1121, "step": 24343 }, { "epoch": 0.7101931267868604, "grad_norm": 0.9264669869312903, "learning_rate": 2.0455396728369165e-06, "loss": 0.1283, "step": 24344 }, { "epoch": 0.710222300017504, "grad_norm": 0.8289058552466958, "learning_rate": 2.045158549476e-06, "loss": 0.1166, "step": 24345 }, { "epoch": 0.7102514732481475, "grad_norm": 0.8389492003646455, "learning_rate": 2.0447774524952054e-06, "loss": 0.1097, "step": 24346 }, { "epoch": 0.7102806464787911, "grad_norm": 0.7369558637489452, "learning_rate": 2.0443963818979318e-06, "loss": 0.1299, "step": 24347 }, { "epoch": 0.7103098197094346, "grad_norm": 1.345947586898109, "learning_rate": 2.0440153376875797e-06, "loss": 0.1277, "step": 24348 }, { "epoch": 0.7103389929400782, "grad_norm": 0.8749608023462779, "learning_rate": 2.0436343198675535e-06, "loss": 0.1122, "step": 24349 }, { "epoch": 0.7103681661707217, "grad_norm": 0.9801531819234112, "learning_rate": 2.0432533284412556e-06, "loss": 0.1079, "step": 24350 }, { "epoch": 0.7103973394013653, "grad_norm": 0.7246153212649536, "learning_rate": 2.0428723634120864e-06, "loss": 0.1288, "step": 24351 }, { "epoch": 0.7104265126320088, "grad_norm": 0.8065126575482403, "learning_rate": 2.042491424783445e-06, "loss": 0.0999, "step": 24352 }, { "epoch": 0.7104556858626524, "grad_norm": 0.8847770818441456, "learning_rate": 2.042110512558736e-06, "loss": 0.1155, "step": 24353 }, { "epoch": 0.710484859093296, "grad_norm": 0.86288922139475, "learning_rate": 2.0417296267413562e-06, "loss": 0.1134, "step": 24354 }, { "epoch": 0.7105140323239395, "grad_norm": 1.185462512963098, "learning_rate": 2.0413487673347083e-06, "loss": 0.1066, "step": 24355 }, { "epoch": 0.7105432055545832, "grad_norm": 1.3018141769592924, "learning_rate": 2.040967934342194e-06, "loss": 0.1435, "step": 24356 }, { "epoch": 0.7105723787852267, "grad_norm": 0.8400628496943888, "learning_rate": 2.040587127767212e-06, "loss": 0.1224, "step": 24357 }, { "epoch": 0.7106015520158703, "grad_norm": 0.8008670181168767, "learning_rate": 2.0402063476131593e-06, "loss": 0.1306, "step": 24358 }, { "epoch": 0.7106307252465138, "grad_norm": 0.8951122619476052, "learning_rate": 2.03982559388344e-06, "loss": 0.1312, "step": 24359 }, { "epoch": 0.7106598984771574, "grad_norm": 1.170453836271604, "learning_rate": 2.039444866581451e-06, "loss": 0.1099, "step": 24360 }, { "epoch": 0.7106890717078009, "grad_norm": 1.05398976968565, "learning_rate": 2.03906416571059e-06, "loss": 0.1231, "step": 24361 }, { "epoch": 0.7107182449384445, "grad_norm": 4.280590207084984, "learning_rate": 2.0386834912742566e-06, "loss": 0.1252, "step": 24362 }, { "epoch": 0.710747418169088, "grad_norm": 0.8509599858629096, "learning_rate": 2.0383028432758522e-06, "loss": 0.1259, "step": 24363 }, { "epoch": 0.7107765913997316, "grad_norm": 1.0586904553300105, "learning_rate": 2.037922221718773e-06, "loss": 0.1291, "step": 24364 }, { "epoch": 0.7108057646303751, "grad_norm": 0.7578885581554284, "learning_rate": 2.037541626606416e-06, "loss": 0.1159, "step": 24365 }, { "epoch": 0.7108349378610187, "grad_norm": 0.9950357619484919, "learning_rate": 2.037161057942179e-06, "loss": 0.125, "step": 24366 }, { "epoch": 0.7108641110916623, "grad_norm": 0.938517775451267, "learning_rate": 2.036780515729463e-06, "loss": 0.1073, "step": 24367 }, { "epoch": 0.7108932843223058, "grad_norm": 1.1184516395218789, "learning_rate": 2.0363999999716618e-06, "loss": 0.128, "step": 24368 }, { "epoch": 0.7109224575529494, "grad_norm": 0.9023110909870798, "learning_rate": 2.036019510672175e-06, "loss": 0.1273, "step": 24369 }, { "epoch": 0.710951630783593, "grad_norm": 1.4001399121659068, "learning_rate": 2.035639047834399e-06, "loss": 0.1337, "step": 24370 }, { "epoch": 0.7109808040142366, "grad_norm": 2.0301668528336987, "learning_rate": 2.035258611461728e-06, "loss": 0.1075, "step": 24371 }, { "epoch": 0.7110099772448801, "grad_norm": 1.1384594601914078, "learning_rate": 2.03487820155756e-06, "loss": 0.1136, "step": 24372 }, { "epoch": 0.7110391504755237, "grad_norm": 1.107004423559062, "learning_rate": 2.034497818125294e-06, "loss": 0.1233, "step": 24373 }, { "epoch": 0.7110683237061672, "grad_norm": 1.0133925683935907, "learning_rate": 2.0341174611683235e-06, "loss": 0.13, "step": 24374 }, { "epoch": 0.7110974969368108, "grad_norm": 0.7104769885495591, "learning_rate": 2.033737130690042e-06, "loss": 0.0905, "step": 24375 }, { "epoch": 0.7111266701674543, "grad_norm": 1.422098025882749, "learning_rate": 2.0333568266938498e-06, "loss": 0.1442, "step": 24376 }, { "epoch": 0.7111558433980979, "grad_norm": 1.0058471267758462, "learning_rate": 2.032976549183139e-06, "loss": 0.116, "step": 24377 }, { "epoch": 0.7111850166287415, "grad_norm": 0.927592664479417, "learning_rate": 2.0325962981613036e-06, "loss": 0.1098, "step": 24378 }, { "epoch": 0.711214189859385, "grad_norm": 0.7931636033481047, "learning_rate": 2.0322160736317404e-06, "loss": 0.0929, "step": 24379 }, { "epoch": 0.7112433630900286, "grad_norm": 0.9806693334105764, "learning_rate": 2.031835875597845e-06, "loss": 0.1099, "step": 24380 }, { "epoch": 0.7112725363206721, "grad_norm": 1.0011884145644832, "learning_rate": 2.0314557040630106e-06, "loss": 0.1041, "step": 24381 }, { "epoch": 0.7113017095513157, "grad_norm": 0.9600760848975869, "learning_rate": 2.031075559030629e-06, "loss": 0.1219, "step": 24382 }, { "epoch": 0.7113308827819593, "grad_norm": 0.8918457468674876, "learning_rate": 2.0306954405040984e-06, "loss": 0.1084, "step": 24383 }, { "epoch": 0.7113600560126029, "grad_norm": 0.9725798074427667, "learning_rate": 2.0303153484868077e-06, "loss": 0.1124, "step": 24384 }, { "epoch": 0.7113892292432464, "grad_norm": 0.9163828993242817, "learning_rate": 2.0299352829821535e-06, "loss": 0.1243, "step": 24385 }, { "epoch": 0.71141840247389, "grad_norm": 1.2597823766770704, "learning_rate": 2.029555243993529e-06, "loss": 0.1184, "step": 24386 }, { "epoch": 0.7114475757045335, "grad_norm": 1.1218957050688485, "learning_rate": 2.029175231524326e-06, "loss": 0.1266, "step": 24387 }, { "epoch": 0.7114767489351771, "grad_norm": 0.8709065580069354, "learning_rate": 2.0287952455779365e-06, "loss": 0.1238, "step": 24388 }, { "epoch": 0.7115059221658206, "grad_norm": 0.8367507895732899, "learning_rate": 2.028415286157755e-06, "loss": 0.0999, "step": 24389 }, { "epoch": 0.7115350953964642, "grad_norm": 0.9890103427532823, "learning_rate": 2.0280353532671704e-06, "loss": 0.1289, "step": 24390 }, { "epoch": 0.7115642686271078, "grad_norm": 1.0479322528274433, "learning_rate": 2.0276554469095787e-06, "loss": 0.1257, "step": 24391 }, { "epoch": 0.7115934418577513, "grad_norm": 0.9110123973017815, "learning_rate": 2.027275567088368e-06, "loss": 0.0999, "step": 24392 }, { "epoch": 0.7116226150883949, "grad_norm": 0.8516766976858033, "learning_rate": 2.0268957138069336e-06, "loss": 0.1203, "step": 24393 }, { "epoch": 0.7116517883190384, "grad_norm": 0.8787287052162154, "learning_rate": 2.0265158870686636e-06, "loss": 0.1271, "step": 24394 }, { "epoch": 0.711680961549682, "grad_norm": 1.320506184430548, "learning_rate": 2.0261360868769487e-06, "loss": 0.1239, "step": 24395 }, { "epoch": 0.7117101347803255, "grad_norm": 0.9819746947924424, "learning_rate": 2.0257563132351808e-06, "loss": 0.1338, "step": 24396 }, { "epoch": 0.7117393080109692, "grad_norm": 0.8328015881292921, "learning_rate": 2.0253765661467523e-06, "loss": 0.1228, "step": 24397 }, { "epoch": 0.7117684812416127, "grad_norm": 0.9631663464689367, "learning_rate": 2.0249968456150497e-06, "loss": 0.1373, "step": 24398 }, { "epoch": 0.7117976544722563, "grad_norm": 1.0840115522703666, "learning_rate": 2.024617151643467e-06, "loss": 0.099, "step": 24399 }, { "epoch": 0.7118268277028998, "grad_norm": 0.7550602826241688, "learning_rate": 2.024237484235392e-06, "loss": 0.0898, "step": 24400 }, { "epoch": 0.7118560009335434, "grad_norm": 0.8204516668833315, "learning_rate": 2.023857843394213e-06, "loss": 0.1115, "step": 24401 }, { "epoch": 0.711885174164187, "grad_norm": 0.9812200065073052, "learning_rate": 2.0234782291233207e-06, "loss": 0.1138, "step": 24402 }, { "epoch": 0.7119143473948305, "grad_norm": 1.069486808913791, "learning_rate": 2.0230986414261056e-06, "loss": 0.1037, "step": 24403 }, { "epoch": 0.7119435206254741, "grad_norm": 0.7081987164093422, "learning_rate": 2.0227190803059554e-06, "loss": 0.1047, "step": 24404 }, { "epoch": 0.7119726938561176, "grad_norm": 0.6843058317568871, "learning_rate": 2.0223395457662572e-06, "loss": 0.1349, "step": 24405 }, { "epoch": 0.7120018670867612, "grad_norm": 0.8905275066612361, "learning_rate": 2.0219600378104014e-06, "loss": 0.1478, "step": 24406 }, { "epoch": 0.7120310403174047, "grad_norm": 0.8933715644007169, "learning_rate": 2.021580556441776e-06, "loss": 0.0965, "step": 24407 }, { "epoch": 0.7120602135480483, "grad_norm": 1.0223465465187809, "learning_rate": 2.0212011016637667e-06, "loss": 0.1127, "step": 24408 }, { "epoch": 0.7120893867786918, "grad_norm": 0.7702065574061616, "learning_rate": 2.0208216734797632e-06, "loss": 0.1162, "step": 24409 }, { "epoch": 0.7121185600093355, "grad_norm": 0.9735199034492721, "learning_rate": 2.0204422718931538e-06, "loss": 0.1361, "step": 24410 }, { "epoch": 0.712147733239979, "grad_norm": 0.8188490262918654, "learning_rate": 2.0200628969073248e-06, "loss": 0.1074, "step": 24411 }, { "epoch": 0.7121769064706226, "grad_norm": 1.2201197741130494, "learning_rate": 2.019683548525661e-06, "loss": 0.1255, "step": 24412 }, { "epoch": 0.7122060797012661, "grad_norm": 1.057590408778701, "learning_rate": 2.0193042267515526e-06, "loss": 0.1107, "step": 24413 }, { "epoch": 0.7122352529319097, "grad_norm": 0.7202570838943055, "learning_rate": 2.018924931588383e-06, "loss": 0.1212, "step": 24414 }, { "epoch": 0.7122644261625533, "grad_norm": 0.7679363626552997, "learning_rate": 2.01854566303954e-06, "loss": 0.1191, "step": 24415 }, { "epoch": 0.7122935993931968, "grad_norm": 1.1375598628888894, "learning_rate": 2.0181664211084114e-06, "loss": 0.1163, "step": 24416 }, { "epoch": 0.7123227726238404, "grad_norm": 0.903098200513465, "learning_rate": 2.017787205798381e-06, "loss": 0.1203, "step": 24417 }, { "epoch": 0.7123519458544839, "grad_norm": 0.7710524434318854, "learning_rate": 2.017408017112833e-06, "loss": 0.1228, "step": 24418 }, { "epoch": 0.7123811190851275, "grad_norm": 0.8126255961949865, "learning_rate": 2.017028855055156e-06, "loss": 0.1241, "step": 24419 }, { "epoch": 0.712410292315771, "grad_norm": 0.8710504505193271, "learning_rate": 2.016649719628731e-06, "loss": 0.1165, "step": 24420 }, { "epoch": 0.7124394655464146, "grad_norm": 0.7745623945296772, "learning_rate": 2.0162706108369473e-06, "loss": 0.1196, "step": 24421 }, { "epoch": 0.7124686387770581, "grad_norm": 0.9979151301807482, "learning_rate": 2.0158915286831852e-06, "loss": 0.1055, "step": 24422 }, { "epoch": 0.7124978120077017, "grad_norm": 0.7711457160494226, "learning_rate": 2.0155124731708337e-06, "loss": 0.1141, "step": 24423 }, { "epoch": 0.7125269852383453, "grad_norm": 0.771775854309958, "learning_rate": 2.015133444303274e-06, "loss": 0.1099, "step": 24424 }, { "epoch": 0.7125561584689889, "grad_norm": 0.9915515612916951, "learning_rate": 2.0147544420838883e-06, "loss": 0.1063, "step": 24425 }, { "epoch": 0.7125853316996325, "grad_norm": 0.7671812323739688, "learning_rate": 2.014375466516062e-06, "loss": 0.1266, "step": 24426 }, { "epoch": 0.712614504930276, "grad_norm": 0.9770055202806369, "learning_rate": 2.013996517603181e-06, "loss": 0.1341, "step": 24427 }, { "epoch": 0.7126436781609196, "grad_norm": 0.8835667304460261, "learning_rate": 2.013617595348625e-06, "loss": 0.1451, "step": 24428 }, { "epoch": 0.7126728513915631, "grad_norm": 0.7354664119934039, "learning_rate": 2.0132386997557795e-06, "loss": 0.1238, "step": 24429 }, { "epoch": 0.7127020246222067, "grad_norm": 0.7179745594529715, "learning_rate": 2.0128598308280255e-06, "loss": 0.1088, "step": 24430 }, { "epoch": 0.7127311978528502, "grad_norm": 0.866216764674149, "learning_rate": 2.0124809885687448e-06, "loss": 0.1171, "step": 24431 }, { "epoch": 0.7127603710834938, "grad_norm": 0.8510728509096759, "learning_rate": 2.0121021729813207e-06, "loss": 0.133, "step": 24432 }, { "epoch": 0.7127895443141373, "grad_norm": 0.7179885008171474, "learning_rate": 2.0117233840691364e-06, "loss": 0.1385, "step": 24433 }, { "epoch": 0.7128187175447809, "grad_norm": 0.7350504815158918, "learning_rate": 2.0113446218355727e-06, "loss": 0.1302, "step": 24434 }, { "epoch": 0.7128478907754244, "grad_norm": 1.4719421673535458, "learning_rate": 2.0109658862840085e-06, "loss": 0.1232, "step": 24435 }, { "epoch": 0.712877064006068, "grad_norm": 0.8215421131912853, "learning_rate": 2.0105871774178293e-06, "loss": 0.1176, "step": 24436 }, { "epoch": 0.7129062372367116, "grad_norm": 0.8400742032577411, "learning_rate": 2.0102084952404145e-06, "loss": 0.1281, "step": 24437 }, { "epoch": 0.7129354104673552, "grad_norm": 0.9365317718177184, "learning_rate": 2.0098298397551423e-06, "loss": 0.1291, "step": 24438 }, { "epoch": 0.7129645836979988, "grad_norm": 0.964290388631854, "learning_rate": 2.009451210965396e-06, "loss": 0.1121, "step": 24439 }, { "epoch": 0.7129937569286423, "grad_norm": 1.1698157614872702, "learning_rate": 2.0090726088745566e-06, "loss": 0.1408, "step": 24440 }, { "epoch": 0.7130229301592859, "grad_norm": 0.8204052766706701, "learning_rate": 2.008694033486003e-06, "loss": 0.1213, "step": 24441 }, { "epoch": 0.7130521033899294, "grad_norm": 0.7812560378061456, "learning_rate": 2.008315484803114e-06, "loss": 0.1138, "step": 24442 }, { "epoch": 0.713081276620573, "grad_norm": 1.079505207040131, "learning_rate": 2.007936962829271e-06, "loss": 0.1454, "step": 24443 }, { "epoch": 0.7131104498512165, "grad_norm": 0.8291730172821995, "learning_rate": 2.0075584675678516e-06, "loss": 0.1129, "step": 24444 }, { "epoch": 0.7131396230818601, "grad_norm": 0.7556905008613477, "learning_rate": 2.007179999022235e-06, "loss": 0.1177, "step": 24445 }, { "epoch": 0.7131687963125036, "grad_norm": 1.0318833643097356, "learning_rate": 2.006801557195803e-06, "loss": 0.1074, "step": 24446 }, { "epoch": 0.7131979695431472, "grad_norm": 0.9405996013697249, "learning_rate": 2.006423142091933e-06, "loss": 0.1127, "step": 24447 }, { "epoch": 0.7132271427737907, "grad_norm": 0.9335895698959623, "learning_rate": 2.006044753714e-06, "loss": 0.1317, "step": 24448 }, { "epoch": 0.7132563160044343, "grad_norm": 1.1095474684342954, "learning_rate": 2.0056663920653865e-06, "loss": 0.1236, "step": 24449 }, { "epoch": 0.7132854892350778, "grad_norm": 0.7760666738870904, "learning_rate": 2.0052880571494665e-06, "loss": 0.1319, "step": 24450 }, { "epoch": 0.7133146624657215, "grad_norm": 0.7959525494247813, "learning_rate": 2.004909748969622e-06, "loss": 0.129, "step": 24451 }, { "epoch": 0.7133438356963651, "grad_norm": 1.0227122749939923, "learning_rate": 2.0045314675292265e-06, "loss": 0.119, "step": 24452 }, { "epoch": 0.7133730089270086, "grad_norm": 0.8146023359839762, "learning_rate": 2.004153212831661e-06, "loss": 0.1148, "step": 24453 }, { "epoch": 0.7134021821576522, "grad_norm": 1.1375692905926735, "learning_rate": 2.0037749848803002e-06, "loss": 0.1268, "step": 24454 }, { "epoch": 0.7134313553882957, "grad_norm": 0.8212560295520828, "learning_rate": 2.0033967836785196e-06, "loss": 0.1198, "step": 24455 }, { "epoch": 0.7134605286189393, "grad_norm": 0.9104696570081723, "learning_rate": 2.0030186092296965e-06, "loss": 0.1333, "step": 24456 }, { "epoch": 0.7134897018495828, "grad_norm": 0.9347454139472186, "learning_rate": 2.00264046153721e-06, "loss": 0.1143, "step": 24457 }, { "epoch": 0.7135188750802264, "grad_norm": 0.6949505851193986, "learning_rate": 2.002262340604432e-06, "loss": 0.1094, "step": 24458 }, { "epoch": 0.7135480483108699, "grad_norm": 0.7705963383300264, "learning_rate": 2.0018842464347427e-06, "loss": 0.1431, "step": 24459 }, { "epoch": 0.7135772215415135, "grad_norm": 0.9104706678332688, "learning_rate": 2.001506179031514e-06, "loss": 0.1031, "step": 24460 }, { "epoch": 0.713606394772157, "grad_norm": 0.7063606372601169, "learning_rate": 2.001128138398121e-06, "loss": 0.116, "step": 24461 }, { "epoch": 0.7136355680028006, "grad_norm": 0.7953551910751111, "learning_rate": 2.0007501245379408e-06, "loss": 0.1087, "step": 24462 }, { "epoch": 0.7136647412334441, "grad_norm": 1.077359935229099, "learning_rate": 2.000372137454349e-06, "loss": 0.138, "step": 24463 }, { "epoch": 0.7136939144640878, "grad_norm": 0.9097983110699532, "learning_rate": 1.999994177150718e-06, "loss": 0.1165, "step": 24464 }, { "epoch": 0.7137230876947314, "grad_norm": 0.7102176656054635, "learning_rate": 1.9996162436304217e-06, "loss": 0.1092, "step": 24465 }, { "epoch": 0.7137522609253749, "grad_norm": 0.8545017317221432, "learning_rate": 1.9992383368968364e-06, "loss": 0.1377, "step": 24466 }, { "epoch": 0.7137814341560185, "grad_norm": 0.9044676141278017, "learning_rate": 1.9988604569533353e-06, "loss": 0.1191, "step": 24467 }, { "epoch": 0.713810607386662, "grad_norm": 0.7449057819921429, "learning_rate": 1.99848260380329e-06, "loss": 0.121, "step": 24468 }, { "epoch": 0.7138397806173056, "grad_norm": 0.8174339871375383, "learning_rate": 1.9981047774500755e-06, "loss": 0.1368, "step": 24469 }, { "epoch": 0.7138689538479491, "grad_norm": 0.8317875323554248, "learning_rate": 1.9977269778970666e-06, "loss": 0.1111, "step": 24470 }, { "epoch": 0.7138981270785927, "grad_norm": 0.864772193549524, "learning_rate": 1.9973492051476345e-06, "loss": 0.0925, "step": 24471 }, { "epoch": 0.7139273003092362, "grad_norm": 0.719659647315192, "learning_rate": 1.9969714592051506e-06, "loss": 0.1008, "step": 24472 }, { "epoch": 0.7139564735398798, "grad_norm": 0.884086106888888, "learning_rate": 1.9965937400729895e-06, "loss": 0.1215, "step": 24473 }, { "epoch": 0.7139856467705233, "grad_norm": 1.1009363270742796, "learning_rate": 1.996216047754521e-06, "loss": 0.1207, "step": 24474 }, { "epoch": 0.7140148200011669, "grad_norm": 0.8531369406368245, "learning_rate": 1.995838382253119e-06, "loss": 0.0985, "step": 24475 }, { "epoch": 0.7140439932318104, "grad_norm": 0.7103027521649443, "learning_rate": 1.995460743572156e-06, "loss": 0.1192, "step": 24476 }, { "epoch": 0.714073166462454, "grad_norm": 1.0738885162982494, "learning_rate": 1.995083131715003e-06, "loss": 0.1364, "step": 24477 }, { "epoch": 0.7141023396930977, "grad_norm": 0.9695200049495182, "learning_rate": 1.9947055466850283e-06, "loss": 0.1333, "step": 24478 }, { "epoch": 0.7141315129237412, "grad_norm": 1.0123868722289286, "learning_rate": 1.9943279884856065e-06, "loss": 0.1164, "step": 24479 }, { "epoch": 0.7141606861543848, "grad_norm": 1.0769255112161285, "learning_rate": 1.9939504571201055e-06, "loss": 0.1268, "step": 24480 }, { "epoch": 0.7141898593850283, "grad_norm": 0.9194611986223226, "learning_rate": 1.993572952591899e-06, "loss": 0.1328, "step": 24481 }, { "epoch": 0.7142190326156719, "grad_norm": 0.8962777665479411, "learning_rate": 1.9931954749043535e-06, "loss": 0.1041, "step": 24482 }, { "epoch": 0.7142482058463154, "grad_norm": 0.7625310960114333, "learning_rate": 1.992818024060843e-06, "loss": 0.1178, "step": 24483 }, { "epoch": 0.714277379076959, "grad_norm": 1.265964283956046, "learning_rate": 1.9924406000647354e-06, "loss": 0.125, "step": 24484 }, { "epoch": 0.7143065523076025, "grad_norm": 1.0880981977655597, "learning_rate": 1.992063202919398e-06, "loss": 0.1163, "step": 24485 }, { "epoch": 0.7143357255382461, "grad_norm": 0.8285814688788601, "learning_rate": 1.991685832628202e-06, "loss": 0.1187, "step": 24486 }, { "epoch": 0.7143648987688896, "grad_norm": 0.9923470221096321, "learning_rate": 1.9913084891945195e-06, "loss": 0.1306, "step": 24487 }, { "epoch": 0.7143940719995332, "grad_norm": 0.997270190992129, "learning_rate": 1.9909311726217144e-06, "loss": 0.1234, "step": 24488 }, { "epoch": 0.7144232452301768, "grad_norm": 0.8188236531698319, "learning_rate": 1.9905538829131594e-06, "loss": 0.1136, "step": 24489 }, { "epoch": 0.7144524184608203, "grad_norm": 0.6162367141874275, "learning_rate": 1.9901766200722205e-06, "loss": 0.1076, "step": 24490 }, { "epoch": 0.7144815916914639, "grad_norm": 1.061775721054025, "learning_rate": 1.9897993841022643e-06, "loss": 0.1173, "step": 24491 }, { "epoch": 0.7145107649221075, "grad_norm": 0.8772880924671139, "learning_rate": 1.989422175006661e-06, "loss": 0.1603, "step": 24492 }, { "epoch": 0.7145399381527511, "grad_norm": 0.9203970716723464, "learning_rate": 1.9890449927887796e-06, "loss": 0.103, "step": 24493 }, { "epoch": 0.7145691113833946, "grad_norm": 0.798478354769862, "learning_rate": 1.988667837451986e-06, "loss": 0.1119, "step": 24494 }, { "epoch": 0.7145982846140382, "grad_norm": 0.9197447757365986, "learning_rate": 1.9882907089996453e-06, "loss": 0.1174, "step": 24495 }, { "epoch": 0.7146274578446817, "grad_norm": 0.6913238297298403, "learning_rate": 1.9879136074351276e-06, "loss": 0.1149, "step": 24496 }, { "epoch": 0.7146566310753253, "grad_norm": 0.762813617857346, "learning_rate": 1.987536532761798e-06, "loss": 0.126, "step": 24497 }, { "epoch": 0.7146858043059688, "grad_norm": 0.6810020343943598, "learning_rate": 1.9871594849830213e-06, "loss": 0.104, "step": 24498 }, { "epoch": 0.7147149775366124, "grad_norm": 0.6555628971500574, "learning_rate": 1.986782464102166e-06, "loss": 0.1173, "step": 24499 }, { "epoch": 0.714744150767256, "grad_norm": 1.1312181162398898, "learning_rate": 1.9864054701225986e-06, "loss": 0.1133, "step": 24500 }, { "epoch": 0.7147733239978995, "grad_norm": 0.8451512766943347, "learning_rate": 1.9860285030476844e-06, "loss": 0.1326, "step": 24501 }, { "epoch": 0.714802497228543, "grad_norm": 0.9255673831474458, "learning_rate": 1.9856515628807865e-06, "loss": 0.1262, "step": 24502 }, { "epoch": 0.7148316704591866, "grad_norm": 0.6826310324274596, "learning_rate": 1.9852746496252735e-06, "loss": 0.1007, "step": 24503 }, { "epoch": 0.7148608436898302, "grad_norm": 0.7154138159018919, "learning_rate": 1.984897763284507e-06, "loss": 0.1248, "step": 24504 }, { "epoch": 0.7148900169204738, "grad_norm": 0.856043848945111, "learning_rate": 1.984520903861853e-06, "loss": 0.1311, "step": 24505 }, { "epoch": 0.7149191901511174, "grad_norm": 0.9118751512671261, "learning_rate": 1.984144071360679e-06, "loss": 0.1547, "step": 24506 }, { "epoch": 0.7149483633817609, "grad_norm": 0.8120519707589329, "learning_rate": 1.9837672657843467e-06, "loss": 0.1119, "step": 24507 }, { "epoch": 0.7149775366124045, "grad_norm": 0.9660033861049498, "learning_rate": 1.983390487136218e-06, "loss": 0.1214, "step": 24508 }, { "epoch": 0.715006709843048, "grad_norm": 0.7301469264130491, "learning_rate": 1.983013735419661e-06, "loss": 0.1246, "step": 24509 }, { "epoch": 0.7150358830736916, "grad_norm": 0.9168246822075329, "learning_rate": 1.982637010638035e-06, "loss": 0.1148, "step": 24510 }, { "epoch": 0.7150650563043351, "grad_norm": 0.802518765472926, "learning_rate": 1.9822603127947076e-06, "loss": 0.1095, "step": 24511 }, { "epoch": 0.7150942295349787, "grad_norm": 1.0253697786433456, "learning_rate": 1.981883641893038e-06, "loss": 0.1368, "step": 24512 }, { "epoch": 0.7151234027656223, "grad_norm": 1.0496533173020024, "learning_rate": 1.9815069979363927e-06, "loss": 0.1242, "step": 24513 }, { "epoch": 0.7151525759962658, "grad_norm": 0.9436918956143182, "learning_rate": 1.9811303809281318e-06, "loss": 0.1204, "step": 24514 }, { "epoch": 0.7151817492269094, "grad_norm": 0.9034584311781312, "learning_rate": 1.980753790871617e-06, "loss": 0.1138, "step": 24515 }, { "epoch": 0.7152109224575529, "grad_norm": 0.9629977658340991, "learning_rate": 1.980377227770211e-06, "loss": 0.1262, "step": 24516 }, { "epoch": 0.7152400956881965, "grad_norm": 0.7637325551091692, "learning_rate": 1.9800006916272785e-06, "loss": 0.1073, "step": 24517 }, { "epoch": 0.71526926891884, "grad_norm": 0.9062947721927214, "learning_rate": 1.979624182446177e-06, "loss": 0.1225, "step": 24518 }, { "epoch": 0.7152984421494837, "grad_norm": 0.9542482483793993, "learning_rate": 1.9792477002302713e-06, "loss": 0.1096, "step": 24519 }, { "epoch": 0.7153276153801272, "grad_norm": 0.9360330193100261, "learning_rate": 1.9788712449829213e-06, "loss": 0.1276, "step": 24520 }, { "epoch": 0.7153567886107708, "grad_norm": 1.2684726042405354, "learning_rate": 1.9784948167074856e-06, "loss": 0.1171, "step": 24521 }, { "epoch": 0.7153859618414143, "grad_norm": 1.0361858156709687, "learning_rate": 1.9781184154073273e-06, "loss": 0.1395, "step": 24522 }, { "epoch": 0.7154151350720579, "grad_norm": 0.8714331582704156, "learning_rate": 1.977742041085808e-06, "loss": 0.1205, "step": 24523 }, { "epoch": 0.7154443083027014, "grad_norm": 1.257545422964162, "learning_rate": 1.9773656937462867e-06, "loss": 0.1198, "step": 24524 }, { "epoch": 0.715473481533345, "grad_norm": 0.8290693970217324, "learning_rate": 1.97698937339212e-06, "loss": 0.0947, "step": 24525 }, { "epoch": 0.7155026547639886, "grad_norm": 0.7808933215197137, "learning_rate": 1.976613080026673e-06, "loss": 0.1198, "step": 24526 }, { "epoch": 0.7155318279946321, "grad_norm": 0.9400499470621698, "learning_rate": 1.976236813653303e-06, "loss": 0.1228, "step": 24527 }, { "epoch": 0.7155610012252757, "grad_norm": 0.8092859321774526, "learning_rate": 1.9758605742753665e-06, "loss": 0.0983, "step": 24528 }, { "epoch": 0.7155901744559192, "grad_norm": 0.9741125357720664, "learning_rate": 1.9754843618962255e-06, "loss": 0.1192, "step": 24529 }, { "epoch": 0.7156193476865628, "grad_norm": 0.9429728273169273, "learning_rate": 1.975108176519239e-06, "loss": 0.1304, "step": 24530 }, { "epoch": 0.7156485209172063, "grad_norm": 0.8955526795595488, "learning_rate": 1.974732018147766e-06, "loss": 0.1215, "step": 24531 }, { "epoch": 0.71567769414785, "grad_norm": 0.8555186048254568, "learning_rate": 1.9743558867851605e-06, "loss": 0.0976, "step": 24532 }, { "epoch": 0.7157068673784935, "grad_norm": 0.9543932338643378, "learning_rate": 1.973979782434785e-06, "loss": 0.0959, "step": 24533 }, { "epoch": 0.7157360406091371, "grad_norm": 0.9182165873944451, "learning_rate": 1.9736037050999946e-06, "loss": 0.1214, "step": 24534 }, { "epoch": 0.7157652138397806, "grad_norm": 0.8839100903731244, "learning_rate": 1.9732276547841473e-06, "loss": 0.1114, "step": 24535 }, { "epoch": 0.7157943870704242, "grad_norm": 0.783510492837181, "learning_rate": 1.9728516314906034e-06, "loss": 0.1316, "step": 24536 }, { "epoch": 0.7158235603010678, "grad_norm": 0.9539275579482954, "learning_rate": 1.9724756352227163e-06, "loss": 0.1344, "step": 24537 }, { "epoch": 0.7158527335317113, "grad_norm": 0.9102475346754038, "learning_rate": 1.9720996659838433e-06, "loss": 0.1017, "step": 24538 }, { "epoch": 0.7158819067623549, "grad_norm": 1.0290131903244208, "learning_rate": 1.9717237237773428e-06, "loss": 0.1271, "step": 24539 }, { "epoch": 0.7159110799929984, "grad_norm": 0.8750834969039144, "learning_rate": 1.9713478086065686e-06, "loss": 0.1263, "step": 24540 }, { "epoch": 0.715940253223642, "grad_norm": 0.7595404203221637, "learning_rate": 1.97097192047488e-06, "loss": 0.1269, "step": 24541 }, { "epoch": 0.7159694264542855, "grad_norm": 1.070364503939263, "learning_rate": 1.9705960593856287e-06, "loss": 0.1346, "step": 24542 }, { "epoch": 0.7159985996849291, "grad_norm": 0.7964632149951953, "learning_rate": 1.970220225342175e-06, "loss": 0.1214, "step": 24543 }, { "epoch": 0.7160277729155726, "grad_norm": 0.7370001324735711, "learning_rate": 1.9698444183478715e-06, "loss": 0.1262, "step": 24544 }, { "epoch": 0.7160569461462162, "grad_norm": 0.9609390321102597, "learning_rate": 1.9694686384060726e-06, "loss": 0.1458, "step": 24545 }, { "epoch": 0.7160861193768598, "grad_norm": 0.6317432156243418, "learning_rate": 1.969092885520133e-06, "loss": 0.1063, "step": 24546 }, { "epoch": 0.7161152926075034, "grad_norm": 0.7703462693873755, "learning_rate": 1.9687171596934112e-06, "loss": 0.1112, "step": 24547 }, { "epoch": 0.716144465838147, "grad_norm": 0.7059559100543018, "learning_rate": 1.9683414609292573e-06, "loss": 0.1171, "step": 24548 }, { "epoch": 0.7161736390687905, "grad_norm": 0.718228407163729, "learning_rate": 1.967965789231028e-06, "loss": 0.1234, "step": 24549 }, { "epoch": 0.716202812299434, "grad_norm": 0.7392834156275169, "learning_rate": 1.967590144602077e-06, "loss": 0.0972, "step": 24550 }, { "epoch": 0.7162319855300776, "grad_norm": 0.966034325521619, "learning_rate": 1.9672145270457553e-06, "loss": 0.1226, "step": 24551 }, { "epoch": 0.7162611587607212, "grad_norm": 1.193308368363112, "learning_rate": 1.966838936565419e-06, "loss": 0.1279, "step": 24552 }, { "epoch": 0.7162903319913647, "grad_norm": 0.6950364685412184, "learning_rate": 1.9664633731644215e-06, "loss": 0.0993, "step": 24553 }, { "epoch": 0.7163195052220083, "grad_norm": 0.7244842209202247, "learning_rate": 1.9660878368461156e-06, "loss": 0.0979, "step": 24554 }, { "epoch": 0.7163486784526518, "grad_norm": 0.8063818433393074, "learning_rate": 1.9657123276138507e-06, "loss": 0.1345, "step": 24555 }, { "epoch": 0.7163778516832954, "grad_norm": 0.8561936995955228, "learning_rate": 1.9653368454709844e-06, "loss": 0.1138, "step": 24556 }, { "epoch": 0.7164070249139389, "grad_norm": 0.8185377889627363, "learning_rate": 1.9649613904208637e-06, "loss": 0.0989, "step": 24557 }, { "epoch": 0.7164361981445825, "grad_norm": 0.7543619490448702, "learning_rate": 1.9645859624668455e-06, "loss": 0.1232, "step": 24558 }, { "epoch": 0.7164653713752261, "grad_norm": 0.8433671728047157, "learning_rate": 1.9642105616122768e-06, "loss": 0.1097, "step": 24559 }, { "epoch": 0.7164945446058697, "grad_norm": 0.8634624804529792, "learning_rate": 1.963835187860514e-06, "loss": 0.1122, "step": 24560 }, { "epoch": 0.7165237178365133, "grad_norm": 0.7328743190271523, "learning_rate": 1.9634598412149056e-06, "loss": 0.1302, "step": 24561 }, { "epoch": 0.7165528910671568, "grad_norm": 0.936673912964834, "learning_rate": 1.9630845216788016e-06, "loss": 0.1269, "step": 24562 }, { "epoch": 0.7165820642978004, "grad_norm": 0.8894157851527578, "learning_rate": 1.9627092292555534e-06, "loss": 0.0939, "step": 24563 }, { "epoch": 0.7166112375284439, "grad_norm": 1.090818452881439, "learning_rate": 1.9623339639485133e-06, "loss": 0.1186, "step": 24564 }, { "epoch": 0.7166404107590875, "grad_norm": 0.9359231326597849, "learning_rate": 1.9619587257610296e-06, "loss": 0.1198, "step": 24565 }, { "epoch": 0.716669583989731, "grad_norm": 0.7439177195500215, "learning_rate": 1.9615835146964547e-06, "loss": 0.1407, "step": 24566 }, { "epoch": 0.7166987572203746, "grad_norm": 0.9931004330733285, "learning_rate": 1.961208330758137e-06, "loss": 0.1209, "step": 24567 }, { "epoch": 0.7167279304510181, "grad_norm": 1.208322531175956, "learning_rate": 1.960833173949424e-06, "loss": 0.1027, "step": 24568 }, { "epoch": 0.7167571036816617, "grad_norm": 0.8527797878275589, "learning_rate": 1.960458044273667e-06, "loss": 0.1251, "step": 24569 }, { "epoch": 0.7167862769123052, "grad_norm": 0.8364728856212879, "learning_rate": 1.9600829417342166e-06, "loss": 0.1251, "step": 24570 }, { "epoch": 0.7168154501429488, "grad_norm": 0.8511367927807031, "learning_rate": 1.95970786633442e-06, "loss": 0.1095, "step": 24571 }, { "epoch": 0.7168446233735923, "grad_norm": 0.7868164798833528, "learning_rate": 1.959332818077624e-06, "loss": 0.1273, "step": 24572 }, { "epoch": 0.716873796604236, "grad_norm": 0.8839613641074617, "learning_rate": 1.9589577969671808e-06, "loss": 0.1394, "step": 24573 }, { "epoch": 0.7169029698348796, "grad_norm": 1.0469856369584865, "learning_rate": 1.958582803006436e-06, "loss": 0.1357, "step": 24574 }, { "epoch": 0.7169321430655231, "grad_norm": 0.7026496730411657, "learning_rate": 1.9582078361987345e-06, "loss": 0.1099, "step": 24575 }, { "epoch": 0.7169613162961667, "grad_norm": 0.7384995100826249, "learning_rate": 1.9578328965474306e-06, "loss": 0.1227, "step": 24576 }, { "epoch": 0.7169904895268102, "grad_norm": 0.7931694308950297, "learning_rate": 1.957457984055869e-06, "loss": 0.1175, "step": 24577 }, { "epoch": 0.7170196627574538, "grad_norm": 0.6955838590790396, "learning_rate": 1.9570830987273944e-06, "loss": 0.1035, "step": 24578 }, { "epoch": 0.7170488359880973, "grad_norm": 0.9837980017332323, "learning_rate": 1.9567082405653565e-06, "loss": 0.115, "step": 24579 }, { "epoch": 0.7170780092187409, "grad_norm": 0.7276555376558514, "learning_rate": 1.956333409573102e-06, "loss": 0.1079, "step": 24580 }, { "epoch": 0.7171071824493844, "grad_norm": 0.7667172904919755, "learning_rate": 1.9559586057539737e-06, "loss": 0.1113, "step": 24581 }, { "epoch": 0.717136355680028, "grad_norm": 0.7961658916482846, "learning_rate": 1.9555838291113205e-06, "loss": 0.1291, "step": 24582 }, { "epoch": 0.7171655289106715, "grad_norm": 0.785833715893844, "learning_rate": 1.9552090796484896e-06, "loss": 0.1193, "step": 24583 }, { "epoch": 0.7171947021413151, "grad_norm": 0.8239789626906368, "learning_rate": 1.9548343573688256e-06, "loss": 0.1407, "step": 24584 }, { "epoch": 0.7172238753719586, "grad_norm": 0.9487799334267942, "learning_rate": 1.9544596622756716e-06, "loss": 0.1259, "step": 24585 }, { "epoch": 0.7172530486026023, "grad_norm": 0.7663403839748532, "learning_rate": 1.954084994372376e-06, "loss": 0.1075, "step": 24586 }, { "epoch": 0.7172822218332459, "grad_norm": 0.7928594910962723, "learning_rate": 1.9537103536622813e-06, "loss": 0.1141, "step": 24587 }, { "epoch": 0.7173113950638894, "grad_norm": 0.9657934542018983, "learning_rate": 1.9533357401487352e-06, "loss": 0.1147, "step": 24588 }, { "epoch": 0.717340568294533, "grad_norm": 0.9545381189212526, "learning_rate": 1.9529611538350785e-06, "loss": 0.1211, "step": 24589 }, { "epoch": 0.7173697415251765, "grad_norm": 0.8322622904427825, "learning_rate": 1.9525865947246587e-06, "loss": 0.1509, "step": 24590 }, { "epoch": 0.7173989147558201, "grad_norm": 0.9111697633194339, "learning_rate": 1.9522120628208186e-06, "loss": 0.1383, "step": 24591 }, { "epoch": 0.7174280879864636, "grad_norm": 0.8057984566038064, "learning_rate": 1.9518375581268993e-06, "loss": 0.1187, "step": 24592 }, { "epoch": 0.7174572612171072, "grad_norm": 0.8861541754385086, "learning_rate": 1.951463080646247e-06, "loss": 0.115, "step": 24593 }, { "epoch": 0.7174864344477507, "grad_norm": 0.8192781769631182, "learning_rate": 1.951088630382206e-06, "loss": 0.1174, "step": 24594 }, { "epoch": 0.7175156076783943, "grad_norm": 0.9404181893206632, "learning_rate": 1.9507142073381167e-06, "loss": 0.124, "step": 24595 }, { "epoch": 0.7175447809090378, "grad_norm": 2.2864494159241726, "learning_rate": 1.950339811517325e-06, "loss": 0.1146, "step": 24596 }, { "epoch": 0.7175739541396814, "grad_norm": 0.8838459596196421, "learning_rate": 1.949965442923171e-06, "loss": 0.1373, "step": 24597 }, { "epoch": 0.717603127370325, "grad_norm": 0.7264295874041378, "learning_rate": 1.9495911015589957e-06, "loss": 0.1183, "step": 24598 }, { "epoch": 0.7176323006009685, "grad_norm": 0.7934864666564094, "learning_rate": 1.9492167874281425e-06, "loss": 0.1234, "step": 24599 }, { "epoch": 0.7176614738316122, "grad_norm": 0.8892097528522984, "learning_rate": 1.9488425005339555e-06, "loss": 0.1149, "step": 24600 }, { "epoch": 0.7176906470622557, "grad_norm": 1.0318295023072517, "learning_rate": 1.948468240879775e-06, "loss": 0.141, "step": 24601 }, { "epoch": 0.7177198202928993, "grad_norm": 0.8342628317211979, "learning_rate": 1.9480940084689394e-06, "loss": 0.1156, "step": 24602 }, { "epoch": 0.7177489935235428, "grad_norm": 0.9779654918887197, "learning_rate": 1.9477198033047933e-06, "loss": 0.104, "step": 24603 }, { "epoch": 0.7177781667541864, "grad_norm": 0.6842079680824421, "learning_rate": 1.9473456253906764e-06, "loss": 0.0947, "step": 24604 }, { "epoch": 0.7178073399848299, "grad_norm": 0.9837934333206318, "learning_rate": 1.946971474729926e-06, "loss": 0.1245, "step": 24605 }, { "epoch": 0.7178365132154735, "grad_norm": 0.7094418965261642, "learning_rate": 1.946597351325888e-06, "loss": 0.1154, "step": 24606 }, { "epoch": 0.717865686446117, "grad_norm": 0.8102633184500629, "learning_rate": 1.9462232551819006e-06, "loss": 0.0963, "step": 24607 }, { "epoch": 0.7178948596767606, "grad_norm": 1.2047205682955549, "learning_rate": 1.9458491863013006e-06, "loss": 0.1071, "step": 24608 }, { "epoch": 0.7179240329074041, "grad_norm": 0.6767291762863835, "learning_rate": 1.9454751446874328e-06, "loss": 0.1197, "step": 24609 }, { "epoch": 0.7179532061380477, "grad_norm": 0.790391055486654, "learning_rate": 1.945101130343633e-06, "loss": 0.1211, "step": 24610 }, { "epoch": 0.7179823793686912, "grad_norm": 0.905758586230062, "learning_rate": 1.944727143273239e-06, "loss": 0.1238, "step": 24611 }, { "epoch": 0.7180115525993348, "grad_norm": 0.6817967211946586, "learning_rate": 1.9443531834795927e-06, "loss": 0.1193, "step": 24612 }, { "epoch": 0.7180407258299785, "grad_norm": 1.153489879461069, "learning_rate": 1.943979250966033e-06, "loss": 0.1255, "step": 24613 }, { "epoch": 0.718069899060622, "grad_norm": 0.914770188982417, "learning_rate": 1.943605345735897e-06, "loss": 0.1097, "step": 24614 }, { "epoch": 0.7180990722912656, "grad_norm": 0.6866580799115839, "learning_rate": 1.9432314677925207e-06, "loss": 0.1216, "step": 24615 }, { "epoch": 0.7181282455219091, "grad_norm": 0.722579575940004, "learning_rate": 1.9428576171392462e-06, "loss": 0.1432, "step": 24616 }, { "epoch": 0.7181574187525527, "grad_norm": 0.937653794165693, "learning_rate": 1.942483793779407e-06, "loss": 0.118, "step": 24617 }, { "epoch": 0.7181865919831962, "grad_norm": 0.8842048175150861, "learning_rate": 1.942109997716345e-06, "loss": 0.1082, "step": 24618 }, { "epoch": 0.7182157652138398, "grad_norm": 0.8019699131399003, "learning_rate": 1.9417362289533933e-06, "loss": 0.1352, "step": 24619 }, { "epoch": 0.7182449384444833, "grad_norm": 0.9522965798772958, "learning_rate": 1.9413624874938915e-06, "loss": 0.1234, "step": 24620 }, { "epoch": 0.7182741116751269, "grad_norm": 0.9299597553798609, "learning_rate": 1.940988773341176e-06, "loss": 0.1119, "step": 24621 }, { "epoch": 0.7183032849057704, "grad_norm": 0.8737066950837651, "learning_rate": 1.94061508649858e-06, "loss": 0.1155, "step": 24622 }, { "epoch": 0.718332458136414, "grad_norm": 0.7935505559490547, "learning_rate": 1.9402414269694425e-06, "loss": 0.1489, "step": 24623 }, { "epoch": 0.7183616313670576, "grad_norm": 0.8908221747073154, "learning_rate": 1.939867794757101e-06, "loss": 0.1266, "step": 24624 }, { "epoch": 0.7183908045977011, "grad_norm": 0.7495130405297756, "learning_rate": 1.9394941898648874e-06, "loss": 0.11, "step": 24625 }, { "epoch": 0.7184199778283447, "grad_norm": 0.8056942599242546, "learning_rate": 1.939120612296141e-06, "loss": 0.1098, "step": 24626 }, { "epoch": 0.7184491510589883, "grad_norm": 0.856716002032613, "learning_rate": 1.938747062054195e-06, "loss": 0.1222, "step": 24627 }, { "epoch": 0.7184783242896319, "grad_norm": 0.8408169293756518, "learning_rate": 1.9383735391423826e-06, "loss": 0.104, "step": 24628 }, { "epoch": 0.7185074975202754, "grad_norm": 1.0431592999174712, "learning_rate": 1.9380000435640407e-06, "loss": 0.109, "step": 24629 }, { "epoch": 0.718536670750919, "grad_norm": 1.024265358311274, "learning_rate": 1.9376265753225047e-06, "loss": 0.1316, "step": 24630 }, { "epoch": 0.7185658439815625, "grad_norm": 0.9089588791649065, "learning_rate": 1.9372531344211076e-06, "loss": 0.1205, "step": 24631 }, { "epoch": 0.7185950172122061, "grad_norm": 0.9693610113332035, "learning_rate": 1.9368797208631822e-06, "loss": 0.1135, "step": 24632 }, { "epoch": 0.7186241904428496, "grad_norm": 0.8042025428336325, "learning_rate": 1.9365063346520645e-06, "loss": 0.1305, "step": 24633 }, { "epoch": 0.7186533636734932, "grad_norm": 0.8853523697634652, "learning_rate": 1.9361329757910875e-06, "loss": 0.1296, "step": 24634 }, { "epoch": 0.7186825369041367, "grad_norm": 0.9365344251950367, "learning_rate": 1.93575964428358e-06, "loss": 0.1328, "step": 24635 }, { "epoch": 0.7187117101347803, "grad_norm": 0.7650189822371252, "learning_rate": 1.9353863401328827e-06, "loss": 0.1083, "step": 24636 }, { "epoch": 0.7187408833654239, "grad_norm": 0.9849746651651492, "learning_rate": 1.9350130633423247e-06, "loss": 0.1307, "step": 24637 }, { "epoch": 0.7187700565960674, "grad_norm": 0.7478866575294362, "learning_rate": 1.934639813915236e-06, "loss": 0.1339, "step": 24638 }, { "epoch": 0.718799229826711, "grad_norm": 0.8851299734756334, "learning_rate": 1.9342665918549534e-06, "loss": 0.1089, "step": 24639 }, { "epoch": 0.7188284030573546, "grad_norm": 0.9144544786870371, "learning_rate": 1.933893397164807e-06, "loss": 0.1196, "step": 24640 }, { "epoch": 0.7188575762879982, "grad_norm": 0.9876098518976142, "learning_rate": 1.9335202298481267e-06, "loss": 0.1251, "step": 24641 }, { "epoch": 0.7188867495186417, "grad_norm": 0.7706470829293179, "learning_rate": 1.9331470899082457e-06, "loss": 0.1144, "step": 24642 }, { "epoch": 0.7189159227492853, "grad_norm": 0.7820956399378031, "learning_rate": 1.9327739773484968e-06, "loss": 0.1186, "step": 24643 }, { "epoch": 0.7189450959799288, "grad_norm": 0.8181877304881421, "learning_rate": 1.93240089217221e-06, "loss": 0.121, "step": 24644 }, { "epoch": 0.7189742692105724, "grad_norm": 1.3578256533319701, "learning_rate": 1.932027834382714e-06, "loss": 0.1276, "step": 24645 }, { "epoch": 0.719003442441216, "grad_norm": 1.3414013371998352, "learning_rate": 1.9316548039833423e-06, "loss": 0.1142, "step": 24646 }, { "epoch": 0.7190326156718595, "grad_norm": 0.7576906087842425, "learning_rate": 1.9312818009774227e-06, "loss": 0.1113, "step": 24647 }, { "epoch": 0.719061788902503, "grad_norm": 0.8616321200133674, "learning_rate": 1.9309088253682884e-06, "loss": 0.1269, "step": 24648 }, { "epoch": 0.7190909621331466, "grad_norm": 0.9510041875764456, "learning_rate": 1.930535877159265e-06, "loss": 0.1167, "step": 24649 }, { "epoch": 0.7191201353637902, "grad_norm": 1.021867022805468, "learning_rate": 1.930162956353687e-06, "loss": 0.1108, "step": 24650 }, { "epoch": 0.7191493085944337, "grad_norm": 0.9265718907821453, "learning_rate": 1.9297900629548817e-06, "loss": 0.1138, "step": 24651 }, { "epoch": 0.7191784818250773, "grad_norm": 0.8233226303401939, "learning_rate": 1.9294171969661756e-06, "loss": 0.1268, "step": 24652 }, { "epoch": 0.7192076550557208, "grad_norm": 0.8893054989634243, "learning_rate": 1.9290443583908996e-06, "loss": 0.1181, "step": 24653 }, { "epoch": 0.7192368282863645, "grad_norm": 0.8629398360858149, "learning_rate": 1.928671547232384e-06, "loss": 0.1029, "step": 24654 }, { "epoch": 0.719266001517008, "grad_norm": 0.880249060721745, "learning_rate": 1.928298763493954e-06, "loss": 0.1289, "step": 24655 }, { "epoch": 0.7192951747476516, "grad_norm": 1.0290716867981546, "learning_rate": 1.927926007178942e-06, "loss": 0.1135, "step": 24656 }, { "epoch": 0.7193243479782951, "grad_norm": 0.9821125362305515, "learning_rate": 1.9275532782906726e-06, "loss": 0.0944, "step": 24657 }, { "epoch": 0.7193535212089387, "grad_norm": 0.9376056462310045, "learning_rate": 1.927180576832472e-06, "loss": 0.1052, "step": 24658 }, { "epoch": 0.7193826944395822, "grad_norm": 0.9270168486362149, "learning_rate": 1.9268079028076705e-06, "loss": 0.1325, "step": 24659 }, { "epoch": 0.7194118676702258, "grad_norm": 0.758389496752336, "learning_rate": 1.9264352562195953e-06, "loss": 0.1186, "step": 24660 }, { "epoch": 0.7194410409008694, "grad_norm": 0.836732520175048, "learning_rate": 1.926062637071573e-06, "loss": 0.1135, "step": 24661 }, { "epoch": 0.7194702141315129, "grad_norm": 0.8483575778597005, "learning_rate": 1.9256900453669273e-06, "loss": 0.1073, "step": 24662 }, { "epoch": 0.7194993873621565, "grad_norm": 0.8408754717403444, "learning_rate": 1.9253174811089892e-06, "loss": 0.1134, "step": 24663 }, { "epoch": 0.7195285605928, "grad_norm": 1.1023176611870964, "learning_rate": 1.9249449443010825e-06, "loss": 0.1243, "step": 24664 }, { "epoch": 0.7195577338234436, "grad_norm": 0.917530972983815, "learning_rate": 1.92457243494653e-06, "loss": 0.1195, "step": 24665 }, { "epoch": 0.7195869070540871, "grad_norm": 0.8137738459398199, "learning_rate": 1.9241999530486636e-06, "loss": 0.1069, "step": 24666 }, { "epoch": 0.7196160802847308, "grad_norm": 1.033795804264537, "learning_rate": 1.923827498610806e-06, "loss": 0.1276, "step": 24667 }, { "epoch": 0.7196452535153743, "grad_norm": 0.8122321928286242, "learning_rate": 1.923455071636281e-06, "loss": 0.1468, "step": 24668 }, { "epoch": 0.7196744267460179, "grad_norm": 1.1422027589284771, "learning_rate": 1.923082672128416e-06, "loss": 0.1115, "step": 24669 }, { "epoch": 0.7197035999766614, "grad_norm": 1.1093760544730045, "learning_rate": 1.9227103000905346e-06, "loss": 0.1183, "step": 24670 }, { "epoch": 0.719732773207305, "grad_norm": 0.862616174265635, "learning_rate": 1.9223379555259587e-06, "loss": 0.1089, "step": 24671 }, { "epoch": 0.7197619464379486, "grad_norm": 0.8495067077110474, "learning_rate": 1.921965638438015e-06, "loss": 0.1222, "step": 24672 }, { "epoch": 0.7197911196685921, "grad_norm": 1.1431146322021712, "learning_rate": 1.921593348830029e-06, "loss": 0.1173, "step": 24673 }, { "epoch": 0.7198202928992357, "grad_norm": 0.8670236887396516, "learning_rate": 1.9212210867053235e-06, "loss": 0.1199, "step": 24674 }, { "epoch": 0.7198494661298792, "grad_norm": 0.8445353815309623, "learning_rate": 1.9208488520672185e-06, "loss": 0.0979, "step": 24675 }, { "epoch": 0.7198786393605228, "grad_norm": 0.9907716916234535, "learning_rate": 1.9204766449190422e-06, "loss": 0.1297, "step": 24676 }, { "epoch": 0.7199078125911663, "grad_norm": 1.2213455095106163, "learning_rate": 1.9201044652641134e-06, "loss": 0.1272, "step": 24677 }, { "epoch": 0.7199369858218099, "grad_norm": 0.8837624913930062, "learning_rate": 1.9197323131057582e-06, "loss": 0.1103, "step": 24678 }, { "epoch": 0.7199661590524534, "grad_norm": 0.8819692157390737, "learning_rate": 1.9193601884472963e-06, "loss": 0.1219, "step": 24679 }, { "epoch": 0.719995332283097, "grad_norm": 0.9645443741095989, "learning_rate": 1.918988091292052e-06, "loss": 0.0995, "step": 24680 }, { "epoch": 0.7200245055137406, "grad_norm": 1.0528319556078618, "learning_rate": 1.9186160216433475e-06, "loss": 0.0999, "step": 24681 }, { "epoch": 0.7200536787443842, "grad_norm": 1.3322366515643702, "learning_rate": 1.9182439795045014e-06, "loss": 0.1169, "step": 24682 }, { "epoch": 0.7200828519750277, "grad_norm": 0.7331645887220264, "learning_rate": 1.917871964878838e-06, "loss": 0.1282, "step": 24683 }, { "epoch": 0.7201120252056713, "grad_norm": 0.8474656686504487, "learning_rate": 1.917499977769679e-06, "loss": 0.1031, "step": 24684 }, { "epoch": 0.7201411984363149, "grad_norm": 1.2961017605312743, "learning_rate": 1.9171280181803427e-06, "loss": 0.1291, "step": 24685 }, { "epoch": 0.7201703716669584, "grad_norm": 0.9402362561328396, "learning_rate": 1.916756086114153e-06, "loss": 0.111, "step": 24686 }, { "epoch": 0.720199544897602, "grad_norm": 1.0759894057376236, "learning_rate": 1.9163841815744295e-06, "loss": 0.1293, "step": 24687 }, { "epoch": 0.7202287181282455, "grad_norm": 0.9344755495602592, "learning_rate": 1.91601230456449e-06, "loss": 0.1148, "step": 24688 }, { "epoch": 0.7202578913588891, "grad_norm": 1.008159646040027, "learning_rate": 1.9156404550876563e-06, "loss": 0.119, "step": 24689 }, { "epoch": 0.7202870645895326, "grad_norm": 1.2464510090837626, "learning_rate": 1.9152686331472505e-06, "loss": 0.1168, "step": 24690 }, { "epoch": 0.7203162378201762, "grad_norm": 1.0474997117594915, "learning_rate": 1.9148968387465895e-06, "loss": 0.1237, "step": 24691 }, { "epoch": 0.7203454110508197, "grad_norm": 0.7978159082470053, "learning_rate": 1.914525071888991e-06, "loss": 0.1018, "step": 24692 }, { "epoch": 0.7203745842814633, "grad_norm": 0.77938685141896, "learning_rate": 1.9141533325777785e-06, "loss": 0.122, "step": 24693 }, { "epoch": 0.720403757512107, "grad_norm": 0.7419460825517156, "learning_rate": 1.913781620816268e-06, "loss": 0.1304, "step": 24694 }, { "epoch": 0.7204329307427505, "grad_norm": 1.1909270497883744, "learning_rate": 1.913409936607775e-06, "loss": 0.1239, "step": 24695 }, { "epoch": 0.720462103973394, "grad_norm": 0.7898081197878707, "learning_rate": 1.9130382799556253e-06, "loss": 0.1277, "step": 24696 }, { "epoch": 0.7204912772040376, "grad_norm": 0.7746590602597376, "learning_rate": 1.9126666508631324e-06, "loss": 0.115, "step": 24697 }, { "epoch": 0.7205204504346812, "grad_norm": 0.9502854095795547, "learning_rate": 1.912295049333613e-06, "loss": 0.1323, "step": 24698 }, { "epoch": 0.7205496236653247, "grad_norm": 1.0002000160621727, "learning_rate": 1.911923475370388e-06, "loss": 0.128, "step": 24699 }, { "epoch": 0.7205787968959683, "grad_norm": 0.8516728307827025, "learning_rate": 1.911551928976773e-06, "loss": 0.1122, "step": 24700 }, { "epoch": 0.7206079701266118, "grad_norm": 0.7974658657158188, "learning_rate": 1.911180410156083e-06, "loss": 0.1325, "step": 24701 }, { "epoch": 0.7206371433572554, "grad_norm": 0.8218616731306487, "learning_rate": 1.9108089189116374e-06, "loss": 0.1095, "step": 24702 }, { "epoch": 0.7206663165878989, "grad_norm": 0.771458787265972, "learning_rate": 1.9104374552467542e-06, "loss": 0.1242, "step": 24703 }, { "epoch": 0.7206954898185425, "grad_norm": 0.9622223656294758, "learning_rate": 1.910066019164748e-06, "loss": 0.1117, "step": 24704 }, { "epoch": 0.720724663049186, "grad_norm": 0.8817943316492531, "learning_rate": 1.9096946106689322e-06, "loss": 0.1244, "step": 24705 }, { "epoch": 0.7207538362798296, "grad_norm": 0.8791277836520458, "learning_rate": 1.9093232297626278e-06, "loss": 0.14, "step": 24706 }, { "epoch": 0.7207830095104731, "grad_norm": 0.868448183477267, "learning_rate": 1.9089518764491453e-06, "loss": 0.0991, "step": 24707 }, { "epoch": 0.7208121827411168, "grad_norm": 0.821549394991177, "learning_rate": 1.908580550731805e-06, "loss": 0.1266, "step": 24708 }, { "epoch": 0.7208413559717604, "grad_norm": 0.7550515482338176, "learning_rate": 1.9082092526139175e-06, "loss": 0.1109, "step": 24709 }, { "epoch": 0.7208705292024039, "grad_norm": 0.8146217101684364, "learning_rate": 1.9078379820988014e-06, "loss": 0.0992, "step": 24710 }, { "epoch": 0.7208997024330475, "grad_norm": 0.9098167480942209, "learning_rate": 1.9074667391897694e-06, "loss": 0.1129, "step": 24711 }, { "epoch": 0.720928875663691, "grad_norm": 0.8360465489386156, "learning_rate": 1.9070955238901352e-06, "loss": 0.1102, "step": 24712 }, { "epoch": 0.7209580488943346, "grad_norm": 1.012354929219495, "learning_rate": 1.9067243362032128e-06, "loss": 0.144, "step": 24713 }, { "epoch": 0.7209872221249781, "grad_norm": 0.6986973803491306, "learning_rate": 1.9063531761323195e-06, "loss": 0.1103, "step": 24714 }, { "epoch": 0.7210163953556217, "grad_norm": 0.7190303083356872, "learning_rate": 1.9059820436807646e-06, "loss": 0.127, "step": 24715 }, { "epoch": 0.7210455685862652, "grad_norm": 0.7472901284349369, "learning_rate": 1.9056109388518652e-06, "loss": 0.1149, "step": 24716 }, { "epoch": 0.7210747418169088, "grad_norm": 0.8931698248751406, "learning_rate": 1.9052398616489325e-06, "loss": 0.1285, "step": 24717 }, { "epoch": 0.7211039150475523, "grad_norm": 0.6280847754916922, "learning_rate": 1.9048688120752785e-06, "loss": 0.0837, "step": 24718 }, { "epoch": 0.7211330882781959, "grad_norm": 0.664066802237107, "learning_rate": 1.904497790134216e-06, "loss": 0.129, "step": 24719 }, { "epoch": 0.7211622615088394, "grad_norm": 0.9312753488511102, "learning_rate": 1.9041267958290604e-06, "loss": 0.1249, "step": 24720 }, { "epoch": 0.721191434739483, "grad_norm": 0.9114255991829319, "learning_rate": 1.9037558291631215e-06, "loss": 0.098, "step": 24721 }, { "epoch": 0.7212206079701267, "grad_norm": 0.8471499709460484, "learning_rate": 1.9033848901397101e-06, "loss": 0.1275, "step": 24722 }, { "epoch": 0.7212497812007702, "grad_norm": 0.8323807539702123, "learning_rate": 1.9030139787621405e-06, "loss": 0.1179, "step": 24723 }, { "epoch": 0.7212789544314138, "grad_norm": 0.9121685995045828, "learning_rate": 1.9026430950337227e-06, "loss": 0.1273, "step": 24724 }, { "epoch": 0.7213081276620573, "grad_norm": 0.8911803084946347, "learning_rate": 1.9022722389577648e-06, "loss": 0.143, "step": 24725 }, { "epoch": 0.7213373008927009, "grad_norm": 0.8695092308868242, "learning_rate": 1.9019014105375843e-06, "loss": 0.1317, "step": 24726 }, { "epoch": 0.7213664741233444, "grad_norm": 1.005277111469028, "learning_rate": 1.9015306097764885e-06, "loss": 0.1261, "step": 24727 }, { "epoch": 0.721395647353988, "grad_norm": 0.9282153258894128, "learning_rate": 1.9011598366777855e-06, "loss": 0.1347, "step": 24728 }, { "epoch": 0.7214248205846315, "grad_norm": 0.823270135603786, "learning_rate": 1.9007890912447902e-06, "loss": 0.1142, "step": 24729 }, { "epoch": 0.7214539938152751, "grad_norm": 0.7616597909380265, "learning_rate": 1.9004183734808097e-06, "loss": 0.1229, "step": 24730 }, { "epoch": 0.7214831670459186, "grad_norm": 1.1750734034252135, "learning_rate": 1.9000476833891518e-06, "loss": 0.1339, "step": 24731 }, { "epoch": 0.7215123402765622, "grad_norm": 1.0496333424439988, "learning_rate": 1.8996770209731291e-06, "loss": 0.1049, "step": 24732 }, { "epoch": 0.7215415135072057, "grad_norm": 0.8141086163389445, "learning_rate": 1.8993063862360512e-06, "loss": 0.1375, "step": 24733 }, { "epoch": 0.7215706867378493, "grad_norm": 0.8109456699197971, "learning_rate": 1.8989357791812253e-06, "loss": 0.1074, "step": 24734 }, { "epoch": 0.721599859968493, "grad_norm": 0.9727899938713124, "learning_rate": 1.8985651998119592e-06, "loss": 0.114, "step": 24735 }, { "epoch": 0.7216290331991365, "grad_norm": 0.8227569421172481, "learning_rate": 1.8981946481315645e-06, "loss": 0.1009, "step": 24736 }, { "epoch": 0.7216582064297801, "grad_norm": 0.8011658588593519, "learning_rate": 1.8978241241433454e-06, "loss": 0.0992, "step": 24737 }, { "epoch": 0.7216873796604236, "grad_norm": 0.7985781412771426, "learning_rate": 1.8974536278506134e-06, "loss": 0.1265, "step": 24738 }, { "epoch": 0.7217165528910672, "grad_norm": 0.6669561995164679, "learning_rate": 1.8970831592566734e-06, "loss": 0.1188, "step": 24739 }, { "epoch": 0.7217457261217107, "grad_norm": 0.894483025862555, "learning_rate": 1.8967127183648365e-06, "loss": 0.1097, "step": 24740 }, { "epoch": 0.7217748993523543, "grad_norm": 0.9363555923795046, "learning_rate": 1.896342305178407e-06, "loss": 0.1145, "step": 24741 }, { "epoch": 0.7218040725829978, "grad_norm": 0.8364978455189405, "learning_rate": 1.8959719197006909e-06, "loss": 0.1218, "step": 24742 }, { "epoch": 0.7218332458136414, "grad_norm": 0.8276715893798426, "learning_rate": 1.8956015619349966e-06, "loss": 0.1287, "step": 24743 }, { "epoch": 0.7218624190442849, "grad_norm": 0.8775399174639029, "learning_rate": 1.8952312318846323e-06, "loss": 0.135, "step": 24744 }, { "epoch": 0.7218915922749285, "grad_norm": 1.222685766362629, "learning_rate": 1.8948609295529002e-06, "loss": 0.0928, "step": 24745 }, { "epoch": 0.721920765505572, "grad_norm": 0.961631466462116, "learning_rate": 1.8944906549431108e-06, "loss": 0.1305, "step": 24746 }, { "epoch": 0.7219499387362156, "grad_norm": 1.0119973346594153, "learning_rate": 1.8941204080585667e-06, "loss": 0.1436, "step": 24747 }, { "epoch": 0.7219791119668592, "grad_norm": 1.1602630861195822, "learning_rate": 1.8937501889025732e-06, "loss": 0.1256, "step": 24748 }, { "epoch": 0.7220082851975028, "grad_norm": 1.1980093307519621, "learning_rate": 1.893379997478436e-06, "loss": 0.1248, "step": 24749 }, { "epoch": 0.7220374584281464, "grad_norm": 1.202511537934108, "learning_rate": 1.8930098337894626e-06, "loss": 0.1243, "step": 24750 }, { "epoch": 0.7220666316587899, "grad_norm": 0.7734793074211808, "learning_rate": 1.8926396978389554e-06, "loss": 0.1225, "step": 24751 }, { "epoch": 0.7220958048894335, "grad_norm": 0.9730790432600112, "learning_rate": 1.892269589630218e-06, "loss": 0.1315, "step": 24752 }, { "epoch": 0.722124978120077, "grad_norm": 1.2765906400452278, "learning_rate": 1.891899509166557e-06, "loss": 0.1545, "step": 24753 }, { "epoch": 0.7221541513507206, "grad_norm": 0.9315206832768864, "learning_rate": 1.8915294564512737e-06, "loss": 0.1215, "step": 24754 }, { "epoch": 0.7221833245813641, "grad_norm": 0.745017946426986, "learning_rate": 1.8911594314876736e-06, "loss": 0.1117, "step": 24755 }, { "epoch": 0.7222124978120077, "grad_norm": 0.8528260442145464, "learning_rate": 1.8907894342790617e-06, "loss": 0.1519, "step": 24756 }, { "epoch": 0.7222416710426512, "grad_norm": 0.9805517312070635, "learning_rate": 1.8904194648287394e-06, "loss": 0.1131, "step": 24757 }, { "epoch": 0.7222708442732948, "grad_norm": 0.7800230015632025, "learning_rate": 1.8900495231400079e-06, "loss": 0.1179, "step": 24758 }, { "epoch": 0.7223000175039384, "grad_norm": 0.9897083552934886, "learning_rate": 1.8896796092161735e-06, "loss": 0.1299, "step": 24759 }, { "epoch": 0.7223291907345819, "grad_norm": 0.8757966254637451, "learning_rate": 1.8893097230605356e-06, "loss": 0.1204, "step": 24760 }, { "epoch": 0.7223583639652255, "grad_norm": 1.1702030094005131, "learning_rate": 1.888939864676399e-06, "loss": 0.1134, "step": 24761 }, { "epoch": 0.7223875371958691, "grad_norm": 0.8368577500639368, "learning_rate": 1.8885700340670638e-06, "loss": 0.12, "step": 24762 }, { "epoch": 0.7224167104265127, "grad_norm": 0.8328544580460618, "learning_rate": 1.8882002312358337e-06, "loss": 0.1248, "step": 24763 }, { "epoch": 0.7224458836571562, "grad_norm": 0.7411473191067168, "learning_rate": 1.8878304561860094e-06, "loss": 0.0925, "step": 24764 }, { "epoch": 0.7224750568877998, "grad_norm": 0.8998334911980462, "learning_rate": 1.8874607089208901e-06, "loss": 0.131, "step": 24765 }, { "epoch": 0.7225042301184433, "grad_norm": 0.8310207083478498, "learning_rate": 1.8870909894437783e-06, "loss": 0.1492, "step": 24766 }, { "epoch": 0.7225334033490869, "grad_norm": 0.6911958778317953, "learning_rate": 1.886721297757977e-06, "loss": 0.1091, "step": 24767 }, { "epoch": 0.7225625765797304, "grad_norm": 0.7323282250704054, "learning_rate": 1.8863516338667847e-06, "loss": 0.1067, "step": 24768 }, { "epoch": 0.722591749810374, "grad_norm": 0.7857957241548171, "learning_rate": 1.8859819977735e-06, "loss": 0.1111, "step": 24769 }, { "epoch": 0.7226209230410175, "grad_norm": 0.758324899343193, "learning_rate": 1.885612389481426e-06, "loss": 0.1268, "step": 24770 }, { "epoch": 0.7226500962716611, "grad_norm": 0.8682851454231227, "learning_rate": 1.885242808993862e-06, "loss": 0.1303, "step": 24771 }, { "epoch": 0.7226792695023047, "grad_norm": 0.7170521968542871, "learning_rate": 1.8848732563141026e-06, "loss": 0.1328, "step": 24772 }, { "epoch": 0.7227084427329482, "grad_norm": 0.945809200661344, "learning_rate": 1.8845037314454544e-06, "loss": 0.1194, "step": 24773 }, { "epoch": 0.7227376159635918, "grad_norm": 0.8845968746110616, "learning_rate": 1.8841342343912134e-06, "loss": 0.1156, "step": 24774 }, { "epoch": 0.7227667891942353, "grad_norm": 0.953148813227204, "learning_rate": 1.8837647651546765e-06, "loss": 0.1114, "step": 24775 }, { "epoch": 0.722795962424879, "grad_norm": 0.8157125894220421, "learning_rate": 1.8833953237391456e-06, "loss": 0.1212, "step": 24776 }, { "epoch": 0.7228251356555225, "grad_norm": 1.2872659325605247, "learning_rate": 1.883025910147917e-06, "loss": 0.1063, "step": 24777 }, { "epoch": 0.7228543088861661, "grad_norm": 0.9941306782651606, "learning_rate": 1.8826565243842877e-06, "loss": 0.1342, "step": 24778 }, { "epoch": 0.7228834821168096, "grad_norm": 0.8272135801291861, "learning_rate": 1.8822871664515562e-06, "loss": 0.1115, "step": 24779 }, { "epoch": 0.7229126553474532, "grad_norm": 0.8707805844517327, "learning_rate": 1.8819178363530226e-06, "loss": 0.101, "step": 24780 }, { "epoch": 0.7229418285780967, "grad_norm": 1.0051174270341248, "learning_rate": 1.8815485340919825e-06, "loss": 0.1169, "step": 24781 }, { "epoch": 0.7229710018087403, "grad_norm": 1.081533538448045, "learning_rate": 1.881179259671731e-06, "loss": 0.0972, "step": 24782 }, { "epoch": 0.7230001750393839, "grad_norm": 0.6500338650694056, "learning_rate": 1.8808100130955676e-06, "loss": 0.1131, "step": 24783 }, { "epoch": 0.7230293482700274, "grad_norm": 0.8756280400598867, "learning_rate": 1.8804407943667869e-06, "loss": 0.1173, "step": 24784 }, { "epoch": 0.723058521500671, "grad_norm": 0.861538997600829, "learning_rate": 1.880071603488685e-06, "loss": 0.0986, "step": 24785 }, { "epoch": 0.7230876947313145, "grad_norm": 0.740512192965018, "learning_rate": 1.879702440464562e-06, "loss": 0.1009, "step": 24786 }, { "epoch": 0.7231168679619581, "grad_norm": 0.9904485291951682, "learning_rate": 1.8793333052977098e-06, "loss": 0.1195, "step": 24787 }, { "epoch": 0.7231460411926016, "grad_norm": 0.8104199356771068, "learning_rate": 1.8789641979914237e-06, "loss": 0.1142, "step": 24788 }, { "epoch": 0.7231752144232453, "grad_norm": 0.7682817340857583, "learning_rate": 1.8785951185490014e-06, "loss": 0.1132, "step": 24789 }, { "epoch": 0.7232043876538888, "grad_norm": 0.8446815571771478, "learning_rate": 1.8782260669737357e-06, "loss": 0.1306, "step": 24790 }, { "epoch": 0.7232335608845324, "grad_norm": 0.7795864492754341, "learning_rate": 1.8778570432689236e-06, "loss": 0.1157, "step": 24791 }, { "epoch": 0.7232627341151759, "grad_norm": 0.7912001186866039, "learning_rate": 1.8774880474378571e-06, "loss": 0.1042, "step": 24792 }, { "epoch": 0.7232919073458195, "grad_norm": 1.1973824302252325, "learning_rate": 1.8771190794838333e-06, "loss": 0.1379, "step": 24793 }, { "epoch": 0.723321080576463, "grad_norm": 0.8313170598836324, "learning_rate": 1.876750139410145e-06, "loss": 0.1326, "step": 24794 }, { "epoch": 0.7233502538071066, "grad_norm": 0.7775042501154492, "learning_rate": 1.8763812272200843e-06, "loss": 0.1114, "step": 24795 }, { "epoch": 0.7233794270377502, "grad_norm": 0.9891694611844759, "learning_rate": 1.8760123429169464e-06, "loss": 0.119, "step": 24796 }, { "epoch": 0.7234086002683937, "grad_norm": 0.7944133919249249, "learning_rate": 1.8756434865040262e-06, "loss": 0.1205, "step": 24797 }, { "epoch": 0.7234377734990373, "grad_norm": 0.7436630045707892, "learning_rate": 1.8752746579846148e-06, "loss": 0.1104, "step": 24798 }, { "epoch": 0.7234669467296808, "grad_norm": 1.0096995984949833, "learning_rate": 1.8749058573620039e-06, "loss": 0.1255, "step": 24799 }, { "epoch": 0.7234961199603244, "grad_norm": 0.8741876356871757, "learning_rate": 1.8745370846394894e-06, "loss": 0.0964, "step": 24800 }, { "epoch": 0.7235252931909679, "grad_norm": 0.8314448827100319, "learning_rate": 1.8741683398203614e-06, "loss": 0.1074, "step": 24801 }, { "epoch": 0.7235544664216115, "grad_norm": 0.966429973558122, "learning_rate": 1.8737996229079086e-06, "loss": 0.1169, "step": 24802 }, { "epoch": 0.7235836396522551, "grad_norm": 0.8734627794263268, "learning_rate": 1.8734309339054308e-06, "loss": 0.1107, "step": 24803 }, { "epoch": 0.7236128128828987, "grad_norm": 1.170058015271319, "learning_rate": 1.8730622728162146e-06, "loss": 0.1326, "step": 24804 }, { "epoch": 0.7236419861135422, "grad_norm": 0.9210037414578498, "learning_rate": 1.8726936396435502e-06, "loss": 0.1201, "step": 24805 }, { "epoch": 0.7236711593441858, "grad_norm": 0.7295522152079452, "learning_rate": 1.8723250343907323e-06, "loss": 0.1044, "step": 24806 }, { "epoch": 0.7237003325748294, "grad_norm": 0.9642615316670107, "learning_rate": 1.8719564570610494e-06, "loss": 0.1201, "step": 24807 }, { "epoch": 0.7237295058054729, "grad_norm": 0.8333528157145845, "learning_rate": 1.8715879076577915e-06, "loss": 0.1115, "step": 24808 }, { "epoch": 0.7237586790361165, "grad_norm": 0.8918676910593354, "learning_rate": 1.8712193861842498e-06, "loss": 0.1096, "step": 24809 }, { "epoch": 0.72378785226676, "grad_norm": 0.9016907346872983, "learning_rate": 1.8708508926437157e-06, "loss": 0.1103, "step": 24810 }, { "epoch": 0.7238170254974036, "grad_norm": 1.0659294528626322, "learning_rate": 1.8704824270394783e-06, "loss": 0.1153, "step": 24811 }, { "epoch": 0.7238461987280471, "grad_norm": 0.9068302087451171, "learning_rate": 1.870113989374825e-06, "loss": 0.1364, "step": 24812 }, { "epoch": 0.7238753719586907, "grad_norm": 1.0306284501844045, "learning_rate": 1.8697455796530483e-06, "loss": 0.1066, "step": 24813 }, { "epoch": 0.7239045451893342, "grad_norm": 1.1067347553094529, "learning_rate": 1.8693771978774345e-06, "loss": 0.1244, "step": 24814 }, { "epoch": 0.7239337184199778, "grad_norm": 0.9523719170841769, "learning_rate": 1.8690088440512738e-06, "loss": 0.1223, "step": 24815 }, { "epoch": 0.7239628916506214, "grad_norm": 0.7094855379979579, "learning_rate": 1.8686405181778562e-06, "loss": 0.1037, "step": 24816 }, { "epoch": 0.723992064881265, "grad_norm": 1.1803413326087289, "learning_rate": 1.8682722202604681e-06, "loss": 0.1171, "step": 24817 }, { "epoch": 0.7240212381119085, "grad_norm": 1.0737906151829388, "learning_rate": 1.8679039503023972e-06, "loss": 0.1283, "step": 24818 }, { "epoch": 0.7240504113425521, "grad_norm": 0.8839486697134835, "learning_rate": 1.8675357083069328e-06, "loss": 0.1355, "step": 24819 }, { "epoch": 0.7240795845731957, "grad_norm": 0.9648421191721859, "learning_rate": 1.867167494277361e-06, "loss": 0.1232, "step": 24820 }, { "epoch": 0.7241087578038392, "grad_norm": 1.0422206619084138, "learning_rate": 1.8667993082169712e-06, "loss": 0.1416, "step": 24821 }, { "epoch": 0.7241379310344828, "grad_norm": 1.1206186102912703, "learning_rate": 1.8664311501290478e-06, "loss": 0.1203, "step": 24822 }, { "epoch": 0.7241671042651263, "grad_norm": 1.2156421710641572, "learning_rate": 1.8660630200168806e-06, "loss": 0.1068, "step": 24823 }, { "epoch": 0.7241962774957699, "grad_norm": 0.7844188174103284, "learning_rate": 1.8656949178837547e-06, "loss": 0.122, "step": 24824 }, { "epoch": 0.7242254507264134, "grad_norm": 0.707850413303378, "learning_rate": 1.8653268437329542e-06, "loss": 0.1119, "step": 24825 }, { "epoch": 0.724254623957057, "grad_norm": 1.2279871966228981, "learning_rate": 1.864958797567768e-06, "loss": 0.1229, "step": 24826 }, { "epoch": 0.7242837971877005, "grad_norm": 0.9589869772952597, "learning_rate": 1.8645907793914826e-06, "loss": 0.1132, "step": 24827 }, { "epoch": 0.7243129704183441, "grad_norm": 0.753126405914351, "learning_rate": 1.864222789207382e-06, "loss": 0.1301, "step": 24828 }, { "epoch": 0.7243421436489876, "grad_norm": 0.7513366561510253, "learning_rate": 1.8638548270187505e-06, "loss": 0.1166, "step": 24829 }, { "epoch": 0.7243713168796313, "grad_norm": 0.9634536537768936, "learning_rate": 1.8634868928288757e-06, "loss": 0.115, "step": 24830 }, { "epoch": 0.7244004901102749, "grad_norm": 0.8460884475997975, "learning_rate": 1.863118986641042e-06, "loss": 0.1171, "step": 24831 }, { "epoch": 0.7244296633409184, "grad_norm": 0.8555442129602252, "learning_rate": 1.8627511084585293e-06, "loss": 0.1067, "step": 24832 }, { "epoch": 0.724458836571562, "grad_norm": 0.7231277466632015, "learning_rate": 1.8623832582846291e-06, "loss": 0.1054, "step": 24833 }, { "epoch": 0.7244880098022055, "grad_norm": 1.2338736174024925, "learning_rate": 1.8620154361226218e-06, "loss": 0.1427, "step": 24834 }, { "epoch": 0.7245171830328491, "grad_norm": 0.8570689943214541, "learning_rate": 1.8616476419757907e-06, "loss": 0.0983, "step": 24835 }, { "epoch": 0.7245463562634926, "grad_norm": 0.7948866818502712, "learning_rate": 1.861279875847421e-06, "loss": 0.139, "step": 24836 }, { "epoch": 0.7245755294941362, "grad_norm": 0.8685926764892993, "learning_rate": 1.8609121377407963e-06, "loss": 0.1235, "step": 24837 }, { "epoch": 0.7246047027247797, "grad_norm": 0.8490860723033917, "learning_rate": 1.8605444276591961e-06, "loss": 0.1263, "step": 24838 }, { "epoch": 0.7246338759554233, "grad_norm": 0.9168573640964627, "learning_rate": 1.8601767456059062e-06, "loss": 0.1075, "step": 24839 }, { "epoch": 0.7246630491860668, "grad_norm": 0.7589705049956947, "learning_rate": 1.8598090915842105e-06, "loss": 0.1204, "step": 24840 }, { "epoch": 0.7246922224167104, "grad_norm": 0.7442359511864499, "learning_rate": 1.8594414655973898e-06, "loss": 0.1145, "step": 24841 }, { "epoch": 0.7247213956473539, "grad_norm": 0.8111783044886859, "learning_rate": 1.8590738676487242e-06, "loss": 0.1094, "step": 24842 }, { "epoch": 0.7247505688779976, "grad_norm": 1.3516789518929335, "learning_rate": 1.8587062977414987e-06, "loss": 0.1195, "step": 24843 }, { "epoch": 0.7247797421086412, "grad_norm": 0.7454359558463477, "learning_rate": 1.8583387558789916e-06, "loss": 0.1217, "step": 24844 }, { "epoch": 0.7248089153392847, "grad_norm": 0.8358274388093241, "learning_rate": 1.8579712420644869e-06, "loss": 0.1352, "step": 24845 }, { "epoch": 0.7248380885699283, "grad_norm": 0.6239342387495486, "learning_rate": 1.8576037563012662e-06, "loss": 0.0952, "step": 24846 }, { "epoch": 0.7248672618005718, "grad_norm": 0.7656401194383812, "learning_rate": 1.857236298592609e-06, "loss": 0.1604, "step": 24847 }, { "epoch": 0.7248964350312154, "grad_norm": 1.0440809395948463, "learning_rate": 1.856868868941794e-06, "loss": 0.1326, "step": 24848 }, { "epoch": 0.7249256082618589, "grad_norm": 0.8446142292096692, "learning_rate": 1.8565014673521053e-06, "loss": 0.1331, "step": 24849 }, { "epoch": 0.7249547814925025, "grad_norm": 0.814132853791011, "learning_rate": 1.8561340938268196e-06, "loss": 0.1386, "step": 24850 }, { "epoch": 0.724983954723146, "grad_norm": 0.7609068087208614, "learning_rate": 1.8557667483692193e-06, "loss": 0.1274, "step": 24851 }, { "epoch": 0.7250131279537896, "grad_norm": 0.9634910209977473, "learning_rate": 1.8553994309825818e-06, "loss": 0.1122, "step": 24852 }, { "epoch": 0.7250423011844331, "grad_norm": 0.7403786149102356, "learning_rate": 1.8550321416701888e-06, "loss": 0.114, "step": 24853 }, { "epoch": 0.7250714744150767, "grad_norm": 0.8961740815132433, "learning_rate": 1.8546648804353185e-06, "loss": 0.1115, "step": 24854 }, { "epoch": 0.7251006476457202, "grad_norm": 0.7385042438001875, "learning_rate": 1.8542976472812474e-06, "loss": 0.0985, "step": 24855 }, { "epoch": 0.7251298208763638, "grad_norm": 1.104591922664808, "learning_rate": 1.8539304422112558e-06, "loss": 0.1389, "step": 24856 }, { "epoch": 0.7251589941070075, "grad_norm": 0.6378269624568983, "learning_rate": 1.853563265228624e-06, "loss": 0.1044, "step": 24857 }, { "epoch": 0.725188167337651, "grad_norm": 0.8157868387104741, "learning_rate": 1.853196116336628e-06, "loss": 0.1358, "step": 24858 }, { "epoch": 0.7252173405682946, "grad_norm": 0.7862279870745205, "learning_rate": 1.8528289955385443e-06, "loss": 0.1112, "step": 24859 }, { "epoch": 0.7252465137989381, "grad_norm": 0.9385475456855338, "learning_rate": 1.8524619028376539e-06, "loss": 0.11, "step": 24860 }, { "epoch": 0.7252756870295817, "grad_norm": 0.7027755140495263, "learning_rate": 1.8520948382372323e-06, "loss": 0.1069, "step": 24861 }, { "epoch": 0.7253048602602252, "grad_norm": 0.8164494945298956, "learning_rate": 1.8517278017405532e-06, "loss": 0.125, "step": 24862 }, { "epoch": 0.7253340334908688, "grad_norm": 0.7517332609638138, "learning_rate": 1.8513607933508999e-06, "loss": 0.1097, "step": 24863 }, { "epoch": 0.7253632067215123, "grad_norm": 0.7674813511763025, "learning_rate": 1.8509938130715455e-06, "loss": 0.0931, "step": 24864 }, { "epoch": 0.7253923799521559, "grad_norm": 0.8343587845088293, "learning_rate": 1.8506268609057653e-06, "loss": 0.115, "step": 24865 }, { "epoch": 0.7254215531827994, "grad_norm": 0.7310635549617341, "learning_rate": 1.8502599368568387e-06, "loss": 0.1136, "step": 24866 }, { "epoch": 0.725450726413443, "grad_norm": 0.6595938915712233, "learning_rate": 1.8498930409280392e-06, "loss": 0.1036, "step": 24867 }, { "epoch": 0.7254798996440865, "grad_norm": 0.7563890394954799, "learning_rate": 1.8495261731226404e-06, "loss": 0.0987, "step": 24868 }, { "epoch": 0.7255090728747301, "grad_norm": 0.8098649358955136, "learning_rate": 1.8491593334439206e-06, "loss": 0.1129, "step": 24869 }, { "epoch": 0.7255382461053738, "grad_norm": 0.7967892081724012, "learning_rate": 1.8487925218951553e-06, "loss": 0.1011, "step": 24870 }, { "epoch": 0.7255674193360173, "grad_norm": 0.9593705090427759, "learning_rate": 1.8484257384796184e-06, "loss": 0.1015, "step": 24871 }, { "epoch": 0.7255965925666609, "grad_norm": 0.8414936894015588, "learning_rate": 1.8480589832005824e-06, "loss": 0.1047, "step": 24872 }, { "epoch": 0.7256257657973044, "grad_norm": 0.8877625900422822, "learning_rate": 1.8476922560613247e-06, "loss": 0.1357, "step": 24873 }, { "epoch": 0.725654939027948, "grad_norm": 0.9182396538995153, "learning_rate": 1.8473255570651167e-06, "loss": 0.1372, "step": 24874 }, { "epoch": 0.7256841122585915, "grad_norm": 0.7526035537212655, "learning_rate": 1.8469588862152338e-06, "loss": 0.1043, "step": 24875 }, { "epoch": 0.7257132854892351, "grad_norm": 0.7054903788913072, "learning_rate": 1.8465922435149502e-06, "loss": 0.1259, "step": 24876 }, { "epoch": 0.7257424587198786, "grad_norm": 0.8085560954450418, "learning_rate": 1.846225628967539e-06, "loss": 0.1112, "step": 24877 }, { "epoch": 0.7257716319505222, "grad_norm": 0.8898010958408633, "learning_rate": 1.8458590425762707e-06, "loss": 0.1112, "step": 24878 }, { "epoch": 0.7258008051811657, "grad_norm": 0.6961278063567853, "learning_rate": 1.8454924843444216e-06, "loss": 0.1043, "step": 24879 }, { "epoch": 0.7258299784118093, "grad_norm": 0.9587722189224249, "learning_rate": 1.8451259542752603e-06, "loss": 0.1149, "step": 24880 }, { "epoch": 0.7258591516424528, "grad_norm": 0.7762580689387995, "learning_rate": 1.8447594523720636e-06, "loss": 0.1154, "step": 24881 }, { "epoch": 0.7258883248730964, "grad_norm": 0.8285094579084039, "learning_rate": 1.8443929786380994e-06, "loss": 0.0861, "step": 24882 }, { "epoch": 0.72591749810374, "grad_norm": 0.7726023454221855, "learning_rate": 1.8440265330766432e-06, "loss": 0.1172, "step": 24883 }, { "epoch": 0.7259466713343836, "grad_norm": 0.7417302916526094, "learning_rate": 1.8436601156909645e-06, "loss": 0.1268, "step": 24884 }, { "epoch": 0.7259758445650272, "grad_norm": 0.8036061521060736, "learning_rate": 1.8432937264843338e-06, "loss": 0.1074, "step": 24885 }, { "epoch": 0.7260050177956707, "grad_norm": 0.8508839085544653, "learning_rate": 1.8429273654600221e-06, "loss": 0.1046, "step": 24886 }, { "epoch": 0.7260341910263143, "grad_norm": 0.8193173401699769, "learning_rate": 1.8425610326213034e-06, "loss": 0.1253, "step": 24887 }, { "epoch": 0.7260633642569578, "grad_norm": 0.7690229197874942, "learning_rate": 1.8421947279714464e-06, "loss": 0.1248, "step": 24888 }, { "epoch": 0.7260925374876014, "grad_norm": 0.8534059239823847, "learning_rate": 1.8418284515137192e-06, "loss": 0.0993, "step": 24889 }, { "epoch": 0.7261217107182449, "grad_norm": 1.0321327097637345, "learning_rate": 1.8414622032513952e-06, "loss": 0.1436, "step": 24890 }, { "epoch": 0.7261508839488885, "grad_norm": 0.7811226031964913, "learning_rate": 1.8410959831877423e-06, "loss": 0.1093, "step": 24891 }, { "epoch": 0.726180057179532, "grad_norm": 0.8800829918309522, "learning_rate": 1.8407297913260274e-06, "loss": 0.1217, "step": 24892 }, { "epoch": 0.7262092304101756, "grad_norm": 1.0248503224959675, "learning_rate": 1.8403636276695263e-06, "loss": 0.1079, "step": 24893 }, { "epoch": 0.7262384036408192, "grad_norm": 0.7391089423226811, "learning_rate": 1.8399974922215042e-06, "loss": 0.107, "step": 24894 }, { "epoch": 0.7262675768714627, "grad_norm": 1.0746904798867014, "learning_rate": 1.8396313849852281e-06, "loss": 0.1146, "step": 24895 }, { "epoch": 0.7262967501021063, "grad_norm": 0.8649387591485509, "learning_rate": 1.8392653059639709e-06, "loss": 0.116, "step": 24896 }, { "epoch": 0.7263259233327499, "grad_norm": 0.9121882526351213, "learning_rate": 1.838899255160998e-06, "loss": 0.1134, "step": 24897 }, { "epoch": 0.7263550965633935, "grad_norm": 0.965498288637953, "learning_rate": 1.838533232579577e-06, "loss": 0.1362, "step": 24898 }, { "epoch": 0.726384269794037, "grad_norm": 0.836395609570536, "learning_rate": 1.838167238222976e-06, "loss": 0.1197, "step": 24899 }, { "epoch": 0.7264134430246806, "grad_norm": 0.7118901604503297, "learning_rate": 1.8378012720944649e-06, "loss": 0.1145, "step": 24900 }, { "epoch": 0.7264426162553241, "grad_norm": 0.7555779815364517, "learning_rate": 1.837435334197309e-06, "loss": 0.1266, "step": 24901 }, { "epoch": 0.7264717894859677, "grad_norm": 0.9109027979813684, "learning_rate": 1.8370694245347736e-06, "loss": 0.1153, "step": 24902 }, { "epoch": 0.7265009627166112, "grad_norm": 1.0025817004775293, "learning_rate": 1.8367035431101293e-06, "loss": 0.1309, "step": 24903 }, { "epoch": 0.7265301359472548, "grad_norm": 0.8049345831601331, "learning_rate": 1.8363376899266394e-06, "loss": 0.1137, "step": 24904 }, { "epoch": 0.7265593091778983, "grad_norm": 0.8043646836958517, "learning_rate": 1.8359718649875708e-06, "loss": 0.1176, "step": 24905 }, { "epoch": 0.7265884824085419, "grad_norm": 1.1209706309388336, "learning_rate": 1.8356060682961918e-06, "loss": 0.1536, "step": 24906 }, { "epoch": 0.7266176556391855, "grad_norm": 0.7374726980267576, "learning_rate": 1.8352402998557667e-06, "loss": 0.1154, "step": 24907 }, { "epoch": 0.726646828869829, "grad_norm": 0.8924252687802675, "learning_rate": 1.834874559669559e-06, "loss": 0.0989, "step": 24908 }, { "epoch": 0.7266760021004726, "grad_norm": 0.830323596731376, "learning_rate": 1.8345088477408368e-06, "loss": 0.1395, "step": 24909 }, { "epoch": 0.7267051753311161, "grad_norm": 1.0699065525042704, "learning_rate": 1.834143164072863e-06, "loss": 0.1019, "step": 24910 }, { "epoch": 0.7267343485617598, "grad_norm": 1.0995388157216845, "learning_rate": 1.8337775086689047e-06, "loss": 0.1228, "step": 24911 }, { "epoch": 0.7267635217924033, "grad_norm": 0.9635517309064618, "learning_rate": 1.8334118815322233e-06, "loss": 0.1294, "step": 24912 }, { "epoch": 0.7267926950230469, "grad_norm": 1.1126481145869898, "learning_rate": 1.833046282666086e-06, "loss": 0.1157, "step": 24913 }, { "epoch": 0.7268218682536904, "grad_norm": 1.0521044133004638, "learning_rate": 1.832680712073756e-06, "loss": 0.1127, "step": 24914 }, { "epoch": 0.726851041484334, "grad_norm": 0.8842440041359714, "learning_rate": 1.8323151697584946e-06, "loss": 0.1122, "step": 24915 }, { "epoch": 0.7268802147149775, "grad_norm": 0.7412429925750073, "learning_rate": 1.8319496557235667e-06, "loss": 0.1134, "step": 24916 }, { "epoch": 0.7269093879456211, "grad_norm": 0.8617024744473915, "learning_rate": 1.8315841699722386e-06, "loss": 0.1233, "step": 24917 }, { "epoch": 0.7269385611762647, "grad_norm": 0.7391870650891543, "learning_rate": 1.8312187125077703e-06, "loss": 0.0984, "step": 24918 }, { "epoch": 0.7269677344069082, "grad_norm": 0.8149588234882889, "learning_rate": 1.830853283333423e-06, "loss": 0.1067, "step": 24919 }, { "epoch": 0.7269969076375518, "grad_norm": 0.9990044124623102, "learning_rate": 1.8304878824524625e-06, "loss": 0.1382, "step": 24920 }, { "epoch": 0.7270260808681953, "grad_norm": 0.9153443954209698, "learning_rate": 1.8301225098681502e-06, "loss": 0.091, "step": 24921 }, { "epoch": 0.7270552540988389, "grad_norm": 0.6446411753985442, "learning_rate": 1.8297571655837437e-06, "loss": 0.1143, "step": 24922 }, { "epoch": 0.7270844273294824, "grad_norm": 0.7362736806102024, "learning_rate": 1.829391849602512e-06, "loss": 0.0873, "step": 24923 }, { "epoch": 0.7271136005601261, "grad_norm": 0.8609760765727609, "learning_rate": 1.8290265619277125e-06, "loss": 0.1237, "step": 24924 }, { "epoch": 0.7271427737907696, "grad_norm": 1.6472467366255574, "learning_rate": 1.8286613025626054e-06, "loss": 0.112, "step": 24925 }, { "epoch": 0.7271719470214132, "grad_norm": 0.8878191010116272, "learning_rate": 1.8282960715104553e-06, "loss": 0.1197, "step": 24926 }, { "epoch": 0.7272011202520567, "grad_norm": 0.8555477765704576, "learning_rate": 1.82793086877452e-06, "loss": 0.1128, "step": 24927 }, { "epoch": 0.7272302934827003, "grad_norm": 0.848971547739859, "learning_rate": 1.8275656943580594e-06, "loss": 0.1244, "step": 24928 }, { "epoch": 0.7272594667133438, "grad_norm": 0.7995870031921165, "learning_rate": 1.8272005482643352e-06, "loss": 0.1204, "step": 24929 }, { "epoch": 0.7272886399439874, "grad_norm": 0.8450287379984195, "learning_rate": 1.8268354304966084e-06, "loss": 0.1249, "step": 24930 }, { "epoch": 0.727317813174631, "grad_norm": 0.8166409415337273, "learning_rate": 1.8264703410581375e-06, "loss": 0.1092, "step": 24931 }, { "epoch": 0.7273469864052745, "grad_norm": 0.8586100750549512, "learning_rate": 1.82610527995218e-06, "loss": 0.1089, "step": 24932 }, { "epoch": 0.7273761596359181, "grad_norm": 0.9073711672332692, "learning_rate": 1.8257402471819991e-06, "loss": 0.1217, "step": 24933 }, { "epoch": 0.7274053328665616, "grad_norm": 0.8195584608933435, "learning_rate": 1.8253752427508493e-06, "loss": 0.1119, "step": 24934 }, { "epoch": 0.7274345060972052, "grad_norm": 1.159609683019026, "learning_rate": 1.8250102666619917e-06, "loss": 0.0917, "step": 24935 }, { "epoch": 0.7274636793278487, "grad_norm": 0.7340335281291133, "learning_rate": 1.8246453189186857e-06, "loss": 0.1053, "step": 24936 }, { "epoch": 0.7274928525584923, "grad_norm": 0.855828491858628, "learning_rate": 1.8242803995241887e-06, "loss": 0.1448, "step": 24937 }, { "epoch": 0.7275220257891359, "grad_norm": 0.9658714695652126, "learning_rate": 1.8239155084817567e-06, "loss": 0.1476, "step": 24938 }, { "epoch": 0.7275511990197795, "grad_norm": 1.1787234210589748, "learning_rate": 1.8235506457946505e-06, "loss": 0.1184, "step": 24939 }, { "epoch": 0.727580372250423, "grad_norm": 0.7264636736826157, "learning_rate": 1.8231858114661238e-06, "loss": 0.1262, "step": 24940 }, { "epoch": 0.7276095454810666, "grad_norm": 1.0263366576540314, "learning_rate": 1.8228210054994377e-06, "loss": 0.109, "step": 24941 }, { "epoch": 0.7276387187117102, "grad_norm": 1.0271232640973176, "learning_rate": 1.8224562278978452e-06, "loss": 0.096, "step": 24942 }, { "epoch": 0.7276678919423537, "grad_norm": 0.8297968739124395, "learning_rate": 1.8220914786646071e-06, "loss": 0.0961, "step": 24943 }, { "epoch": 0.7276970651729973, "grad_norm": 0.8402880182245117, "learning_rate": 1.821726757802978e-06, "loss": 0.1192, "step": 24944 }, { "epoch": 0.7277262384036408, "grad_norm": 1.144508257056658, "learning_rate": 1.8213620653162111e-06, "loss": 0.1172, "step": 24945 }, { "epoch": 0.7277554116342844, "grad_norm": 1.160513505920864, "learning_rate": 1.8209974012075654e-06, "loss": 0.1118, "step": 24946 }, { "epoch": 0.7277845848649279, "grad_norm": 0.85734785080866, "learning_rate": 1.8206327654802975e-06, "loss": 0.1146, "step": 24947 }, { "epoch": 0.7278137580955715, "grad_norm": 0.8636575756890902, "learning_rate": 1.8202681581376614e-06, "loss": 0.116, "step": 24948 }, { "epoch": 0.727842931326215, "grad_norm": 1.0040967361817545, "learning_rate": 1.8199035791829105e-06, "loss": 0.1373, "step": 24949 }, { "epoch": 0.7278721045568586, "grad_norm": 1.1790709096440317, "learning_rate": 1.8195390286193027e-06, "loss": 0.1018, "step": 24950 }, { "epoch": 0.7279012777875022, "grad_norm": 0.9110015649735659, "learning_rate": 1.81917450645009e-06, "loss": 0.108, "step": 24951 }, { "epoch": 0.7279304510181458, "grad_norm": 0.8987094341283958, "learning_rate": 1.8188100126785273e-06, "loss": 0.1067, "step": 24952 }, { "epoch": 0.7279596242487893, "grad_norm": 0.9444427978391019, "learning_rate": 1.8184455473078717e-06, "loss": 0.114, "step": 24953 }, { "epoch": 0.7279887974794329, "grad_norm": 1.0161299164683342, "learning_rate": 1.8180811103413743e-06, "loss": 0.0994, "step": 24954 }, { "epoch": 0.7280179707100765, "grad_norm": 0.8559058581243981, "learning_rate": 1.8177167017822878e-06, "loss": 0.1099, "step": 24955 }, { "epoch": 0.72804714394072, "grad_norm": 0.9927475393473875, "learning_rate": 1.8173523216338685e-06, "loss": 0.1209, "step": 24956 }, { "epoch": 0.7280763171713636, "grad_norm": 0.8647802236615696, "learning_rate": 1.8169879698993665e-06, "loss": 0.1179, "step": 24957 }, { "epoch": 0.7281054904020071, "grad_norm": 0.7483838827480895, "learning_rate": 1.8166236465820375e-06, "loss": 0.1006, "step": 24958 }, { "epoch": 0.7281346636326507, "grad_norm": 1.0133208695063225, "learning_rate": 1.8162593516851308e-06, "loss": 0.1194, "step": 24959 }, { "epoch": 0.7281638368632942, "grad_norm": 0.8257053792932199, "learning_rate": 1.8158950852119024e-06, "loss": 0.1186, "step": 24960 }, { "epoch": 0.7281930100939378, "grad_norm": 0.7500533278285161, "learning_rate": 1.8155308471656024e-06, "loss": 0.1047, "step": 24961 }, { "epoch": 0.7282221833245813, "grad_norm": 1.0483414779193183, "learning_rate": 1.8151666375494815e-06, "loss": 0.1224, "step": 24962 }, { "epoch": 0.7282513565552249, "grad_norm": 0.8956709519972501, "learning_rate": 1.8148024563667926e-06, "loss": 0.1093, "step": 24963 }, { "epoch": 0.7282805297858684, "grad_norm": 1.3678983466767172, "learning_rate": 1.8144383036207886e-06, "loss": 0.1462, "step": 24964 }, { "epoch": 0.7283097030165121, "grad_norm": 0.762653630359437, "learning_rate": 1.8140741793147172e-06, "loss": 0.1057, "step": 24965 }, { "epoch": 0.7283388762471557, "grad_norm": 0.9833261476048476, "learning_rate": 1.8137100834518323e-06, "loss": 0.1346, "step": 24966 }, { "epoch": 0.7283680494777992, "grad_norm": 1.3145407068180195, "learning_rate": 1.8133460160353832e-06, "loss": 0.112, "step": 24967 }, { "epoch": 0.7283972227084428, "grad_norm": 1.1744594263215486, "learning_rate": 1.8129819770686192e-06, "loss": 0.0967, "step": 24968 }, { "epoch": 0.7284263959390863, "grad_norm": 0.6977637276046625, "learning_rate": 1.8126179665547905e-06, "loss": 0.1148, "step": 24969 }, { "epoch": 0.7284555691697299, "grad_norm": 0.8791605244855828, "learning_rate": 1.8122539844971498e-06, "loss": 0.1244, "step": 24970 }, { "epoch": 0.7284847424003734, "grad_norm": 1.3230451343526601, "learning_rate": 1.8118900308989446e-06, "loss": 0.1262, "step": 24971 }, { "epoch": 0.728513915631017, "grad_norm": 1.0614937945500669, "learning_rate": 1.8115261057634226e-06, "loss": 0.1222, "step": 24972 }, { "epoch": 0.7285430888616605, "grad_norm": 0.8326956579195125, "learning_rate": 1.8111622090938357e-06, "loss": 0.1358, "step": 24973 }, { "epoch": 0.7285722620923041, "grad_norm": 0.8717068756020058, "learning_rate": 1.8107983408934315e-06, "loss": 0.1004, "step": 24974 }, { "epoch": 0.7286014353229476, "grad_norm": 1.1216425684225206, "learning_rate": 1.8104345011654566e-06, "loss": 0.1105, "step": 24975 }, { "epoch": 0.7286306085535912, "grad_norm": 0.7841207142089922, "learning_rate": 1.810070689913161e-06, "loss": 0.1049, "step": 24976 }, { "epoch": 0.7286597817842347, "grad_norm": 0.8524723388647767, "learning_rate": 1.8097069071397943e-06, "loss": 0.1108, "step": 24977 }, { "epoch": 0.7286889550148783, "grad_norm": 0.9369938320213941, "learning_rate": 1.8093431528486034e-06, "loss": 0.137, "step": 24978 }, { "epoch": 0.728718128245522, "grad_norm": 0.8796896474443129, "learning_rate": 1.808979427042833e-06, "loss": 0.1249, "step": 24979 }, { "epoch": 0.7287473014761655, "grad_norm": 0.9817665157287899, "learning_rate": 1.8086157297257346e-06, "loss": 0.1179, "step": 24980 }, { "epoch": 0.7287764747068091, "grad_norm": 0.8483677780760432, "learning_rate": 1.808252060900551e-06, "loss": 0.1111, "step": 24981 }, { "epoch": 0.7288056479374526, "grad_norm": 0.7097324491120331, "learning_rate": 1.8078884205705311e-06, "loss": 0.1133, "step": 24982 }, { "epoch": 0.7288348211680962, "grad_norm": 0.7778382295973635, "learning_rate": 1.8075248087389236e-06, "loss": 0.124, "step": 24983 }, { "epoch": 0.7288639943987397, "grad_norm": 0.7890920754294309, "learning_rate": 1.8071612254089722e-06, "loss": 0.1055, "step": 24984 }, { "epoch": 0.7288931676293833, "grad_norm": 0.7835489966044812, "learning_rate": 1.8067976705839208e-06, "loss": 0.1204, "step": 24985 }, { "epoch": 0.7289223408600268, "grad_norm": 0.8632739473909581, "learning_rate": 1.8064341442670203e-06, "loss": 0.1078, "step": 24986 }, { "epoch": 0.7289515140906704, "grad_norm": 0.747518900822516, "learning_rate": 1.8060706464615108e-06, "loss": 0.1176, "step": 24987 }, { "epoch": 0.7289806873213139, "grad_norm": 0.9503703476024202, "learning_rate": 1.8057071771706424e-06, "loss": 0.129, "step": 24988 }, { "epoch": 0.7290098605519575, "grad_norm": 0.9969023318960142, "learning_rate": 1.8053437363976556e-06, "loss": 0.1269, "step": 24989 }, { "epoch": 0.729039033782601, "grad_norm": 0.8066353602916551, "learning_rate": 1.8049803241457996e-06, "loss": 0.1094, "step": 24990 }, { "epoch": 0.7290682070132446, "grad_norm": 0.923421178093087, "learning_rate": 1.8046169404183162e-06, "loss": 0.1116, "step": 24991 }, { "epoch": 0.7290973802438883, "grad_norm": 0.7958522972223832, "learning_rate": 1.8042535852184484e-06, "loss": 0.1343, "step": 24992 }, { "epoch": 0.7291265534745318, "grad_norm": 0.9977349501167543, "learning_rate": 1.8038902585494417e-06, "loss": 0.1024, "step": 24993 }, { "epoch": 0.7291557267051754, "grad_norm": 0.9675218438224169, "learning_rate": 1.803526960414541e-06, "loss": 0.133, "step": 24994 }, { "epoch": 0.7291848999358189, "grad_norm": 0.7603232056663293, "learning_rate": 1.8031636908169876e-06, "loss": 0.1067, "step": 24995 }, { "epoch": 0.7292140731664625, "grad_norm": 0.788021842468877, "learning_rate": 1.8028004497600265e-06, "loss": 0.1142, "step": 24996 }, { "epoch": 0.729243246397106, "grad_norm": 0.7482645623370769, "learning_rate": 1.8024372372469008e-06, "loss": 0.1343, "step": 24997 }, { "epoch": 0.7292724196277496, "grad_norm": 0.8244271398921885, "learning_rate": 1.8020740532808495e-06, "loss": 0.1108, "step": 24998 }, { "epoch": 0.7293015928583931, "grad_norm": 1.3435602789569605, "learning_rate": 1.8017108978651182e-06, "loss": 0.1211, "step": 24999 }, { "epoch": 0.7293307660890367, "grad_norm": 1.025693838787472, "learning_rate": 1.8013477710029498e-06, "loss": 0.1218, "step": 25000 }, { "epoch": 0.7293599393196802, "grad_norm": 0.7885644961583933, "learning_rate": 1.8009846726975849e-06, "loss": 0.1245, "step": 25001 }, { "epoch": 0.7293891125503238, "grad_norm": 0.8563145559061879, "learning_rate": 1.8006216029522638e-06, "loss": 0.1336, "step": 25002 }, { "epoch": 0.7294182857809673, "grad_norm": 0.7020424218793577, "learning_rate": 1.8002585617702313e-06, "loss": 0.1084, "step": 25003 }, { "epoch": 0.7294474590116109, "grad_norm": 1.0573065590986708, "learning_rate": 1.7998955491547254e-06, "loss": 0.1287, "step": 25004 }, { "epoch": 0.7294766322422545, "grad_norm": 0.7594585977249245, "learning_rate": 1.7995325651089873e-06, "loss": 0.1316, "step": 25005 }, { "epoch": 0.7295058054728981, "grad_norm": 0.7442154971434948, "learning_rate": 1.7991696096362582e-06, "loss": 0.1058, "step": 25006 }, { "epoch": 0.7295349787035417, "grad_norm": 0.914630533632953, "learning_rate": 1.7988066827397805e-06, "loss": 0.0873, "step": 25007 }, { "epoch": 0.7295641519341852, "grad_norm": 0.8939894431960431, "learning_rate": 1.7984437844227925e-06, "loss": 0.1189, "step": 25008 }, { "epoch": 0.7295933251648288, "grad_norm": 1.074739453093371, "learning_rate": 1.7980809146885325e-06, "loss": 0.1462, "step": 25009 }, { "epoch": 0.7296224983954723, "grad_norm": 0.7462093769197482, "learning_rate": 1.7977180735402433e-06, "loss": 0.1088, "step": 25010 }, { "epoch": 0.7296516716261159, "grad_norm": 0.9067292778305343, "learning_rate": 1.797355260981161e-06, "loss": 0.1046, "step": 25011 }, { "epoch": 0.7296808448567594, "grad_norm": 0.707973753350686, "learning_rate": 1.7969924770145264e-06, "loss": 0.1181, "step": 25012 }, { "epoch": 0.729710018087403, "grad_norm": 0.7722361338043253, "learning_rate": 1.79662972164358e-06, "loss": 0.0997, "step": 25013 }, { "epoch": 0.7297391913180465, "grad_norm": 0.8776022001151548, "learning_rate": 1.7962669948715594e-06, "loss": 0.1326, "step": 25014 }, { "epoch": 0.7297683645486901, "grad_norm": 0.9531958295800378, "learning_rate": 1.7959042967016998e-06, "loss": 0.1354, "step": 25015 }, { "epoch": 0.7297975377793336, "grad_norm": 0.9746917684306904, "learning_rate": 1.7955416271372438e-06, "loss": 0.1289, "step": 25016 }, { "epoch": 0.7298267110099772, "grad_norm": 0.9164394104439284, "learning_rate": 1.7951789861814251e-06, "loss": 0.0989, "step": 25017 }, { "epoch": 0.7298558842406208, "grad_norm": 0.727406514704139, "learning_rate": 1.7948163738374858e-06, "loss": 0.1026, "step": 25018 }, { "epoch": 0.7298850574712644, "grad_norm": 0.8699623298374827, "learning_rate": 1.7944537901086585e-06, "loss": 0.1146, "step": 25019 }, { "epoch": 0.729914230701908, "grad_norm": 0.847542047979384, "learning_rate": 1.7940912349981844e-06, "loss": 0.114, "step": 25020 }, { "epoch": 0.7299434039325515, "grad_norm": 0.8382347962971295, "learning_rate": 1.793728708509298e-06, "loss": 0.096, "step": 25021 }, { "epoch": 0.7299725771631951, "grad_norm": 0.6323190932166773, "learning_rate": 1.7933662106452349e-06, "loss": 0.1168, "step": 25022 }, { "epoch": 0.7300017503938386, "grad_norm": 0.7482440952761785, "learning_rate": 1.7930037414092333e-06, "loss": 0.1158, "step": 25023 }, { "epoch": 0.7300309236244822, "grad_norm": 0.8202781357756733, "learning_rate": 1.7926413008045296e-06, "loss": 0.1095, "step": 25024 }, { "epoch": 0.7300600968551257, "grad_norm": 0.7695006838706988, "learning_rate": 1.7922788888343574e-06, "loss": 0.135, "step": 25025 }, { "epoch": 0.7300892700857693, "grad_norm": 0.7794378186839271, "learning_rate": 1.7919165055019555e-06, "loss": 0.1222, "step": 25026 }, { "epoch": 0.7301184433164128, "grad_norm": 0.8003448262206493, "learning_rate": 1.7915541508105566e-06, "loss": 0.1027, "step": 25027 }, { "epoch": 0.7301476165470564, "grad_norm": 0.9064438954442364, "learning_rate": 1.7911918247633953e-06, "loss": 0.1165, "step": 25028 }, { "epoch": 0.7301767897777, "grad_norm": 0.8687801863595818, "learning_rate": 1.7908295273637066e-06, "loss": 0.099, "step": 25029 }, { "epoch": 0.7302059630083435, "grad_norm": 0.743202190253119, "learning_rate": 1.790467258614728e-06, "loss": 0.1135, "step": 25030 }, { "epoch": 0.7302351362389871, "grad_norm": 0.847381812584872, "learning_rate": 1.7901050185196916e-06, "loss": 0.1356, "step": 25031 }, { "epoch": 0.7302643094696306, "grad_norm": 0.8066910304199605, "learning_rate": 1.7897428070818295e-06, "loss": 0.1318, "step": 25032 }, { "epoch": 0.7302934827002743, "grad_norm": 0.6904417673296508, "learning_rate": 1.7893806243043794e-06, "loss": 0.1082, "step": 25033 }, { "epoch": 0.7303226559309178, "grad_norm": 0.8160857484736981, "learning_rate": 1.7890184701905723e-06, "loss": 0.1323, "step": 25034 }, { "epoch": 0.7303518291615614, "grad_norm": 0.8490542342399159, "learning_rate": 1.7886563447436394e-06, "loss": 0.1166, "step": 25035 }, { "epoch": 0.7303810023922049, "grad_norm": 0.9124067324639801, "learning_rate": 1.788294247966817e-06, "loss": 0.143, "step": 25036 }, { "epoch": 0.7304101756228485, "grad_norm": 1.24884392752143, "learning_rate": 1.7879321798633381e-06, "loss": 0.1181, "step": 25037 }, { "epoch": 0.730439348853492, "grad_norm": 0.7764460148314785, "learning_rate": 1.7875701404364337e-06, "loss": 0.095, "step": 25038 }, { "epoch": 0.7304685220841356, "grad_norm": 0.7370511598440967, "learning_rate": 1.787208129689335e-06, "loss": 0.0896, "step": 25039 }, { "epoch": 0.7304976953147791, "grad_norm": 0.8654637129815397, "learning_rate": 1.786846147625277e-06, "loss": 0.1231, "step": 25040 }, { "epoch": 0.7305268685454227, "grad_norm": 0.9567156856282536, "learning_rate": 1.7864841942474876e-06, "loss": 0.1301, "step": 25041 }, { "epoch": 0.7305560417760663, "grad_norm": 0.9964630297756714, "learning_rate": 1.7861222695592e-06, "loss": 0.111, "step": 25042 }, { "epoch": 0.7305852150067098, "grad_norm": 0.8372688229347758, "learning_rate": 1.7857603735636475e-06, "loss": 0.1111, "step": 25043 }, { "epoch": 0.7306143882373534, "grad_norm": 0.7720023441235836, "learning_rate": 1.7853985062640589e-06, "loss": 0.1134, "step": 25044 }, { "epoch": 0.7306435614679969, "grad_norm": 0.7857774481382417, "learning_rate": 1.7850366676636632e-06, "loss": 0.1057, "step": 25045 }, { "epoch": 0.7306727346986406, "grad_norm": 0.674889643651943, "learning_rate": 1.7846748577656947e-06, "loss": 0.106, "step": 25046 }, { "epoch": 0.7307019079292841, "grad_norm": 0.9857571154306463, "learning_rate": 1.7843130765733797e-06, "loss": 0.1297, "step": 25047 }, { "epoch": 0.7307310811599277, "grad_norm": 0.9845988416842765, "learning_rate": 1.7839513240899513e-06, "loss": 0.1473, "step": 25048 }, { "epoch": 0.7307602543905712, "grad_norm": 0.8661578830324631, "learning_rate": 1.7835896003186366e-06, "loss": 0.1024, "step": 25049 }, { "epoch": 0.7307894276212148, "grad_norm": 0.9033248694122342, "learning_rate": 1.7832279052626677e-06, "loss": 0.1505, "step": 25050 }, { "epoch": 0.7308186008518583, "grad_norm": 1.0295070321884365, "learning_rate": 1.7828662389252722e-06, "loss": 0.145, "step": 25051 }, { "epoch": 0.7308477740825019, "grad_norm": 0.8899622472839388, "learning_rate": 1.7825046013096769e-06, "loss": 0.1216, "step": 25052 }, { "epoch": 0.7308769473131455, "grad_norm": 1.0187479512197988, "learning_rate": 1.7821429924191125e-06, "loss": 0.1072, "step": 25053 }, { "epoch": 0.730906120543789, "grad_norm": 1.1032520339193246, "learning_rate": 1.781781412256809e-06, "loss": 0.1129, "step": 25054 }, { "epoch": 0.7309352937744326, "grad_norm": 0.9820851148701948, "learning_rate": 1.7814198608259931e-06, "loss": 0.1361, "step": 25055 }, { "epoch": 0.7309644670050761, "grad_norm": 0.7417339043079931, "learning_rate": 1.7810583381298902e-06, "loss": 0.0986, "step": 25056 }, { "epoch": 0.7309936402357197, "grad_norm": 0.9315353013191379, "learning_rate": 1.780696844171732e-06, "loss": 0.1075, "step": 25057 }, { "epoch": 0.7310228134663632, "grad_norm": 0.6483906621132459, "learning_rate": 1.7803353789547422e-06, "loss": 0.1147, "step": 25058 }, { "epoch": 0.7310519866970068, "grad_norm": 1.0000328173101294, "learning_rate": 1.7799739424821494e-06, "loss": 0.1553, "step": 25059 }, { "epoch": 0.7310811599276504, "grad_norm": 0.7642304727740475, "learning_rate": 1.7796125347571825e-06, "loss": 0.128, "step": 25060 }, { "epoch": 0.731110333158294, "grad_norm": 1.00476660559154, "learning_rate": 1.779251155783066e-06, "loss": 0.123, "step": 25061 }, { "epoch": 0.7311395063889375, "grad_norm": 0.818370657229027, "learning_rate": 1.7788898055630243e-06, "loss": 0.1273, "step": 25062 }, { "epoch": 0.7311686796195811, "grad_norm": 0.9591897965233913, "learning_rate": 1.7785284841002876e-06, "loss": 0.1256, "step": 25063 }, { "epoch": 0.7311978528502246, "grad_norm": 0.9170908665415384, "learning_rate": 1.7781671913980797e-06, "loss": 0.1303, "step": 25064 }, { "epoch": 0.7312270260808682, "grad_norm": 0.687266642683632, "learning_rate": 1.7778059274596237e-06, "loss": 0.1316, "step": 25065 }, { "epoch": 0.7312561993115118, "grad_norm": 1.069808344895264, "learning_rate": 1.7774446922881477e-06, "loss": 0.1159, "step": 25066 }, { "epoch": 0.7312853725421553, "grad_norm": 1.1827380260337113, "learning_rate": 1.7770834858868774e-06, "loss": 0.13, "step": 25067 }, { "epoch": 0.7313145457727989, "grad_norm": 0.8296872513374155, "learning_rate": 1.7767223082590368e-06, "loss": 0.0954, "step": 25068 }, { "epoch": 0.7313437190034424, "grad_norm": 1.013042974837889, "learning_rate": 1.7763611594078484e-06, "loss": 0.131, "step": 25069 }, { "epoch": 0.731372892234086, "grad_norm": 0.7654800282624646, "learning_rate": 1.7760000393365396e-06, "loss": 0.1373, "step": 25070 }, { "epoch": 0.7314020654647295, "grad_norm": 0.8389137047012237, "learning_rate": 1.775638948048331e-06, "loss": 0.099, "step": 25071 }, { "epoch": 0.7314312386953731, "grad_norm": 0.9852190149238672, "learning_rate": 1.7752778855464482e-06, "loss": 0.1134, "step": 25072 }, { "epoch": 0.7314604119260167, "grad_norm": 0.8183655474665511, "learning_rate": 1.7749168518341159e-06, "loss": 0.1088, "step": 25073 }, { "epoch": 0.7314895851566603, "grad_norm": 1.214453919438412, "learning_rate": 1.7745558469145563e-06, "loss": 0.1254, "step": 25074 }, { "epoch": 0.7315187583873038, "grad_norm": 0.9299908590710989, "learning_rate": 1.7741948707909906e-06, "loss": 0.1075, "step": 25075 }, { "epoch": 0.7315479316179474, "grad_norm": 0.7162708437097317, "learning_rate": 1.7738339234666453e-06, "loss": 0.118, "step": 25076 }, { "epoch": 0.731577104848591, "grad_norm": 0.9828499682152776, "learning_rate": 1.773473004944738e-06, "loss": 0.1276, "step": 25077 }, { "epoch": 0.7316062780792345, "grad_norm": 0.9106745842008812, "learning_rate": 1.7731121152284952e-06, "loss": 0.1032, "step": 25078 }, { "epoch": 0.7316354513098781, "grad_norm": 0.8463589980104531, "learning_rate": 1.7727512543211356e-06, "loss": 0.1061, "step": 25079 }, { "epoch": 0.7316646245405216, "grad_norm": 0.8227355149757087, "learning_rate": 1.7723904222258842e-06, "loss": 0.1241, "step": 25080 }, { "epoch": 0.7316937977711652, "grad_norm": 1.0125654110241649, "learning_rate": 1.7720296189459607e-06, "loss": 0.115, "step": 25081 }, { "epoch": 0.7317229710018087, "grad_norm": 0.8123603753577865, "learning_rate": 1.7716688444845841e-06, "loss": 0.1227, "step": 25082 }, { "epoch": 0.7317521442324523, "grad_norm": 1.2190760843158348, "learning_rate": 1.7713080988449783e-06, "loss": 0.148, "step": 25083 }, { "epoch": 0.7317813174630958, "grad_norm": 1.058355541610057, "learning_rate": 1.770947382030364e-06, "loss": 0.1319, "step": 25084 }, { "epoch": 0.7318104906937394, "grad_norm": 1.142991790627484, "learning_rate": 1.7705866940439604e-06, "loss": 0.1205, "step": 25085 }, { "epoch": 0.7318396639243829, "grad_norm": 0.9925538806801744, "learning_rate": 1.7702260348889865e-06, "loss": 0.1185, "step": 25086 }, { "epoch": 0.7318688371550266, "grad_norm": 0.9204263589105607, "learning_rate": 1.7698654045686654e-06, "loss": 0.1065, "step": 25087 }, { "epoch": 0.7318980103856702, "grad_norm": 1.01427959243275, "learning_rate": 1.7695048030862133e-06, "loss": 0.1213, "step": 25088 }, { "epoch": 0.7319271836163137, "grad_norm": 0.7468746509361103, "learning_rate": 1.7691442304448508e-06, "loss": 0.1171, "step": 25089 }, { "epoch": 0.7319563568469573, "grad_norm": 0.7073049353332799, "learning_rate": 1.7687836866477992e-06, "loss": 0.1103, "step": 25090 }, { "epoch": 0.7319855300776008, "grad_norm": 0.9058603297741257, "learning_rate": 1.7684231716982753e-06, "loss": 0.1062, "step": 25091 }, { "epoch": 0.7320147033082444, "grad_norm": 0.7907075778553803, "learning_rate": 1.7680626855994964e-06, "loss": 0.1273, "step": 25092 }, { "epoch": 0.7320438765388879, "grad_norm": 0.8180062151490706, "learning_rate": 1.7677022283546835e-06, "loss": 0.1212, "step": 25093 }, { "epoch": 0.7320730497695315, "grad_norm": 0.976544821222924, "learning_rate": 1.7673417999670538e-06, "loss": 0.1147, "step": 25094 }, { "epoch": 0.732102223000175, "grad_norm": 0.9447337914495483, "learning_rate": 1.7669814004398234e-06, "loss": 0.1204, "step": 25095 }, { "epoch": 0.7321313962308186, "grad_norm": 0.8780045298183542, "learning_rate": 1.766621029776211e-06, "loss": 0.123, "step": 25096 }, { "epoch": 0.7321605694614621, "grad_norm": 0.9278161234312494, "learning_rate": 1.7662606879794364e-06, "loss": 0.1117, "step": 25097 }, { "epoch": 0.7321897426921057, "grad_norm": 1.030305282599738, "learning_rate": 1.7659003750527137e-06, "loss": 0.1301, "step": 25098 }, { "epoch": 0.7322189159227492, "grad_norm": 0.93053434249989, "learning_rate": 1.7655400909992592e-06, "loss": 0.1201, "step": 25099 }, { "epoch": 0.7322480891533929, "grad_norm": 0.8230394303780576, "learning_rate": 1.765179835822292e-06, "loss": 0.1065, "step": 25100 }, { "epoch": 0.7322772623840365, "grad_norm": 0.7766468427595344, "learning_rate": 1.7648196095250252e-06, "loss": 0.1074, "step": 25101 }, { "epoch": 0.73230643561468, "grad_norm": 0.8429244286159766, "learning_rate": 1.7644594121106773e-06, "loss": 0.1082, "step": 25102 }, { "epoch": 0.7323356088453236, "grad_norm": 0.8269239382489441, "learning_rate": 1.7640992435824644e-06, "loss": 0.1158, "step": 25103 }, { "epoch": 0.7323647820759671, "grad_norm": 0.7961464558235928, "learning_rate": 1.7637391039436013e-06, "loss": 0.1565, "step": 25104 }, { "epoch": 0.7323939553066107, "grad_norm": 0.7701526148099882, "learning_rate": 1.7633789931973011e-06, "loss": 0.126, "step": 25105 }, { "epoch": 0.7324231285372542, "grad_norm": 0.9857449419649804, "learning_rate": 1.7630189113467827e-06, "loss": 0.1365, "step": 25106 }, { "epoch": 0.7324523017678978, "grad_norm": 0.8301893093873627, "learning_rate": 1.7626588583952564e-06, "loss": 0.1344, "step": 25107 }, { "epoch": 0.7324814749985413, "grad_norm": 0.8109699195045951, "learning_rate": 1.7622988343459412e-06, "loss": 0.1064, "step": 25108 }, { "epoch": 0.7325106482291849, "grad_norm": 1.0554056330856585, "learning_rate": 1.761938839202047e-06, "loss": 0.1366, "step": 25109 }, { "epoch": 0.7325398214598284, "grad_norm": 0.8565797345130763, "learning_rate": 1.761578872966792e-06, "loss": 0.1208, "step": 25110 }, { "epoch": 0.732568994690472, "grad_norm": 0.7180030160198854, "learning_rate": 1.7612189356433873e-06, "loss": 0.0986, "step": 25111 }, { "epoch": 0.7325981679211155, "grad_norm": 0.9382257106450259, "learning_rate": 1.7608590272350452e-06, "loss": 0.1468, "step": 25112 }, { "epoch": 0.7326273411517591, "grad_norm": 1.027195069618291, "learning_rate": 1.7604991477449806e-06, "loss": 0.1235, "step": 25113 }, { "epoch": 0.7326565143824028, "grad_norm": 0.7406530075175138, "learning_rate": 1.760139297176408e-06, "loss": 0.1103, "step": 25114 }, { "epoch": 0.7326856876130463, "grad_norm": 0.7793079944136352, "learning_rate": 1.7597794755325381e-06, "loss": 0.1247, "step": 25115 }, { "epoch": 0.7327148608436899, "grad_norm": 0.9175384882801038, "learning_rate": 1.7594196828165822e-06, "loss": 0.1104, "step": 25116 }, { "epoch": 0.7327440340743334, "grad_norm": 0.830111568881049, "learning_rate": 1.7590599190317553e-06, "loss": 0.1234, "step": 25117 }, { "epoch": 0.732773207304977, "grad_norm": 0.8619153060807219, "learning_rate": 1.7587001841812661e-06, "loss": 0.1198, "step": 25118 }, { "epoch": 0.7328023805356205, "grad_norm": 1.142988479214071, "learning_rate": 1.7583404782683278e-06, "loss": 0.1471, "step": 25119 }, { "epoch": 0.7328315537662641, "grad_norm": 0.8390633501782565, "learning_rate": 1.7579808012961535e-06, "loss": 0.1392, "step": 25120 }, { "epoch": 0.7328607269969076, "grad_norm": 0.8210486107886237, "learning_rate": 1.7576211532679526e-06, "loss": 0.1083, "step": 25121 }, { "epoch": 0.7328899002275512, "grad_norm": 1.0812884348190424, "learning_rate": 1.7572615341869348e-06, "loss": 0.1453, "step": 25122 }, { "epoch": 0.7329190734581947, "grad_norm": 1.1511242222907994, "learning_rate": 1.7569019440563134e-06, "loss": 0.113, "step": 25123 }, { "epoch": 0.7329482466888383, "grad_norm": 0.8570069548126423, "learning_rate": 1.7565423828792971e-06, "loss": 0.1043, "step": 25124 }, { "epoch": 0.7329774199194818, "grad_norm": 0.8266584965000707, "learning_rate": 1.7561828506590944e-06, "loss": 0.1092, "step": 25125 }, { "epoch": 0.7330065931501254, "grad_norm": 0.7430619098612392, "learning_rate": 1.7558233473989172e-06, "loss": 0.1058, "step": 25126 }, { "epoch": 0.7330357663807691, "grad_norm": 1.0697237572037825, "learning_rate": 1.7554638731019757e-06, "loss": 0.1212, "step": 25127 }, { "epoch": 0.7330649396114126, "grad_norm": 0.7909565495721145, "learning_rate": 1.755104427771479e-06, "loss": 0.0987, "step": 25128 }, { "epoch": 0.7330941128420562, "grad_norm": 0.8600045561286982, "learning_rate": 1.7547450114106335e-06, "loss": 0.1182, "step": 25129 }, { "epoch": 0.7331232860726997, "grad_norm": 0.7879225465652433, "learning_rate": 1.754385624022651e-06, "loss": 0.1266, "step": 25130 }, { "epoch": 0.7331524593033433, "grad_norm": 0.9075453655824288, "learning_rate": 1.7540262656107376e-06, "loss": 0.1237, "step": 25131 }, { "epoch": 0.7331816325339868, "grad_norm": 0.8839351725820979, "learning_rate": 1.7536669361781028e-06, "loss": 0.1029, "step": 25132 }, { "epoch": 0.7332108057646304, "grad_norm": 0.7682383820730142, "learning_rate": 1.753307635727956e-06, "loss": 0.1175, "step": 25133 }, { "epoch": 0.7332399789952739, "grad_norm": 0.7750322859507258, "learning_rate": 1.7529483642635042e-06, "loss": 0.1293, "step": 25134 }, { "epoch": 0.7332691522259175, "grad_norm": 1.0730597542982712, "learning_rate": 1.752589121787952e-06, "loss": 0.1144, "step": 25135 }, { "epoch": 0.733298325456561, "grad_norm": 1.0052931011938175, "learning_rate": 1.7522299083045109e-06, "loss": 0.1178, "step": 25136 }, { "epoch": 0.7333274986872046, "grad_norm": 0.7550743786223016, "learning_rate": 1.751870723816384e-06, "loss": 0.1164, "step": 25137 }, { "epoch": 0.7333566719178481, "grad_norm": 0.8236347357126722, "learning_rate": 1.7515115683267818e-06, "loss": 0.1176, "step": 25138 }, { "epoch": 0.7333858451484917, "grad_norm": 0.9729512647434295, "learning_rate": 1.751152441838907e-06, "loss": 0.1096, "step": 25139 }, { "epoch": 0.7334150183791353, "grad_norm": 0.8616297450036582, "learning_rate": 1.7507933443559694e-06, "loss": 0.1069, "step": 25140 }, { "epoch": 0.7334441916097789, "grad_norm": 0.8177603369689589, "learning_rate": 1.7504342758811732e-06, "loss": 0.1131, "step": 25141 }, { "epoch": 0.7334733648404225, "grad_norm": 0.7055999696259391, "learning_rate": 1.750075236417722e-06, "loss": 0.1174, "step": 25142 }, { "epoch": 0.733502538071066, "grad_norm": 0.8013556770362623, "learning_rate": 1.7497162259688238e-06, "loss": 0.1383, "step": 25143 }, { "epoch": 0.7335317113017096, "grad_norm": 0.7566002384298631, "learning_rate": 1.7493572445376845e-06, "loss": 0.1082, "step": 25144 }, { "epoch": 0.7335608845323531, "grad_norm": 0.7498020039586966, "learning_rate": 1.7489982921275077e-06, "loss": 0.1188, "step": 25145 }, { "epoch": 0.7335900577629967, "grad_norm": 0.7120447492160659, "learning_rate": 1.748639368741497e-06, "loss": 0.1182, "step": 25146 }, { "epoch": 0.7336192309936402, "grad_norm": 0.7978790389234697, "learning_rate": 1.748280474382859e-06, "loss": 0.1074, "step": 25147 }, { "epoch": 0.7336484042242838, "grad_norm": 0.8372600013761905, "learning_rate": 1.7479216090547952e-06, "loss": 0.1378, "step": 25148 }, { "epoch": 0.7336775774549273, "grad_norm": 0.7960443512718854, "learning_rate": 1.747562772760511e-06, "loss": 0.1008, "step": 25149 }, { "epoch": 0.7337067506855709, "grad_norm": 0.8518957134997182, "learning_rate": 1.7472039655032113e-06, "loss": 0.123, "step": 25150 }, { "epoch": 0.7337359239162144, "grad_norm": 0.835281475119563, "learning_rate": 1.7468451872860986e-06, "loss": 0.1102, "step": 25151 }, { "epoch": 0.733765097146858, "grad_norm": 0.8287815632191993, "learning_rate": 1.746486438112373e-06, "loss": 0.1209, "step": 25152 }, { "epoch": 0.7337942703775016, "grad_norm": 0.8533193339576118, "learning_rate": 1.746127717985242e-06, "loss": 0.0999, "step": 25153 }, { "epoch": 0.7338234436081452, "grad_norm": 0.6564936950655875, "learning_rate": 1.7457690269079047e-06, "loss": 0.0973, "step": 25154 }, { "epoch": 0.7338526168387888, "grad_norm": 1.1398355247475862, "learning_rate": 1.7454103648835656e-06, "loss": 0.1268, "step": 25155 }, { "epoch": 0.7338817900694323, "grad_norm": 1.463932068213724, "learning_rate": 1.7450517319154247e-06, "loss": 0.117, "step": 25156 }, { "epoch": 0.7339109633000759, "grad_norm": 1.0145696826910793, "learning_rate": 1.7446931280066865e-06, "loss": 0.1307, "step": 25157 }, { "epoch": 0.7339401365307194, "grad_norm": 1.158387489395097, "learning_rate": 1.7443345531605505e-06, "loss": 0.1153, "step": 25158 }, { "epoch": 0.733969309761363, "grad_norm": 1.115998650436188, "learning_rate": 1.743976007380217e-06, "loss": 0.1181, "step": 25159 }, { "epoch": 0.7339984829920065, "grad_norm": 0.8625697536330599, "learning_rate": 1.7436174906688886e-06, "loss": 0.1408, "step": 25160 }, { "epoch": 0.7340276562226501, "grad_norm": 0.8435931153180534, "learning_rate": 1.7432590030297674e-06, "loss": 0.1307, "step": 25161 }, { "epoch": 0.7340568294532936, "grad_norm": 1.1569062914021235, "learning_rate": 1.7429005444660508e-06, "loss": 0.1177, "step": 25162 }, { "epoch": 0.7340860026839372, "grad_norm": 1.1099160223533906, "learning_rate": 1.7425421149809424e-06, "loss": 0.1002, "step": 25163 }, { "epoch": 0.7341151759145808, "grad_norm": 1.2895727092868505, "learning_rate": 1.7421837145776399e-06, "loss": 0.1209, "step": 25164 }, { "epoch": 0.7341443491452243, "grad_norm": 0.7167036218626416, "learning_rate": 1.7418253432593423e-06, "loss": 0.1126, "step": 25165 }, { "epoch": 0.7341735223758679, "grad_norm": 0.7890719376869357, "learning_rate": 1.74146700102925e-06, "loss": 0.1005, "step": 25166 }, { "epoch": 0.7342026956065114, "grad_norm": 0.8660126753722375, "learning_rate": 1.741108687890564e-06, "loss": 0.1112, "step": 25167 }, { "epoch": 0.7342318688371551, "grad_norm": 1.0932106365105552, "learning_rate": 1.7407504038464818e-06, "loss": 0.1098, "step": 25168 }, { "epoch": 0.7342610420677986, "grad_norm": 0.9895097506666662, "learning_rate": 1.7403921489002008e-06, "loss": 0.128, "step": 25169 }, { "epoch": 0.7342902152984422, "grad_norm": 0.7495357288382063, "learning_rate": 1.7400339230549212e-06, "loss": 0.1228, "step": 25170 }, { "epoch": 0.7343193885290857, "grad_norm": 0.7804076536488669, "learning_rate": 1.7396757263138415e-06, "loss": 0.106, "step": 25171 }, { "epoch": 0.7343485617597293, "grad_norm": 0.8424890119681546, "learning_rate": 1.7393175586801564e-06, "loss": 0.1157, "step": 25172 }, { "epoch": 0.7343777349903728, "grad_norm": 0.9289119281859283, "learning_rate": 1.738959420157066e-06, "loss": 0.1095, "step": 25173 }, { "epoch": 0.7344069082210164, "grad_norm": 0.9027837895824324, "learning_rate": 1.738601310747769e-06, "loss": 0.1034, "step": 25174 }, { "epoch": 0.73443608145166, "grad_norm": 0.8428337517467128, "learning_rate": 1.7382432304554609e-06, "loss": 0.1128, "step": 25175 }, { "epoch": 0.7344652546823035, "grad_norm": 0.9047097875695138, "learning_rate": 1.7378851792833368e-06, "loss": 0.1119, "step": 25176 }, { "epoch": 0.7344944279129471, "grad_norm": 0.8795970607848003, "learning_rate": 1.737527157234597e-06, "loss": 0.1248, "step": 25177 }, { "epoch": 0.7345236011435906, "grad_norm": 0.7990268085180928, "learning_rate": 1.7371691643124338e-06, "loss": 0.1036, "step": 25178 }, { "epoch": 0.7345527743742342, "grad_norm": 0.793030213764276, "learning_rate": 1.736811200520046e-06, "loss": 0.1164, "step": 25179 }, { "epoch": 0.7345819476048777, "grad_norm": 1.0068930119539534, "learning_rate": 1.7364532658606304e-06, "loss": 0.1253, "step": 25180 }, { "epoch": 0.7346111208355214, "grad_norm": 0.800583865149948, "learning_rate": 1.736095360337381e-06, "loss": 0.1095, "step": 25181 }, { "epoch": 0.7346402940661649, "grad_norm": 1.045965267800114, "learning_rate": 1.7357374839534907e-06, "loss": 0.0936, "step": 25182 }, { "epoch": 0.7346694672968085, "grad_norm": 1.3238259145904048, "learning_rate": 1.7353796367121594e-06, "loss": 0.1062, "step": 25183 }, { "epoch": 0.734698640527452, "grad_norm": 0.8951687075971192, "learning_rate": 1.7350218186165774e-06, "loss": 0.129, "step": 25184 }, { "epoch": 0.7347278137580956, "grad_norm": 0.9953421898794453, "learning_rate": 1.7346640296699424e-06, "loss": 0.1017, "step": 25185 }, { "epoch": 0.7347569869887391, "grad_norm": 2.017025881446626, "learning_rate": 1.7343062698754465e-06, "loss": 0.1061, "step": 25186 }, { "epoch": 0.7347861602193827, "grad_norm": 0.6387944226869747, "learning_rate": 1.733948539236286e-06, "loss": 0.1083, "step": 25187 }, { "epoch": 0.7348153334500263, "grad_norm": 1.7999285802911165, "learning_rate": 1.7335908377556533e-06, "loss": 0.1064, "step": 25188 }, { "epoch": 0.7348445066806698, "grad_norm": 0.8051056740792877, "learning_rate": 1.73323316543674e-06, "loss": 0.1074, "step": 25189 }, { "epoch": 0.7348736799113134, "grad_norm": 0.7310949681442523, "learning_rate": 1.7328755222827414e-06, "loss": 0.1384, "step": 25190 }, { "epoch": 0.7349028531419569, "grad_norm": 0.836118745669304, "learning_rate": 1.732517908296852e-06, "loss": 0.1124, "step": 25191 }, { "epoch": 0.7349320263726005, "grad_norm": 0.8581381736726805, "learning_rate": 1.7321603234822608e-06, "loss": 0.1232, "step": 25192 }, { "epoch": 0.734961199603244, "grad_norm": 0.7946901559174058, "learning_rate": 1.7318027678421638e-06, "loss": 0.0923, "step": 25193 }, { "epoch": 0.7349903728338876, "grad_norm": 0.987180674950145, "learning_rate": 1.7314452413797517e-06, "loss": 0.1269, "step": 25194 }, { "epoch": 0.7350195460645312, "grad_norm": 0.8264194715410034, "learning_rate": 1.7310877440982144e-06, "loss": 0.1069, "step": 25195 }, { "epoch": 0.7350487192951748, "grad_norm": 0.6918364701282613, "learning_rate": 1.730730276000745e-06, "loss": 0.1106, "step": 25196 }, { "epoch": 0.7350778925258183, "grad_norm": 0.6190853723432631, "learning_rate": 1.7303728370905377e-06, "loss": 0.1043, "step": 25197 }, { "epoch": 0.7351070657564619, "grad_norm": 0.7734153098972022, "learning_rate": 1.7300154273707803e-06, "loss": 0.117, "step": 25198 }, { "epoch": 0.7351362389871055, "grad_norm": 1.5546059569207948, "learning_rate": 1.7296580468446638e-06, "loss": 0.1432, "step": 25199 }, { "epoch": 0.735165412217749, "grad_norm": 0.8826353310839861, "learning_rate": 1.7293006955153808e-06, "loss": 0.1054, "step": 25200 }, { "epoch": 0.7351945854483926, "grad_norm": 0.8863678540996224, "learning_rate": 1.7289433733861206e-06, "loss": 0.1189, "step": 25201 }, { "epoch": 0.7352237586790361, "grad_norm": 0.7047809478398082, "learning_rate": 1.7285860804600708e-06, "loss": 0.1327, "step": 25202 }, { "epoch": 0.7352529319096797, "grad_norm": 0.9617434252540195, "learning_rate": 1.7282288167404243e-06, "loss": 0.118, "step": 25203 }, { "epoch": 0.7352821051403232, "grad_norm": 0.6185126927262421, "learning_rate": 1.727871582230371e-06, "loss": 0.1262, "step": 25204 }, { "epoch": 0.7353112783709668, "grad_norm": 0.7054006349378011, "learning_rate": 1.7275143769330994e-06, "loss": 0.1045, "step": 25205 }, { "epoch": 0.7353404516016103, "grad_norm": 0.7796769073497872, "learning_rate": 1.7271572008517968e-06, "loss": 0.1218, "step": 25206 }, { "epoch": 0.7353696248322539, "grad_norm": 0.788207880874118, "learning_rate": 1.7268000539896545e-06, "loss": 0.1114, "step": 25207 }, { "epoch": 0.7353987980628975, "grad_norm": 0.8660248515564732, "learning_rate": 1.7264429363498587e-06, "loss": 0.1444, "step": 25208 }, { "epoch": 0.7354279712935411, "grad_norm": 0.7349303066376833, "learning_rate": 1.7260858479355986e-06, "loss": 0.1372, "step": 25209 }, { "epoch": 0.7354571445241846, "grad_norm": 0.6860352387906503, "learning_rate": 1.7257287887500645e-06, "loss": 0.1188, "step": 25210 }, { "epoch": 0.7354863177548282, "grad_norm": 0.8315110432364982, "learning_rate": 1.7253717587964419e-06, "loss": 0.0989, "step": 25211 }, { "epoch": 0.7355154909854718, "grad_norm": 0.8596638787874854, "learning_rate": 1.725014758077917e-06, "loss": 0.1108, "step": 25212 }, { "epoch": 0.7355446642161153, "grad_norm": 0.752770762716311, "learning_rate": 1.72465778659768e-06, "loss": 0.111, "step": 25213 }, { "epoch": 0.7355738374467589, "grad_norm": 0.6867116489178011, "learning_rate": 1.7243008443589148e-06, "loss": 0.1021, "step": 25214 }, { "epoch": 0.7356030106774024, "grad_norm": 0.7910658847762618, "learning_rate": 1.7239439313648115e-06, "loss": 0.098, "step": 25215 }, { "epoch": 0.735632183908046, "grad_norm": 0.7865668459400588, "learning_rate": 1.7235870476185528e-06, "loss": 0.1053, "step": 25216 }, { "epoch": 0.7356613571386895, "grad_norm": 0.7660660298399999, "learning_rate": 1.7232301931233287e-06, "loss": 0.113, "step": 25217 }, { "epoch": 0.7356905303693331, "grad_norm": 0.774868231792155, "learning_rate": 1.7228733678823234e-06, "loss": 0.1273, "step": 25218 }, { "epoch": 0.7357197035999766, "grad_norm": 0.6727820209920885, "learning_rate": 1.7225165718987203e-06, "loss": 0.1022, "step": 25219 }, { "epoch": 0.7357488768306202, "grad_norm": 0.8086784982776921, "learning_rate": 1.7221598051757066e-06, "loss": 0.1196, "step": 25220 }, { "epoch": 0.7357780500612637, "grad_norm": 0.9249791247127002, "learning_rate": 1.7218030677164698e-06, "loss": 0.1384, "step": 25221 }, { "epoch": 0.7358072232919074, "grad_norm": 0.7257236343971472, "learning_rate": 1.7214463595241909e-06, "loss": 0.1221, "step": 25222 }, { "epoch": 0.735836396522551, "grad_norm": 0.7856001053922799, "learning_rate": 1.7210896806020583e-06, "loss": 0.122, "step": 25223 }, { "epoch": 0.7358655697531945, "grad_norm": 1.2352753135807606, "learning_rate": 1.720733030953254e-06, "loss": 0.111, "step": 25224 }, { "epoch": 0.7358947429838381, "grad_norm": 0.8579676431534605, "learning_rate": 1.72037641058096e-06, "loss": 0.1101, "step": 25225 }, { "epoch": 0.7359239162144816, "grad_norm": 0.8398343325198167, "learning_rate": 1.7200198194883632e-06, "loss": 0.1289, "step": 25226 }, { "epoch": 0.7359530894451252, "grad_norm": 1.547312546179713, "learning_rate": 1.7196632576786481e-06, "loss": 0.1173, "step": 25227 }, { "epoch": 0.7359822626757687, "grad_norm": 0.9003318672602055, "learning_rate": 1.7193067251549966e-06, "loss": 0.1165, "step": 25228 }, { "epoch": 0.7360114359064123, "grad_norm": 0.8140376947371802, "learning_rate": 1.7189502219205894e-06, "loss": 0.1351, "step": 25229 }, { "epoch": 0.7360406091370558, "grad_norm": 0.8741857773815951, "learning_rate": 1.718593747978613e-06, "loss": 0.1243, "step": 25230 }, { "epoch": 0.7360697823676994, "grad_norm": 0.8652461061178748, "learning_rate": 1.7182373033322485e-06, "loss": 0.1204, "step": 25231 }, { "epoch": 0.7360989555983429, "grad_norm": 0.8595575707027153, "learning_rate": 1.7178808879846763e-06, "loss": 0.1228, "step": 25232 }, { "epoch": 0.7361281288289865, "grad_norm": 0.9282324690575567, "learning_rate": 1.7175245019390801e-06, "loss": 0.1098, "step": 25233 }, { "epoch": 0.73615730205963, "grad_norm": 0.8532936953628621, "learning_rate": 1.7171681451986428e-06, "loss": 0.139, "step": 25234 }, { "epoch": 0.7361864752902736, "grad_norm": 1.1356813078641073, "learning_rate": 1.716811817766545e-06, "loss": 0.1135, "step": 25235 }, { "epoch": 0.7362156485209173, "grad_norm": 1.1125982455851906, "learning_rate": 1.7164555196459659e-06, "loss": 0.1417, "step": 25236 }, { "epoch": 0.7362448217515608, "grad_norm": 0.8874185666031077, "learning_rate": 1.7160992508400892e-06, "loss": 0.1096, "step": 25237 }, { "epoch": 0.7362739949822044, "grad_norm": 1.101421033845138, "learning_rate": 1.7157430113520934e-06, "loss": 0.1236, "step": 25238 }, { "epoch": 0.7363031682128479, "grad_norm": 0.8578668617025079, "learning_rate": 1.71538680118516e-06, "loss": 0.0967, "step": 25239 }, { "epoch": 0.7363323414434915, "grad_norm": 1.02838406227728, "learning_rate": 1.7150306203424705e-06, "loss": 0.1108, "step": 25240 }, { "epoch": 0.736361514674135, "grad_norm": 1.0423443265487888, "learning_rate": 1.7146744688272033e-06, "loss": 0.1232, "step": 25241 }, { "epoch": 0.7363906879047786, "grad_norm": 0.7404149203167731, "learning_rate": 1.7143183466425366e-06, "loss": 0.1003, "step": 25242 }, { "epoch": 0.7364198611354221, "grad_norm": 0.8153272341392377, "learning_rate": 1.7139622537916533e-06, "loss": 0.0916, "step": 25243 }, { "epoch": 0.7364490343660657, "grad_norm": 0.8857841917298593, "learning_rate": 1.7136061902777286e-06, "loss": 0.1231, "step": 25244 }, { "epoch": 0.7364782075967092, "grad_norm": 0.9688457467265948, "learning_rate": 1.713250156103945e-06, "loss": 0.1233, "step": 25245 }, { "epoch": 0.7365073808273528, "grad_norm": 1.1345142642731583, "learning_rate": 1.7128941512734781e-06, "loss": 0.1212, "step": 25246 }, { "epoch": 0.7365365540579963, "grad_norm": 0.8563042071323728, "learning_rate": 1.7125381757895088e-06, "loss": 0.1213, "step": 25247 }, { "epoch": 0.7365657272886399, "grad_norm": 0.9739527917204357, "learning_rate": 1.7121822296552138e-06, "loss": 0.1225, "step": 25248 }, { "epoch": 0.7365949005192836, "grad_norm": 1.0025426988035975, "learning_rate": 1.7118263128737693e-06, "loss": 0.1434, "step": 25249 }, { "epoch": 0.7366240737499271, "grad_norm": 0.9172220347337707, "learning_rate": 1.7114704254483549e-06, "loss": 0.1053, "step": 25250 }, { "epoch": 0.7366532469805707, "grad_norm": 0.8207826382201381, "learning_rate": 1.7111145673821489e-06, "loss": 0.1275, "step": 25251 }, { "epoch": 0.7366824202112142, "grad_norm": 0.8933047248882343, "learning_rate": 1.7107587386783258e-06, "loss": 0.1207, "step": 25252 }, { "epoch": 0.7367115934418578, "grad_norm": 0.8904389300151773, "learning_rate": 1.7104029393400646e-06, "loss": 0.1452, "step": 25253 }, { "epoch": 0.7367407666725013, "grad_norm": 0.7367043057960646, "learning_rate": 1.7100471693705405e-06, "loss": 0.1029, "step": 25254 }, { "epoch": 0.7367699399031449, "grad_norm": 0.7648136268051936, "learning_rate": 1.7096914287729287e-06, "loss": 0.1084, "step": 25255 }, { "epoch": 0.7367991131337884, "grad_norm": 0.9378611376088117, "learning_rate": 1.709335717550406e-06, "loss": 0.1306, "step": 25256 }, { "epoch": 0.736828286364432, "grad_norm": 0.7537964662440101, "learning_rate": 1.7089800357061504e-06, "loss": 0.1039, "step": 25257 }, { "epoch": 0.7368574595950755, "grad_norm": 0.7520104198875354, "learning_rate": 1.7086243832433353e-06, "loss": 0.1193, "step": 25258 }, { "epoch": 0.7368866328257191, "grad_norm": 0.9008341568376169, "learning_rate": 1.7082687601651344e-06, "loss": 0.1153, "step": 25259 }, { "epoch": 0.7369158060563626, "grad_norm": 1.059608005032094, "learning_rate": 1.7079131664747256e-06, "loss": 0.1162, "step": 25260 }, { "epoch": 0.7369449792870062, "grad_norm": 0.7660511675565401, "learning_rate": 1.7075576021752826e-06, "loss": 0.1007, "step": 25261 }, { "epoch": 0.7369741525176497, "grad_norm": 0.6890316702609873, "learning_rate": 1.7072020672699775e-06, "loss": 0.1112, "step": 25262 }, { "epoch": 0.7370033257482934, "grad_norm": 0.9422067141317693, "learning_rate": 1.7068465617619861e-06, "loss": 0.1307, "step": 25263 }, { "epoch": 0.737032498978937, "grad_norm": 1.4778763290020256, "learning_rate": 1.7064910856544842e-06, "loss": 0.1211, "step": 25264 }, { "epoch": 0.7370616722095805, "grad_norm": 0.8505609361935126, "learning_rate": 1.7061356389506439e-06, "loss": 0.1218, "step": 25265 }, { "epoch": 0.7370908454402241, "grad_norm": 0.7946341945130069, "learning_rate": 1.7057802216536369e-06, "loss": 0.1034, "step": 25266 }, { "epoch": 0.7371200186708676, "grad_norm": 0.8677088750994095, "learning_rate": 1.7054248337666385e-06, "loss": 0.1366, "step": 25267 }, { "epoch": 0.7371491919015112, "grad_norm": 0.7919279927712066, "learning_rate": 1.7050694752928198e-06, "loss": 0.1092, "step": 25268 }, { "epoch": 0.7371783651321547, "grad_norm": 1.0481757496621407, "learning_rate": 1.7047141462353538e-06, "loss": 0.1315, "step": 25269 }, { "epoch": 0.7372075383627983, "grad_norm": 1.3248705217274135, "learning_rate": 1.7043588465974148e-06, "loss": 0.1469, "step": 25270 }, { "epoch": 0.7372367115934418, "grad_norm": 0.9862843422901132, "learning_rate": 1.7040035763821738e-06, "loss": 0.0943, "step": 25271 }, { "epoch": 0.7372658848240854, "grad_norm": 0.8694817549761541, "learning_rate": 1.7036483355928002e-06, "loss": 0.1223, "step": 25272 }, { "epoch": 0.737295058054729, "grad_norm": 0.8065121355796986, "learning_rate": 1.7032931242324691e-06, "loss": 0.1148, "step": 25273 }, { "epoch": 0.7373242312853725, "grad_norm": 1.054999863812303, "learning_rate": 1.7029379423043479e-06, "loss": 0.1084, "step": 25274 }, { "epoch": 0.737353404516016, "grad_norm": 0.9127816940580294, "learning_rate": 1.7025827898116115e-06, "loss": 0.1153, "step": 25275 }, { "epoch": 0.7373825777466597, "grad_norm": 0.9339764236338165, "learning_rate": 1.7022276667574272e-06, "loss": 0.1155, "step": 25276 }, { "epoch": 0.7374117509773033, "grad_norm": 0.9194037018513296, "learning_rate": 1.7018725731449692e-06, "loss": 0.1329, "step": 25277 }, { "epoch": 0.7374409242079468, "grad_norm": 1.0288101664648228, "learning_rate": 1.701517508977405e-06, "loss": 0.1047, "step": 25278 }, { "epoch": 0.7374700974385904, "grad_norm": 0.8035757981797365, "learning_rate": 1.7011624742579037e-06, "loss": 0.1256, "step": 25279 }, { "epoch": 0.7374992706692339, "grad_norm": 1.0381347895455237, "learning_rate": 1.7008074689896359e-06, "loss": 0.1134, "step": 25280 }, { "epoch": 0.7375284438998775, "grad_norm": 0.9140663289613524, "learning_rate": 1.7004524931757733e-06, "loss": 0.1438, "step": 25281 }, { "epoch": 0.737557617130521, "grad_norm": 1.1388058220901165, "learning_rate": 1.700097546819482e-06, "loss": 0.1339, "step": 25282 }, { "epoch": 0.7375867903611646, "grad_norm": 0.744424852543238, "learning_rate": 1.6997426299239327e-06, "loss": 0.111, "step": 25283 }, { "epoch": 0.7376159635918081, "grad_norm": 0.6704914742716379, "learning_rate": 1.6993877424922945e-06, "loss": 0.1108, "step": 25284 }, { "epoch": 0.7376451368224517, "grad_norm": 0.7887579127255342, "learning_rate": 1.699032884527732e-06, "loss": 0.1444, "step": 25285 }, { "epoch": 0.7376743100530953, "grad_norm": 1.2477765701714825, "learning_rate": 1.6986780560334165e-06, "loss": 0.0984, "step": 25286 }, { "epoch": 0.7377034832837388, "grad_norm": 1.09505236161158, "learning_rate": 1.698323257012517e-06, "loss": 0.1425, "step": 25287 }, { "epoch": 0.7377326565143824, "grad_norm": 1.1090470372073882, "learning_rate": 1.6979684874681983e-06, "loss": 0.1025, "step": 25288 }, { "epoch": 0.7377618297450259, "grad_norm": 0.7474682054854008, "learning_rate": 1.697613747403628e-06, "loss": 0.0979, "step": 25289 }, { "epoch": 0.7377910029756696, "grad_norm": 0.9958266496411424, "learning_rate": 1.6972590368219755e-06, "loss": 0.122, "step": 25290 }, { "epoch": 0.7378201762063131, "grad_norm": 1.4041187903991246, "learning_rate": 1.6969043557264053e-06, "loss": 0.1197, "step": 25291 }, { "epoch": 0.7378493494369567, "grad_norm": 0.873790759024061, "learning_rate": 1.6965497041200829e-06, "loss": 0.1109, "step": 25292 }, { "epoch": 0.7378785226676002, "grad_norm": 0.7870163084088219, "learning_rate": 1.6961950820061767e-06, "loss": 0.105, "step": 25293 }, { "epoch": 0.7379076958982438, "grad_norm": 0.8080892165558088, "learning_rate": 1.6958404893878534e-06, "loss": 0.1, "step": 25294 }, { "epoch": 0.7379368691288873, "grad_norm": 0.9422983683712554, "learning_rate": 1.6954859262682777e-06, "loss": 0.1259, "step": 25295 }, { "epoch": 0.7379660423595309, "grad_norm": 0.8727555335092521, "learning_rate": 1.6951313926506124e-06, "loss": 0.1172, "step": 25296 }, { "epoch": 0.7379952155901744, "grad_norm": 0.7652748839371435, "learning_rate": 1.6947768885380278e-06, "loss": 0.1222, "step": 25297 }, { "epoch": 0.738024388820818, "grad_norm": 1.0887679184307082, "learning_rate": 1.6944224139336835e-06, "loss": 0.1234, "step": 25298 }, { "epoch": 0.7380535620514616, "grad_norm": 1.2673148003698989, "learning_rate": 1.6940679688407474e-06, "loss": 0.1361, "step": 25299 }, { "epoch": 0.7380827352821051, "grad_norm": 0.8633692029700694, "learning_rate": 1.6937135532623849e-06, "loss": 0.1088, "step": 25300 }, { "epoch": 0.7381119085127487, "grad_norm": 0.7747075654055182, "learning_rate": 1.6933591672017585e-06, "loss": 0.1455, "step": 25301 }, { "epoch": 0.7381410817433922, "grad_norm": 0.677240042499947, "learning_rate": 1.69300481066203e-06, "loss": 0.1236, "step": 25302 }, { "epoch": 0.7381702549740359, "grad_norm": 0.9029406886127779, "learning_rate": 1.692650483646367e-06, "loss": 0.1178, "step": 25303 }, { "epoch": 0.7381994282046794, "grad_norm": 0.7303228048236673, "learning_rate": 1.6922961861579295e-06, "loss": 0.1231, "step": 25304 }, { "epoch": 0.738228601435323, "grad_norm": 1.0257079461165068, "learning_rate": 1.6919419181998835e-06, "loss": 0.12, "step": 25305 }, { "epoch": 0.7382577746659665, "grad_norm": 0.826062206882135, "learning_rate": 1.691587679775389e-06, "loss": 0.0988, "step": 25306 }, { "epoch": 0.7382869478966101, "grad_norm": 1.0108898120413579, "learning_rate": 1.6912334708876116e-06, "loss": 0.1151, "step": 25307 }, { "epoch": 0.7383161211272536, "grad_norm": 0.9036026203197851, "learning_rate": 1.6908792915397115e-06, "loss": 0.1466, "step": 25308 }, { "epoch": 0.7383452943578972, "grad_norm": 0.770493745646177, "learning_rate": 1.6905251417348496e-06, "loss": 0.1222, "step": 25309 }, { "epoch": 0.7383744675885408, "grad_norm": 1.0709504522731002, "learning_rate": 1.690171021476189e-06, "loss": 0.1103, "step": 25310 }, { "epoch": 0.7384036408191843, "grad_norm": 0.7943863876776105, "learning_rate": 1.6898169307668932e-06, "loss": 0.1019, "step": 25311 }, { "epoch": 0.7384328140498279, "grad_norm": 0.9259974212396024, "learning_rate": 1.6894628696101201e-06, "loss": 0.1235, "step": 25312 }, { "epoch": 0.7384619872804714, "grad_norm": 1.07402123444298, "learning_rate": 1.6891088380090342e-06, "loss": 0.1198, "step": 25313 }, { "epoch": 0.738491160511115, "grad_norm": 0.7311273303407267, "learning_rate": 1.6887548359667939e-06, "loss": 0.1168, "step": 25314 }, { "epoch": 0.7385203337417585, "grad_norm": 0.8746853956529446, "learning_rate": 1.6884008634865584e-06, "loss": 0.1197, "step": 25315 }, { "epoch": 0.7385495069724021, "grad_norm": 0.9412831713663902, "learning_rate": 1.6880469205714888e-06, "loss": 0.1156, "step": 25316 }, { "epoch": 0.7385786802030457, "grad_norm": 0.7666779693899957, "learning_rate": 1.6876930072247482e-06, "loss": 0.1141, "step": 25317 }, { "epoch": 0.7386078534336893, "grad_norm": 0.8145189765663631, "learning_rate": 1.6873391234494936e-06, "loss": 0.1234, "step": 25318 }, { "epoch": 0.7386370266643328, "grad_norm": 1.2152461541244233, "learning_rate": 1.6869852692488826e-06, "loss": 0.1284, "step": 25319 }, { "epoch": 0.7386661998949764, "grad_norm": 1.1502248798022294, "learning_rate": 1.6866314446260778e-06, "loss": 0.1454, "step": 25320 }, { "epoch": 0.73869537312562, "grad_norm": 1.0078035979676785, "learning_rate": 1.6862776495842365e-06, "loss": 0.1252, "step": 25321 }, { "epoch": 0.7387245463562635, "grad_norm": 0.8911224581332606, "learning_rate": 1.6859238841265157e-06, "loss": 0.1189, "step": 25322 }, { "epoch": 0.738753719586907, "grad_norm": 0.7859438445214656, "learning_rate": 1.6855701482560754e-06, "loss": 0.1257, "step": 25323 }, { "epoch": 0.7387828928175506, "grad_norm": 0.8271070160879815, "learning_rate": 1.6852164419760752e-06, "loss": 0.1133, "step": 25324 }, { "epoch": 0.7388120660481942, "grad_norm": 1.007895153492105, "learning_rate": 1.6848627652896716e-06, "loss": 0.1056, "step": 25325 }, { "epoch": 0.7388412392788377, "grad_norm": 0.9271857457001312, "learning_rate": 1.6845091182000196e-06, "loss": 0.1399, "step": 25326 }, { "epoch": 0.7388704125094813, "grad_norm": 0.7724191013336708, "learning_rate": 1.6841555007102806e-06, "loss": 0.1216, "step": 25327 }, { "epoch": 0.7388995857401248, "grad_norm": 0.7769602395806887, "learning_rate": 1.6838019128236083e-06, "loss": 0.0879, "step": 25328 }, { "epoch": 0.7389287589707684, "grad_norm": 0.9950594960387527, "learning_rate": 1.6834483545431606e-06, "loss": 0.1168, "step": 25329 }, { "epoch": 0.738957932201412, "grad_norm": 0.8284338149531952, "learning_rate": 1.6830948258720964e-06, "loss": 0.1219, "step": 25330 }, { "epoch": 0.7389871054320556, "grad_norm": 0.7465268207472059, "learning_rate": 1.6827413268135694e-06, "loss": 0.1082, "step": 25331 }, { "epoch": 0.7390162786626991, "grad_norm": 0.9532600911478566, "learning_rate": 1.6823878573707341e-06, "loss": 0.1024, "step": 25332 }, { "epoch": 0.7390454518933427, "grad_norm": 1.2348503494756145, "learning_rate": 1.6820344175467502e-06, "loss": 0.1254, "step": 25333 }, { "epoch": 0.7390746251239863, "grad_norm": 0.8048644288093839, "learning_rate": 1.6816810073447686e-06, "loss": 0.1249, "step": 25334 }, { "epoch": 0.7391037983546298, "grad_norm": 0.7463363458690977, "learning_rate": 1.681327626767949e-06, "loss": 0.1013, "step": 25335 }, { "epoch": 0.7391329715852734, "grad_norm": 0.8456477130553874, "learning_rate": 1.6809742758194426e-06, "loss": 0.113, "step": 25336 }, { "epoch": 0.7391621448159169, "grad_norm": 0.9659166131898201, "learning_rate": 1.680620954502407e-06, "loss": 0.1072, "step": 25337 }, { "epoch": 0.7391913180465605, "grad_norm": 0.8320748594693623, "learning_rate": 1.6802676628199948e-06, "loss": 0.1112, "step": 25338 }, { "epoch": 0.739220491277204, "grad_norm": 1.0040201288794541, "learning_rate": 1.6799144007753576e-06, "loss": 0.1024, "step": 25339 }, { "epoch": 0.7392496645078476, "grad_norm": 0.8786702076063105, "learning_rate": 1.6795611683716555e-06, "loss": 0.0968, "step": 25340 }, { "epoch": 0.7392788377384911, "grad_norm": 0.8722965065660602, "learning_rate": 1.679207965612038e-06, "loss": 0.0985, "step": 25341 }, { "epoch": 0.7393080109691347, "grad_norm": 0.8566522122966621, "learning_rate": 1.6788547924996578e-06, "loss": 0.1287, "step": 25342 }, { "epoch": 0.7393371841997782, "grad_norm": 0.8130467367070658, "learning_rate": 1.678501649037671e-06, "loss": 0.1162, "step": 25343 }, { "epoch": 0.7393663574304219, "grad_norm": 0.8761452407573056, "learning_rate": 1.6781485352292281e-06, "loss": 0.1175, "step": 25344 }, { "epoch": 0.7393955306610654, "grad_norm": 1.2940274165400936, "learning_rate": 1.6777954510774808e-06, "loss": 0.1206, "step": 25345 }, { "epoch": 0.739424703891709, "grad_norm": 1.0570273790806137, "learning_rate": 1.6774423965855823e-06, "loss": 0.1351, "step": 25346 }, { "epoch": 0.7394538771223526, "grad_norm": 0.9344682524090563, "learning_rate": 1.6770893717566872e-06, "loss": 0.111, "step": 25347 }, { "epoch": 0.7394830503529961, "grad_norm": 1.2793674111916882, "learning_rate": 1.6767363765939444e-06, "loss": 0.136, "step": 25348 }, { "epoch": 0.7395122235836397, "grad_norm": 1.0608057392286048, "learning_rate": 1.6763834111005039e-06, "loss": 0.1162, "step": 25349 }, { "epoch": 0.7395413968142832, "grad_norm": 0.7445686660391058, "learning_rate": 1.6760304752795215e-06, "loss": 0.1417, "step": 25350 }, { "epoch": 0.7395705700449268, "grad_norm": 0.9735687822159999, "learning_rate": 1.675677569134143e-06, "loss": 0.1121, "step": 25351 }, { "epoch": 0.7395997432755703, "grad_norm": 1.0102545417041977, "learning_rate": 1.6753246926675237e-06, "loss": 0.1304, "step": 25352 }, { "epoch": 0.7396289165062139, "grad_norm": 1.3639402008031616, "learning_rate": 1.6749718458828102e-06, "loss": 0.1373, "step": 25353 }, { "epoch": 0.7396580897368574, "grad_norm": 0.8620142432377433, "learning_rate": 1.6746190287831559e-06, "loss": 0.1103, "step": 25354 }, { "epoch": 0.739687262967501, "grad_norm": 1.014938079064612, "learning_rate": 1.6742662413717092e-06, "loss": 0.1222, "step": 25355 }, { "epoch": 0.7397164361981445, "grad_norm": 0.8723643333151515, "learning_rate": 1.673913483651618e-06, "loss": 0.1183, "step": 25356 }, { "epoch": 0.7397456094287882, "grad_norm": 0.8299990124103854, "learning_rate": 1.673560755626033e-06, "loss": 0.1075, "step": 25357 }, { "epoch": 0.7397747826594318, "grad_norm": 0.9832267034035805, "learning_rate": 1.6732080572981052e-06, "loss": 0.1156, "step": 25358 }, { "epoch": 0.7398039558900753, "grad_norm": 1.3896315058880548, "learning_rate": 1.6728553886709798e-06, "loss": 0.1327, "step": 25359 }, { "epoch": 0.7398331291207189, "grad_norm": 0.8507942262110857, "learning_rate": 1.6725027497478092e-06, "loss": 0.0972, "step": 25360 }, { "epoch": 0.7398623023513624, "grad_norm": 0.9933799142356851, "learning_rate": 1.6721501405317398e-06, "loss": 0.1407, "step": 25361 }, { "epoch": 0.739891475582006, "grad_norm": 1.0911925315744393, "learning_rate": 1.6717975610259175e-06, "loss": 0.1361, "step": 25362 }, { "epoch": 0.7399206488126495, "grad_norm": 0.9913835876354764, "learning_rate": 1.6714450112334924e-06, "loss": 0.1185, "step": 25363 }, { "epoch": 0.7399498220432931, "grad_norm": 0.9459508176023983, "learning_rate": 1.671092491157613e-06, "loss": 0.0895, "step": 25364 }, { "epoch": 0.7399789952739366, "grad_norm": 1.0171833360019018, "learning_rate": 1.6707400008014257e-06, "loss": 0.1016, "step": 25365 }, { "epoch": 0.7400081685045802, "grad_norm": 0.9135241923001208, "learning_rate": 1.6703875401680747e-06, "loss": 0.1179, "step": 25366 }, { "epoch": 0.7400373417352237, "grad_norm": 0.7377216085161417, "learning_rate": 1.670035109260711e-06, "loss": 0.1096, "step": 25367 }, { "epoch": 0.7400665149658673, "grad_norm": 0.781221612603949, "learning_rate": 1.6696827080824784e-06, "loss": 0.094, "step": 25368 }, { "epoch": 0.7400956881965108, "grad_norm": 1.2490721073710185, "learning_rate": 1.6693303366365205e-06, "loss": 0.1285, "step": 25369 }, { "epoch": 0.7401248614271544, "grad_norm": 1.140595970412751, "learning_rate": 1.6689779949259894e-06, "loss": 0.1423, "step": 25370 }, { "epoch": 0.740154034657798, "grad_norm": 1.1134970041962298, "learning_rate": 1.6686256829540282e-06, "loss": 0.1228, "step": 25371 }, { "epoch": 0.7401832078884416, "grad_norm": 0.9452682178795438, "learning_rate": 1.6682734007237793e-06, "loss": 0.1272, "step": 25372 }, { "epoch": 0.7402123811190852, "grad_norm": 1.035749661831755, "learning_rate": 1.6679211482383923e-06, "loss": 0.0964, "step": 25373 }, { "epoch": 0.7402415543497287, "grad_norm": 0.7226682253429818, "learning_rate": 1.6675689255010098e-06, "loss": 0.1007, "step": 25374 }, { "epoch": 0.7402707275803723, "grad_norm": 0.8096881255591155, "learning_rate": 1.6672167325147741e-06, "loss": 0.1082, "step": 25375 }, { "epoch": 0.7402999008110158, "grad_norm": 0.897310494971551, "learning_rate": 1.6668645692828323e-06, "loss": 0.0941, "step": 25376 }, { "epoch": 0.7403290740416594, "grad_norm": 0.9521364435395179, "learning_rate": 1.6665124358083296e-06, "loss": 0.1282, "step": 25377 }, { "epoch": 0.7403582472723029, "grad_norm": 1.2353152986653075, "learning_rate": 1.666160332094408e-06, "loss": 0.1183, "step": 25378 }, { "epoch": 0.7403874205029465, "grad_norm": 0.7693406591470277, "learning_rate": 1.6658082581442098e-06, "loss": 0.1218, "step": 25379 }, { "epoch": 0.74041659373359, "grad_norm": 0.7983395794475866, "learning_rate": 1.665456213960881e-06, "loss": 0.1289, "step": 25380 }, { "epoch": 0.7404457669642336, "grad_norm": 0.8501129599977921, "learning_rate": 1.6651041995475613e-06, "loss": 0.1125, "step": 25381 }, { "epoch": 0.7404749401948771, "grad_norm": 0.8171686588634324, "learning_rate": 1.664752214907397e-06, "loss": 0.1608, "step": 25382 }, { "epoch": 0.7405041134255207, "grad_norm": 0.927952847881401, "learning_rate": 1.6644002600435267e-06, "loss": 0.1194, "step": 25383 }, { "epoch": 0.7405332866561644, "grad_norm": 0.8322235908395166, "learning_rate": 1.664048334959097e-06, "loss": 0.1007, "step": 25384 }, { "epoch": 0.7405624598868079, "grad_norm": 0.7438857772698875, "learning_rate": 1.663696439657247e-06, "loss": 0.105, "step": 25385 }, { "epoch": 0.7405916331174515, "grad_norm": 0.7538778802375633, "learning_rate": 1.6633445741411169e-06, "loss": 0.1027, "step": 25386 }, { "epoch": 0.740620806348095, "grad_norm": 1.1904370590106432, "learning_rate": 1.66299273841385e-06, "loss": 0.1211, "step": 25387 }, { "epoch": 0.7406499795787386, "grad_norm": 1.5398770808674112, "learning_rate": 1.662640932478589e-06, "loss": 0.1079, "step": 25388 }, { "epoch": 0.7406791528093821, "grad_norm": 0.8759431557643031, "learning_rate": 1.6622891563384714e-06, "loss": 0.1098, "step": 25389 }, { "epoch": 0.7407083260400257, "grad_norm": 0.7902504275915255, "learning_rate": 1.6619374099966412e-06, "loss": 0.1224, "step": 25390 }, { "epoch": 0.7407374992706692, "grad_norm": 1.162438744624882, "learning_rate": 1.661585693456237e-06, "loss": 0.1273, "step": 25391 }, { "epoch": 0.7407666725013128, "grad_norm": 0.9169285785607442, "learning_rate": 1.6612340067203968e-06, "loss": 0.1168, "step": 25392 }, { "epoch": 0.7407958457319563, "grad_norm": 0.8393633412940993, "learning_rate": 1.6608823497922626e-06, "loss": 0.1108, "step": 25393 }, { "epoch": 0.7408250189625999, "grad_norm": 0.6990912682692665, "learning_rate": 1.6605307226749757e-06, "loss": 0.1095, "step": 25394 }, { "epoch": 0.7408541921932434, "grad_norm": 0.9879222376101539, "learning_rate": 1.6601791253716725e-06, "loss": 0.1415, "step": 25395 }, { "epoch": 0.740883365423887, "grad_norm": 1.1348303097250978, "learning_rate": 1.6598275578854917e-06, "loss": 0.1076, "step": 25396 }, { "epoch": 0.7409125386545306, "grad_norm": 0.9411588160906919, "learning_rate": 1.6594760202195749e-06, "loss": 0.1228, "step": 25397 }, { "epoch": 0.7409417118851742, "grad_norm": 0.8093755643714603, "learning_rate": 1.6591245123770583e-06, "loss": 0.1162, "step": 25398 }, { "epoch": 0.7409708851158178, "grad_norm": 1.0155090857890545, "learning_rate": 1.6587730343610776e-06, "loss": 0.1288, "step": 25399 }, { "epoch": 0.7410000583464613, "grad_norm": 0.8846906618267024, "learning_rate": 1.6584215861747766e-06, "loss": 0.1101, "step": 25400 }, { "epoch": 0.7410292315771049, "grad_norm": 0.7678375805109864, "learning_rate": 1.65807016782129e-06, "loss": 0.1123, "step": 25401 }, { "epoch": 0.7410584048077484, "grad_norm": 0.7614403773250501, "learning_rate": 1.6577187793037535e-06, "loss": 0.1152, "step": 25402 }, { "epoch": 0.741087578038392, "grad_norm": 0.7680447139282883, "learning_rate": 1.6573674206253077e-06, "loss": 0.1033, "step": 25403 }, { "epoch": 0.7411167512690355, "grad_norm": 0.7959803953095608, "learning_rate": 1.6570160917890876e-06, "loss": 0.1067, "step": 25404 }, { "epoch": 0.7411459244996791, "grad_norm": 0.86153092292329, "learning_rate": 1.6566647927982283e-06, "loss": 0.1366, "step": 25405 }, { "epoch": 0.7411750977303226, "grad_norm": 0.8970047267704228, "learning_rate": 1.6563135236558675e-06, "loss": 0.1234, "step": 25406 }, { "epoch": 0.7412042709609662, "grad_norm": 0.8237904485101333, "learning_rate": 1.6559622843651429e-06, "loss": 0.1032, "step": 25407 }, { "epoch": 0.7412334441916097, "grad_norm": 0.7105008058025912, "learning_rate": 1.6556110749291888e-06, "loss": 0.1236, "step": 25408 }, { "epoch": 0.7412626174222533, "grad_norm": 0.7860115769909971, "learning_rate": 1.655259895351139e-06, "loss": 0.1072, "step": 25409 }, { "epoch": 0.7412917906528969, "grad_norm": 0.8804442229356237, "learning_rate": 1.6549087456341317e-06, "loss": 0.1257, "step": 25410 }, { "epoch": 0.7413209638835405, "grad_norm": 0.8105582031140881, "learning_rate": 1.6545576257812995e-06, "loss": 0.1246, "step": 25411 }, { "epoch": 0.7413501371141841, "grad_norm": 0.789937401807501, "learning_rate": 1.6542065357957793e-06, "loss": 0.1296, "step": 25412 }, { "epoch": 0.7413793103448276, "grad_norm": 1.0434908734170527, "learning_rate": 1.6538554756807035e-06, "loss": 0.151, "step": 25413 }, { "epoch": 0.7414084835754712, "grad_norm": 0.8006444351043412, "learning_rate": 1.653504445439208e-06, "loss": 0.1096, "step": 25414 }, { "epoch": 0.7414376568061147, "grad_norm": 0.838615604229827, "learning_rate": 1.6531534450744268e-06, "loss": 0.1339, "step": 25415 }, { "epoch": 0.7414668300367583, "grad_norm": 0.8090596873840712, "learning_rate": 1.6528024745894904e-06, "loss": 0.1156, "step": 25416 }, { "epoch": 0.7414960032674018, "grad_norm": 1.1246619977451124, "learning_rate": 1.6524515339875346e-06, "loss": 0.1187, "step": 25417 }, { "epoch": 0.7415251764980454, "grad_norm": 0.7490263473434614, "learning_rate": 1.6521006232716941e-06, "loss": 0.1124, "step": 25418 }, { "epoch": 0.7415543497286889, "grad_norm": 0.7352819684613954, "learning_rate": 1.6517497424450985e-06, "loss": 0.15, "step": 25419 }, { "epoch": 0.7415835229593325, "grad_norm": 0.8687632301078961, "learning_rate": 1.6513988915108836e-06, "loss": 0.1202, "step": 25420 }, { "epoch": 0.741612696189976, "grad_norm": 0.7663999610481497, "learning_rate": 1.6510480704721798e-06, "loss": 0.119, "step": 25421 }, { "epoch": 0.7416418694206196, "grad_norm": 0.7455157376620623, "learning_rate": 1.650697279332118e-06, "loss": 0.1533, "step": 25422 }, { "epoch": 0.7416710426512632, "grad_norm": 0.7097885563794665, "learning_rate": 1.650346518093831e-06, "loss": 0.1073, "step": 25423 }, { "epoch": 0.7417002158819067, "grad_norm": 0.8284290309907514, "learning_rate": 1.6499957867604527e-06, "loss": 0.1112, "step": 25424 }, { "epoch": 0.7417293891125504, "grad_norm": 0.7962707894633176, "learning_rate": 1.649645085335112e-06, "loss": 0.1107, "step": 25425 }, { "epoch": 0.7417585623431939, "grad_norm": 0.7968001948841206, "learning_rate": 1.6492944138209382e-06, "loss": 0.1284, "step": 25426 }, { "epoch": 0.7417877355738375, "grad_norm": 1.2010669149237727, "learning_rate": 1.648943772221066e-06, "loss": 0.1276, "step": 25427 }, { "epoch": 0.741816908804481, "grad_norm": 0.7628615958201201, "learning_rate": 1.648593160538624e-06, "loss": 0.0869, "step": 25428 }, { "epoch": 0.7418460820351246, "grad_norm": 0.6660997680511695, "learning_rate": 1.6482425787767392e-06, "loss": 0.0993, "step": 25429 }, { "epoch": 0.7418752552657681, "grad_norm": 0.9096575575987993, "learning_rate": 1.6478920269385472e-06, "loss": 0.1365, "step": 25430 }, { "epoch": 0.7419044284964117, "grad_norm": 1.1179123581038652, "learning_rate": 1.6475415050271754e-06, "loss": 0.1183, "step": 25431 }, { "epoch": 0.7419336017270552, "grad_norm": 0.8847006034413464, "learning_rate": 1.6471910130457508e-06, "loss": 0.1199, "step": 25432 }, { "epoch": 0.7419627749576988, "grad_norm": 0.9165676425464232, "learning_rate": 1.646840550997406e-06, "loss": 0.123, "step": 25433 }, { "epoch": 0.7419919481883424, "grad_norm": 1.2480362274106354, "learning_rate": 1.6464901188852684e-06, "loss": 0.1234, "step": 25434 }, { "epoch": 0.7420211214189859, "grad_norm": 0.7311462670928583, "learning_rate": 1.646139716712465e-06, "loss": 0.0972, "step": 25435 }, { "epoch": 0.7420502946496295, "grad_norm": 1.0648678979346429, "learning_rate": 1.6457893444821255e-06, "loss": 0.1195, "step": 25436 }, { "epoch": 0.742079467880273, "grad_norm": 0.9334327651337558, "learning_rate": 1.6454390021973798e-06, "loss": 0.1074, "step": 25437 }, { "epoch": 0.7421086411109167, "grad_norm": 0.7848374413560427, "learning_rate": 1.6450886898613538e-06, "loss": 0.1266, "step": 25438 }, { "epoch": 0.7421378143415602, "grad_norm": 0.9621393073657919, "learning_rate": 1.6447384074771732e-06, "loss": 0.1237, "step": 25439 }, { "epoch": 0.7421669875722038, "grad_norm": 0.9061550032611203, "learning_rate": 1.644388155047969e-06, "loss": 0.1102, "step": 25440 }, { "epoch": 0.7421961608028473, "grad_norm": 1.186165421768891, "learning_rate": 1.6440379325768646e-06, "loss": 0.1064, "step": 25441 }, { "epoch": 0.7422253340334909, "grad_norm": 1.1411524256563175, "learning_rate": 1.6436877400669904e-06, "loss": 0.1158, "step": 25442 }, { "epoch": 0.7422545072641344, "grad_norm": 0.9063056563284617, "learning_rate": 1.643337577521469e-06, "loss": 0.0974, "step": 25443 }, { "epoch": 0.742283680494778, "grad_norm": 0.9133675605781577, "learning_rate": 1.6429874449434297e-06, "loss": 0.1212, "step": 25444 }, { "epoch": 0.7423128537254216, "grad_norm": 1.1041187593131603, "learning_rate": 1.6426373423359975e-06, "loss": 0.1201, "step": 25445 }, { "epoch": 0.7423420269560651, "grad_norm": 1.051387351379852, "learning_rate": 1.6422872697022958e-06, "loss": 0.1025, "step": 25446 }, { "epoch": 0.7423712001867087, "grad_norm": 0.9285985697730978, "learning_rate": 1.641937227045452e-06, "loss": 0.1158, "step": 25447 }, { "epoch": 0.7424003734173522, "grad_norm": 1.0951366644053775, "learning_rate": 1.6415872143685924e-06, "loss": 0.1423, "step": 25448 }, { "epoch": 0.7424295466479958, "grad_norm": 1.001333933093759, "learning_rate": 1.6412372316748387e-06, "loss": 0.1102, "step": 25449 }, { "epoch": 0.7424587198786393, "grad_norm": 0.8529908056140938, "learning_rate": 1.640887278967319e-06, "loss": 0.1353, "step": 25450 }, { "epoch": 0.7424878931092829, "grad_norm": 0.6451206073086674, "learning_rate": 1.6405373562491562e-06, "loss": 0.1205, "step": 25451 }, { "epoch": 0.7425170663399265, "grad_norm": 0.7512592341763321, "learning_rate": 1.6401874635234716e-06, "loss": 0.1365, "step": 25452 }, { "epoch": 0.7425462395705701, "grad_norm": 0.6750821858785784, "learning_rate": 1.6398376007933914e-06, "loss": 0.1057, "step": 25453 }, { "epoch": 0.7425754128012136, "grad_norm": 0.8788521283989128, "learning_rate": 1.6394877680620407e-06, "loss": 0.1235, "step": 25454 }, { "epoch": 0.7426045860318572, "grad_norm": 0.6389843363690554, "learning_rate": 1.6391379653325412e-06, "loss": 0.1023, "step": 25455 }, { "epoch": 0.7426337592625007, "grad_norm": 0.7921985564656653, "learning_rate": 1.638788192608014e-06, "loss": 0.1208, "step": 25456 }, { "epoch": 0.7426629324931443, "grad_norm": 0.8358770450717694, "learning_rate": 1.6384384498915844e-06, "loss": 0.1136, "step": 25457 }, { "epoch": 0.7426921057237879, "grad_norm": 0.6269270825396777, "learning_rate": 1.6380887371863747e-06, "loss": 0.1143, "step": 25458 }, { "epoch": 0.7427212789544314, "grad_norm": 0.7772857358708563, "learning_rate": 1.6377390544955024e-06, "loss": 0.1121, "step": 25459 }, { "epoch": 0.742750452185075, "grad_norm": 0.8009850369783482, "learning_rate": 1.6373894018220971e-06, "loss": 0.1038, "step": 25460 }, { "epoch": 0.7427796254157185, "grad_norm": 0.9487626611399563, "learning_rate": 1.637039779169276e-06, "loss": 0.1236, "step": 25461 }, { "epoch": 0.7428087986463621, "grad_norm": 0.7575777363808675, "learning_rate": 1.6366901865401592e-06, "loss": 0.1101, "step": 25462 }, { "epoch": 0.7428379718770056, "grad_norm": 0.8093174230273426, "learning_rate": 1.6363406239378715e-06, "loss": 0.1219, "step": 25463 }, { "epoch": 0.7428671451076492, "grad_norm": 0.8523740167678296, "learning_rate": 1.6359910913655314e-06, "loss": 0.1119, "step": 25464 }, { "epoch": 0.7428963183382927, "grad_norm": 0.8158904565755344, "learning_rate": 1.6356415888262583e-06, "loss": 0.1242, "step": 25465 }, { "epoch": 0.7429254915689364, "grad_norm": 0.7557731574314885, "learning_rate": 1.6352921163231738e-06, "loss": 0.1212, "step": 25466 }, { "epoch": 0.74295466479958, "grad_norm": 1.5021850666971923, "learning_rate": 1.6349426738594e-06, "loss": 0.1005, "step": 25467 }, { "epoch": 0.7429838380302235, "grad_norm": 0.8435779966046582, "learning_rate": 1.634593261438055e-06, "loss": 0.1078, "step": 25468 }, { "epoch": 0.743013011260867, "grad_norm": 0.6486910102729132, "learning_rate": 1.6342438790622556e-06, "loss": 0.1117, "step": 25469 }, { "epoch": 0.7430421844915106, "grad_norm": 0.7197599091949821, "learning_rate": 1.6338945267351253e-06, "loss": 0.1015, "step": 25470 }, { "epoch": 0.7430713577221542, "grad_norm": 0.9144436898866133, "learning_rate": 1.6335452044597794e-06, "loss": 0.1023, "step": 25471 }, { "epoch": 0.7431005309527977, "grad_norm": 0.6483596145456543, "learning_rate": 1.6331959122393405e-06, "loss": 0.1117, "step": 25472 }, { "epoch": 0.7431297041834413, "grad_norm": 0.787331992253388, "learning_rate": 1.6328466500769225e-06, "loss": 0.1115, "step": 25473 }, { "epoch": 0.7431588774140848, "grad_norm": 0.9774850073862337, "learning_rate": 1.6324974179756476e-06, "loss": 0.1122, "step": 25474 }, { "epoch": 0.7431880506447284, "grad_norm": 0.7852460671945108, "learning_rate": 1.6321482159386314e-06, "loss": 0.1164, "step": 25475 }, { "epoch": 0.7432172238753719, "grad_norm": 0.8141108891769548, "learning_rate": 1.6317990439689913e-06, "loss": 0.1035, "step": 25476 }, { "epoch": 0.7432463971060155, "grad_norm": 0.8439670860992345, "learning_rate": 1.6314499020698444e-06, "loss": 0.1224, "step": 25477 }, { "epoch": 0.743275570336659, "grad_norm": 0.8902089647834072, "learning_rate": 1.631100790244311e-06, "loss": 0.109, "step": 25478 }, { "epoch": 0.7433047435673027, "grad_norm": 0.9622996545312251, "learning_rate": 1.6307517084955033e-06, "loss": 0.1022, "step": 25479 }, { "epoch": 0.7433339167979462, "grad_norm": 0.8769423630598164, "learning_rate": 1.630402656826542e-06, "loss": 0.1164, "step": 25480 }, { "epoch": 0.7433630900285898, "grad_norm": 1.1854201881829816, "learning_rate": 1.630053635240541e-06, "loss": 0.1146, "step": 25481 }, { "epoch": 0.7433922632592334, "grad_norm": 0.8432987016520562, "learning_rate": 1.6297046437406156e-06, "loss": 0.1022, "step": 25482 }, { "epoch": 0.7434214364898769, "grad_norm": 0.7889299292369527, "learning_rate": 1.6293556823298823e-06, "loss": 0.133, "step": 25483 }, { "epoch": 0.7434506097205205, "grad_norm": 1.0117205907326565, "learning_rate": 1.6290067510114583e-06, "loss": 0.1201, "step": 25484 }, { "epoch": 0.743479782951164, "grad_norm": 1.0412832235517318, "learning_rate": 1.6286578497884575e-06, "loss": 0.098, "step": 25485 }, { "epoch": 0.7435089561818076, "grad_norm": 0.8620017663603526, "learning_rate": 1.6283089786639933e-06, "loss": 0.1286, "step": 25486 }, { "epoch": 0.7435381294124511, "grad_norm": 1.133428864430782, "learning_rate": 1.627960137641183e-06, "loss": 0.1316, "step": 25487 }, { "epoch": 0.7435673026430947, "grad_norm": 0.7748872761268861, "learning_rate": 1.6276113267231392e-06, "loss": 0.1254, "step": 25488 }, { "epoch": 0.7435964758737382, "grad_norm": 0.8168004763587532, "learning_rate": 1.6272625459129737e-06, "loss": 0.1147, "step": 25489 }, { "epoch": 0.7436256491043818, "grad_norm": 0.7334192510394804, "learning_rate": 1.6269137952138064e-06, "loss": 0.1153, "step": 25490 }, { "epoch": 0.7436548223350253, "grad_norm": 1.0478577222605305, "learning_rate": 1.626565074628747e-06, "loss": 0.134, "step": 25491 }, { "epoch": 0.7436839955656689, "grad_norm": 0.8121206573306156, "learning_rate": 1.626216384160908e-06, "loss": 0.1189, "step": 25492 }, { "epoch": 0.7437131687963126, "grad_norm": 0.9527340094945739, "learning_rate": 1.6258677238134052e-06, "loss": 0.1017, "step": 25493 }, { "epoch": 0.7437423420269561, "grad_norm": 0.8191193835618129, "learning_rate": 1.62551909358935e-06, "loss": 0.1368, "step": 25494 }, { "epoch": 0.7437715152575997, "grad_norm": 0.8564678401155142, "learning_rate": 1.6251704934918533e-06, "loss": 0.0997, "step": 25495 }, { "epoch": 0.7438006884882432, "grad_norm": 0.7826307777950089, "learning_rate": 1.6248219235240287e-06, "loss": 0.1035, "step": 25496 }, { "epoch": 0.7438298617188868, "grad_norm": 0.8216581390653788, "learning_rate": 1.6244733836889897e-06, "loss": 0.1015, "step": 25497 }, { "epoch": 0.7438590349495303, "grad_norm": 0.7795015813151074, "learning_rate": 1.6241248739898469e-06, "loss": 0.1241, "step": 25498 }, { "epoch": 0.7438882081801739, "grad_norm": 0.9220823397389473, "learning_rate": 1.623776394429709e-06, "loss": 0.1162, "step": 25499 }, { "epoch": 0.7439173814108174, "grad_norm": 0.8041320740082133, "learning_rate": 1.6234279450116918e-06, "loss": 0.0977, "step": 25500 }, { "epoch": 0.743946554641461, "grad_norm": 0.7047822171205932, "learning_rate": 1.6230795257389021e-06, "loss": 0.1311, "step": 25501 }, { "epoch": 0.7439757278721045, "grad_norm": 0.7572621591783532, "learning_rate": 1.6227311366144538e-06, "loss": 0.1121, "step": 25502 }, { "epoch": 0.7440049011027481, "grad_norm": 1.0272799798139245, "learning_rate": 1.622382777641454e-06, "loss": 0.1148, "step": 25503 }, { "epoch": 0.7440340743333916, "grad_norm": 0.8133294697702453, "learning_rate": 1.622034448823016e-06, "loss": 0.1005, "step": 25504 }, { "epoch": 0.7440632475640352, "grad_norm": 0.8817510751531433, "learning_rate": 1.6216861501622483e-06, "loss": 0.1212, "step": 25505 }, { "epoch": 0.7440924207946789, "grad_norm": 1.0316321980202487, "learning_rate": 1.6213378816622583e-06, "loss": 0.1438, "step": 25506 }, { "epoch": 0.7441215940253224, "grad_norm": 0.7901698333667843, "learning_rate": 1.6209896433261573e-06, "loss": 0.1144, "step": 25507 }, { "epoch": 0.744150767255966, "grad_norm": 0.7283455912549498, "learning_rate": 1.620641435157056e-06, "loss": 0.1242, "step": 25508 }, { "epoch": 0.7441799404866095, "grad_norm": 1.124404937433288, "learning_rate": 1.6202932571580593e-06, "loss": 0.1234, "step": 25509 }, { "epoch": 0.7442091137172531, "grad_norm": 0.741831345451023, "learning_rate": 1.6199451093322794e-06, "loss": 0.09, "step": 25510 }, { "epoch": 0.7442382869478966, "grad_norm": 0.8475788042988227, "learning_rate": 1.6195969916828224e-06, "loss": 0.0986, "step": 25511 }, { "epoch": 0.7442674601785402, "grad_norm": 1.1727542967840356, "learning_rate": 1.619248904212795e-06, "loss": 0.1177, "step": 25512 }, { "epoch": 0.7442966334091837, "grad_norm": 0.963286441570922, "learning_rate": 1.6189008469253064e-06, "loss": 0.1264, "step": 25513 }, { "epoch": 0.7443258066398273, "grad_norm": 1.0430121824220553, "learning_rate": 1.6185528198234656e-06, "loss": 0.1297, "step": 25514 }, { "epoch": 0.7443549798704708, "grad_norm": 0.8351046293034777, "learning_rate": 1.6182048229103774e-06, "loss": 0.1172, "step": 25515 }, { "epoch": 0.7443841531011144, "grad_norm": 0.8808831723939586, "learning_rate": 1.6178568561891484e-06, "loss": 0.1146, "step": 25516 }, { "epoch": 0.7444133263317579, "grad_norm": 1.0297276449556383, "learning_rate": 1.6175089196628874e-06, "loss": 0.1053, "step": 25517 }, { "epoch": 0.7444424995624015, "grad_norm": 0.7246997990728504, "learning_rate": 1.6171610133346992e-06, "loss": 0.1078, "step": 25518 }, { "epoch": 0.744471672793045, "grad_norm": 0.8487178867770659, "learning_rate": 1.6168131372076868e-06, "loss": 0.1335, "step": 25519 }, { "epoch": 0.7445008460236887, "grad_norm": 1.2413324814821267, "learning_rate": 1.616465291284962e-06, "loss": 0.1137, "step": 25520 }, { "epoch": 0.7445300192543323, "grad_norm": 0.8236342336085557, "learning_rate": 1.616117475569628e-06, "loss": 0.1131, "step": 25521 }, { "epoch": 0.7445591924849758, "grad_norm": 0.903432158000206, "learning_rate": 1.6157696900647874e-06, "loss": 0.1234, "step": 25522 }, { "epoch": 0.7445883657156194, "grad_norm": 1.0895261908075224, "learning_rate": 1.6154219347735484e-06, "loss": 0.1453, "step": 25523 }, { "epoch": 0.7446175389462629, "grad_norm": 1.2394685697188887, "learning_rate": 1.6150742096990151e-06, "loss": 0.127, "step": 25524 }, { "epoch": 0.7446467121769065, "grad_norm": 0.7495244324670081, "learning_rate": 1.6147265148442892e-06, "loss": 0.1084, "step": 25525 }, { "epoch": 0.74467588540755, "grad_norm": 0.6213348733752562, "learning_rate": 1.6143788502124768e-06, "loss": 0.1036, "step": 25526 }, { "epoch": 0.7447050586381936, "grad_norm": 0.8996843840594385, "learning_rate": 1.6140312158066834e-06, "loss": 0.1032, "step": 25527 }, { "epoch": 0.7447342318688371, "grad_norm": 1.1597407651426115, "learning_rate": 1.6136836116300109e-06, "loss": 0.1166, "step": 25528 }, { "epoch": 0.7447634050994807, "grad_norm": 0.6980797666475563, "learning_rate": 1.6133360376855616e-06, "loss": 0.1098, "step": 25529 }, { "epoch": 0.7447925783301242, "grad_norm": 0.6870208505510624, "learning_rate": 1.6129884939764396e-06, "loss": 0.0947, "step": 25530 }, { "epoch": 0.7448217515607678, "grad_norm": 0.7926473302234334, "learning_rate": 1.6126409805057492e-06, "loss": 0.1198, "step": 25531 }, { "epoch": 0.7448509247914114, "grad_norm": 0.9145850364045315, "learning_rate": 1.6122934972765914e-06, "loss": 0.1071, "step": 25532 }, { "epoch": 0.744880098022055, "grad_norm": 1.2818810250205608, "learning_rate": 1.611946044292067e-06, "loss": 0.1108, "step": 25533 }, { "epoch": 0.7449092712526986, "grad_norm": 0.8039091127731487, "learning_rate": 1.6115986215552808e-06, "loss": 0.098, "step": 25534 }, { "epoch": 0.7449384444833421, "grad_norm": 0.8527214430764477, "learning_rate": 1.6112512290693338e-06, "loss": 0.1416, "step": 25535 }, { "epoch": 0.7449676177139857, "grad_norm": 0.970098336114009, "learning_rate": 1.6109038668373234e-06, "loss": 0.0991, "step": 25536 }, { "epoch": 0.7449967909446292, "grad_norm": 1.1187396466934256, "learning_rate": 1.6105565348623574e-06, "loss": 0.1327, "step": 25537 }, { "epoch": 0.7450259641752728, "grad_norm": 0.9311686004770586, "learning_rate": 1.6102092331475339e-06, "loss": 0.1037, "step": 25538 }, { "epoch": 0.7450551374059163, "grad_norm": 0.8785398287443141, "learning_rate": 1.609861961695951e-06, "loss": 0.0931, "step": 25539 }, { "epoch": 0.7450843106365599, "grad_norm": 0.8494143320744586, "learning_rate": 1.609514720510713e-06, "loss": 0.106, "step": 25540 }, { "epoch": 0.7451134838672034, "grad_norm": 0.9415105827703377, "learning_rate": 1.6091675095949189e-06, "loss": 0.112, "step": 25541 }, { "epoch": 0.745142657097847, "grad_norm": 0.8763081132105256, "learning_rate": 1.6088203289516652e-06, "loss": 0.1271, "step": 25542 }, { "epoch": 0.7451718303284905, "grad_norm": 0.8929431481682392, "learning_rate": 1.6084731785840547e-06, "loss": 0.116, "step": 25543 }, { "epoch": 0.7452010035591341, "grad_norm": 0.9028324909614509, "learning_rate": 1.6081260584951875e-06, "loss": 0.1381, "step": 25544 }, { "epoch": 0.7452301767897777, "grad_norm": 0.9750611600946603, "learning_rate": 1.6077789686881611e-06, "loss": 0.114, "step": 25545 }, { "epoch": 0.7452593500204212, "grad_norm": 0.8078006404430004, "learning_rate": 1.6074319091660723e-06, "loss": 0.1155, "step": 25546 }, { "epoch": 0.7452885232510649, "grad_norm": 1.018055942403003, "learning_rate": 1.6070848799320237e-06, "loss": 0.1269, "step": 25547 }, { "epoch": 0.7453176964817084, "grad_norm": 0.9232982422290381, "learning_rate": 1.6067378809891094e-06, "loss": 0.1179, "step": 25548 }, { "epoch": 0.745346869712352, "grad_norm": 1.1525596094515442, "learning_rate": 1.6063909123404298e-06, "loss": 0.131, "step": 25549 }, { "epoch": 0.7453760429429955, "grad_norm": 1.0587325693696445, "learning_rate": 1.6060439739890832e-06, "loss": 0.1091, "step": 25550 }, { "epoch": 0.7454052161736391, "grad_norm": 0.8698500911476206, "learning_rate": 1.6056970659381654e-06, "loss": 0.1147, "step": 25551 }, { "epoch": 0.7454343894042826, "grad_norm": 1.3338528907672536, "learning_rate": 1.6053501881907728e-06, "loss": 0.1243, "step": 25552 }, { "epoch": 0.7454635626349262, "grad_norm": 0.9939976643404548, "learning_rate": 1.6050033407500048e-06, "loss": 0.1241, "step": 25553 }, { "epoch": 0.7454927358655697, "grad_norm": 0.9490224930223939, "learning_rate": 1.6046565236189554e-06, "loss": 0.1121, "step": 25554 }, { "epoch": 0.7455219090962133, "grad_norm": 0.7675865964998464, "learning_rate": 1.6043097368007233e-06, "loss": 0.1245, "step": 25555 }, { "epoch": 0.7455510823268569, "grad_norm": 1.0211646104043877, "learning_rate": 1.6039629802984014e-06, "loss": 0.1104, "step": 25556 }, { "epoch": 0.7455802555575004, "grad_norm": 0.8369056220121306, "learning_rate": 1.603616254115089e-06, "loss": 0.1307, "step": 25557 }, { "epoch": 0.745609428788144, "grad_norm": 0.9513548401552328, "learning_rate": 1.6032695582538798e-06, "loss": 0.1043, "step": 25558 }, { "epoch": 0.7456386020187875, "grad_norm": 1.072861385359351, "learning_rate": 1.602922892717868e-06, "loss": 0.1342, "step": 25559 }, { "epoch": 0.7456677752494312, "grad_norm": 0.9539497317221474, "learning_rate": 1.602576257510149e-06, "loss": 0.1468, "step": 25560 }, { "epoch": 0.7456969484800747, "grad_norm": 0.6985725803914287, "learning_rate": 1.6022296526338204e-06, "loss": 0.0915, "step": 25561 }, { "epoch": 0.7457261217107183, "grad_norm": 1.3062122090407413, "learning_rate": 1.6018830780919741e-06, "loss": 0.1317, "step": 25562 }, { "epoch": 0.7457552949413618, "grad_norm": 1.0300371415741765, "learning_rate": 1.6015365338877025e-06, "loss": 0.1248, "step": 25563 }, { "epoch": 0.7457844681720054, "grad_norm": 0.8176422147947936, "learning_rate": 1.6011900200241038e-06, "loss": 0.0952, "step": 25564 }, { "epoch": 0.7458136414026489, "grad_norm": 0.8474100415831087, "learning_rate": 1.6008435365042685e-06, "loss": 0.1243, "step": 25565 }, { "epoch": 0.7458428146332925, "grad_norm": 5.006150667434528, "learning_rate": 1.6004970833312878e-06, "loss": 0.1086, "step": 25566 }, { "epoch": 0.745871987863936, "grad_norm": 0.8356199022025703, "learning_rate": 1.6001506605082605e-06, "loss": 0.1128, "step": 25567 }, { "epoch": 0.7459011610945796, "grad_norm": 0.8197559960530519, "learning_rate": 1.599804268038277e-06, "loss": 0.1076, "step": 25568 }, { "epoch": 0.7459303343252232, "grad_norm": 0.9229738169506888, "learning_rate": 1.5994579059244276e-06, "loss": 0.1304, "step": 25569 }, { "epoch": 0.7459595075558667, "grad_norm": 1.0226081550306922, "learning_rate": 1.5991115741698076e-06, "loss": 0.1314, "step": 25570 }, { "epoch": 0.7459886807865103, "grad_norm": 1.2285554130538987, "learning_rate": 1.5987652727775077e-06, "loss": 0.1086, "step": 25571 }, { "epoch": 0.7460178540171538, "grad_norm": 1.0954519130263571, "learning_rate": 1.598419001750618e-06, "loss": 0.1275, "step": 25572 }, { "epoch": 0.7460470272477974, "grad_norm": 0.8074544591816742, "learning_rate": 1.5980727610922315e-06, "loss": 0.133, "step": 25573 }, { "epoch": 0.746076200478441, "grad_norm": 1.2046636037278546, "learning_rate": 1.5977265508054408e-06, "loss": 0.1346, "step": 25574 }, { "epoch": 0.7461053737090846, "grad_norm": 0.8894811971962874, "learning_rate": 1.5973803708933355e-06, "loss": 0.1246, "step": 25575 }, { "epoch": 0.7461345469397281, "grad_norm": 0.988885625330251, "learning_rate": 1.597034221359004e-06, "loss": 0.1141, "step": 25576 }, { "epoch": 0.7461637201703717, "grad_norm": 0.9923304779624527, "learning_rate": 1.5966881022055403e-06, "loss": 0.1147, "step": 25577 }, { "epoch": 0.7461928934010152, "grad_norm": 1.0562825232150244, "learning_rate": 1.5963420134360313e-06, "loss": 0.1013, "step": 25578 }, { "epoch": 0.7462220666316588, "grad_norm": 0.9596395482609146, "learning_rate": 1.5959959550535682e-06, "loss": 0.119, "step": 25579 }, { "epoch": 0.7462512398623024, "grad_norm": 0.7517602565914026, "learning_rate": 1.595649927061242e-06, "loss": 0.1417, "step": 25580 }, { "epoch": 0.7462804130929459, "grad_norm": 1.026186386798158, "learning_rate": 1.595303929462141e-06, "loss": 0.1385, "step": 25581 }, { "epoch": 0.7463095863235895, "grad_norm": 0.862758481281522, "learning_rate": 1.594957962259352e-06, "loss": 0.119, "step": 25582 }, { "epoch": 0.746338759554233, "grad_norm": 0.7700798815742126, "learning_rate": 1.5946120254559666e-06, "loss": 0.1496, "step": 25583 }, { "epoch": 0.7463679327848766, "grad_norm": 0.8386103680514339, "learning_rate": 1.5942661190550713e-06, "loss": 0.1278, "step": 25584 }, { "epoch": 0.7463971060155201, "grad_norm": 1.0830627699934756, "learning_rate": 1.5939202430597562e-06, "loss": 0.1225, "step": 25585 }, { "epoch": 0.7464262792461637, "grad_norm": 0.9011135878300403, "learning_rate": 1.5935743974731065e-06, "loss": 0.125, "step": 25586 }, { "epoch": 0.7464554524768073, "grad_norm": 0.8177723284523348, "learning_rate": 1.593228582298213e-06, "loss": 0.1296, "step": 25587 }, { "epoch": 0.7464846257074509, "grad_norm": 0.7812770830191983, "learning_rate": 1.5928827975381617e-06, "loss": 0.1113, "step": 25588 }, { "epoch": 0.7465137989380944, "grad_norm": 1.1012562085984332, "learning_rate": 1.5925370431960373e-06, "loss": 0.085, "step": 25589 }, { "epoch": 0.746542972168738, "grad_norm": 0.7157208521702754, "learning_rate": 1.5921913192749288e-06, "loss": 0.1181, "step": 25590 }, { "epoch": 0.7465721453993815, "grad_norm": 0.6344133581036481, "learning_rate": 1.5918456257779248e-06, "loss": 0.1038, "step": 25591 }, { "epoch": 0.7466013186300251, "grad_norm": 0.8730673364442029, "learning_rate": 1.5914999627081096e-06, "loss": 0.1163, "step": 25592 }, { "epoch": 0.7466304918606687, "grad_norm": 1.073997424960767, "learning_rate": 1.5911543300685667e-06, "loss": 0.099, "step": 25593 }, { "epoch": 0.7466596650913122, "grad_norm": 0.8947140763807463, "learning_rate": 1.5908087278623863e-06, "loss": 0.1311, "step": 25594 }, { "epoch": 0.7466888383219558, "grad_norm": 0.6886352924022845, "learning_rate": 1.5904631560926515e-06, "loss": 0.1223, "step": 25595 }, { "epoch": 0.7467180115525993, "grad_norm": 0.8302032427528294, "learning_rate": 1.5901176147624448e-06, "loss": 0.0969, "step": 25596 }, { "epoch": 0.7467471847832429, "grad_norm": 0.7117401002308471, "learning_rate": 1.589772103874857e-06, "loss": 0.1056, "step": 25597 }, { "epoch": 0.7467763580138864, "grad_norm": 0.9721643354323618, "learning_rate": 1.5894266234329697e-06, "loss": 0.1454, "step": 25598 }, { "epoch": 0.74680553124453, "grad_norm": 0.8414748072462078, "learning_rate": 1.5890811734398659e-06, "loss": 0.1261, "step": 25599 }, { "epoch": 0.7468347044751735, "grad_norm": 0.8829117922438163, "learning_rate": 1.588735753898633e-06, "loss": 0.1059, "step": 25600 }, { "epoch": 0.7468638777058172, "grad_norm": 1.0462696314783562, "learning_rate": 1.5883903648123528e-06, "loss": 0.1199, "step": 25601 }, { "epoch": 0.7468930509364607, "grad_norm": 0.6695386273924907, "learning_rate": 1.588045006184107e-06, "loss": 0.1214, "step": 25602 }, { "epoch": 0.7469222241671043, "grad_norm": 1.2591927558180418, "learning_rate": 1.5876996780169813e-06, "loss": 0.1095, "step": 25603 }, { "epoch": 0.7469513973977479, "grad_norm": 0.8860847365222658, "learning_rate": 1.5873543803140594e-06, "loss": 0.1179, "step": 25604 }, { "epoch": 0.7469805706283914, "grad_norm": 0.9533108212734227, "learning_rate": 1.5870091130784237e-06, "loss": 0.128, "step": 25605 }, { "epoch": 0.747009743859035, "grad_norm": 0.9225178779363759, "learning_rate": 1.5866638763131536e-06, "loss": 0.1128, "step": 25606 }, { "epoch": 0.7470389170896785, "grad_norm": 0.8778929331649101, "learning_rate": 1.5863186700213356e-06, "loss": 0.1409, "step": 25607 }, { "epoch": 0.7470680903203221, "grad_norm": 0.8500698613801462, "learning_rate": 1.5859734942060479e-06, "loss": 0.1463, "step": 25608 }, { "epoch": 0.7470972635509656, "grad_norm": 0.916773226968268, "learning_rate": 1.5856283488703738e-06, "loss": 0.1208, "step": 25609 }, { "epoch": 0.7471264367816092, "grad_norm": 1.3881675992267661, "learning_rate": 1.5852832340173962e-06, "loss": 0.108, "step": 25610 }, { "epoch": 0.7471556100122527, "grad_norm": 0.9720055053802819, "learning_rate": 1.5849381496501948e-06, "loss": 0.1109, "step": 25611 }, { "epoch": 0.7471847832428963, "grad_norm": 0.7714399454441949, "learning_rate": 1.5845930957718491e-06, "loss": 0.1191, "step": 25612 }, { "epoch": 0.7472139564735398, "grad_norm": 0.9274973907083488, "learning_rate": 1.584248072385442e-06, "loss": 0.1281, "step": 25613 }, { "epoch": 0.7472431297041835, "grad_norm": 0.853891497627841, "learning_rate": 1.5839030794940513e-06, "loss": 0.1084, "step": 25614 }, { "epoch": 0.747272302934827, "grad_norm": 0.7458355945066203, "learning_rate": 1.5835581171007603e-06, "loss": 0.1211, "step": 25615 }, { "epoch": 0.7473014761654706, "grad_norm": 0.8022379384011737, "learning_rate": 1.5832131852086452e-06, "loss": 0.117, "step": 25616 }, { "epoch": 0.7473306493961142, "grad_norm": 0.8887473459433449, "learning_rate": 1.5828682838207882e-06, "loss": 0.1104, "step": 25617 }, { "epoch": 0.7473598226267577, "grad_norm": 0.7825986872831625, "learning_rate": 1.5825234129402679e-06, "loss": 0.1281, "step": 25618 }, { "epoch": 0.7473889958574013, "grad_norm": 0.7345869986556904, "learning_rate": 1.582178572570161e-06, "loss": 0.1173, "step": 25619 }, { "epoch": 0.7474181690880448, "grad_norm": 0.820942982376986, "learning_rate": 1.5818337627135477e-06, "loss": 0.1277, "step": 25620 }, { "epoch": 0.7474473423186884, "grad_norm": 0.9539420149368407, "learning_rate": 1.5814889833735087e-06, "loss": 0.1124, "step": 25621 }, { "epoch": 0.7474765155493319, "grad_norm": 0.8295040468938645, "learning_rate": 1.5811442345531197e-06, "loss": 0.1097, "step": 25622 }, { "epoch": 0.7475056887799755, "grad_norm": 0.7998331738142896, "learning_rate": 1.5807995162554574e-06, "loss": 0.1343, "step": 25623 }, { "epoch": 0.747534862010619, "grad_norm": 0.9058265781022713, "learning_rate": 1.5804548284836018e-06, "loss": 0.1434, "step": 25624 }, { "epoch": 0.7475640352412626, "grad_norm": 0.77370724980713, "learning_rate": 1.5801101712406296e-06, "loss": 0.1069, "step": 25625 }, { "epoch": 0.7475932084719061, "grad_norm": 0.8491998827047683, "learning_rate": 1.5797655445296146e-06, "loss": 0.1051, "step": 25626 }, { "epoch": 0.7476223817025497, "grad_norm": 0.9163458204771122, "learning_rate": 1.5794209483536388e-06, "loss": 0.1179, "step": 25627 }, { "epoch": 0.7476515549331934, "grad_norm": 0.9323202469232902, "learning_rate": 1.5790763827157769e-06, "loss": 0.1285, "step": 25628 }, { "epoch": 0.7476807281638369, "grad_norm": 0.8657285290055365, "learning_rate": 1.5787318476191021e-06, "loss": 0.1163, "step": 25629 }, { "epoch": 0.7477099013944805, "grad_norm": 0.874499824021466, "learning_rate": 1.5783873430666947e-06, "loss": 0.1033, "step": 25630 }, { "epoch": 0.747739074625124, "grad_norm": 0.8748384166736571, "learning_rate": 1.5780428690616284e-06, "loss": 0.1395, "step": 25631 }, { "epoch": 0.7477682478557676, "grad_norm": 0.7851887220845484, "learning_rate": 1.5776984256069767e-06, "loss": 0.1055, "step": 25632 }, { "epoch": 0.7477974210864111, "grad_norm": 0.9042595817353926, "learning_rate": 1.5773540127058162e-06, "loss": 0.1001, "step": 25633 }, { "epoch": 0.7478265943170547, "grad_norm": 0.8458277829155152, "learning_rate": 1.5770096303612243e-06, "loss": 0.1104, "step": 25634 }, { "epoch": 0.7478557675476982, "grad_norm": 0.8950348281535193, "learning_rate": 1.5766652785762726e-06, "loss": 0.1269, "step": 25635 }, { "epoch": 0.7478849407783418, "grad_norm": 0.7225497063346766, "learning_rate": 1.576320957354035e-06, "loss": 0.1233, "step": 25636 }, { "epoch": 0.7479141140089853, "grad_norm": 0.8940911506486378, "learning_rate": 1.5759766666975878e-06, "loss": 0.1108, "step": 25637 }, { "epoch": 0.7479432872396289, "grad_norm": 0.8380670318959027, "learning_rate": 1.575632406610002e-06, "loss": 0.1129, "step": 25638 }, { "epoch": 0.7479724604702724, "grad_norm": 0.7181155540416648, "learning_rate": 1.5752881770943529e-06, "loss": 0.0956, "step": 25639 }, { "epoch": 0.748001633700916, "grad_norm": 0.8029626984678675, "learning_rate": 1.5749439781537145e-06, "loss": 0.1446, "step": 25640 }, { "epoch": 0.7480308069315597, "grad_norm": 1.0508857609416549, "learning_rate": 1.574599809791159e-06, "loss": 0.1097, "step": 25641 }, { "epoch": 0.7480599801622032, "grad_norm": 0.8246555096083829, "learning_rate": 1.5742556720097574e-06, "loss": 0.0896, "step": 25642 }, { "epoch": 0.7480891533928468, "grad_norm": 0.8795579743206051, "learning_rate": 1.5739115648125846e-06, "loss": 0.107, "step": 25643 }, { "epoch": 0.7481183266234903, "grad_norm": 0.8127137789150932, "learning_rate": 1.5735674882027097e-06, "loss": 0.1179, "step": 25644 }, { "epoch": 0.7481474998541339, "grad_norm": 0.8657455104022886, "learning_rate": 1.5732234421832083e-06, "loss": 0.1151, "step": 25645 }, { "epoch": 0.7481766730847774, "grad_norm": 0.984091213798382, "learning_rate": 1.5728794267571478e-06, "loss": 0.1207, "step": 25646 }, { "epoch": 0.748205846315421, "grad_norm": 0.8555390115961721, "learning_rate": 1.5725354419276039e-06, "loss": 0.1071, "step": 25647 }, { "epoch": 0.7482350195460645, "grad_norm": 1.1362405378720326, "learning_rate": 1.5721914876976452e-06, "loss": 0.1254, "step": 25648 }, { "epoch": 0.7482641927767081, "grad_norm": 0.9078234450730663, "learning_rate": 1.5718475640703407e-06, "loss": 0.1286, "step": 25649 }, { "epoch": 0.7482933660073516, "grad_norm": 1.012314769564317, "learning_rate": 1.571503671048763e-06, "loss": 0.1131, "step": 25650 }, { "epoch": 0.7483225392379952, "grad_norm": 0.8669448799297004, "learning_rate": 1.5711598086359837e-06, "loss": 0.1178, "step": 25651 }, { "epoch": 0.7483517124686387, "grad_norm": 0.9417444921510744, "learning_rate": 1.5708159768350711e-06, "loss": 0.1068, "step": 25652 }, { "epoch": 0.7483808856992823, "grad_norm": 0.9311394405412246, "learning_rate": 1.5704721756490932e-06, "loss": 0.1192, "step": 25653 }, { "epoch": 0.7484100589299258, "grad_norm": 0.7286011907726347, "learning_rate": 1.5701284050811227e-06, "loss": 0.1141, "step": 25654 }, { "epoch": 0.7484392321605695, "grad_norm": 0.988775257715098, "learning_rate": 1.569784665134227e-06, "loss": 0.1033, "step": 25655 }, { "epoch": 0.7484684053912131, "grad_norm": 0.9231658020629819, "learning_rate": 1.5694409558114715e-06, "loss": 0.1128, "step": 25656 }, { "epoch": 0.7484975786218566, "grad_norm": 0.8887093770242535, "learning_rate": 1.5690972771159318e-06, "loss": 0.1257, "step": 25657 }, { "epoch": 0.7485267518525002, "grad_norm": 1.3122384141373964, "learning_rate": 1.5687536290506722e-06, "loss": 0.1231, "step": 25658 }, { "epoch": 0.7485559250831437, "grad_norm": 1.0793584893273471, "learning_rate": 1.56841001161876e-06, "loss": 0.116, "step": 25659 }, { "epoch": 0.7485850983137873, "grad_norm": 0.9791206413428024, "learning_rate": 1.5680664248232652e-06, "loss": 0.1075, "step": 25660 }, { "epoch": 0.7486142715444308, "grad_norm": 0.7584541340528008, "learning_rate": 1.567722868667254e-06, "loss": 0.1076, "step": 25661 }, { "epoch": 0.7486434447750744, "grad_norm": 0.8887738686445517, "learning_rate": 1.5673793431537925e-06, "loss": 0.1331, "step": 25662 }, { "epoch": 0.7486726180057179, "grad_norm": 1.8058045291080915, "learning_rate": 1.5670358482859488e-06, "loss": 0.1393, "step": 25663 }, { "epoch": 0.7487017912363615, "grad_norm": 0.9396069158149576, "learning_rate": 1.5666923840667907e-06, "loss": 0.1128, "step": 25664 }, { "epoch": 0.748730964467005, "grad_norm": 0.852240701152601, "learning_rate": 1.566348950499384e-06, "loss": 0.0937, "step": 25665 }, { "epoch": 0.7487601376976486, "grad_norm": 0.9996120030112183, "learning_rate": 1.5660055475867918e-06, "loss": 0.1355, "step": 25666 }, { "epoch": 0.7487893109282922, "grad_norm": 0.8031682577824013, "learning_rate": 1.5656621753320844e-06, "loss": 0.1069, "step": 25667 }, { "epoch": 0.7488184841589358, "grad_norm": 1.2529655951038805, "learning_rate": 1.5653188337383236e-06, "loss": 0.1258, "step": 25668 }, { "epoch": 0.7488476573895794, "grad_norm": 0.8544354484814337, "learning_rate": 1.5649755228085766e-06, "loss": 0.1299, "step": 25669 }, { "epoch": 0.7488768306202229, "grad_norm": 0.9718943308170146, "learning_rate": 1.5646322425459092e-06, "loss": 0.1348, "step": 25670 }, { "epoch": 0.7489060038508665, "grad_norm": 0.8862346063142209, "learning_rate": 1.5642889929533856e-06, "loss": 0.1291, "step": 25671 }, { "epoch": 0.74893517708151, "grad_norm": 0.9019518537476776, "learning_rate": 1.5639457740340674e-06, "loss": 0.1293, "step": 25672 }, { "epoch": 0.7489643503121536, "grad_norm": 0.7627082783371806, "learning_rate": 1.563602585791023e-06, "loss": 0.1244, "step": 25673 }, { "epoch": 0.7489935235427971, "grad_norm": 0.9860020227765464, "learning_rate": 1.5632594282273129e-06, "loss": 0.1335, "step": 25674 }, { "epoch": 0.7490226967734407, "grad_norm": 1.3573526095780761, "learning_rate": 1.5629163013460041e-06, "loss": 0.1172, "step": 25675 }, { "epoch": 0.7490518700040842, "grad_norm": 0.7911529622078647, "learning_rate": 1.5625732051501558e-06, "loss": 0.1169, "step": 25676 }, { "epoch": 0.7490810432347278, "grad_norm": 0.954631176425323, "learning_rate": 1.5622301396428351e-06, "loss": 0.1208, "step": 25677 }, { "epoch": 0.7491102164653713, "grad_norm": 0.7905542458713434, "learning_rate": 1.5618871048271034e-06, "loss": 0.1021, "step": 25678 }, { "epoch": 0.7491393896960149, "grad_norm": 0.9653716620721325, "learning_rate": 1.5615441007060211e-06, "loss": 0.1327, "step": 25679 }, { "epoch": 0.7491685629266585, "grad_norm": 0.735935900526844, "learning_rate": 1.561201127282652e-06, "loss": 0.1096, "step": 25680 }, { "epoch": 0.749197736157302, "grad_norm": 0.7418271668979863, "learning_rate": 1.5608581845600606e-06, "loss": 0.1068, "step": 25681 }, { "epoch": 0.7492269093879457, "grad_norm": 1.1632935520368335, "learning_rate": 1.5605152725413058e-06, "loss": 0.1155, "step": 25682 }, { "epoch": 0.7492560826185892, "grad_norm": 0.8550446078428814, "learning_rate": 1.5601723912294481e-06, "loss": 0.1392, "step": 25683 }, { "epoch": 0.7492852558492328, "grad_norm": 0.8621473842462445, "learning_rate": 1.5598295406275516e-06, "loss": 0.1305, "step": 25684 }, { "epoch": 0.7493144290798763, "grad_norm": 0.937678709069431, "learning_rate": 1.559486720738676e-06, "loss": 0.1012, "step": 25685 }, { "epoch": 0.7493436023105199, "grad_norm": 0.7971498399200088, "learning_rate": 1.5591439315658786e-06, "loss": 0.13, "step": 25686 }, { "epoch": 0.7493727755411634, "grad_norm": 0.741866315596296, "learning_rate": 1.5588011731122254e-06, "loss": 0.1231, "step": 25687 }, { "epoch": 0.749401948771807, "grad_norm": 1.1522685624220508, "learning_rate": 1.5584584453807738e-06, "loss": 0.1401, "step": 25688 }, { "epoch": 0.7494311220024505, "grad_norm": 0.938849643434187, "learning_rate": 1.5581157483745824e-06, "loss": 0.1219, "step": 25689 }, { "epoch": 0.7494602952330941, "grad_norm": 0.9111289872918175, "learning_rate": 1.5577730820967135e-06, "loss": 0.1136, "step": 25690 }, { "epoch": 0.7494894684637377, "grad_norm": 0.8241711797103661, "learning_rate": 1.557430446550225e-06, "loss": 0.1123, "step": 25691 }, { "epoch": 0.7495186416943812, "grad_norm": 0.7043010184822692, "learning_rate": 1.557087841738174e-06, "loss": 0.1126, "step": 25692 }, { "epoch": 0.7495478149250248, "grad_norm": 0.8381720276354171, "learning_rate": 1.5567452676636207e-06, "loss": 0.1051, "step": 25693 }, { "epoch": 0.7495769881556683, "grad_norm": 1.084801681471802, "learning_rate": 1.5564027243296254e-06, "loss": 0.1097, "step": 25694 }, { "epoch": 0.749606161386312, "grad_norm": 1.0063538727901777, "learning_rate": 1.5560602117392442e-06, "loss": 0.1121, "step": 25695 }, { "epoch": 0.7496353346169555, "grad_norm": 0.8374868438912568, "learning_rate": 1.5557177298955339e-06, "loss": 0.1241, "step": 25696 }, { "epoch": 0.7496645078475991, "grad_norm": 0.9112579423054664, "learning_rate": 1.5553752788015552e-06, "loss": 0.1159, "step": 25697 }, { "epoch": 0.7496936810782426, "grad_norm": 0.8229293561206907, "learning_rate": 1.5550328584603619e-06, "loss": 0.1293, "step": 25698 }, { "epoch": 0.7497228543088862, "grad_norm": 0.991546629013683, "learning_rate": 1.554690468875013e-06, "loss": 0.1296, "step": 25699 }, { "epoch": 0.7497520275395297, "grad_norm": 0.9612341527354766, "learning_rate": 1.5543481100485669e-06, "loss": 0.1025, "step": 25700 }, { "epoch": 0.7497812007701733, "grad_norm": 0.8886960342689915, "learning_rate": 1.5540057819840782e-06, "loss": 0.1311, "step": 25701 }, { "epoch": 0.7498103740008168, "grad_norm": 0.8294574979464563, "learning_rate": 1.5536634846846016e-06, "loss": 0.1189, "step": 25702 }, { "epoch": 0.7498395472314604, "grad_norm": 0.9520830625817014, "learning_rate": 1.553321218153196e-06, "loss": 0.1046, "step": 25703 }, { "epoch": 0.749868720462104, "grad_norm": 0.775330468701103, "learning_rate": 1.5529789823929149e-06, "loss": 0.1211, "step": 25704 }, { "epoch": 0.7498978936927475, "grad_norm": 0.6679047702139579, "learning_rate": 1.5526367774068158e-06, "loss": 0.1128, "step": 25705 }, { "epoch": 0.7499270669233911, "grad_norm": 1.1116795057968818, "learning_rate": 1.5522946031979507e-06, "loss": 0.1336, "step": 25706 }, { "epoch": 0.7499562401540346, "grad_norm": 0.878106243220114, "learning_rate": 1.551952459769378e-06, "loss": 0.1107, "step": 25707 }, { "epoch": 0.7499854133846782, "grad_norm": 0.7496572643796967, "learning_rate": 1.5516103471241512e-06, "loss": 0.115, "step": 25708 }, { "epoch": 0.7500145866153218, "grad_norm": 0.8909588369443133, "learning_rate": 1.5512682652653221e-06, "loss": 0.1185, "step": 25709 }, { "epoch": 0.7500437598459654, "grad_norm": 0.9451354533796887, "learning_rate": 1.5509262141959463e-06, "loss": 0.1342, "step": 25710 }, { "epoch": 0.7500729330766089, "grad_norm": 0.9042477030205243, "learning_rate": 1.5505841939190796e-06, "loss": 0.1175, "step": 25711 }, { "epoch": 0.7501021063072525, "grad_norm": 0.8046544980615903, "learning_rate": 1.5502422044377741e-06, "loss": 0.1111, "step": 25712 }, { "epoch": 0.750131279537896, "grad_norm": 0.6873544487419347, "learning_rate": 1.549900245755081e-06, "loss": 0.1093, "step": 25713 }, { "epoch": 0.7501604527685396, "grad_norm": 0.9247468006234043, "learning_rate": 1.5495583178740563e-06, "loss": 0.1243, "step": 25714 }, { "epoch": 0.7501896259991832, "grad_norm": 0.8559041359883738, "learning_rate": 1.5492164207977517e-06, "loss": 0.1234, "step": 25715 }, { "epoch": 0.7502187992298267, "grad_norm": 0.9897421291896847, "learning_rate": 1.5488745545292155e-06, "loss": 0.1109, "step": 25716 }, { "epoch": 0.7502479724604703, "grad_norm": 0.8266209911558882, "learning_rate": 1.5485327190715066e-06, "loss": 0.1304, "step": 25717 }, { "epoch": 0.7502771456911138, "grad_norm": 0.7300099003378026, "learning_rate": 1.548190914427674e-06, "loss": 0.1014, "step": 25718 }, { "epoch": 0.7503063189217574, "grad_norm": 0.7944459331619189, "learning_rate": 1.5478491406007672e-06, "loss": 0.0927, "step": 25719 }, { "epoch": 0.7503354921524009, "grad_norm": 1.4416304824765536, "learning_rate": 1.5475073975938409e-06, "loss": 0.1291, "step": 25720 }, { "epoch": 0.7503646653830445, "grad_norm": 0.7870133310338016, "learning_rate": 1.5471656854099437e-06, "loss": 0.1003, "step": 25721 }, { "epoch": 0.750393838613688, "grad_norm": 0.770019211641465, "learning_rate": 1.546824004052126e-06, "loss": 0.122, "step": 25722 }, { "epoch": 0.7504230118443317, "grad_norm": 0.8051129295730975, "learning_rate": 1.546482353523439e-06, "loss": 0.1153, "step": 25723 }, { "epoch": 0.7504521850749752, "grad_norm": 0.7747533309883834, "learning_rate": 1.5461407338269351e-06, "loss": 0.1142, "step": 25724 }, { "epoch": 0.7504813583056188, "grad_norm": 0.8472187954208377, "learning_rate": 1.5457991449656618e-06, "loss": 0.1003, "step": 25725 }, { "epoch": 0.7505105315362623, "grad_norm": 1.1004646868941497, "learning_rate": 1.545457586942668e-06, "loss": 0.1133, "step": 25726 }, { "epoch": 0.7505397047669059, "grad_norm": 0.8664687944865257, "learning_rate": 1.5451160597610038e-06, "loss": 0.1044, "step": 25727 }, { "epoch": 0.7505688779975495, "grad_norm": 0.8657255289319268, "learning_rate": 1.5447745634237204e-06, "loss": 0.125, "step": 25728 }, { "epoch": 0.750598051228193, "grad_norm": 0.7561478075010376, "learning_rate": 1.5444330979338634e-06, "loss": 0.1212, "step": 25729 }, { "epoch": 0.7506272244588366, "grad_norm": 0.8906009988923281, "learning_rate": 1.544091663294484e-06, "loss": 0.1494, "step": 25730 }, { "epoch": 0.7506563976894801, "grad_norm": 0.9576505702480632, "learning_rate": 1.5437502595086295e-06, "loss": 0.1094, "step": 25731 }, { "epoch": 0.7506855709201237, "grad_norm": 0.9966472602789167, "learning_rate": 1.5434088865793461e-06, "loss": 0.1083, "step": 25732 }, { "epoch": 0.7507147441507672, "grad_norm": 0.9673156502653935, "learning_rate": 1.5430675445096827e-06, "loss": 0.1268, "step": 25733 }, { "epoch": 0.7507439173814108, "grad_norm": 0.6494572180029597, "learning_rate": 1.5427262333026894e-06, "loss": 0.1242, "step": 25734 }, { "epoch": 0.7507730906120543, "grad_norm": 0.8430400269354585, "learning_rate": 1.5423849529614098e-06, "loss": 0.1096, "step": 25735 }, { "epoch": 0.750802263842698, "grad_norm": 0.8645013085778063, "learning_rate": 1.5420437034888914e-06, "loss": 0.1364, "step": 25736 }, { "epoch": 0.7508314370733415, "grad_norm": 0.688901586949705, "learning_rate": 1.5417024848881823e-06, "loss": 0.0997, "step": 25737 }, { "epoch": 0.7508606103039851, "grad_norm": 0.7500581859235532, "learning_rate": 1.5413612971623282e-06, "loss": 0.117, "step": 25738 }, { "epoch": 0.7508897835346287, "grad_norm": 1.000258172728012, "learning_rate": 1.5410201403143726e-06, "loss": 0.1068, "step": 25739 }, { "epoch": 0.7509189567652722, "grad_norm": 0.7286092268553375, "learning_rate": 1.5406790143473644e-06, "loss": 0.1053, "step": 25740 }, { "epoch": 0.7509481299959158, "grad_norm": 0.744143623975935, "learning_rate": 1.5403379192643491e-06, "loss": 0.1085, "step": 25741 }, { "epoch": 0.7509773032265593, "grad_norm": 0.9229720704294662, "learning_rate": 1.5399968550683708e-06, "loss": 0.1222, "step": 25742 }, { "epoch": 0.7510064764572029, "grad_norm": 1.0041125224633975, "learning_rate": 1.5396558217624734e-06, "loss": 0.1154, "step": 25743 }, { "epoch": 0.7510356496878464, "grad_norm": 0.8483922300350779, "learning_rate": 1.5393148193497042e-06, "loss": 0.1477, "step": 25744 }, { "epoch": 0.75106482291849, "grad_norm": 0.8912519006605253, "learning_rate": 1.538973847833105e-06, "loss": 0.1295, "step": 25745 }, { "epoch": 0.7510939961491335, "grad_norm": 0.8236529472409622, "learning_rate": 1.5386329072157209e-06, "loss": 0.1028, "step": 25746 }, { "epoch": 0.7511231693797771, "grad_norm": 1.0366472976575478, "learning_rate": 1.5382919975005971e-06, "loss": 0.1516, "step": 25747 }, { "epoch": 0.7511523426104206, "grad_norm": 0.8515181377093403, "learning_rate": 1.5379511186907764e-06, "loss": 0.1157, "step": 25748 }, { "epoch": 0.7511815158410642, "grad_norm": 2.0746256570564485, "learning_rate": 1.5376102707893e-06, "loss": 0.1113, "step": 25749 }, { "epoch": 0.7512106890717078, "grad_norm": 0.9266128271380434, "learning_rate": 1.5372694537992138e-06, "loss": 0.1159, "step": 25750 }, { "epoch": 0.7512398623023514, "grad_norm": 1.0543881817697456, "learning_rate": 1.536928667723558e-06, "loss": 0.1202, "step": 25751 }, { "epoch": 0.751269035532995, "grad_norm": 0.7582579627222545, "learning_rate": 1.5365879125653776e-06, "loss": 0.1232, "step": 25752 }, { "epoch": 0.7512982087636385, "grad_norm": 0.9280920830413915, "learning_rate": 1.5362471883277125e-06, "loss": 0.123, "step": 25753 }, { "epoch": 0.7513273819942821, "grad_norm": 0.8118954715549233, "learning_rate": 1.5359064950136065e-06, "loss": 0.1095, "step": 25754 }, { "epoch": 0.7513565552249256, "grad_norm": 0.8382301200004377, "learning_rate": 1.5355658326261008e-06, "loss": 0.1275, "step": 25755 }, { "epoch": 0.7513857284555692, "grad_norm": 0.9030350085480212, "learning_rate": 1.5352252011682351e-06, "loss": 0.1191, "step": 25756 }, { "epoch": 0.7514149016862127, "grad_norm": 0.8534362217001674, "learning_rate": 1.5348846006430513e-06, "loss": 0.1097, "step": 25757 }, { "epoch": 0.7514440749168563, "grad_norm": 1.0283059911674362, "learning_rate": 1.534544031053592e-06, "loss": 0.1158, "step": 25758 }, { "epoch": 0.7514732481474998, "grad_norm": 0.8814878203165918, "learning_rate": 1.5342034924028948e-06, "loss": 0.1497, "step": 25759 }, { "epoch": 0.7515024213781434, "grad_norm": 0.9472910776939061, "learning_rate": 1.5338629846940033e-06, "loss": 0.1358, "step": 25760 }, { "epoch": 0.7515315946087869, "grad_norm": 0.934545581378259, "learning_rate": 1.533522507929956e-06, "loss": 0.1311, "step": 25761 }, { "epoch": 0.7515607678394305, "grad_norm": 3.804019459227602, "learning_rate": 1.53318206211379e-06, "loss": 0.1388, "step": 25762 }, { "epoch": 0.7515899410700742, "grad_norm": 0.9187604451123866, "learning_rate": 1.532841647248547e-06, "loss": 0.114, "step": 25763 }, { "epoch": 0.7516191143007177, "grad_norm": 0.9454141071279012, "learning_rate": 1.5325012633372677e-06, "loss": 0.1076, "step": 25764 }, { "epoch": 0.7516482875313613, "grad_norm": 1.022386653222536, "learning_rate": 1.53216091038299e-06, "loss": 0.1071, "step": 25765 }, { "epoch": 0.7516774607620048, "grad_norm": 0.8616648625056953, "learning_rate": 1.5318205883887494e-06, "loss": 0.1164, "step": 25766 }, { "epoch": 0.7517066339926484, "grad_norm": 0.9621778674013503, "learning_rate": 1.5314802973575888e-06, "loss": 0.1135, "step": 25767 }, { "epoch": 0.7517358072232919, "grad_norm": 0.62209175496305, "learning_rate": 1.531140037292544e-06, "loss": 0.1051, "step": 25768 }, { "epoch": 0.7517649804539355, "grad_norm": 0.8775747135041679, "learning_rate": 1.5307998081966507e-06, "loss": 0.1156, "step": 25769 }, { "epoch": 0.751794153684579, "grad_norm": 1.1942064510018175, "learning_rate": 1.530459610072949e-06, "loss": 0.1232, "step": 25770 }, { "epoch": 0.7518233269152226, "grad_norm": 0.8855733617127285, "learning_rate": 1.5301194429244776e-06, "loss": 0.1081, "step": 25771 }, { "epoch": 0.7518525001458661, "grad_norm": 0.7607391829487901, "learning_rate": 1.529779306754271e-06, "loss": 0.1179, "step": 25772 }, { "epoch": 0.7518816733765097, "grad_norm": 0.8636723525629825, "learning_rate": 1.5294392015653648e-06, "loss": 0.1143, "step": 25773 }, { "epoch": 0.7519108466071532, "grad_norm": 0.8722640448327522, "learning_rate": 1.5290991273607986e-06, "loss": 0.1219, "step": 25774 }, { "epoch": 0.7519400198377968, "grad_norm": 1.0854114211235604, "learning_rate": 1.5287590841436056e-06, "loss": 0.1253, "step": 25775 }, { "epoch": 0.7519691930684403, "grad_norm": 0.793304124044723, "learning_rate": 1.5284190719168224e-06, "loss": 0.1192, "step": 25776 }, { "epoch": 0.751998366299084, "grad_norm": 0.7337154705818196, "learning_rate": 1.5280790906834863e-06, "loss": 0.1179, "step": 25777 }, { "epoch": 0.7520275395297276, "grad_norm": 0.871914108863201, "learning_rate": 1.527739140446632e-06, "loss": 0.1141, "step": 25778 }, { "epoch": 0.7520567127603711, "grad_norm": 0.8699708569000806, "learning_rate": 1.527399221209292e-06, "loss": 0.1256, "step": 25779 }, { "epoch": 0.7520858859910147, "grad_norm": 0.8517358303980148, "learning_rate": 1.5270593329745036e-06, "loss": 0.1174, "step": 25780 }, { "epoch": 0.7521150592216582, "grad_norm": 1.2603983556341987, "learning_rate": 1.5267194757452996e-06, "loss": 0.1224, "step": 25781 }, { "epoch": 0.7521442324523018, "grad_norm": 1.229955663058453, "learning_rate": 1.5263796495247162e-06, "loss": 0.1174, "step": 25782 }, { "epoch": 0.7521734056829453, "grad_norm": 0.8220658004708723, "learning_rate": 1.5260398543157851e-06, "loss": 0.1135, "step": 25783 }, { "epoch": 0.7522025789135889, "grad_norm": 0.8753495724032763, "learning_rate": 1.5257000901215418e-06, "loss": 0.1361, "step": 25784 }, { "epoch": 0.7522317521442324, "grad_norm": 1.0133299363899728, "learning_rate": 1.5253603569450192e-06, "loss": 0.1067, "step": 25785 }, { "epoch": 0.752260925374876, "grad_norm": 1.0725323846787793, "learning_rate": 1.5250206547892477e-06, "loss": 0.1084, "step": 25786 }, { "epoch": 0.7522900986055195, "grad_norm": 0.7877331615597004, "learning_rate": 1.524680983657263e-06, "loss": 0.1073, "step": 25787 }, { "epoch": 0.7523192718361631, "grad_norm": 0.8543230161109976, "learning_rate": 1.5243413435520977e-06, "loss": 0.1302, "step": 25788 }, { "epoch": 0.7523484450668066, "grad_norm": 0.8261126604003302, "learning_rate": 1.5240017344767837e-06, "loss": 0.1082, "step": 25789 }, { "epoch": 0.7523776182974503, "grad_norm": 1.011447016471589, "learning_rate": 1.5236621564343507e-06, "loss": 0.1374, "step": 25790 }, { "epoch": 0.7524067915280939, "grad_norm": 1.0970751413172217, "learning_rate": 1.5233226094278336e-06, "loss": 0.1168, "step": 25791 }, { "epoch": 0.7524359647587374, "grad_norm": 1.154937269989941, "learning_rate": 1.5229830934602608e-06, "loss": 0.1066, "step": 25792 }, { "epoch": 0.752465137989381, "grad_norm": 0.7369734811166199, "learning_rate": 1.5226436085346646e-06, "loss": 0.0936, "step": 25793 }, { "epoch": 0.7524943112200245, "grad_norm": 0.8012708710065065, "learning_rate": 1.5223041546540778e-06, "loss": 0.1172, "step": 25794 }, { "epoch": 0.7525234844506681, "grad_norm": 0.8504496220103561, "learning_rate": 1.5219647318215297e-06, "loss": 0.1302, "step": 25795 }, { "epoch": 0.7525526576813116, "grad_norm": 1.3388398262619507, "learning_rate": 1.5216253400400483e-06, "loss": 0.102, "step": 25796 }, { "epoch": 0.7525818309119552, "grad_norm": 0.8398517099374087, "learning_rate": 1.5212859793126672e-06, "loss": 0.1097, "step": 25797 }, { "epoch": 0.7526110041425987, "grad_norm": 0.7939681099286744, "learning_rate": 1.5209466496424146e-06, "loss": 0.1333, "step": 25798 }, { "epoch": 0.7526401773732423, "grad_norm": 0.7374726875389844, "learning_rate": 1.5206073510323177e-06, "loss": 0.1291, "step": 25799 }, { "epoch": 0.7526693506038858, "grad_norm": 0.8541748450938101, "learning_rate": 1.5202680834854084e-06, "loss": 0.1097, "step": 25800 }, { "epoch": 0.7526985238345294, "grad_norm": 0.7624552554381788, "learning_rate": 1.5199288470047163e-06, "loss": 0.1375, "step": 25801 }, { "epoch": 0.752727697065173, "grad_norm": 0.8605363225941227, "learning_rate": 1.5195896415932687e-06, "loss": 0.1114, "step": 25802 }, { "epoch": 0.7527568702958165, "grad_norm": 0.7502923124699972, "learning_rate": 1.5192504672540919e-06, "loss": 0.0925, "step": 25803 }, { "epoch": 0.7527860435264602, "grad_norm": 0.8045466862669897, "learning_rate": 1.5189113239902182e-06, "loss": 0.1117, "step": 25804 }, { "epoch": 0.7528152167571037, "grad_norm": 1.120178268443537, "learning_rate": 1.5185722118046714e-06, "loss": 0.1147, "step": 25805 }, { "epoch": 0.7528443899877473, "grad_norm": 0.9345635236915236, "learning_rate": 1.518233130700481e-06, "loss": 0.1303, "step": 25806 }, { "epoch": 0.7528735632183908, "grad_norm": 0.7488181350558006, "learning_rate": 1.5178940806806753e-06, "loss": 0.1096, "step": 25807 }, { "epoch": 0.7529027364490344, "grad_norm": 0.7496632544713565, "learning_rate": 1.5175550617482804e-06, "loss": 0.1092, "step": 25808 }, { "epoch": 0.7529319096796779, "grad_norm": 0.7321865320211671, "learning_rate": 1.5172160739063208e-06, "loss": 0.1195, "step": 25809 }, { "epoch": 0.7529610829103215, "grad_norm": 0.9067910773689672, "learning_rate": 1.516877117157826e-06, "loss": 0.1236, "step": 25810 }, { "epoch": 0.752990256140965, "grad_norm": 0.8273544180677533, "learning_rate": 1.5165381915058196e-06, "loss": 0.1087, "step": 25811 }, { "epoch": 0.7530194293716086, "grad_norm": 0.7558130969418564, "learning_rate": 1.51619929695333e-06, "loss": 0.1203, "step": 25812 }, { "epoch": 0.7530486026022521, "grad_norm": 0.8595675943393801, "learning_rate": 1.51586043350338e-06, "loss": 0.1267, "step": 25813 }, { "epoch": 0.7530777758328957, "grad_norm": 0.6977803243866763, "learning_rate": 1.5155216011589979e-06, "loss": 0.0972, "step": 25814 }, { "epoch": 0.7531069490635393, "grad_norm": 0.8183157120963988, "learning_rate": 1.5151827999232071e-06, "loss": 0.1192, "step": 25815 }, { "epoch": 0.7531361222941828, "grad_norm": 0.9025714857647811, "learning_rate": 1.5148440297990308e-06, "loss": 0.1492, "step": 25816 }, { "epoch": 0.7531652955248265, "grad_norm": 2.137719978205146, "learning_rate": 1.5145052907894946e-06, "loss": 0.1206, "step": 25817 }, { "epoch": 0.75319446875547, "grad_norm": 0.7518584121211901, "learning_rate": 1.5141665828976253e-06, "loss": 0.1065, "step": 25818 }, { "epoch": 0.7532236419861136, "grad_norm": 0.8352424914910912, "learning_rate": 1.5138279061264445e-06, "loss": 0.11, "step": 25819 }, { "epoch": 0.7532528152167571, "grad_norm": 0.9322873060983801, "learning_rate": 1.5134892604789743e-06, "loss": 0.108, "step": 25820 }, { "epoch": 0.7532819884474007, "grad_norm": 1.1939548122290728, "learning_rate": 1.5131506459582412e-06, "loss": 0.1193, "step": 25821 }, { "epoch": 0.7533111616780442, "grad_norm": 0.7629191767789621, "learning_rate": 1.5128120625672648e-06, "loss": 0.1042, "step": 25822 }, { "epoch": 0.7533403349086878, "grad_norm": 0.8692873201149913, "learning_rate": 1.5124735103090704e-06, "loss": 0.1052, "step": 25823 }, { "epoch": 0.7533695081393313, "grad_norm": 0.934424415248695, "learning_rate": 1.5121349891866815e-06, "loss": 0.1018, "step": 25824 }, { "epoch": 0.7533986813699749, "grad_norm": 1.0896673051291843, "learning_rate": 1.5117964992031187e-06, "loss": 0.1441, "step": 25825 }, { "epoch": 0.7534278546006185, "grad_norm": 1.206064428837, "learning_rate": 1.5114580403614022e-06, "loss": 0.1235, "step": 25826 }, { "epoch": 0.753457027831262, "grad_norm": 0.8417440204406492, "learning_rate": 1.5111196126645573e-06, "loss": 0.112, "step": 25827 }, { "epoch": 0.7534862010619056, "grad_norm": 0.8429514644846525, "learning_rate": 1.5107812161156037e-06, "loss": 0.1006, "step": 25828 }, { "epoch": 0.7535153742925491, "grad_norm": 0.6798431981677711, "learning_rate": 1.5104428507175612e-06, "loss": 0.125, "step": 25829 }, { "epoch": 0.7535445475231927, "grad_norm": 0.786217281828536, "learning_rate": 1.5101045164734512e-06, "loss": 0.146, "step": 25830 }, { "epoch": 0.7535737207538363, "grad_norm": 0.909765805922423, "learning_rate": 1.5097662133862973e-06, "loss": 0.132, "step": 25831 }, { "epoch": 0.7536028939844799, "grad_norm": 0.8993856071037685, "learning_rate": 1.5094279414591168e-06, "loss": 0.1107, "step": 25832 }, { "epoch": 0.7536320672151234, "grad_norm": 0.8117467983951188, "learning_rate": 1.509089700694929e-06, "loss": 0.1085, "step": 25833 }, { "epoch": 0.753661240445767, "grad_norm": 0.8235728459135495, "learning_rate": 1.5087514910967572e-06, "loss": 0.1259, "step": 25834 }, { "epoch": 0.7536904136764105, "grad_norm": 0.8557785771479315, "learning_rate": 1.508413312667616e-06, "loss": 0.1109, "step": 25835 }, { "epoch": 0.7537195869070541, "grad_norm": 0.8349318552744862, "learning_rate": 1.508075165410528e-06, "loss": 0.116, "step": 25836 }, { "epoch": 0.7537487601376976, "grad_norm": 0.8397457214782929, "learning_rate": 1.5077370493285126e-06, "loss": 0.1194, "step": 25837 }, { "epoch": 0.7537779333683412, "grad_norm": 0.7883389338674813, "learning_rate": 1.5073989644245873e-06, "loss": 0.1275, "step": 25838 }, { "epoch": 0.7538071065989848, "grad_norm": 0.8064327679277885, "learning_rate": 1.5070609107017687e-06, "loss": 0.1026, "step": 25839 }, { "epoch": 0.7538362798296283, "grad_norm": 0.8328954087145269, "learning_rate": 1.506722888163078e-06, "loss": 0.1176, "step": 25840 }, { "epoch": 0.7538654530602719, "grad_norm": 0.918903002541291, "learning_rate": 1.5063848968115297e-06, "loss": 0.1199, "step": 25841 }, { "epoch": 0.7538946262909154, "grad_norm": 0.9218694076987992, "learning_rate": 1.506046936650145e-06, "loss": 0.116, "step": 25842 }, { "epoch": 0.753923799521559, "grad_norm": 0.8553981890892228, "learning_rate": 1.5057090076819375e-06, "loss": 0.0949, "step": 25843 }, { "epoch": 0.7539529727522026, "grad_norm": 0.8016816482886957, "learning_rate": 1.5053711099099272e-06, "loss": 0.1019, "step": 25844 }, { "epoch": 0.7539821459828462, "grad_norm": 0.8313417626973173, "learning_rate": 1.5050332433371295e-06, "loss": 0.1027, "step": 25845 }, { "epoch": 0.7540113192134897, "grad_norm": 0.8996554314666295, "learning_rate": 1.5046954079665588e-06, "loss": 0.0942, "step": 25846 }, { "epoch": 0.7540404924441333, "grad_norm": 0.8516424226755879, "learning_rate": 1.5043576038012337e-06, "loss": 0.1277, "step": 25847 }, { "epoch": 0.7540696656747768, "grad_norm": 0.9941469091678935, "learning_rate": 1.5040198308441707e-06, "loss": 0.1181, "step": 25848 }, { "epoch": 0.7540988389054204, "grad_norm": 0.8230225930256071, "learning_rate": 1.503682089098384e-06, "loss": 0.1065, "step": 25849 }, { "epoch": 0.754128012136064, "grad_norm": 0.9212499227895967, "learning_rate": 1.5033443785668873e-06, "loss": 0.1049, "step": 25850 }, { "epoch": 0.7541571853667075, "grad_norm": 0.9520060923931164, "learning_rate": 1.5030066992526993e-06, "loss": 0.1368, "step": 25851 }, { "epoch": 0.7541863585973511, "grad_norm": 0.8640732809284455, "learning_rate": 1.502669051158831e-06, "loss": 0.1013, "step": 25852 }, { "epoch": 0.7542155318279946, "grad_norm": 0.9303947086138226, "learning_rate": 1.5023314342882984e-06, "loss": 0.1161, "step": 25853 }, { "epoch": 0.7542447050586382, "grad_norm": 0.8011869651117773, "learning_rate": 1.5019938486441172e-06, "loss": 0.1266, "step": 25854 }, { "epoch": 0.7542738782892817, "grad_norm": 0.9567966624041573, "learning_rate": 1.5016562942293e-06, "loss": 0.1017, "step": 25855 }, { "epoch": 0.7543030515199253, "grad_norm": 0.8704921474973745, "learning_rate": 1.5013187710468584e-06, "loss": 0.1069, "step": 25856 }, { "epoch": 0.7543322247505688, "grad_norm": 0.7797878699950223, "learning_rate": 1.5009812790998096e-06, "loss": 0.0988, "step": 25857 }, { "epoch": 0.7543613979812125, "grad_norm": 0.8203057622573032, "learning_rate": 1.500643818391165e-06, "loss": 0.1409, "step": 25858 }, { "epoch": 0.754390571211856, "grad_norm": 0.951587561368629, "learning_rate": 1.500306388923935e-06, "loss": 0.1073, "step": 25859 }, { "epoch": 0.7544197444424996, "grad_norm": 1.061516934732331, "learning_rate": 1.4999689907011338e-06, "loss": 0.1158, "step": 25860 }, { "epoch": 0.7544489176731431, "grad_norm": 0.9483004185178417, "learning_rate": 1.4996316237257758e-06, "loss": 0.11, "step": 25861 }, { "epoch": 0.7544780909037867, "grad_norm": 0.7892898201827739, "learning_rate": 1.4992942880008716e-06, "loss": 0.1019, "step": 25862 }, { "epoch": 0.7545072641344303, "grad_norm": 0.8486407833114757, "learning_rate": 1.4989569835294298e-06, "loss": 0.1114, "step": 25863 }, { "epoch": 0.7545364373650738, "grad_norm": 0.8545793243946734, "learning_rate": 1.4986197103144661e-06, "loss": 0.111, "step": 25864 }, { "epoch": 0.7545656105957174, "grad_norm": 0.9917273192168473, "learning_rate": 1.4982824683589887e-06, "loss": 0.1285, "step": 25865 }, { "epoch": 0.7545947838263609, "grad_norm": 1.0136428433058013, "learning_rate": 1.4979452576660091e-06, "loss": 0.1201, "step": 25866 }, { "epoch": 0.7546239570570045, "grad_norm": 1.142683193062564, "learning_rate": 1.4976080782385399e-06, "loss": 0.0963, "step": 25867 }, { "epoch": 0.754653130287648, "grad_norm": 1.0490554723344994, "learning_rate": 1.4972709300795896e-06, "loss": 0.1179, "step": 25868 }, { "epoch": 0.7546823035182916, "grad_norm": 1.0838232578951599, "learning_rate": 1.4969338131921667e-06, "loss": 0.1063, "step": 25869 }, { "epoch": 0.7547114767489351, "grad_norm": 0.8307847314099764, "learning_rate": 1.4965967275792842e-06, "loss": 0.1102, "step": 25870 }, { "epoch": 0.7547406499795788, "grad_norm": 1.338595340756564, "learning_rate": 1.4962596732439484e-06, "loss": 0.115, "step": 25871 }, { "epoch": 0.7547698232102223, "grad_norm": 1.0822023601561124, "learning_rate": 1.495922650189171e-06, "loss": 0.1166, "step": 25872 }, { "epoch": 0.7547989964408659, "grad_norm": 0.7975328518759391, "learning_rate": 1.4955856584179584e-06, "loss": 0.1385, "step": 25873 }, { "epoch": 0.7548281696715095, "grad_norm": 0.7821845278332362, "learning_rate": 1.495248697933322e-06, "loss": 0.1126, "step": 25874 }, { "epoch": 0.754857342902153, "grad_norm": 0.7082873143847945, "learning_rate": 1.4949117687382686e-06, "loss": 0.101, "step": 25875 }, { "epoch": 0.7548865161327966, "grad_norm": 0.7938271501070359, "learning_rate": 1.4945748708358044e-06, "loss": 0.1053, "step": 25876 }, { "epoch": 0.7549156893634401, "grad_norm": 0.7773730416861138, "learning_rate": 1.4942380042289388e-06, "loss": 0.1171, "step": 25877 }, { "epoch": 0.7549448625940837, "grad_norm": 1.0313441772972234, "learning_rate": 1.4939011689206812e-06, "loss": 0.1002, "step": 25878 }, { "epoch": 0.7549740358247272, "grad_norm": 0.818432201028089, "learning_rate": 1.493564364914037e-06, "loss": 0.1287, "step": 25879 }, { "epoch": 0.7550032090553708, "grad_norm": 0.9182194321868798, "learning_rate": 1.4932275922120116e-06, "loss": 0.1269, "step": 25880 }, { "epoch": 0.7550323822860143, "grad_norm": 0.8024847123486553, "learning_rate": 1.4928908508176148e-06, "loss": 0.1031, "step": 25881 }, { "epoch": 0.7550615555166579, "grad_norm": 1.0148061125312728, "learning_rate": 1.4925541407338511e-06, "loss": 0.1176, "step": 25882 }, { "epoch": 0.7550907287473014, "grad_norm": 0.783349701504457, "learning_rate": 1.4922174619637236e-06, "loss": 0.1023, "step": 25883 }, { "epoch": 0.755119901977945, "grad_norm": 0.7475352753241996, "learning_rate": 1.4918808145102443e-06, "loss": 0.1452, "step": 25884 }, { "epoch": 0.7551490752085886, "grad_norm": 0.8235592086340293, "learning_rate": 1.4915441983764156e-06, "loss": 0.1081, "step": 25885 }, { "epoch": 0.7551782484392322, "grad_norm": 1.0793896475449292, "learning_rate": 1.4912076135652414e-06, "loss": 0.1188, "step": 25886 }, { "epoch": 0.7552074216698758, "grad_norm": 0.8527877192086889, "learning_rate": 1.4908710600797293e-06, "loss": 0.1166, "step": 25887 }, { "epoch": 0.7552365949005193, "grad_norm": 1.025990503369907, "learning_rate": 1.490534537922883e-06, "loss": 0.1279, "step": 25888 }, { "epoch": 0.7552657681311629, "grad_norm": 0.7099068887493515, "learning_rate": 1.4901980470977046e-06, "loss": 0.1035, "step": 25889 }, { "epoch": 0.7552949413618064, "grad_norm": 0.6294984277144369, "learning_rate": 1.4898615876072002e-06, "loss": 0.1263, "step": 25890 }, { "epoch": 0.75532411459245, "grad_norm": 1.0830088936709952, "learning_rate": 1.4895251594543758e-06, "loss": 0.1024, "step": 25891 }, { "epoch": 0.7553532878230935, "grad_norm": 0.879881989210489, "learning_rate": 1.4891887626422324e-06, "loss": 0.108, "step": 25892 }, { "epoch": 0.7553824610537371, "grad_norm": 0.8406366078337004, "learning_rate": 1.4888523971737716e-06, "loss": 0.1211, "step": 25893 }, { "epoch": 0.7554116342843806, "grad_norm": 0.7158975523182622, "learning_rate": 1.4885160630520008e-06, "loss": 0.1065, "step": 25894 }, { "epoch": 0.7554408075150242, "grad_norm": 0.9213164725454261, "learning_rate": 1.488179760279918e-06, "loss": 0.1197, "step": 25895 }, { "epoch": 0.7554699807456677, "grad_norm": 2.0983595262270427, "learning_rate": 1.4878434888605287e-06, "loss": 0.1069, "step": 25896 }, { "epoch": 0.7554991539763113, "grad_norm": 0.9421713784874025, "learning_rate": 1.4875072487968356e-06, "loss": 0.1069, "step": 25897 }, { "epoch": 0.755528327206955, "grad_norm": 1.0693569782422778, "learning_rate": 1.4871710400918388e-06, "loss": 0.1099, "step": 25898 }, { "epoch": 0.7555575004375985, "grad_norm": 0.8511918821329131, "learning_rate": 1.4868348627485397e-06, "loss": 0.1015, "step": 25899 }, { "epoch": 0.7555866736682421, "grad_norm": 0.6910395200974965, "learning_rate": 1.4864987167699414e-06, "loss": 0.0939, "step": 25900 }, { "epoch": 0.7556158468988856, "grad_norm": 1.0408751846096094, "learning_rate": 1.486162602159042e-06, "loss": 0.1056, "step": 25901 }, { "epoch": 0.7556450201295292, "grad_norm": 0.7004601690901944, "learning_rate": 1.485826518918846e-06, "loss": 0.1044, "step": 25902 }, { "epoch": 0.7556741933601727, "grad_norm": 1.0082745657940422, "learning_rate": 1.4854904670523496e-06, "loss": 0.1255, "step": 25903 }, { "epoch": 0.7557033665908163, "grad_norm": 0.8292697853817166, "learning_rate": 1.485154446562558e-06, "loss": 0.1157, "step": 25904 }, { "epoch": 0.7557325398214598, "grad_norm": 0.8205664785554989, "learning_rate": 1.4848184574524677e-06, "loss": 0.1225, "step": 25905 }, { "epoch": 0.7557617130521034, "grad_norm": 1.1649608083028422, "learning_rate": 1.4844824997250779e-06, "loss": 0.1262, "step": 25906 }, { "epoch": 0.7557908862827469, "grad_norm": 1.0107865491889776, "learning_rate": 1.4841465733833887e-06, "loss": 0.1155, "step": 25907 }, { "epoch": 0.7558200595133905, "grad_norm": 0.7543127553830585, "learning_rate": 1.4838106784304012e-06, "loss": 0.1002, "step": 25908 }, { "epoch": 0.755849232744034, "grad_norm": 0.9196363893053334, "learning_rate": 1.483474814869113e-06, "loss": 0.1158, "step": 25909 }, { "epoch": 0.7558784059746776, "grad_norm": 5.341613293993254, "learning_rate": 1.4831389827025206e-06, "loss": 0.1007, "step": 25910 }, { "epoch": 0.7559075792053211, "grad_norm": 0.9742554329408882, "learning_rate": 1.4828031819336254e-06, "loss": 0.12, "step": 25911 }, { "epoch": 0.7559367524359648, "grad_norm": 0.7953343134436859, "learning_rate": 1.4824674125654232e-06, "loss": 0.1218, "step": 25912 }, { "epoch": 0.7559659256666084, "grad_norm": 0.8716245200533763, "learning_rate": 1.4821316746009096e-06, "loss": 0.1056, "step": 25913 }, { "epoch": 0.7559950988972519, "grad_norm": 0.9618030116541052, "learning_rate": 1.4817959680430876e-06, "loss": 0.088, "step": 25914 }, { "epoch": 0.7560242721278955, "grad_norm": 0.8652819971298802, "learning_rate": 1.4814602928949512e-06, "loss": 0.1179, "step": 25915 }, { "epoch": 0.756053445358539, "grad_norm": 0.8854919138178449, "learning_rate": 1.4811246491594961e-06, "loss": 0.1302, "step": 25916 }, { "epoch": 0.7560826185891826, "grad_norm": 0.8830455795101136, "learning_rate": 1.4807890368397215e-06, "loss": 0.0977, "step": 25917 }, { "epoch": 0.7561117918198261, "grad_norm": 0.9216145671200741, "learning_rate": 1.4804534559386208e-06, "loss": 0.1076, "step": 25918 }, { "epoch": 0.7561409650504697, "grad_norm": 0.8000667135237715, "learning_rate": 1.480117906459193e-06, "loss": 0.1256, "step": 25919 }, { "epoch": 0.7561701382811132, "grad_norm": 0.9024547885912507, "learning_rate": 1.4797823884044303e-06, "loss": 0.1058, "step": 25920 }, { "epoch": 0.7561993115117568, "grad_norm": 0.7338654652081568, "learning_rate": 1.4794469017773327e-06, "loss": 0.1022, "step": 25921 }, { "epoch": 0.7562284847424003, "grad_norm": 0.9438720342464741, "learning_rate": 1.479111446580892e-06, "loss": 0.1317, "step": 25922 }, { "epoch": 0.7562576579730439, "grad_norm": 0.8413841232525172, "learning_rate": 1.4787760228181019e-06, "loss": 0.1132, "step": 25923 }, { "epoch": 0.7562868312036874, "grad_norm": 0.9747519063979391, "learning_rate": 1.4784406304919596e-06, "loss": 0.1201, "step": 25924 }, { "epoch": 0.7563160044343311, "grad_norm": 0.9040490081693991, "learning_rate": 1.4781052696054598e-06, "loss": 0.0901, "step": 25925 }, { "epoch": 0.7563451776649747, "grad_norm": 0.8826366885308079, "learning_rate": 1.477769940161594e-06, "loss": 0.1268, "step": 25926 }, { "epoch": 0.7563743508956182, "grad_norm": 0.9184658452872857, "learning_rate": 1.477434642163359e-06, "loss": 0.1015, "step": 25927 }, { "epoch": 0.7564035241262618, "grad_norm": 1.0181008044302222, "learning_rate": 1.4770993756137465e-06, "loss": 0.1212, "step": 25928 }, { "epoch": 0.7564326973569053, "grad_norm": 0.9308072159314915, "learning_rate": 1.4767641405157485e-06, "loss": 0.1025, "step": 25929 }, { "epoch": 0.7564618705875489, "grad_norm": 0.8788407820851127, "learning_rate": 1.476428936872359e-06, "loss": 0.121, "step": 25930 }, { "epoch": 0.7564910438181924, "grad_norm": 1.0523315667493367, "learning_rate": 1.4760937646865718e-06, "loss": 0.1288, "step": 25931 }, { "epoch": 0.756520217048836, "grad_norm": 0.806332663983592, "learning_rate": 1.475758623961379e-06, "loss": 0.1102, "step": 25932 }, { "epoch": 0.7565493902794795, "grad_norm": 0.7671064953869315, "learning_rate": 1.4754235146997704e-06, "loss": 0.1155, "step": 25933 }, { "epoch": 0.7565785635101231, "grad_norm": 1.0927368440905616, "learning_rate": 1.4750884369047403e-06, "loss": 0.1172, "step": 25934 }, { "epoch": 0.7566077367407666, "grad_norm": 0.8445207583002338, "learning_rate": 1.4747533905792794e-06, "loss": 0.1021, "step": 25935 }, { "epoch": 0.7566369099714102, "grad_norm": 0.6645077128838399, "learning_rate": 1.474418375726377e-06, "loss": 0.0965, "step": 25936 }, { "epoch": 0.7566660832020538, "grad_norm": 0.9671532615444046, "learning_rate": 1.4740833923490262e-06, "loss": 0.1181, "step": 25937 }, { "epoch": 0.7566952564326973, "grad_norm": 0.9902850061524677, "learning_rate": 1.4737484404502178e-06, "loss": 0.1432, "step": 25938 }, { "epoch": 0.756724429663341, "grad_norm": 1.0415156810652546, "learning_rate": 1.4734135200329425e-06, "loss": 0.0995, "step": 25939 }, { "epoch": 0.7567536028939845, "grad_norm": 0.8475533043562927, "learning_rate": 1.473078631100187e-06, "loss": 0.1335, "step": 25940 }, { "epoch": 0.7567827761246281, "grad_norm": 0.8573282963470593, "learning_rate": 1.472743773654946e-06, "loss": 0.1102, "step": 25941 }, { "epoch": 0.7568119493552716, "grad_norm": 0.9241080034264542, "learning_rate": 1.4724089477002047e-06, "loss": 0.1218, "step": 25942 }, { "epoch": 0.7568411225859152, "grad_norm": 0.9145944415032274, "learning_rate": 1.4720741532389537e-06, "loss": 0.105, "step": 25943 }, { "epoch": 0.7568702958165587, "grad_norm": 0.8711946635114528, "learning_rate": 1.4717393902741845e-06, "loss": 0.1223, "step": 25944 }, { "epoch": 0.7568994690472023, "grad_norm": 0.9409995913225305, "learning_rate": 1.4714046588088838e-06, "loss": 0.1271, "step": 25945 }, { "epoch": 0.7569286422778458, "grad_norm": 0.75000581011883, "learning_rate": 1.4710699588460382e-06, "loss": 0.115, "step": 25946 }, { "epoch": 0.7569578155084894, "grad_norm": 0.8327671343984938, "learning_rate": 1.4707352903886395e-06, "loss": 0.1, "step": 25947 }, { "epoch": 0.756986988739133, "grad_norm": 0.8913646526156245, "learning_rate": 1.4704006534396714e-06, "loss": 0.1186, "step": 25948 }, { "epoch": 0.7570161619697765, "grad_norm": 1.0481898711688422, "learning_rate": 1.4700660480021263e-06, "loss": 0.1023, "step": 25949 }, { "epoch": 0.75704533520042, "grad_norm": 0.8521890379544377, "learning_rate": 1.4697314740789864e-06, "loss": 0.1009, "step": 25950 }, { "epoch": 0.7570745084310636, "grad_norm": 1.126152932194546, "learning_rate": 1.4693969316732426e-06, "loss": 0.138, "step": 25951 }, { "epoch": 0.7571036816617073, "grad_norm": 0.9845809426788137, "learning_rate": 1.4690624207878807e-06, "loss": 0.1306, "step": 25952 }, { "epoch": 0.7571328548923508, "grad_norm": 0.673575236417537, "learning_rate": 1.4687279414258848e-06, "loss": 0.1197, "step": 25953 }, { "epoch": 0.7571620281229944, "grad_norm": 0.9049576524294846, "learning_rate": 1.4683934935902428e-06, "loss": 0.104, "step": 25954 }, { "epoch": 0.7571912013536379, "grad_norm": 0.7645593047819226, "learning_rate": 1.4680590772839427e-06, "loss": 0.1081, "step": 25955 }, { "epoch": 0.7572203745842815, "grad_norm": 0.8204293226318649, "learning_rate": 1.4677246925099659e-06, "loss": 0.1153, "step": 25956 }, { "epoch": 0.757249547814925, "grad_norm": 0.8188709015858062, "learning_rate": 1.4673903392713018e-06, "loss": 0.1447, "step": 25957 }, { "epoch": 0.7572787210455686, "grad_norm": 1.2373567565441503, "learning_rate": 1.4670560175709331e-06, "loss": 0.1197, "step": 25958 }, { "epoch": 0.7573078942762121, "grad_norm": 0.8767735977746076, "learning_rate": 1.4667217274118433e-06, "loss": 0.1269, "step": 25959 }, { "epoch": 0.7573370675068557, "grad_norm": 0.9925801970461513, "learning_rate": 1.4663874687970187e-06, "loss": 0.1212, "step": 25960 }, { "epoch": 0.7573662407374993, "grad_norm": 0.6686022034013344, "learning_rate": 1.4660532417294448e-06, "loss": 0.108, "step": 25961 }, { "epoch": 0.7573954139681428, "grad_norm": 2.0859127547442027, "learning_rate": 1.4657190462121035e-06, "loss": 0.1171, "step": 25962 }, { "epoch": 0.7574245871987864, "grad_norm": 0.8538955615247369, "learning_rate": 1.4653848822479778e-06, "loss": 0.1372, "step": 25963 }, { "epoch": 0.7574537604294299, "grad_norm": 1.1753289851422752, "learning_rate": 1.4650507498400535e-06, "loss": 0.0862, "step": 25964 }, { "epoch": 0.7574829336600735, "grad_norm": 1.1818555683389145, "learning_rate": 1.4647166489913123e-06, "loss": 0.1168, "step": 25965 }, { "epoch": 0.7575121068907171, "grad_norm": 0.9780284514575832, "learning_rate": 1.4643825797047351e-06, "loss": 0.1097, "step": 25966 }, { "epoch": 0.7575412801213607, "grad_norm": 1.0640706978670855, "learning_rate": 1.4640485419833062e-06, "loss": 0.119, "step": 25967 }, { "epoch": 0.7575704533520042, "grad_norm": 1.1239686902375026, "learning_rate": 1.4637145358300099e-06, "loss": 0.1337, "step": 25968 }, { "epoch": 0.7575996265826478, "grad_norm": 1.3655347668875975, "learning_rate": 1.463380561247826e-06, "loss": 0.1145, "step": 25969 }, { "epoch": 0.7576287998132913, "grad_norm": 1.1733687009818943, "learning_rate": 1.463046618239734e-06, "loss": 0.1179, "step": 25970 }, { "epoch": 0.7576579730439349, "grad_norm": 0.7321282465144936, "learning_rate": 1.4627127068087194e-06, "loss": 0.0879, "step": 25971 }, { "epoch": 0.7576871462745784, "grad_norm": 0.9933471409592995, "learning_rate": 1.4623788269577594e-06, "loss": 0.1125, "step": 25972 }, { "epoch": 0.757716319505222, "grad_norm": 1.2773186342760419, "learning_rate": 1.4620449786898372e-06, "loss": 0.1381, "step": 25973 }, { "epoch": 0.7577454927358656, "grad_norm": 1.1663542132844367, "learning_rate": 1.4617111620079343e-06, "loss": 0.1387, "step": 25974 }, { "epoch": 0.7577746659665091, "grad_norm": 0.9684096729095881, "learning_rate": 1.4613773769150298e-06, "loss": 0.0976, "step": 25975 }, { "epoch": 0.7578038391971527, "grad_norm": 0.7706560538170121, "learning_rate": 1.4610436234141013e-06, "loss": 0.1261, "step": 25976 }, { "epoch": 0.7578330124277962, "grad_norm": 1.1786850203145411, "learning_rate": 1.4607099015081322e-06, "loss": 0.1148, "step": 25977 }, { "epoch": 0.7578621856584398, "grad_norm": 1.230491814231743, "learning_rate": 1.4603762112000986e-06, "loss": 0.1199, "step": 25978 }, { "epoch": 0.7578913588890833, "grad_norm": 1.0535222003526965, "learning_rate": 1.460042552492983e-06, "loss": 0.127, "step": 25979 }, { "epoch": 0.757920532119727, "grad_norm": 1.018164078482079, "learning_rate": 1.4597089253897606e-06, "loss": 0.1464, "step": 25980 }, { "epoch": 0.7579497053503705, "grad_norm": 0.8576248454750078, "learning_rate": 1.4593753298934132e-06, "loss": 0.1143, "step": 25981 }, { "epoch": 0.7579788785810141, "grad_norm": 1.2795461807170105, "learning_rate": 1.4590417660069177e-06, "loss": 0.1107, "step": 25982 }, { "epoch": 0.7580080518116576, "grad_norm": 0.9777373110980944, "learning_rate": 1.4587082337332508e-06, "loss": 0.1176, "step": 25983 }, { "epoch": 0.7580372250423012, "grad_norm": 0.9614068004197822, "learning_rate": 1.458374733075391e-06, "loss": 0.1158, "step": 25984 }, { "epoch": 0.7580663982729448, "grad_norm": 0.8765415290918716, "learning_rate": 1.4580412640363185e-06, "loss": 0.1083, "step": 25985 }, { "epoch": 0.7580955715035883, "grad_norm": 0.8348287583351599, "learning_rate": 1.4577078266190058e-06, "loss": 0.1151, "step": 25986 }, { "epoch": 0.7581247447342319, "grad_norm": 0.950245424025854, "learning_rate": 1.4573744208264335e-06, "loss": 0.117, "step": 25987 }, { "epoch": 0.7581539179648754, "grad_norm": 1.1097815798448516, "learning_rate": 1.457041046661577e-06, "loss": 0.1294, "step": 25988 }, { "epoch": 0.758183091195519, "grad_norm": 1.009146907616732, "learning_rate": 1.4567077041274109e-06, "loss": 0.1116, "step": 25989 }, { "epoch": 0.7582122644261625, "grad_norm": 0.7586790868249903, "learning_rate": 1.456374393226912e-06, "loss": 0.1038, "step": 25990 }, { "epoch": 0.7582414376568061, "grad_norm": 0.7418242568597122, "learning_rate": 1.4560411139630581e-06, "loss": 0.1274, "step": 25991 }, { "epoch": 0.7582706108874496, "grad_norm": 1.084858512902555, "learning_rate": 1.4557078663388236e-06, "loss": 0.0986, "step": 25992 }, { "epoch": 0.7582997841180933, "grad_norm": 0.858504223295019, "learning_rate": 1.4553746503571813e-06, "loss": 0.1144, "step": 25993 }, { "epoch": 0.7583289573487368, "grad_norm": 0.7787597298270245, "learning_rate": 1.4550414660211099e-06, "loss": 0.1033, "step": 25994 }, { "epoch": 0.7583581305793804, "grad_norm": 0.8932275680825315, "learning_rate": 1.4547083133335821e-06, "loss": 0.1131, "step": 25995 }, { "epoch": 0.758387303810024, "grad_norm": 1.0437860675415536, "learning_rate": 1.45437519229757e-06, "loss": 0.0972, "step": 25996 }, { "epoch": 0.7584164770406675, "grad_norm": 0.7378942739734735, "learning_rate": 1.45404210291605e-06, "loss": 0.1121, "step": 25997 }, { "epoch": 0.7584456502713111, "grad_norm": 0.6903590941643238, "learning_rate": 1.4537090451919972e-06, "loss": 0.1082, "step": 25998 }, { "epoch": 0.7584748235019546, "grad_norm": 0.940410991770605, "learning_rate": 1.4533760191283836e-06, "loss": 0.1243, "step": 25999 }, { "epoch": 0.7585039967325982, "grad_norm": 0.8713184786511823, "learning_rate": 1.4530430247281808e-06, "loss": 0.1172, "step": 26000 }, { "epoch": 0.7585331699632417, "grad_norm": 0.7118485771160761, "learning_rate": 1.4527100619943646e-06, "loss": 0.1337, "step": 26001 }, { "epoch": 0.7585623431938853, "grad_norm": 0.8045119603049272, "learning_rate": 1.4523771309299044e-06, "loss": 0.1185, "step": 26002 }, { "epoch": 0.7585915164245288, "grad_norm": 0.8168851311481075, "learning_rate": 1.452044231537774e-06, "loss": 0.1107, "step": 26003 }, { "epoch": 0.7586206896551724, "grad_norm": 0.6164216694250991, "learning_rate": 1.451711363820948e-06, "loss": 0.1195, "step": 26004 }, { "epoch": 0.7586498628858159, "grad_norm": 0.9144872541790582, "learning_rate": 1.4513785277823956e-06, "loss": 0.1109, "step": 26005 }, { "epoch": 0.7586790361164595, "grad_norm": 0.9360306194839095, "learning_rate": 1.4510457234250868e-06, "loss": 0.1358, "step": 26006 }, { "epoch": 0.7587082093471031, "grad_norm": 0.7041663597927578, "learning_rate": 1.4507129507519968e-06, "loss": 0.1001, "step": 26007 }, { "epoch": 0.7587373825777467, "grad_norm": 0.7837777907709031, "learning_rate": 1.4503802097660918e-06, "loss": 0.114, "step": 26008 }, { "epoch": 0.7587665558083903, "grad_norm": 0.7362928846800872, "learning_rate": 1.4500475004703475e-06, "loss": 0.1038, "step": 26009 }, { "epoch": 0.7587957290390338, "grad_norm": 0.8596231315156331, "learning_rate": 1.4497148228677294e-06, "loss": 0.1175, "step": 26010 }, { "epoch": 0.7588249022696774, "grad_norm": 1.0259867411899775, "learning_rate": 1.4493821769612115e-06, "loss": 0.1158, "step": 26011 }, { "epoch": 0.7588540755003209, "grad_norm": 0.9072027808247067, "learning_rate": 1.4490495627537621e-06, "loss": 0.1118, "step": 26012 }, { "epoch": 0.7588832487309645, "grad_norm": 0.8457106723844688, "learning_rate": 1.4487169802483485e-06, "loss": 0.1051, "step": 26013 }, { "epoch": 0.758912421961608, "grad_norm": 1.1286016343179337, "learning_rate": 1.4483844294479427e-06, "loss": 0.1337, "step": 26014 }, { "epoch": 0.7589415951922516, "grad_norm": 0.8435348531995076, "learning_rate": 1.4480519103555141e-06, "loss": 0.1367, "step": 26015 }, { "epoch": 0.7589707684228951, "grad_norm": 0.8806963037281516, "learning_rate": 1.4477194229740282e-06, "loss": 0.1012, "step": 26016 }, { "epoch": 0.7589999416535387, "grad_norm": 1.1095409650591022, "learning_rate": 1.4473869673064573e-06, "loss": 0.1215, "step": 26017 }, { "epoch": 0.7590291148841822, "grad_norm": 0.8571719788990452, "learning_rate": 1.4470545433557676e-06, "loss": 0.1168, "step": 26018 }, { "epoch": 0.7590582881148258, "grad_norm": 0.8549046547853687, "learning_rate": 1.4467221511249247e-06, "loss": 0.1042, "step": 26019 }, { "epoch": 0.7590874613454694, "grad_norm": 0.8946800367869224, "learning_rate": 1.4463897906168984e-06, "loss": 0.1172, "step": 26020 }, { "epoch": 0.759116634576113, "grad_norm": 0.9367240916529788, "learning_rate": 1.4460574618346573e-06, "loss": 0.1137, "step": 26021 }, { "epoch": 0.7591458078067566, "grad_norm": 1.0603338690555317, "learning_rate": 1.445725164781167e-06, "loss": 0.1113, "step": 26022 }, { "epoch": 0.7591749810374001, "grad_norm": 0.7919956411995522, "learning_rate": 1.4453928994593925e-06, "loss": 0.1035, "step": 26023 }, { "epoch": 0.7592041542680437, "grad_norm": 1.0548655536326177, "learning_rate": 1.4450606658723026e-06, "loss": 0.1153, "step": 26024 }, { "epoch": 0.7592333274986872, "grad_norm": 0.8492669261576317, "learning_rate": 1.4447284640228631e-06, "loss": 0.0978, "step": 26025 }, { "epoch": 0.7592625007293308, "grad_norm": 0.8709168049423043, "learning_rate": 1.4443962939140372e-06, "loss": 0.1065, "step": 26026 }, { "epoch": 0.7592916739599743, "grad_norm": 1.3811960156659688, "learning_rate": 1.4440641555487922e-06, "loss": 0.1065, "step": 26027 }, { "epoch": 0.7593208471906179, "grad_norm": 0.7200139465267126, "learning_rate": 1.4437320489300954e-06, "loss": 0.1045, "step": 26028 }, { "epoch": 0.7593500204212614, "grad_norm": 0.7628794682821637, "learning_rate": 1.44339997406091e-06, "loss": 0.1129, "step": 26029 }, { "epoch": 0.759379193651905, "grad_norm": 0.9482328257842885, "learning_rate": 1.4430679309441992e-06, "loss": 0.1548, "step": 26030 }, { "epoch": 0.7594083668825485, "grad_norm": 0.9370533620990203, "learning_rate": 1.44273591958293e-06, "loss": 0.1201, "step": 26031 }, { "epoch": 0.7594375401131921, "grad_norm": 0.9843232443690725, "learning_rate": 1.4424039399800639e-06, "loss": 0.1229, "step": 26032 }, { "epoch": 0.7594667133438356, "grad_norm": 0.891275018099884, "learning_rate": 1.442071992138566e-06, "loss": 0.1253, "step": 26033 }, { "epoch": 0.7594958865744793, "grad_norm": 1.0782847967864844, "learning_rate": 1.441740076061402e-06, "loss": 0.1184, "step": 26034 }, { "epoch": 0.7595250598051229, "grad_norm": 1.1422747124755679, "learning_rate": 1.4414081917515328e-06, "loss": 0.1218, "step": 26035 }, { "epoch": 0.7595542330357664, "grad_norm": 1.0143969155664316, "learning_rate": 1.4410763392119203e-06, "loss": 0.1212, "step": 26036 }, { "epoch": 0.75958340626641, "grad_norm": 1.0085085582756732, "learning_rate": 1.4407445184455304e-06, "loss": 0.113, "step": 26037 }, { "epoch": 0.7596125794970535, "grad_norm": 0.8049804432728066, "learning_rate": 1.4404127294553216e-06, "loss": 0.1162, "step": 26038 }, { "epoch": 0.7596417527276971, "grad_norm": 0.7588235740314043, "learning_rate": 1.4400809722442604e-06, "loss": 0.1194, "step": 26039 }, { "epoch": 0.7596709259583406, "grad_norm": 0.7968265456666715, "learning_rate": 1.4397492468153047e-06, "loss": 0.1227, "step": 26040 }, { "epoch": 0.7597000991889842, "grad_norm": 0.9157507802234769, "learning_rate": 1.4394175531714193e-06, "loss": 0.1268, "step": 26041 }, { "epoch": 0.7597292724196277, "grad_norm": 0.872768238942069, "learning_rate": 1.4390858913155641e-06, "loss": 0.0965, "step": 26042 }, { "epoch": 0.7597584456502713, "grad_norm": 0.8289127647144127, "learning_rate": 1.4387542612506983e-06, "loss": 0.0954, "step": 26043 }, { "epoch": 0.7597876188809148, "grad_norm": 0.7870445360928183, "learning_rate": 1.438422662979785e-06, "loss": 0.1166, "step": 26044 }, { "epoch": 0.7598167921115584, "grad_norm": 0.910924733280388, "learning_rate": 1.4380910965057843e-06, "loss": 0.1299, "step": 26045 }, { "epoch": 0.759845965342202, "grad_norm": 0.8015310873760055, "learning_rate": 1.4377595618316552e-06, "loss": 0.1259, "step": 26046 }, { "epoch": 0.7598751385728456, "grad_norm": 0.7435053511577032, "learning_rate": 1.4374280589603602e-06, "loss": 0.1202, "step": 26047 }, { "epoch": 0.7599043118034892, "grad_norm": 0.9469440455160251, "learning_rate": 1.4370965878948562e-06, "loss": 0.1072, "step": 26048 }, { "epoch": 0.7599334850341327, "grad_norm": 0.8816852366894563, "learning_rate": 1.4367651486381023e-06, "loss": 0.1254, "step": 26049 }, { "epoch": 0.7599626582647763, "grad_norm": 1.0915406489172184, "learning_rate": 1.4364337411930585e-06, "loss": 0.1094, "step": 26050 }, { "epoch": 0.7599918314954198, "grad_norm": 0.7775839955869801, "learning_rate": 1.436102365562685e-06, "loss": 0.1438, "step": 26051 }, { "epoch": 0.7600210047260634, "grad_norm": 0.7853566644503953, "learning_rate": 1.4357710217499387e-06, "loss": 0.1302, "step": 26052 }, { "epoch": 0.7600501779567069, "grad_norm": 0.9019594927138638, "learning_rate": 1.4354397097577766e-06, "loss": 0.1342, "step": 26053 }, { "epoch": 0.7600793511873505, "grad_norm": 0.6598718319418905, "learning_rate": 1.4351084295891593e-06, "loss": 0.1028, "step": 26054 }, { "epoch": 0.760108524417994, "grad_norm": 0.7998989603001021, "learning_rate": 1.4347771812470428e-06, "loss": 0.1326, "step": 26055 }, { "epoch": 0.7601376976486376, "grad_norm": 0.8621933089397833, "learning_rate": 1.4344459647343833e-06, "loss": 0.1265, "step": 26056 }, { "epoch": 0.7601668708792811, "grad_norm": 0.8460505298845999, "learning_rate": 1.4341147800541387e-06, "loss": 0.1094, "step": 26057 }, { "epoch": 0.7601960441099247, "grad_norm": 0.8597389125328023, "learning_rate": 1.4337836272092681e-06, "loss": 0.1244, "step": 26058 }, { "epoch": 0.7602252173405682, "grad_norm": 0.821636293537273, "learning_rate": 1.4334525062027255e-06, "loss": 0.1313, "step": 26059 }, { "epoch": 0.7602543905712118, "grad_norm": 1.0433881299768766, "learning_rate": 1.4331214170374663e-06, "loss": 0.1067, "step": 26060 }, { "epoch": 0.7602835638018555, "grad_norm": 2.0221918569399744, "learning_rate": 1.4327903597164488e-06, "loss": 0.1075, "step": 26061 }, { "epoch": 0.760312737032499, "grad_norm": 0.7565484371987905, "learning_rate": 1.4324593342426264e-06, "loss": 0.1275, "step": 26062 }, { "epoch": 0.7603419102631426, "grad_norm": 0.9417510453568073, "learning_rate": 1.432128340618955e-06, "loss": 0.1323, "step": 26063 }, { "epoch": 0.7603710834937861, "grad_norm": 0.8785801625610209, "learning_rate": 1.4317973788483914e-06, "loss": 0.101, "step": 26064 }, { "epoch": 0.7604002567244297, "grad_norm": 0.7417185558525325, "learning_rate": 1.4314664489338892e-06, "loss": 0.1231, "step": 26065 }, { "epoch": 0.7604294299550732, "grad_norm": 0.8351879040678396, "learning_rate": 1.4311355508784015e-06, "loss": 0.1048, "step": 26066 }, { "epoch": 0.7604586031857168, "grad_norm": 0.8431731966399241, "learning_rate": 1.430804684684885e-06, "loss": 0.1249, "step": 26067 }, { "epoch": 0.7604877764163603, "grad_norm": 1.4833114662917593, "learning_rate": 1.4304738503562903e-06, "loss": 0.1123, "step": 26068 }, { "epoch": 0.7605169496470039, "grad_norm": 0.8581745216150858, "learning_rate": 1.4301430478955748e-06, "loss": 0.129, "step": 26069 }, { "epoch": 0.7605461228776474, "grad_norm": 1.0792194494872442, "learning_rate": 1.4298122773056883e-06, "loss": 0.1123, "step": 26070 }, { "epoch": 0.760575296108291, "grad_norm": 0.8004529914549448, "learning_rate": 1.4294815385895872e-06, "loss": 0.1073, "step": 26071 }, { "epoch": 0.7606044693389346, "grad_norm": 0.6705396840924204, "learning_rate": 1.4291508317502229e-06, "loss": 0.1075, "step": 26072 }, { "epoch": 0.7606336425695781, "grad_norm": 0.9491198281198915, "learning_rate": 1.4288201567905452e-06, "loss": 0.125, "step": 26073 }, { "epoch": 0.7606628158002218, "grad_norm": 0.6802741828152761, "learning_rate": 1.4284895137135091e-06, "loss": 0.1154, "step": 26074 }, { "epoch": 0.7606919890308653, "grad_norm": 0.6897548118106798, "learning_rate": 1.4281589025220676e-06, "loss": 0.0956, "step": 26075 }, { "epoch": 0.7607211622615089, "grad_norm": 0.5562101397590327, "learning_rate": 1.4278283232191692e-06, "loss": 0.1185, "step": 26076 }, { "epoch": 0.7607503354921524, "grad_norm": 0.697138522827796, "learning_rate": 1.4274977758077685e-06, "loss": 0.0986, "step": 26077 }, { "epoch": 0.760779508722796, "grad_norm": 0.7319436734846092, "learning_rate": 1.4271672602908143e-06, "loss": 0.1113, "step": 26078 }, { "epoch": 0.7608086819534395, "grad_norm": 0.7363816138287833, "learning_rate": 1.4268367766712571e-06, "loss": 0.094, "step": 26079 }, { "epoch": 0.7608378551840831, "grad_norm": 0.9144220524949574, "learning_rate": 1.4265063249520478e-06, "loss": 0.1191, "step": 26080 }, { "epoch": 0.7608670284147266, "grad_norm": 0.8609360798205258, "learning_rate": 1.4261759051361378e-06, "loss": 0.1045, "step": 26081 }, { "epoch": 0.7608962016453702, "grad_norm": 0.7758677831773033, "learning_rate": 1.4258455172264774e-06, "loss": 0.1203, "step": 26082 }, { "epoch": 0.7609253748760137, "grad_norm": 0.6705278719730049, "learning_rate": 1.4255151612260127e-06, "loss": 0.1273, "step": 26083 }, { "epoch": 0.7609545481066573, "grad_norm": 0.8855828532746887, "learning_rate": 1.425184837137697e-06, "loss": 0.1223, "step": 26084 }, { "epoch": 0.7609837213373009, "grad_norm": 0.8729962093532602, "learning_rate": 1.4248545449644778e-06, "loss": 0.1048, "step": 26085 }, { "epoch": 0.7610128945679444, "grad_norm": 0.9834331543670914, "learning_rate": 1.424524284709302e-06, "loss": 0.1099, "step": 26086 }, { "epoch": 0.761042067798588, "grad_norm": 1.1227771963032696, "learning_rate": 1.4241940563751205e-06, "loss": 0.1089, "step": 26087 }, { "epoch": 0.7610712410292316, "grad_norm": 0.9042592500315241, "learning_rate": 1.4238638599648818e-06, "loss": 0.1044, "step": 26088 }, { "epoch": 0.7611004142598752, "grad_norm": 0.9398773073336264, "learning_rate": 1.423533695481533e-06, "loss": 0.0945, "step": 26089 }, { "epoch": 0.7611295874905187, "grad_norm": 0.9616544174256167, "learning_rate": 1.4232035629280199e-06, "loss": 0.1295, "step": 26090 }, { "epoch": 0.7611587607211623, "grad_norm": 1.0352716786918337, "learning_rate": 1.4228734623072932e-06, "loss": 0.1224, "step": 26091 }, { "epoch": 0.7611879339518058, "grad_norm": 0.9028573316249922, "learning_rate": 1.422543393622297e-06, "loss": 0.1108, "step": 26092 }, { "epoch": 0.7612171071824494, "grad_norm": 1.06193886836678, "learning_rate": 1.4222133568759793e-06, "loss": 0.1204, "step": 26093 }, { "epoch": 0.761246280413093, "grad_norm": 0.894203663634131, "learning_rate": 1.4218833520712876e-06, "loss": 0.1339, "step": 26094 }, { "epoch": 0.7612754536437365, "grad_norm": 0.9934211483158438, "learning_rate": 1.421553379211168e-06, "loss": 0.1122, "step": 26095 }, { "epoch": 0.76130462687438, "grad_norm": 0.8794428270048518, "learning_rate": 1.4212234382985634e-06, "loss": 0.1033, "step": 26096 }, { "epoch": 0.7613338001050236, "grad_norm": 0.8888271444596492, "learning_rate": 1.420893529336424e-06, "loss": 0.103, "step": 26097 }, { "epoch": 0.7613629733356672, "grad_norm": 0.8720925799092015, "learning_rate": 1.4205636523276907e-06, "loss": 0.1202, "step": 26098 }, { "epoch": 0.7613921465663107, "grad_norm": 0.9711674969153876, "learning_rate": 1.4202338072753119e-06, "loss": 0.1303, "step": 26099 }, { "epoch": 0.7614213197969543, "grad_norm": 1.0092013053378361, "learning_rate": 1.4199039941822296e-06, "loss": 0.1049, "step": 26100 }, { "epoch": 0.7614504930275979, "grad_norm": 1.0326288842659024, "learning_rate": 1.4195742130513917e-06, "loss": 0.1304, "step": 26101 }, { "epoch": 0.7614796662582415, "grad_norm": 1.290525021354441, "learning_rate": 1.4192444638857406e-06, "loss": 0.1092, "step": 26102 }, { "epoch": 0.761508839488885, "grad_norm": 0.7499833600199455, "learning_rate": 1.418914746688218e-06, "loss": 0.1038, "step": 26103 }, { "epoch": 0.7615380127195286, "grad_norm": 0.7940921307230743, "learning_rate": 1.4185850614617702e-06, "loss": 0.1243, "step": 26104 }, { "epoch": 0.7615671859501721, "grad_norm": 0.890027375851292, "learning_rate": 1.4182554082093413e-06, "loss": 0.1245, "step": 26105 }, { "epoch": 0.7615963591808157, "grad_norm": 1.0072383997810619, "learning_rate": 1.417925786933872e-06, "loss": 0.1012, "step": 26106 }, { "epoch": 0.7616255324114592, "grad_norm": 0.8988512233934777, "learning_rate": 1.4175961976383074e-06, "loss": 0.1075, "step": 26107 }, { "epoch": 0.7616547056421028, "grad_norm": 0.7775804860429172, "learning_rate": 1.4172666403255885e-06, "loss": 0.122, "step": 26108 }, { "epoch": 0.7616838788727464, "grad_norm": 0.8146670210731684, "learning_rate": 1.4169371149986566e-06, "loss": 0.1181, "step": 26109 }, { "epoch": 0.7617130521033899, "grad_norm": 0.8593160795739001, "learning_rate": 1.4166076216604546e-06, "loss": 0.1273, "step": 26110 }, { "epoch": 0.7617422253340335, "grad_norm": 0.7258672718667234, "learning_rate": 1.416278160313926e-06, "loss": 0.1052, "step": 26111 }, { "epoch": 0.761771398564677, "grad_norm": 0.8639625016813878, "learning_rate": 1.41594873096201e-06, "loss": 0.129, "step": 26112 }, { "epoch": 0.7618005717953206, "grad_norm": 1.0549937706213535, "learning_rate": 1.4156193336076468e-06, "loss": 0.1188, "step": 26113 }, { "epoch": 0.7618297450259641, "grad_norm": 1.040744735760747, "learning_rate": 1.4152899682537807e-06, "loss": 0.1311, "step": 26114 }, { "epoch": 0.7618589182566078, "grad_norm": 0.800371634445398, "learning_rate": 1.4149606349033479e-06, "loss": 0.1102, "step": 26115 }, { "epoch": 0.7618880914872513, "grad_norm": 0.7326289348566162, "learning_rate": 1.414631333559292e-06, "loss": 0.1107, "step": 26116 }, { "epoch": 0.7619172647178949, "grad_norm": 0.8358216723861759, "learning_rate": 1.4143020642245508e-06, "loss": 0.1244, "step": 26117 }, { "epoch": 0.7619464379485384, "grad_norm": 0.9032544770366628, "learning_rate": 1.4139728269020658e-06, "loss": 0.1041, "step": 26118 }, { "epoch": 0.761975611179182, "grad_norm": 0.6647824916904694, "learning_rate": 1.4136436215947758e-06, "loss": 0.1187, "step": 26119 }, { "epoch": 0.7620047844098256, "grad_norm": 0.7611100393768583, "learning_rate": 1.4133144483056177e-06, "loss": 0.1216, "step": 26120 }, { "epoch": 0.7620339576404691, "grad_norm": 1.0394359126634016, "learning_rate": 1.412985307037532e-06, "loss": 0.1087, "step": 26121 }, { "epoch": 0.7620631308711127, "grad_norm": 0.8332488684432571, "learning_rate": 1.4126561977934588e-06, "loss": 0.1305, "step": 26122 }, { "epoch": 0.7620923041017562, "grad_norm": 0.8175459595351123, "learning_rate": 1.4123271205763328e-06, "loss": 0.1103, "step": 26123 }, { "epoch": 0.7621214773323998, "grad_norm": 1.0622581782031035, "learning_rate": 1.4119980753890961e-06, "loss": 0.0998, "step": 26124 }, { "epoch": 0.7621506505630433, "grad_norm": 1.350676129465357, "learning_rate": 1.4116690622346834e-06, "loss": 0.115, "step": 26125 }, { "epoch": 0.7621798237936869, "grad_norm": 1.1058608444791878, "learning_rate": 1.411340081116031e-06, "loss": 0.1075, "step": 26126 }, { "epoch": 0.7622089970243304, "grad_norm": 0.8742520958084282, "learning_rate": 1.4110111320360782e-06, "loss": 0.1241, "step": 26127 }, { "epoch": 0.7622381702549741, "grad_norm": 0.8807076242303162, "learning_rate": 1.4106822149977628e-06, "loss": 0.1218, "step": 26128 }, { "epoch": 0.7622673434856176, "grad_norm": 0.7028763989526833, "learning_rate": 1.4103533300040196e-06, "loss": 0.1246, "step": 26129 }, { "epoch": 0.7622965167162612, "grad_norm": 0.7250147832036411, "learning_rate": 1.4100244770577831e-06, "loss": 0.1214, "step": 26130 }, { "epoch": 0.7623256899469047, "grad_norm": 0.8747872299612623, "learning_rate": 1.4096956561619929e-06, "loss": 0.1208, "step": 26131 }, { "epoch": 0.7623548631775483, "grad_norm": 0.6920845028611138, "learning_rate": 1.4093668673195832e-06, "loss": 0.0886, "step": 26132 }, { "epoch": 0.7623840364081919, "grad_norm": 0.8411257337544789, "learning_rate": 1.409038110533485e-06, "loss": 0.1176, "step": 26133 }, { "epoch": 0.7624132096388354, "grad_norm": 0.7044509770592748, "learning_rate": 1.408709385806641e-06, "loss": 0.1193, "step": 26134 }, { "epoch": 0.762442382869479, "grad_norm": 0.9000777091653247, "learning_rate": 1.4083806931419825e-06, "loss": 0.1249, "step": 26135 }, { "epoch": 0.7624715561001225, "grad_norm": 0.773039298613744, "learning_rate": 1.4080520325424418e-06, "loss": 0.1076, "step": 26136 }, { "epoch": 0.7625007293307661, "grad_norm": 0.718514893172037, "learning_rate": 1.4077234040109567e-06, "loss": 0.1214, "step": 26137 }, { "epoch": 0.7625299025614096, "grad_norm": 0.7763429287032169, "learning_rate": 1.4073948075504596e-06, "loss": 0.1298, "step": 26138 }, { "epoch": 0.7625590757920532, "grad_norm": 1.1534295868138604, "learning_rate": 1.4070662431638821e-06, "loss": 0.122, "step": 26139 }, { "epoch": 0.7625882490226967, "grad_norm": 0.8624455969386104, "learning_rate": 1.4067377108541597e-06, "loss": 0.1488, "step": 26140 }, { "epoch": 0.7626174222533403, "grad_norm": 0.8243022362839859, "learning_rate": 1.4064092106242272e-06, "loss": 0.1213, "step": 26141 }, { "epoch": 0.762646595483984, "grad_norm": 0.769343615825338, "learning_rate": 1.406080742477015e-06, "loss": 0.1135, "step": 26142 }, { "epoch": 0.7626757687146275, "grad_norm": 0.9565018495201091, "learning_rate": 1.4057523064154544e-06, "loss": 0.1172, "step": 26143 }, { "epoch": 0.762704941945271, "grad_norm": 1.4047610873709226, "learning_rate": 1.405423902442481e-06, "loss": 0.1457, "step": 26144 }, { "epoch": 0.7627341151759146, "grad_norm": 0.9720275556686869, "learning_rate": 1.4050955305610232e-06, "loss": 0.1271, "step": 26145 }, { "epoch": 0.7627632884065582, "grad_norm": 0.7835991442333221, "learning_rate": 1.4047671907740156e-06, "loss": 0.0847, "step": 26146 }, { "epoch": 0.7627924616372017, "grad_norm": 0.9013144996617115, "learning_rate": 1.4044388830843875e-06, "loss": 0.1313, "step": 26147 }, { "epoch": 0.7628216348678453, "grad_norm": 0.8317579576376288, "learning_rate": 1.4041106074950716e-06, "loss": 0.0963, "step": 26148 }, { "epoch": 0.7628508080984888, "grad_norm": 0.9615326176543755, "learning_rate": 1.4037823640089982e-06, "loss": 0.1088, "step": 26149 }, { "epoch": 0.7628799813291324, "grad_norm": 0.9889612993697408, "learning_rate": 1.4034541526290957e-06, "loss": 0.1356, "step": 26150 }, { "epoch": 0.7629091545597759, "grad_norm": 0.8351460427530856, "learning_rate": 1.4031259733582958e-06, "loss": 0.1057, "step": 26151 }, { "epoch": 0.7629383277904195, "grad_norm": 0.897135120743724, "learning_rate": 1.4027978261995301e-06, "loss": 0.1043, "step": 26152 }, { "epoch": 0.762967501021063, "grad_norm": 0.9586903416993737, "learning_rate": 1.4024697111557251e-06, "loss": 0.1102, "step": 26153 }, { "epoch": 0.7629966742517066, "grad_norm": 0.7091006457430841, "learning_rate": 1.4021416282298133e-06, "loss": 0.1033, "step": 26154 }, { "epoch": 0.7630258474823503, "grad_norm": 0.8564627396767824, "learning_rate": 1.401813577424722e-06, "loss": 0.1457, "step": 26155 }, { "epoch": 0.7630550207129938, "grad_norm": 0.8665696067842101, "learning_rate": 1.401485558743379e-06, "loss": 0.1177, "step": 26156 }, { "epoch": 0.7630841939436374, "grad_norm": 1.0068283095317692, "learning_rate": 1.401157572188714e-06, "loss": 0.134, "step": 26157 }, { "epoch": 0.7631133671742809, "grad_norm": 0.8907497856235018, "learning_rate": 1.4008296177636565e-06, "loss": 0.1269, "step": 26158 }, { "epoch": 0.7631425404049245, "grad_norm": 0.9406625381336449, "learning_rate": 1.4005016954711325e-06, "loss": 0.0952, "step": 26159 }, { "epoch": 0.763171713635568, "grad_norm": 1.0340193349784765, "learning_rate": 1.400173805314069e-06, "loss": 0.1187, "step": 26160 }, { "epoch": 0.7632008868662116, "grad_norm": 0.8797648399153376, "learning_rate": 1.3998459472953956e-06, "loss": 0.1209, "step": 26161 }, { "epoch": 0.7632300600968551, "grad_norm": 0.8892336228707008, "learning_rate": 1.3995181214180386e-06, "loss": 0.1182, "step": 26162 }, { "epoch": 0.7632592333274987, "grad_norm": 0.7893949456538327, "learning_rate": 1.399190327684921e-06, "loss": 0.1184, "step": 26163 }, { "epoch": 0.7632884065581422, "grad_norm": 0.9609933664140798, "learning_rate": 1.3988625660989758e-06, "loss": 0.0968, "step": 26164 }, { "epoch": 0.7633175797887858, "grad_norm": 0.7529623815427332, "learning_rate": 1.3985348366631258e-06, "loss": 0.0961, "step": 26165 }, { "epoch": 0.7633467530194293, "grad_norm": 0.8554896954035796, "learning_rate": 1.3982071393802953e-06, "loss": 0.1115, "step": 26166 }, { "epoch": 0.7633759262500729, "grad_norm": 0.9777081127658431, "learning_rate": 1.3978794742534135e-06, "loss": 0.1248, "step": 26167 }, { "epoch": 0.7634050994807164, "grad_norm": 0.7910750670891128, "learning_rate": 1.3975518412854038e-06, "loss": 0.1054, "step": 26168 }, { "epoch": 0.7634342727113601, "grad_norm": 0.8372695287966306, "learning_rate": 1.3972242404791896e-06, "loss": 0.1132, "step": 26169 }, { "epoch": 0.7634634459420037, "grad_norm": 0.7779962076886326, "learning_rate": 1.3968966718376976e-06, "loss": 0.1249, "step": 26170 }, { "epoch": 0.7634926191726472, "grad_norm": 0.9212527853625375, "learning_rate": 1.3965691353638532e-06, "loss": 0.1071, "step": 26171 }, { "epoch": 0.7635217924032908, "grad_norm": 0.9292382048809429, "learning_rate": 1.3962416310605798e-06, "loss": 0.0999, "step": 26172 }, { "epoch": 0.7635509656339343, "grad_norm": 1.134281645203754, "learning_rate": 1.395914158930799e-06, "loss": 0.1005, "step": 26173 }, { "epoch": 0.7635801388645779, "grad_norm": 0.9668929216293283, "learning_rate": 1.395586718977438e-06, "loss": 0.0955, "step": 26174 }, { "epoch": 0.7636093120952214, "grad_norm": 0.806384713606756, "learning_rate": 1.3952593112034163e-06, "loss": 0.1068, "step": 26175 }, { "epoch": 0.763638485325865, "grad_norm": 0.729396128674825, "learning_rate": 1.3949319356116608e-06, "loss": 0.1182, "step": 26176 }, { "epoch": 0.7636676585565085, "grad_norm": 1.0656409502481048, "learning_rate": 1.3946045922050911e-06, "loss": 0.114, "step": 26177 }, { "epoch": 0.7636968317871521, "grad_norm": 0.9233551092026401, "learning_rate": 1.3942772809866317e-06, "loss": 0.1134, "step": 26178 }, { "epoch": 0.7637260050177956, "grad_norm": 0.9580018016534266, "learning_rate": 1.3939500019592046e-06, "loss": 0.1182, "step": 26179 }, { "epoch": 0.7637551782484392, "grad_norm": 0.7582018960150472, "learning_rate": 1.3936227551257293e-06, "loss": 0.1503, "step": 26180 }, { "epoch": 0.7637843514790827, "grad_norm": 0.7594002273528503, "learning_rate": 1.3932955404891295e-06, "loss": 0.0959, "step": 26181 }, { "epoch": 0.7638135247097264, "grad_norm": 0.8320128400983632, "learning_rate": 1.3929683580523274e-06, "loss": 0.1023, "step": 26182 }, { "epoch": 0.76384269794037, "grad_norm": 0.8733870138636963, "learning_rate": 1.3926412078182411e-06, "loss": 0.1258, "step": 26183 }, { "epoch": 0.7638718711710135, "grad_norm": 0.9261463317054388, "learning_rate": 1.392314089789794e-06, "loss": 0.1367, "step": 26184 }, { "epoch": 0.7639010444016571, "grad_norm": 0.9961368062548425, "learning_rate": 1.3919870039699062e-06, "loss": 0.1163, "step": 26185 }, { "epoch": 0.7639302176323006, "grad_norm": 0.6961372193675052, "learning_rate": 1.3916599503614958e-06, "loss": 0.1006, "step": 26186 }, { "epoch": 0.7639593908629442, "grad_norm": 0.7459581187138465, "learning_rate": 1.391332928967483e-06, "loss": 0.114, "step": 26187 }, { "epoch": 0.7639885640935877, "grad_norm": 0.9477156298218982, "learning_rate": 1.391005939790791e-06, "loss": 0.1203, "step": 26188 }, { "epoch": 0.7640177373242313, "grad_norm": 1.0762321539839521, "learning_rate": 1.3906789828343358e-06, "loss": 0.1258, "step": 26189 }, { "epoch": 0.7640469105548748, "grad_norm": 0.9400921453796561, "learning_rate": 1.3903520581010354e-06, "loss": 0.128, "step": 26190 }, { "epoch": 0.7640760837855184, "grad_norm": 0.9850343275209514, "learning_rate": 1.3900251655938118e-06, "loss": 0.1159, "step": 26191 }, { "epoch": 0.7641052570161619, "grad_norm": 0.776003117127375, "learning_rate": 1.3896983053155821e-06, "loss": 0.108, "step": 26192 }, { "epoch": 0.7641344302468055, "grad_norm": 0.8159114840533124, "learning_rate": 1.3893714772692607e-06, "loss": 0.0962, "step": 26193 }, { "epoch": 0.764163603477449, "grad_norm": 0.9462367818043006, "learning_rate": 1.389044681457772e-06, "loss": 0.1176, "step": 26194 }, { "epoch": 0.7641927767080926, "grad_norm": 1.0006720493583758, "learning_rate": 1.3887179178840305e-06, "loss": 0.1124, "step": 26195 }, { "epoch": 0.7642219499387363, "grad_norm": 0.8008535688160848, "learning_rate": 1.3883911865509514e-06, "loss": 0.1061, "step": 26196 }, { "epoch": 0.7642511231693798, "grad_norm": 0.7653365849573978, "learning_rate": 1.3880644874614552e-06, "loss": 0.1058, "step": 26197 }, { "epoch": 0.7642802964000234, "grad_norm": 1.0773741125237393, "learning_rate": 1.3877378206184571e-06, "loss": 0.1298, "step": 26198 }, { "epoch": 0.7643094696306669, "grad_norm": 0.9364940124081743, "learning_rate": 1.3874111860248722e-06, "loss": 0.1141, "step": 26199 }, { "epoch": 0.7643386428613105, "grad_norm": 0.9301490879668555, "learning_rate": 1.3870845836836177e-06, "loss": 0.1126, "step": 26200 }, { "epoch": 0.764367816091954, "grad_norm": 0.8093211906790393, "learning_rate": 1.386758013597611e-06, "loss": 0.1168, "step": 26201 }, { "epoch": 0.7643969893225976, "grad_norm": 0.8698017267897885, "learning_rate": 1.386431475769766e-06, "loss": 0.1174, "step": 26202 }, { "epoch": 0.7644261625532411, "grad_norm": 0.953222352438878, "learning_rate": 1.3861049702029971e-06, "loss": 0.1231, "step": 26203 }, { "epoch": 0.7644553357838847, "grad_norm": 0.8810582395735321, "learning_rate": 1.3857784969002214e-06, "loss": 0.1207, "step": 26204 }, { "epoch": 0.7644845090145282, "grad_norm": 0.7950840377710335, "learning_rate": 1.3854520558643513e-06, "loss": 0.1412, "step": 26205 }, { "epoch": 0.7645136822451718, "grad_norm": 0.8763916156368131, "learning_rate": 1.3851256470983037e-06, "loss": 0.1035, "step": 26206 }, { "epoch": 0.7645428554758154, "grad_norm": 0.8761734058534855, "learning_rate": 1.38479927060499e-06, "loss": 0.127, "step": 26207 }, { "epoch": 0.7645720287064589, "grad_norm": 1.0127740547800734, "learning_rate": 1.3844729263873269e-06, "loss": 0.1309, "step": 26208 }, { "epoch": 0.7646012019371025, "grad_norm": 0.8732384759525622, "learning_rate": 1.3841466144482262e-06, "loss": 0.1312, "step": 26209 }, { "epoch": 0.7646303751677461, "grad_norm": 0.9122354532164116, "learning_rate": 1.3838203347905999e-06, "loss": 0.1104, "step": 26210 }, { "epoch": 0.7646595483983897, "grad_norm": 0.8134407679285944, "learning_rate": 1.3834940874173624e-06, "loss": 0.1152, "step": 26211 }, { "epoch": 0.7646887216290332, "grad_norm": 0.8169273397185864, "learning_rate": 1.383167872331428e-06, "loss": 0.1396, "step": 26212 }, { "epoch": 0.7647178948596768, "grad_norm": 0.9313472989800363, "learning_rate": 1.382841689535706e-06, "loss": 0.1084, "step": 26213 }, { "epoch": 0.7647470680903203, "grad_norm": 0.8093829327740456, "learning_rate": 1.3825155390331114e-06, "loss": 0.1313, "step": 26214 }, { "epoch": 0.7647762413209639, "grad_norm": 0.9013230684690757, "learning_rate": 1.382189420826554e-06, "loss": 0.1357, "step": 26215 }, { "epoch": 0.7648054145516074, "grad_norm": 0.7668506858228894, "learning_rate": 1.3818633349189448e-06, "loss": 0.1242, "step": 26216 }, { "epoch": 0.764834587782251, "grad_norm": 0.926569989866445, "learning_rate": 1.381537281313196e-06, "loss": 0.1132, "step": 26217 }, { "epoch": 0.7648637610128945, "grad_norm": 0.6817679841498191, "learning_rate": 1.3812112600122201e-06, "loss": 0.0901, "step": 26218 }, { "epoch": 0.7648929342435381, "grad_norm": 0.8774705720123143, "learning_rate": 1.3808852710189263e-06, "loss": 0.1268, "step": 26219 }, { "epoch": 0.7649221074741817, "grad_norm": 0.853094144607071, "learning_rate": 1.3805593143362227e-06, "loss": 0.1262, "step": 26220 }, { "epoch": 0.7649512807048252, "grad_norm": 0.8010893325699411, "learning_rate": 1.3802333899670239e-06, "loss": 0.1089, "step": 26221 }, { "epoch": 0.7649804539354688, "grad_norm": 0.8227006921687338, "learning_rate": 1.3799074979142369e-06, "loss": 0.1336, "step": 26222 }, { "epoch": 0.7650096271661124, "grad_norm": 0.7544040557332929, "learning_rate": 1.379581638180768e-06, "loss": 0.1174, "step": 26223 }, { "epoch": 0.765038800396756, "grad_norm": 0.8637354983940975, "learning_rate": 1.3792558107695335e-06, "loss": 0.1453, "step": 26224 }, { "epoch": 0.7650679736273995, "grad_norm": 1.0777165785817178, "learning_rate": 1.3789300156834389e-06, "loss": 0.1152, "step": 26225 }, { "epoch": 0.7650971468580431, "grad_norm": 0.7879738396809659, "learning_rate": 1.3786042529253913e-06, "loss": 0.1084, "step": 26226 }, { "epoch": 0.7651263200886866, "grad_norm": 0.7016310321239007, "learning_rate": 1.3782785224983015e-06, "loss": 0.0875, "step": 26227 }, { "epoch": 0.7651554933193302, "grad_norm": 1.2203319913017596, "learning_rate": 1.3779528244050765e-06, "loss": 0.1087, "step": 26228 }, { "epoch": 0.7651846665499737, "grad_norm": 0.7662474942040508, "learning_rate": 1.3776271586486229e-06, "loss": 0.1388, "step": 26229 }, { "epoch": 0.7652138397806173, "grad_norm": 0.8063930269842505, "learning_rate": 1.3773015252318489e-06, "loss": 0.1176, "step": 26230 }, { "epoch": 0.7652430130112609, "grad_norm": 0.8660898453060176, "learning_rate": 1.3769759241576642e-06, "loss": 0.1126, "step": 26231 }, { "epoch": 0.7652721862419044, "grad_norm": 1.010878535140388, "learning_rate": 1.376650355428973e-06, "loss": 0.0982, "step": 26232 }, { "epoch": 0.765301359472548, "grad_norm": 0.7806800752071383, "learning_rate": 1.376324819048681e-06, "loss": 0.1208, "step": 26233 }, { "epoch": 0.7653305327031915, "grad_norm": 0.938268846177341, "learning_rate": 1.3759993150196975e-06, "loss": 0.1226, "step": 26234 }, { "epoch": 0.7653597059338351, "grad_norm": 0.9861122910139926, "learning_rate": 1.3756738433449257e-06, "loss": 0.125, "step": 26235 }, { "epoch": 0.7653888791644786, "grad_norm": 0.9386610683992159, "learning_rate": 1.375348404027274e-06, "loss": 0.1236, "step": 26236 }, { "epoch": 0.7654180523951223, "grad_norm": 0.8633672654574206, "learning_rate": 1.375022997069645e-06, "loss": 0.1255, "step": 26237 }, { "epoch": 0.7654472256257658, "grad_norm": 0.8935751608128624, "learning_rate": 1.374697622474947e-06, "loss": 0.1252, "step": 26238 }, { "epoch": 0.7654763988564094, "grad_norm": 0.8291465751734923, "learning_rate": 1.374372280246083e-06, "loss": 0.1198, "step": 26239 }, { "epoch": 0.7655055720870529, "grad_norm": 0.6881511252850518, "learning_rate": 1.374046970385956e-06, "loss": 0.0963, "step": 26240 }, { "epoch": 0.7655347453176965, "grad_norm": 0.7701647522818884, "learning_rate": 1.3737216928974723e-06, "loss": 0.1095, "step": 26241 }, { "epoch": 0.76556391854834, "grad_norm": 1.071376330529272, "learning_rate": 1.373396447783537e-06, "loss": 0.1439, "step": 26242 }, { "epoch": 0.7655930917789836, "grad_norm": 1.0701209096809126, "learning_rate": 1.3730712350470516e-06, "loss": 0.1083, "step": 26243 }, { "epoch": 0.7656222650096272, "grad_norm": 0.8551621899125321, "learning_rate": 1.372746054690921e-06, "loss": 0.1212, "step": 26244 }, { "epoch": 0.7656514382402707, "grad_norm": 1.0081329271816202, "learning_rate": 1.3724209067180483e-06, "loss": 0.1297, "step": 26245 }, { "epoch": 0.7656806114709143, "grad_norm": 0.9135838539518528, "learning_rate": 1.3720957911313342e-06, "loss": 0.1246, "step": 26246 }, { "epoch": 0.7657097847015578, "grad_norm": 0.7551891413814291, "learning_rate": 1.3717707079336828e-06, "loss": 0.1087, "step": 26247 }, { "epoch": 0.7657389579322014, "grad_norm": 0.8999379916351353, "learning_rate": 1.3714456571279984e-06, "loss": 0.1187, "step": 26248 }, { "epoch": 0.7657681311628449, "grad_norm": 1.0576561005476992, "learning_rate": 1.3711206387171798e-06, "loss": 0.1294, "step": 26249 }, { "epoch": 0.7657973043934886, "grad_norm": 0.9553902479960511, "learning_rate": 1.3707956527041294e-06, "loss": 0.1214, "step": 26250 }, { "epoch": 0.7658264776241321, "grad_norm": 0.9196799012445692, "learning_rate": 1.37047069909175e-06, "loss": 0.1445, "step": 26251 }, { "epoch": 0.7658556508547757, "grad_norm": 0.7874001995227332, "learning_rate": 1.3701457778829418e-06, "loss": 0.1058, "step": 26252 }, { "epoch": 0.7658848240854192, "grad_norm": 1.0459834083947193, "learning_rate": 1.369820889080603e-06, "loss": 0.1093, "step": 26253 }, { "epoch": 0.7659139973160628, "grad_norm": 0.938649616690978, "learning_rate": 1.3694960326876393e-06, "loss": 0.1384, "step": 26254 }, { "epoch": 0.7659431705467064, "grad_norm": 0.6865196226329512, "learning_rate": 1.3691712087069486e-06, "loss": 0.1058, "step": 26255 }, { "epoch": 0.7659723437773499, "grad_norm": 0.9685957610013003, "learning_rate": 1.368846417141429e-06, "loss": 0.1487, "step": 26256 }, { "epoch": 0.7660015170079935, "grad_norm": 0.7855783301905297, "learning_rate": 1.368521657993983e-06, "loss": 0.1156, "step": 26257 }, { "epoch": 0.766030690238637, "grad_norm": 0.9477634181498675, "learning_rate": 1.3681969312675092e-06, "loss": 0.1107, "step": 26258 }, { "epoch": 0.7660598634692806, "grad_norm": 1.1397961070466422, "learning_rate": 1.3678722369649045e-06, "loss": 0.1233, "step": 26259 }, { "epoch": 0.7660890366999241, "grad_norm": 0.8802910992411307, "learning_rate": 1.3675475750890693e-06, "loss": 0.1173, "step": 26260 }, { "epoch": 0.7661182099305677, "grad_norm": 0.6852959092629128, "learning_rate": 1.3672229456429036e-06, "loss": 0.1033, "step": 26261 }, { "epoch": 0.7661473831612112, "grad_norm": 0.8839510340431758, "learning_rate": 1.3668983486293047e-06, "loss": 0.0959, "step": 26262 }, { "epoch": 0.7661765563918548, "grad_norm": 0.8572553122440686, "learning_rate": 1.3665737840511684e-06, "loss": 0.1088, "step": 26263 }, { "epoch": 0.7662057296224984, "grad_norm": 0.7137228297578125, "learning_rate": 1.3662492519113951e-06, "loss": 0.1143, "step": 26264 }, { "epoch": 0.766234902853142, "grad_norm": 0.8696405135233204, "learning_rate": 1.3659247522128798e-06, "loss": 0.1464, "step": 26265 }, { "epoch": 0.7662640760837856, "grad_norm": 0.7868935310991707, "learning_rate": 1.365600284958522e-06, "loss": 0.1097, "step": 26266 }, { "epoch": 0.7662932493144291, "grad_norm": 0.8974860316812074, "learning_rate": 1.3652758501512165e-06, "loss": 0.1058, "step": 26267 }, { "epoch": 0.7663224225450727, "grad_norm": 0.799519184422163, "learning_rate": 1.3649514477938613e-06, "loss": 0.1056, "step": 26268 }, { "epoch": 0.7663515957757162, "grad_norm": 0.8261566520424768, "learning_rate": 1.3646270778893523e-06, "loss": 0.1151, "step": 26269 }, { "epoch": 0.7663807690063598, "grad_norm": 0.8451196113916615, "learning_rate": 1.364302740440583e-06, "loss": 0.1019, "step": 26270 }, { "epoch": 0.7664099422370033, "grad_norm": 0.727806267640842, "learning_rate": 1.3639784354504509e-06, "loss": 0.0953, "step": 26271 }, { "epoch": 0.7664391154676469, "grad_norm": 0.8105129847942211, "learning_rate": 1.3636541629218536e-06, "loss": 0.136, "step": 26272 }, { "epoch": 0.7664682886982904, "grad_norm": 0.7534847321433793, "learning_rate": 1.363329922857682e-06, "loss": 0.0933, "step": 26273 }, { "epoch": 0.766497461928934, "grad_norm": 0.78367161884003, "learning_rate": 1.3630057152608334e-06, "loss": 0.1095, "step": 26274 }, { "epoch": 0.7665266351595775, "grad_norm": 0.6653689714496968, "learning_rate": 1.3626815401342025e-06, "loss": 0.0926, "step": 26275 }, { "epoch": 0.7665558083902211, "grad_norm": 0.7206363186672688, "learning_rate": 1.3623573974806808e-06, "loss": 0.1182, "step": 26276 }, { "epoch": 0.7665849816208647, "grad_norm": 0.8402632051718905, "learning_rate": 1.3620332873031639e-06, "loss": 0.144, "step": 26277 }, { "epoch": 0.7666141548515083, "grad_norm": 0.9351239698532204, "learning_rate": 1.3617092096045466e-06, "loss": 0.1109, "step": 26278 }, { "epoch": 0.7666433280821519, "grad_norm": 0.7641055166835903, "learning_rate": 1.3613851643877206e-06, "loss": 0.12, "step": 26279 }, { "epoch": 0.7666725013127954, "grad_norm": 0.8797418194600454, "learning_rate": 1.361061151655579e-06, "loss": 0.1249, "step": 26280 }, { "epoch": 0.766701674543439, "grad_norm": 0.7831566990130667, "learning_rate": 1.3607371714110151e-06, "loss": 0.1423, "step": 26281 }, { "epoch": 0.7667308477740825, "grad_norm": 0.9812283302496945, "learning_rate": 1.3604132236569212e-06, "loss": 0.1243, "step": 26282 }, { "epoch": 0.7667600210047261, "grad_norm": 0.7172249047175969, "learning_rate": 1.3600893083961864e-06, "loss": 0.1083, "step": 26283 }, { "epoch": 0.7667891942353696, "grad_norm": 0.8203349237324321, "learning_rate": 1.3597654256317084e-06, "loss": 0.0939, "step": 26284 }, { "epoch": 0.7668183674660132, "grad_norm": 0.9417641017967823, "learning_rate": 1.3594415753663754e-06, "loss": 0.1094, "step": 26285 }, { "epoch": 0.7668475406966567, "grad_norm": 0.8256592426687523, "learning_rate": 1.359117757603078e-06, "loss": 0.1242, "step": 26286 }, { "epoch": 0.7668767139273003, "grad_norm": 0.9015058823509582, "learning_rate": 1.3587939723447091e-06, "loss": 0.1245, "step": 26287 }, { "epoch": 0.7669058871579438, "grad_norm": 1.2808031531899104, "learning_rate": 1.3584702195941585e-06, "loss": 0.12, "step": 26288 }, { "epoch": 0.7669350603885874, "grad_norm": 0.8742758191790518, "learning_rate": 1.3581464993543147e-06, "loss": 0.102, "step": 26289 }, { "epoch": 0.7669642336192309, "grad_norm": 0.7941010130215583, "learning_rate": 1.35782281162807e-06, "loss": 0.1385, "step": 26290 }, { "epoch": 0.7669934068498746, "grad_norm": 0.750154611086974, "learning_rate": 1.3574991564183155e-06, "loss": 0.1038, "step": 26291 }, { "epoch": 0.7670225800805182, "grad_norm": 0.998807815362782, "learning_rate": 1.3571755337279386e-06, "loss": 0.1118, "step": 26292 }, { "epoch": 0.7670517533111617, "grad_norm": 1.2365964096570772, "learning_rate": 1.356851943559827e-06, "loss": 0.1038, "step": 26293 }, { "epoch": 0.7670809265418053, "grad_norm": 1.3332685752500593, "learning_rate": 1.3565283859168738e-06, "loss": 0.1268, "step": 26294 }, { "epoch": 0.7671100997724488, "grad_norm": 0.9024981148315767, "learning_rate": 1.3562048608019635e-06, "loss": 0.1321, "step": 26295 }, { "epoch": 0.7671392730030924, "grad_norm": 0.9515302823841885, "learning_rate": 1.3558813682179884e-06, "loss": 0.1136, "step": 26296 }, { "epoch": 0.7671684462337359, "grad_norm": 0.6359741660583533, "learning_rate": 1.3555579081678321e-06, "loss": 0.1029, "step": 26297 }, { "epoch": 0.7671976194643795, "grad_norm": 0.6716431148909698, "learning_rate": 1.355234480654387e-06, "loss": 0.0962, "step": 26298 }, { "epoch": 0.767226792695023, "grad_norm": 1.0057342710694277, "learning_rate": 1.354911085680538e-06, "loss": 0.1119, "step": 26299 }, { "epoch": 0.7672559659256666, "grad_norm": 1.0221882901163448, "learning_rate": 1.3545877232491716e-06, "loss": 0.1082, "step": 26300 }, { "epoch": 0.7672851391563101, "grad_norm": 0.9537886818232255, "learning_rate": 1.3542643933631755e-06, "loss": 0.1294, "step": 26301 }, { "epoch": 0.7673143123869537, "grad_norm": 1.3430581809932332, "learning_rate": 1.3539410960254384e-06, "loss": 0.1269, "step": 26302 }, { "epoch": 0.7673434856175972, "grad_norm": 0.8514420956156064, "learning_rate": 1.3536178312388432e-06, "loss": 0.1085, "step": 26303 }, { "epoch": 0.7673726588482409, "grad_norm": 0.7326071125051908, "learning_rate": 1.3532945990062784e-06, "loss": 0.1242, "step": 26304 }, { "epoch": 0.7674018320788845, "grad_norm": 0.8487698632811732, "learning_rate": 1.35297139933063e-06, "loss": 0.1427, "step": 26305 }, { "epoch": 0.767431005309528, "grad_norm": 1.1671383181527097, "learning_rate": 1.3526482322147798e-06, "loss": 0.1057, "step": 26306 }, { "epoch": 0.7674601785401716, "grad_norm": 0.7276062023581822, "learning_rate": 1.352325097661616e-06, "loss": 0.1143, "step": 26307 }, { "epoch": 0.7674893517708151, "grad_norm": 0.8089488802652228, "learning_rate": 1.3520019956740244e-06, "loss": 0.1058, "step": 26308 }, { "epoch": 0.7675185250014587, "grad_norm": 0.9404382782265481, "learning_rate": 1.351678926254888e-06, "loss": 0.1184, "step": 26309 }, { "epoch": 0.7675476982321022, "grad_norm": 0.7773568824293435, "learning_rate": 1.35135588940709e-06, "loss": 0.0923, "step": 26310 }, { "epoch": 0.7675768714627458, "grad_norm": 1.1283489551534065, "learning_rate": 1.3510328851335164e-06, "loss": 0.1063, "step": 26311 }, { "epoch": 0.7676060446933893, "grad_norm": 0.9811542149546232, "learning_rate": 1.3507099134370494e-06, "loss": 0.1107, "step": 26312 }, { "epoch": 0.7676352179240329, "grad_norm": 1.0223309748746012, "learning_rate": 1.3503869743205727e-06, "loss": 0.1099, "step": 26313 }, { "epoch": 0.7676643911546764, "grad_norm": 0.7347629024668387, "learning_rate": 1.3500640677869713e-06, "loss": 0.0999, "step": 26314 }, { "epoch": 0.76769356438532, "grad_norm": 0.7341897061573558, "learning_rate": 1.3497411938391276e-06, "loss": 0.1092, "step": 26315 }, { "epoch": 0.7677227376159635, "grad_norm": 0.851833901783838, "learning_rate": 1.3494183524799204e-06, "loss": 0.1246, "step": 26316 }, { "epoch": 0.7677519108466071, "grad_norm": 0.8753818742823339, "learning_rate": 1.3490955437122367e-06, "loss": 0.1059, "step": 26317 }, { "epoch": 0.7677810840772508, "grad_norm": 0.8341477962960666, "learning_rate": 1.348772767538955e-06, "loss": 0.1044, "step": 26318 }, { "epoch": 0.7678102573078943, "grad_norm": 0.7149525553203409, "learning_rate": 1.34845002396296e-06, "loss": 0.111, "step": 26319 }, { "epoch": 0.7678394305385379, "grad_norm": 0.9113556991700084, "learning_rate": 1.3481273129871297e-06, "loss": 0.1126, "step": 26320 }, { "epoch": 0.7678686037691814, "grad_norm": 0.7604835147354264, "learning_rate": 1.3478046346143487e-06, "loss": 0.1024, "step": 26321 }, { "epoch": 0.767897776999825, "grad_norm": 0.7818953082305324, "learning_rate": 1.3474819888474955e-06, "loss": 0.1329, "step": 26322 }, { "epoch": 0.7679269502304685, "grad_norm": 0.7055300546011628, "learning_rate": 1.3471593756894502e-06, "loss": 0.1349, "step": 26323 }, { "epoch": 0.7679561234611121, "grad_norm": 0.801859806573647, "learning_rate": 1.3468367951430939e-06, "loss": 0.1202, "step": 26324 }, { "epoch": 0.7679852966917556, "grad_norm": 0.8591917215223992, "learning_rate": 1.3465142472113085e-06, "loss": 0.1149, "step": 26325 }, { "epoch": 0.7680144699223992, "grad_norm": 0.7905620443673668, "learning_rate": 1.3461917318969714e-06, "loss": 0.1284, "step": 26326 }, { "epoch": 0.7680436431530427, "grad_norm": 0.9012770736109739, "learning_rate": 1.3458692492029608e-06, "loss": 0.1161, "step": 26327 }, { "epoch": 0.7680728163836863, "grad_norm": 0.7719845505449965, "learning_rate": 1.3455467991321586e-06, "loss": 0.093, "step": 26328 }, { "epoch": 0.7681019896143298, "grad_norm": 0.8562658122996444, "learning_rate": 1.3452243816874423e-06, "loss": 0.1207, "step": 26329 }, { "epoch": 0.7681311628449734, "grad_norm": 0.6791845106246308, "learning_rate": 1.344901996871687e-06, "loss": 0.1085, "step": 26330 }, { "epoch": 0.7681603360756171, "grad_norm": 0.8296558353589102, "learning_rate": 1.3445796446877773e-06, "loss": 0.1092, "step": 26331 }, { "epoch": 0.7681895093062606, "grad_norm": 1.1079128073038012, "learning_rate": 1.3442573251385882e-06, "loss": 0.1275, "step": 26332 }, { "epoch": 0.7682186825369042, "grad_norm": 0.7430678761466323, "learning_rate": 1.343935038226995e-06, "loss": 0.1136, "step": 26333 }, { "epoch": 0.7682478557675477, "grad_norm": 0.8437397346011964, "learning_rate": 1.3436127839558788e-06, "loss": 0.1432, "step": 26334 }, { "epoch": 0.7682770289981913, "grad_norm": 0.8250038742054424, "learning_rate": 1.3432905623281151e-06, "loss": 0.121, "step": 26335 }, { "epoch": 0.7683062022288348, "grad_norm": 0.8556139117128854, "learning_rate": 1.3429683733465782e-06, "loss": 0.1114, "step": 26336 }, { "epoch": 0.7683353754594784, "grad_norm": 0.8087185715853028, "learning_rate": 1.3426462170141475e-06, "loss": 0.1042, "step": 26337 }, { "epoch": 0.7683645486901219, "grad_norm": 0.7425615248636965, "learning_rate": 1.3423240933336989e-06, "loss": 0.1096, "step": 26338 }, { "epoch": 0.7683937219207655, "grad_norm": 0.9583128033071, "learning_rate": 1.3420020023081081e-06, "loss": 0.1001, "step": 26339 }, { "epoch": 0.768422895151409, "grad_norm": 0.8593452010260961, "learning_rate": 1.3416799439402483e-06, "loss": 0.1213, "step": 26340 }, { "epoch": 0.7684520683820526, "grad_norm": 0.8879522014577076, "learning_rate": 1.3413579182329989e-06, "loss": 0.1257, "step": 26341 }, { "epoch": 0.7684812416126962, "grad_norm": 0.7333457531120627, "learning_rate": 1.3410359251892307e-06, "loss": 0.1172, "step": 26342 }, { "epoch": 0.7685104148433397, "grad_norm": 1.0015957078579298, "learning_rate": 1.34071396481182e-06, "loss": 0.1241, "step": 26343 }, { "epoch": 0.7685395880739833, "grad_norm": 0.8076648583421359, "learning_rate": 1.3403920371036433e-06, "loss": 0.1337, "step": 26344 }, { "epoch": 0.7685687613046269, "grad_norm": 0.9656849619903707, "learning_rate": 1.3400701420675727e-06, "loss": 0.1345, "step": 26345 }, { "epoch": 0.7685979345352705, "grad_norm": 0.8535953469313441, "learning_rate": 1.339748279706481e-06, "loss": 0.1108, "step": 26346 }, { "epoch": 0.768627107765914, "grad_norm": 0.7917522328083967, "learning_rate": 1.339426450023244e-06, "loss": 0.1257, "step": 26347 }, { "epoch": 0.7686562809965576, "grad_norm": 0.6787829623433255, "learning_rate": 1.3391046530207325e-06, "loss": 0.1197, "step": 26348 }, { "epoch": 0.7686854542272011, "grad_norm": 0.8878994011466506, "learning_rate": 1.3387828887018222e-06, "loss": 0.0988, "step": 26349 }, { "epoch": 0.7687146274578447, "grad_norm": 0.9161149240701941, "learning_rate": 1.3384611570693828e-06, "loss": 0.1173, "step": 26350 }, { "epoch": 0.7687438006884882, "grad_norm": 0.7338299930776192, "learning_rate": 1.3381394581262896e-06, "loss": 0.1169, "step": 26351 }, { "epoch": 0.7687729739191318, "grad_norm": 0.8177897018433256, "learning_rate": 1.3378177918754132e-06, "loss": 0.1167, "step": 26352 }, { "epoch": 0.7688021471497754, "grad_norm": 0.7839406401691176, "learning_rate": 1.3374961583196238e-06, "loss": 0.0966, "step": 26353 }, { "epoch": 0.7688313203804189, "grad_norm": 0.790638859628819, "learning_rate": 1.3371745574617945e-06, "loss": 0.1188, "step": 26354 }, { "epoch": 0.7688604936110625, "grad_norm": 0.9705534971863546, "learning_rate": 1.3368529893047977e-06, "loss": 0.1167, "step": 26355 }, { "epoch": 0.768889666841706, "grad_norm": 1.0390544757812006, "learning_rate": 1.3365314538515028e-06, "loss": 0.0999, "step": 26356 }, { "epoch": 0.7689188400723496, "grad_norm": 0.945480891191583, "learning_rate": 1.3362099511047793e-06, "loss": 0.1177, "step": 26357 }, { "epoch": 0.7689480133029932, "grad_norm": 0.8838098435908222, "learning_rate": 1.3358884810675005e-06, "loss": 0.1454, "step": 26358 }, { "epoch": 0.7689771865336368, "grad_norm": 0.851736956955131, "learning_rate": 1.3355670437425344e-06, "loss": 0.1254, "step": 26359 }, { "epoch": 0.7690063597642803, "grad_norm": 0.7620084892561015, "learning_rate": 1.3352456391327479e-06, "loss": 0.0926, "step": 26360 }, { "epoch": 0.7690355329949239, "grad_norm": 0.7596378487858548, "learning_rate": 1.3349242672410162e-06, "loss": 0.1037, "step": 26361 }, { "epoch": 0.7690647062255674, "grad_norm": 0.7407636176932145, "learning_rate": 1.334602928070206e-06, "loss": 0.1191, "step": 26362 }, { "epoch": 0.769093879456211, "grad_norm": 0.8387156139437207, "learning_rate": 1.3342816216231846e-06, "loss": 0.1341, "step": 26363 }, { "epoch": 0.7691230526868545, "grad_norm": 0.9424023168724408, "learning_rate": 1.3339603479028229e-06, "loss": 0.0995, "step": 26364 }, { "epoch": 0.7691522259174981, "grad_norm": 0.8413175728940638, "learning_rate": 1.333639106911988e-06, "loss": 0.1123, "step": 26365 }, { "epoch": 0.7691813991481417, "grad_norm": 0.7428676524295936, "learning_rate": 1.3333178986535466e-06, "loss": 0.114, "step": 26366 }, { "epoch": 0.7692105723787852, "grad_norm": 0.8633340480866784, "learning_rate": 1.3329967231303682e-06, "loss": 0.101, "step": 26367 }, { "epoch": 0.7692397456094288, "grad_norm": 1.2447757965667121, "learning_rate": 1.3326755803453206e-06, "loss": 0.1074, "step": 26368 }, { "epoch": 0.7692689188400723, "grad_norm": 1.0479824407084282, "learning_rate": 1.3323544703012697e-06, "loss": 0.1117, "step": 26369 }, { "epoch": 0.7692980920707159, "grad_norm": 0.7147793449500534, "learning_rate": 1.3320333930010815e-06, "loss": 0.1121, "step": 26370 }, { "epoch": 0.7693272653013594, "grad_norm": 0.7578252238900758, "learning_rate": 1.3317123484476251e-06, "loss": 0.1103, "step": 26371 }, { "epoch": 0.7693564385320031, "grad_norm": 0.83767739554361, "learning_rate": 1.3313913366437637e-06, "loss": 0.1074, "step": 26372 }, { "epoch": 0.7693856117626466, "grad_norm": 0.9595939537888586, "learning_rate": 1.331070357592364e-06, "loss": 0.1188, "step": 26373 }, { "epoch": 0.7694147849932902, "grad_norm": 0.9503184644901421, "learning_rate": 1.3307494112962943e-06, "loss": 0.1145, "step": 26374 }, { "epoch": 0.7694439582239337, "grad_norm": 0.6936057562434776, "learning_rate": 1.3304284977584182e-06, "loss": 0.0997, "step": 26375 }, { "epoch": 0.7694731314545773, "grad_norm": 0.7660194227454243, "learning_rate": 1.3301076169815986e-06, "loss": 0.1052, "step": 26376 }, { "epoch": 0.7695023046852209, "grad_norm": 0.860131712849057, "learning_rate": 1.3297867689687038e-06, "loss": 0.12, "step": 26377 }, { "epoch": 0.7695314779158644, "grad_norm": 0.8692900744563608, "learning_rate": 1.3294659537225951e-06, "loss": 0.1241, "step": 26378 }, { "epoch": 0.769560651146508, "grad_norm": 0.8160703074895804, "learning_rate": 1.3291451712461395e-06, "loss": 0.0993, "step": 26379 }, { "epoch": 0.7695898243771515, "grad_norm": 0.9095841995953802, "learning_rate": 1.3288244215421981e-06, "loss": 0.1149, "step": 26380 }, { "epoch": 0.7696189976077951, "grad_norm": 0.8708794576204514, "learning_rate": 1.3285037046136372e-06, "loss": 0.1204, "step": 26381 }, { "epoch": 0.7696481708384386, "grad_norm": 0.9675974965172188, "learning_rate": 1.3281830204633188e-06, "loss": 0.1166, "step": 26382 }, { "epoch": 0.7696773440690822, "grad_norm": 0.8515714097596896, "learning_rate": 1.3278623690941045e-06, "loss": 0.1284, "step": 26383 }, { "epoch": 0.7697065172997257, "grad_norm": 0.8043822820085537, "learning_rate": 1.3275417505088585e-06, "loss": 0.1157, "step": 26384 }, { "epoch": 0.7697356905303694, "grad_norm": 0.955162189308029, "learning_rate": 1.3272211647104443e-06, "loss": 0.1225, "step": 26385 }, { "epoch": 0.7697648637610129, "grad_norm": 1.0431847591400947, "learning_rate": 1.3269006117017231e-06, "loss": 0.1048, "step": 26386 }, { "epoch": 0.7697940369916565, "grad_norm": 0.7443241082161194, "learning_rate": 1.3265800914855542e-06, "loss": 0.1118, "step": 26387 }, { "epoch": 0.7698232102223, "grad_norm": 0.8425779974112467, "learning_rate": 1.3262596040648034e-06, "loss": 0.1057, "step": 26388 }, { "epoch": 0.7698523834529436, "grad_norm": 0.768087129628987, "learning_rate": 1.3259391494423296e-06, "loss": 0.1054, "step": 26389 }, { "epoch": 0.7698815566835872, "grad_norm": 1.061329548066553, "learning_rate": 1.3256187276209913e-06, "loss": 0.1188, "step": 26390 }, { "epoch": 0.7699107299142307, "grad_norm": 0.900892773663434, "learning_rate": 1.325298338603655e-06, "loss": 0.1119, "step": 26391 }, { "epoch": 0.7699399031448743, "grad_norm": 1.0821439260157737, "learning_rate": 1.3249779823931774e-06, "loss": 0.1044, "step": 26392 }, { "epoch": 0.7699690763755178, "grad_norm": 0.8665013694211476, "learning_rate": 1.3246576589924176e-06, "loss": 0.0815, "step": 26393 }, { "epoch": 0.7699982496061614, "grad_norm": 1.037289665752422, "learning_rate": 1.3243373684042388e-06, "loss": 0.1176, "step": 26394 }, { "epoch": 0.7700274228368049, "grad_norm": 1.037716672470052, "learning_rate": 1.324017110631498e-06, "loss": 0.137, "step": 26395 }, { "epoch": 0.7700565960674485, "grad_norm": 1.211706994811601, "learning_rate": 1.3236968856770537e-06, "loss": 0.1345, "step": 26396 }, { "epoch": 0.770085769298092, "grad_norm": 1.0750149620536222, "learning_rate": 1.3233766935437665e-06, "loss": 0.1334, "step": 26397 }, { "epoch": 0.7701149425287356, "grad_norm": 0.8909687763686794, "learning_rate": 1.3230565342344953e-06, "loss": 0.1093, "step": 26398 }, { "epoch": 0.7701441157593792, "grad_norm": 0.6824404088662096, "learning_rate": 1.3227364077520976e-06, "loss": 0.1208, "step": 26399 }, { "epoch": 0.7701732889900228, "grad_norm": 0.8293847946531514, "learning_rate": 1.3224163140994302e-06, "loss": 0.1224, "step": 26400 }, { "epoch": 0.7702024622206664, "grad_norm": 0.871079034104856, "learning_rate": 1.3220962532793535e-06, "loss": 0.1119, "step": 26401 }, { "epoch": 0.7702316354513099, "grad_norm": 0.9644347823087496, "learning_rate": 1.321776225294722e-06, "loss": 0.1438, "step": 26402 }, { "epoch": 0.7702608086819535, "grad_norm": 1.1372883534878597, "learning_rate": 1.321456230148394e-06, "loss": 0.1059, "step": 26403 }, { "epoch": 0.770289981912597, "grad_norm": 0.7689123155392992, "learning_rate": 1.3211362678432282e-06, "loss": 0.123, "step": 26404 }, { "epoch": 0.7703191551432406, "grad_norm": 0.7275816622209508, "learning_rate": 1.32081633838208e-06, "loss": 0.1149, "step": 26405 }, { "epoch": 0.7703483283738841, "grad_norm": 0.7988063990793386, "learning_rate": 1.3204964417678034e-06, "loss": 0.1349, "step": 26406 }, { "epoch": 0.7703775016045277, "grad_norm": 1.000820402644795, "learning_rate": 1.3201765780032577e-06, "loss": 0.1164, "step": 26407 }, { "epoch": 0.7704066748351712, "grad_norm": 0.8542802866515816, "learning_rate": 1.3198567470912955e-06, "loss": 0.1155, "step": 26408 }, { "epoch": 0.7704358480658148, "grad_norm": 0.8719979593231254, "learning_rate": 1.3195369490347753e-06, "loss": 0.1033, "step": 26409 }, { "epoch": 0.7704650212964583, "grad_norm": 1.1192189668562742, "learning_rate": 1.3192171838365492e-06, "loss": 0.1246, "step": 26410 }, { "epoch": 0.7704941945271019, "grad_norm": 0.683453341444437, "learning_rate": 1.3188974514994752e-06, "loss": 0.0945, "step": 26411 }, { "epoch": 0.7705233677577455, "grad_norm": 1.1538560322257447, "learning_rate": 1.3185777520264053e-06, "loss": 0.1087, "step": 26412 }, { "epoch": 0.7705525409883891, "grad_norm": 1.123113533059127, "learning_rate": 1.3182580854201938e-06, "loss": 0.124, "step": 26413 }, { "epoch": 0.7705817142190327, "grad_norm": 0.9604330296704243, "learning_rate": 1.3179384516836947e-06, "loss": 0.1192, "step": 26414 }, { "epoch": 0.7706108874496762, "grad_norm": 0.9035893269277159, "learning_rate": 1.3176188508197634e-06, "loss": 0.133, "step": 26415 }, { "epoch": 0.7706400606803198, "grad_norm": 0.85900840253525, "learning_rate": 1.3172992828312519e-06, "loss": 0.1317, "step": 26416 }, { "epoch": 0.7706692339109633, "grad_norm": 1.1163044663463344, "learning_rate": 1.3169797477210122e-06, "loss": 0.1144, "step": 26417 }, { "epoch": 0.7706984071416069, "grad_norm": 1.1446577803877949, "learning_rate": 1.3166602454918997e-06, "loss": 0.0885, "step": 26418 }, { "epoch": 0.7707275803722504, "grad_norm": 0.7461391709802458, "learning_rate": 1.316340776146765e-06, "loss": 0.1162, "step": 26419 }, { "epoch": 0.770756753602894, "grad_norm": 0.8121190406704041, "learning_rate": 1.3160213396884576e-06, "loss": 0.0949, "step": 26420 }, { "epoch": 0.7707859268335375, "grad_norm": 1.1227051978681637, "learning_rate": 1.3157019361198348e-06, "loss": 0.1323, "step": 26421 }, { "epoch": 0.7708151000641811, "grad_norm": 1.3014020476072332, "learning_rate": 1.3153825654437458e-06, "loss": 0.0977, "step": 26422 }, { "epoch": 0.7708442732948246, "grad_norm": 1.3137095697635548, "learning_rate": 1.3150632276630405e-06, "loss": 0.1235, "step": 26423 }, { "epoch": 0.7708734465254682, "grad_norm": 0.9783099395143354, "learning_rate": 1.3147439227805726e-06, "loss": 0.1578, "step": 26424 }, { "epoch": 0.7709026197561117, "grad_norm": 1.6056538931094753, "learning_rate": 1.314424650799191e-06, "loss": 0.1119, "step": 26425 }, { "epoch": 0.7709317929867554, "grad_norm": 0.8491353982017323, "learning_rate": 1.3141054117217444e-06, "loss": 0.1271, "step": 26426 }, { "epoch": 0.770960966217399, "grad_norm": 1.2452005449943448, "learning_rate": 1.3137862055510852e-06, "loss": 0.1122, "step": 26427 }, { "epoch": 0.7709901394480425, "grad_norm": 0.9635875427143986, "learning_rate": 1.3134670322900644e-06, "loss": 0.0943, "step": 26428 }, { "epoch": 0.7710193126786861, "grad_norm": 0.752123689176318, "learning_rate": 1.3131478919415298e-06, "loss": 0.11, "step": 26429 }, { "epoch": 0.7710484859093296, "grad_norm": 0.7198618254965795, "learning_rate": 1.3128287845083288e-06, "loss": 0.1247, "step": 26430 }, { "epoch": 0.7710776591399732, "grad_norm": 1.3233392143387757, "learning_rate": 1.3125097099933144e-06, "loss": 0.1205, "step": 26431 }, { "epoch": 0.7711068323706167, "grad_norm": 1.0919256862175135, "learning_rate": 1.3121906683993307e-06, "loss": 0.1065, "step": 26432 }, { "epoch": 0.7711360056012603, "grad_norm": 0.9580261211845957, "learning_rate": 1.3118716597292292e-06, "loss": 0.1164, "step": 26433 }, { "epoch": 0.7711651788319038, "grad_norm": 0.6467131070332959, "learning_rate": 1.3115526839858583e-06, "loss": 0.1006, "step": 26434 }, { "epoch": 0.7711943520625474, "grad_norm": 0.8629428027626489, "learning_rate": 1.3112337411720643e-06, "loss": 0.1164, "step": 26435 }, { "epoch": 0.7712235252931909, "grad_norm": 0.86144947721426, "learning_rate": 1.3109148312906934e-06, "loss": 0.1205, "step": 26436 }, { "epoch": 0.7712526985238345, "grad_norm": 0.7128145435843275, "learning_rate": 1.3105959543445962e-06, "loss": 0.0983, "step": 26437 }, { "epoch": 0.771281871754478, "grad_norm": 0.9961558734954546, "learning_rate": 1.3102771103366157e-06, "loss": 0.1091, "step": 26438 }, { "epoch": 0.7713110449851217, "grad_norm": 0.9780844718050035, "learning_rate": 1.3099582992696019e-06, "loss": 0.1195, "step": 26439 }, { "epoch": 0.7713402182157653, "grad_norm": 1.0287588646014765, "learning_rate": 1.309639521146398e-06, "loss": 0.1313, "step": 26440 }, { "epoch": 0.7713693914464088, "grad_norm": 1.251658470659023, "learning_rate": 1.309320775969853e-06, "loss": 0.1255, "step": 26441 }, { "epoch": 0.7713985646770524, "grad_norm": 1.196933869166425, "learning_rate": 1.3090020637428109e-06, "loss": 0.1265, "step": 26442 }, { "epoch": 0.7714277379076959, "grad_norm": 0.9939862539712442, "learning_rate": 1.3086833844681163e-06, "loss": 0.1154, "step": 26443 }, { "epoch": 0.7714569111383395, "grad_norm": 0.8493062979362679, "learning_rate": 1.3083647381486147e-06, "loss": 0.1168, "step": 26444 }, { "epoch": 0.771486084368983, "grad_norm": 0.9132476162737213, "learning_rate": 1.3080461247871528e-06, "loss": 0.1091, "step": 26445 }, { "epoch": 0.7715152575996266, "grad_norm": 0.7964659543172595, "learning_rate": 1.3077275443865744e-06, "loss": 0.0903, "step": 26446 }, { "epoch": 0.7715444308302701, "grad_norm": 0.8313801419019907, "learning_rate": 1.307408996949721e-06, "loss": 0.1269, "step": 26447 }, { "epoch": 0.7715736040609137, "grad_norm": 0.8557884059584263, "learning_rate": 1.3070904824794405e-06, "loss": 0.1092, "step": 26448 }, { "epoch": 0.7716027772915572, "grad_norm": 0.9307981977101334, "learning_rate": 1.3067720009785744e-06, "loss": 0.12, "step": 26449 }, { "epoch": 0.7716319505222008, "grad_norm": 0.9161618851245314, "learning_rate": 1.3064535524499638e-06, "loss": 0.1123, "step": 26450 }, { "epoch": 0.7716611237528443, "grad_norm": 0.9973343431939814, "learning_rate": 1.3061351368964565e-06, "loss": 0.1178, "step": 26451 }, { "epoch": 0.7716902969834879, "grad_norm": 0.8219635170300139, "learning_rate": 1.3058167543208932e-06, "loss": 0.1071, "step": 26452 }, { "epoch": 0.7717194702141316, "grad_norm": 0.8308945313301898, "learning_rate": 1.3054984047261143e-06, "loss": 0.1105, "step": 26453 }, { "epoch": 0.7717486434447751, "grad_norm": 0.9276828756661168, "learning_rate": 1.305180088114965e-06, "loss": 0.1267, "step": 26454 }, { "epoch": 0.7717778166754187, "grad_norm": 0.6314204423551208, "learning_rate": 1.3048618044902867e-06, "loss": 0.1004, "step": 26455 }, { "epoch": 0.7718069899060622, "grad_norm": 0.7850854170641326, "learning_rate": 1.3045435538549178e-06, "loss": 0.1083, "step": 26456 }, { "epoch": 0.7718361631367058, "grad_norm": 0.8000894342283231, "learning_rate": 1.3042253362117025e-06, "loss": 0.103, "step": 26457 }, { "epoch": 0.7718653363673493, "grad_norm": 0.9008739004837375, "learning_rate": 1.3039071515634822e-06, "loss": 0.1183, "step": 26458 }, { "epoch": 0.7718945095979929, "grad_norm": 0.8673481606862793, "learning_rate": 1.3035889999130963e-06, "loss": 0.136, "step": 26459 }, { "epoch": 0.7719236828286364, "grad_norm": 0.8850524606452219, "learning_rate": 1.3032708812633843e-06, "loss": 0.1126, "step": 26460 }, { "epoch": 0.77195285605928, "grad_norm": 0.8824738752227391, "learning_rate": 1.302952795617189e-06, "loss": 0.1173, "step": 26461 }, { "epoch": 0.7719820292899235, "grad_norm": 0.9801705753487708, "learning_rate": 1.3026347429773467e-06, "loss": 0.1301, "step": 26462 }, { "epoch": 0.7720112025205671, "grad_norm": 0.869696321205162, "learning_rate": 1.3023167233466988e-06, "loss": 0.1166, "step": 26463 }, { "epoch": 0.7720403757512107, "grad_norm": 0.699863753769055, "learning_rate": 1.3019987367280863e-06, "loss": 0.1208, "step": 26464 }, { "epoch": 0.7720695489818542, "grad_norm": 0.7509956601812794, "learning_rate": 1.3016807831243462e-06, "loss": 0.1334, "step": 26465 }, { "epoch": 0.7720987222124978, "grad_norm": 0.7857488046474499, "learning_rate": 1.3013628625383156e-06, "loss": 0.1117, "step": 26466 }, { "epoch": 0.7721278954431414, "grad_norm": 0.7597992997166758, "learning_rate": 1.301044974972836e-06, "loss": 0.1092, "step": 26467 }, { "epoch": 0.772157068673785, "grad_norm": 0.902079263486036, "learning_rate": 1.3007271204307425e-06, "loss": 0.1109, "step": 26468 }, { "epoch": 0.7721862419044285, "grad_norm": 0.9154434600938871, "learning_rate": 1.3004092989148753e-06, "loss": 0.1356, "step": 26469 }, { "epoch": 0.7722154151350721, "grad_norm": 0.7602294858779245, "learning_rate": 1.3000915104280699e-06, "loss": 0.1209, "step": 26470 }, { "epoch": 0.7722445883657156, "grad_norm": 0.8650595391250381, "learning_rate": 1.2997737549731647e-06, "loss": 0.1037, "step": 26471 }, { "epoch": 0.7722737615963592, "grad_norm": 0.728679407589401, "learning_rate": 1.299456032552997e-06, "loss": 0.116, "step": 26472 }, { "epoch": 0.7723029348270027, "grad_norm": 0.911503442395789, "learning_rate": 1.2991383431704008e-06, "loss": 0.1636, "step": 26473 }, { "epoch": 0.7723321080576463, "grad_norm": 1.119843180670213, "learning_rate": 1.2988206868282138e-06, "loss": 0.1356, "step": 26474 }, { "epoch": 0.7723612812882898, "grad_norm": 0.8635609734114374, "learning_rate": 1.2985030635292733e-06, "loss": 0.0951, "step": 26475 }, { "epoch": 0.7723904545189334, "grad_norm": 0.7894405202081105, "learning_rate": 1.2981854732764142e-06, "loss": 0.1326, "step": 26476 }, { "epoch": 0.772419627749577, "grad_norm": 0.7744503147262438, "learning_rate": 1.2978679160724706e-06, "loss": 0.1196, "step": 26477 }, { "epoch": 0.7724488009802205, "grad_norm": 0.9085344278051577, "learning_rate": 1.2975503919202793e-06, "loss": 0.1343, "step": 26478 }, { "epoch": 0.7724779742108641, "grad_norm": 0.8709450678143941, "learning_rate": 1.2972329008226741e-06, "loss": 0.118, "step": 26479 }, { "epoch": 0.7725071474415077, "grad_norm": 0.6416055095465956, "learning_rate": 1.296915442782487e-06, "loss": 0.0993, "step": 26480 }, { "epoch": 0.7725363206721513, "grad_norm": 0.9495578419459769, "learning_rate": 1.2965980178025577e-06, "loss": 0.1105, "step": 26481 }, { "epoch": 0.7725654939027948, "grad_norm": 0.8520634504504494, "learning_rate": 1.2962806258857175e-06, "loss": 0.1049, "step": 26482 }, { "epoch": 0.7725946671334384, "grad_norm": 0.9039195815906078, "learning_rate": 1.2959632670347976e-06, "loss": 0.117, "step": 26483 }, { "epoch": 0.7726238403640819, "grad_norm": 0.7995310471836102, "learning_rate": 1.2956459412526357e-06, "loss": 0.1187, "step": 26484 }, { "epoch": 0.7726530135947255, "grad_norm": 1.0539171841398818, "learning_rate": 1.2953286485420618e-06, "loss": 0.1098, "step": 26485 }, { "epoch": 0.772682186825369, "grad_norm": 0.9727985169305272, "learning_rate": 1.2950113889059084e-06, "loss": 0.1255, "step": 26486 }, { "epoch": 0.7727113600560126, "grad_norm": 1.1550559656561254, "learning_rate": 1.294694162347009e-06, "loss": 0.1136, "step": 26487 }, { "epoch": 0.7727405332866562, "grad_norm": 0.726800504404567, "learning_rate": 1.2943769688681968e-06, "loss": 0.1141, "step": 26488 }, { "epoch": 0.7727697065172997, "grad_norm": 1.8236590615622714, "learning_rate": 1.294059808472302e-06, "loss": 0.0945, "step": 26489 }, { "epoch": 0.7727988797479433, "grad_norm": 0.8323302104959504, "learning_rate": 1.2937426811621557e-06, "loss": 0.1052, "step": 26490 }, { "epoch": 0.7728280529785868, "grad_norm": 0.7459109385827977, "learning_rate": 1.293425586940591e-06, "loss": 0.1081, "step": 26491 }, { "epoch": 0.7728572262092304, "grad_norm": 1.2150684866793608, "learning_rate": 1.2931085258104365e-06, "loss": 0.1089, "step": 26492 }, { "epoch": 0.7728863994398739, "grad_norm": 0.9975084486954806, "learning_rate": 1.292791497774526e-06, "loss": 0.1237, "step": 26493 }, { "epoch": 0.7729155726705176, "grad_norm": 0.6490156257469298, "learning_rate": 1.292474502835686e-06, "loss": 0.1296, "step": 26494 }, { "epoch": 0.7729447459011611, "grad_norm": 0.7228196405436439, "learning_rate": 1.2921575409967507e-06, "loss": 0.1077, "step": 26495 }, { "epoch": 0.7729739191318047, "grad_norm": 1.0764161726479002, "learning_rate": 1.2918406122605459e-06, "loss": 0.1186, "step": 26496 }, { "epoch": 0.7730030923624482, "grad_norm": 0.7546420173359845, "learning_rate": 1.2915237166299038e-06, "loss": 0.1026, "step": 26497 }, { "epoch": 0.7730322655930918, "grad_norm": 1.01179067039939, "learning_rate": 1.2912068541076523e-06, "loss": 0.1229, "step": 26498 }, { "epoch": 0.7730614388237353, "grad_norm": 0.9371775488421225, "learning_rate": 1.2908900246966215e-06, "loss": 0.1226, "step": 26499 }, { "epoch": 0.7730906120543789, "grad_norm": 1.0982933175170113, "learning_rate": 1.2905732283996374e-06, "loss": 0.1218, "step": 26500 }, { "epoch": 0.7731197852850225, "grad_norm": 1.797577771811171, "learning_rate": 1.290256465219532e-06, "loss": 0.1318, "step": 26501 }, { "epoch": 0.773148958515666, "grad_norm": 0.9185502509125388, "learning_rate": 1.2899397351591308e-06, "loss": 0.1202, "step": 26502 }, { "epoch": 0.7731781317463096, "grad_norm": 0.8406384464510138, "learning_rate": 1.289623038221261e-06, "loss": 0.1, "step": 26503 }, { "epoch": 0.7732073049769531, "grad_norm": 0.8154581556170826, "learning_rate": 1.289306374408751e-06, "loss": 0.1173, "step": 26504 }, { "epoch": 0.7732364782075967, "grad_norm": 0.9123238426515281, "learning_rate": 1.2889897437244292e-06, "loss": 0.1083, "step": 26505 }, { "epoch": 0.7732656514382402, "grad_norm": 1.1856662017489783, "learning_rate": 1.288673146171121e-06, "loss": 0.1092, "step": 26506 }, { "epoch": 0.7732948246688839, "grad_norm": 0.8013534639063158, "learning_rate": 1.2883565817516513e-06, "loss": 0.1159, "step": 26507 }, { "epoch": 0.7733239978995274, "grad_norm": 0.8404164148131812, "learning_rate": 1.2880400504688501e-06, "loss": 0.1189, "step": 26508 }, { "epoch": 0.773353171130171, "grad_norm": 0.9352412053671898, "learning_rate": 1.2877235523255388e-06, "loss": 0.1373, "step": 26509 }, { "epoch": 0.7733823443608145, "grad_norm": 0.8562182761344047, "learning_rate": 1.2874070873245465e-06, "loss": 0.1287, "step": 26510 }, { "epoch": 0.7734115175914581, "grad_norm": 0.7641345416105122, "learning_rate": 1.2870906554686979e-06, "loss": 0.1168, "step": 26511 }, { "epoch": 0.7734406908221017, "grad_norm": 0.9916976973259733, "learning_rate": 1.2867742567608182e-06, "loss": 0.1192, "step": 26512 }, { "epoch": 0.7734698640527452, "grad_norm": 0.8311264741871122, "learning_rate": 1.2864578912037302e-06, "loss": 0.1066, "step": 26513 }, { "epoch": 0.7734990372833888, "grad_norm": 0.7764714206791056, "learning_rate": 1.2861415588002607e-06, "loss": 0.0888, "step": 26514 }, { "epoch": 0.7735282105140323, "grad_norm": 0.8708474570257974, "learning_rate": 1.2858252595532316e-06, "loss": 0.0964, "step": 26515 }, { "epoch": 0.7735573837446759, "grad_norm": 0.9782078589934318, "learning_rate": 1.285508993465469e-06, "loss": 0.1209, "step": 26516 }, { "epoch": 0.7735865569753194, "grad_norm": 0.9509027822156123, "learning_rate": 1.2851927605397946e-06, "loss": 0.108, "step": 26517 }, { "epoch": 0.773615730205963, "grad_norm": 0.7746880580086296, "learning_rate": 1.2848765607790332e-06, "loss": 0.103, "step": 26518 }, { "epoch": 0.7736449034366065, "grad_norm": 0.7650423822361858, "learning_rate": 1.2845603941860074e-06, "loss": 0.1023, "step": 26519 }, { "epoch": 0.7736740766672501, "grad_norm": 1.4347883213488632, "learning_rate": 1.2842442607635381e-06, "loss": 0.1082, "step": 26520 }, { "epoch": 0.7737032498978937, "grad_norm": 1.1284873371552322, "learning_rate": 1.2839281605144488e-06, "loss": 0.1036, "step": 26521 }, { "epoch": 0.7737324231285373, "grad_norm": 0.9096892401280708, "learning_rate": 1.283612093441563e-06, "loss": 0.1099, "step": 26522 }, { "epoch": 0.7737615963591808, "grad_norm": 0.7006154927094754, "learning_rate": 1.2832960595477017e-06, "loss": 0.1174, "step": 26523 }, { "epoch": 0.7737907695898244, "grad_norm": 1.0828107341052888, "learning_rate": 1.2829800588356839e-06, "loss": 0.1021, "step": 26524 }, { "epoch": 0.773819942820468, "grad_norm": 1.0696356690412794, "learning_rate": 1.282664091308335e-06, "loss": 0.1172, "step": 26525 }, { "epoch": 0.7738491160511115, "grad_norm": 0.8497245081942782, "learning_rate": 1.282348156968472e-06, "loss": 0.1297, "step": 26526 }, { "epoch": 0.7738782892817551, "grad_norm": 0.9001105388819419, "learning_rate": 1.282032255818917e-06, "loss": 0.1285, "step": 26527 }, { "epoch": 0.7739074625123986, "grad_norm": 0.8068502221676728, "learning_rate": 1.2817163878624917e-06, "loss": 0.1217, "step": 26528 }, { "epoch": 0.7739366357430422, "grad_norm": 0.8138790681916827, "learning_rate": 1.281400553102015e-06, "loss": 0.1291, "step": 26529 }, { "epoch": 0.7739658089736857, "grad_norm": 0.8778194586781097, "learning_rate": 1.2810847515403058e-06, "loss": 0.141, "step": 26530 }, { "epoch": 0.7739949822043293, "grad_norm": 0.9503925376410518, "learning_rate": 1.2807689831801846e-06, "loss": 0.1069, "step": 26531 }, { "epoch": 0.7740241554349728, "grad_norm": 1.0624130337991038, "learning_rate": 1.2804532480244709e-06, "loss": 0.1157, "step": 26532 }, { "epoch": 0.7740533286656164, "grad_norm": 0.9790620493478033, "learning_rate": 1.2801375460759802e-06, "loss": 0.1371, "step": 26533 }, { "epoch": 0.77408250189626, "grad_norm": 1.1323242419711559, "learning_rate": 1.2798218773375342e-06, "loss": 0.1086, "step": 26534 }, { "epoch": 0.7741116751269036, "grad_norm": 1.0682323746768634, "learning_rate": 1.2795062418119519e-06, "loss": 0.1239, "step": 26535 }, { "epoch": 0.7741408483575472, "grad_norm": 0.8797137663193164, "learning_rate": 1.2791906395020493e-06, "loss": 0.1241, "step": 26536 }, { "epoch": 0.7741700215881907, "grad_norm": 0.9167332230238365, "learning_rate": 1.2788750704106434e-06, "loss": 0.1112, "step": 26537 }, { "epoch": 0.7741991948188343, "grad_norm": 0.7695633151047667, "learning_rate": 1.2785595345405539e-06, "loss": 0.1075, "step": 26538 }, { "epoch": 0.7742283680494778, "grad_norm": 0.8312754128050671, "learning_rate": 1.278244031894595e-06, "loss": 0.1069, "step": 26539 }, { "epoch": 0.7742575412801214, "grad_norm": 0.8945044287668984, "learning_rate": 1.277928562475585e-06, "loss": 0.1333, "step": 26540 }, { "epoch": 0.7742867145107649, "grad_norm": 0.8845537811854449, "learning_rate": 1.2776131262863412e-06, "loss": 0.1226, "step": 26541 }, { "epoch": 0.7743158877414085, "grad_norm": 1.046034095084399, "learning_rate": 1.2772977233296796e-06, "loss": 0.1193, "step": 26542 }, { "epoch": 0.774345060972052, "grad_norm": 0.9048239629282795, "learning_rate": 1.276982353608413e-06, "loss": 0.119, "step": 26543 }, { "epoch": 0.7743742342026956, "grad_norm": 0.8592410590381747, "learning_rate": 1.2766670171253614e-06, "loss": 0.1241, "step": 26544 }, { "epoch": 0.7744034074333391, "grad_norm": 0.9521390371402167, "learning_rate": 1.276351713883336e-06, "loss": 0.1062, "step": 26545 }, { "epoch": 0.7744325806639827, "grad_norm": 0.6922278166740373, "learning_rate": 1.2760364438851553e-06, "loss": 0.1071, "step": 26546 }, { "epoch": 0.7744617538946262, "grad_norm": 0.7868868881989651, "learning_rate": 1.2757212071336301e-06, "loss": 0.1184, "step": 26547 }, { "epoch": 0.7744909271252699, "grad_norm": 0.8375359753984807, "learning_rate": 1.275406003631579e-06, "loss": 0.1082, "step": 26548 }, { "epoch": 0.7745201003559135, "grad_norm": 0.8448427683544858, "learning_rate": 1.275090833381814e-06, "loss": 0.1235, "step": 26549 }, { "epoch": 0.774549273586557, "grad_norm": 0.8780531252526221, "learning_rate": 1.2747756963871472e-06, "loss": 0.1287, "step": 26550 }, { "epoch": 0.7745784468172006, "grad_norm": 0.7180162243325253, "learning_rate": 1.2744605926503934e-06, "loss": 0.1189, "step": 26551 }, { "epoch": 0.7746076200478441, "grad_norm": 0.7395139920355911, "learning_rate": 1.274145522174368e-06, "loss": 0.1208, "step": 26552 }, { "epoch": 0.7746367932784877, "grad_norm": 0.8643409270290434, "learning_rate": 1.2738304849618815e-06, "loss": 0.1495, "step": 26553 }, { "epoch": 0.7746659665091312, "grad_norm": 0.9146671208526945, "learning_rate": 1.2735154810157458e-06, "loss": 0.1055, "step": 26554 }, { "epoch": 0.7746951397397748, "grad_norm": 0.7962582769405433, "learning_rate": 1.2732005103387756e-06, "loss": 0.13, "step": 26555 }, { "epoch": 0.7747243129704183, "grad_norm": 1.033888721443246, "learning_rate": 1.2728855729337802e-06, "loss": 0.1424, "step": 26556 }, { "epoch": 0.7747534862010619, "grad_norm": 0.7861583172360689, "learning_rate": 1.2725706688035728e-06, "loss": 0.0905, "step": 26557 }, { "epoch": 0.7747826594317054, "grad_norm": 0.8478843797782293, "learning_rate": 1.2722557979509664e-06, "loss": 0.1219, "step": 26558 }, { "epoch": 0.774811832662349, "grad_norm": 0.7467407285651229, "learning_rate": 1.2719409603787696e-06, "loss": 0.1055, "step": 26559 }, { "epoch": 0.7748410058929925, "grad_norm": 0.9988871413293041, "learning_rate": 1.271626156089793e-06, "loss": 0.1237, "step": 26560 }, { "epoch": 0.7748701791236362, "grad_norm": 0.7728425459962185, "learning_rate": 1.2713113850868492e-06, "loss": 0.1093, "step": 26561 }, { "epoch": 0.7748993523542798, "grad_norm": 0.7353651798996653, "learning_rate": 1.2709966473727474e-06, "loss": 0.1348, "step": 26562 }, { "epoch": 0.7749285255849233, "grad_norm": 0.6751947559540624, "learning_rate": 1.270681942950296e-06, "loss": 0.112, "step": 26563 }, { "epoch": 0.7749576988155669, "grad_norm": 0.9657854387208041, "learning_rate": 1.2703672718223058e-06, "loss": 0.1178, "step": 26564 }, { "epoch": 0.7749868720462104, "grad_norm": 0.7443473108788434, "learning_rate": 1.2700526339915875e-06, "loss": 0.1035, "step": 26565 }, { "epoch": 0.775016045276854, "grad_norm": 0.8117538167786532, "learning_rate": 1.2697380294609495e-06, "loss": 0.1287, "step": 26566 }, { "epoch": 0.7750452185074975, "grad_norm": 0.9204762012155704, "learning_rate": 1.2694234582331982e-06, "loss": 0.1173, "step": 26567 }, { "epoch": 0.7750743917381411, "grad_norm": 0.810251947875881, "learning_rate": 1.2691089203111444e-06, "loss": 0.108, "step": 26568 }, { "epoch": 0.7751035649687846, "grad_norm": 0.7534612732531387, "learning_rate": 1.2687944156975952e-06, "loss": 0.1341, "step": 26569 }, { "epoch": 0.7751327381994282, "grad_norm": 0.9639774981600991, "learning_rate": 1.2684799443953582e-06, "loss": 0.1113, "step": 26570 }, { "epoch": 0.7751619114300717, "grad_norm": 0.7398740880920021, "learning_rate": 1.2681655064072429e-06, "loss": 0.1056, "step": 26571 }, { "epoch": 0.7751910846607153, "grad_norm": 0.637081908395961, "learning_rate": 1.267851101736055e-06, "loss": 0.1137, "step": 26572 }, { "epoch": 0.7752202578913588, "grad_norm": 0.9941602533204257, "learning_rate": 1.2675367303846004e-06, "loss": 0.1368, "step": 26573 }, { "epoch": 0.7752494311220024, "grad_norm": 1.0444638934507875, "learning_rate": 1.267222392355688e-06, "loss": 0.0949, "step": 26574 }, { "epoch": 0.7752786043526461, "grad_norm": 0.7904267526058851, "learning_rate": 1.2669080876521217e-06, "loss": 0.133, "step": 26575 }, { "epoch": 0.7753077775832896, "grad_norm": 0.8829949795051159, "learning_rate": 1.2665938162767105e-06, "loss": 0.1337, "step": 26576 }, { "epoch": 0.7753369508139332, "grad_norm": 0.9142570146776361, "learning_rate": 1.2662795782322567e-06, "loss": 0.1183, "step": 26577 }, { "epoch": 0.7753661240445767, "grad_norm": 0.7460793058960168, "learning_rate": 1.2659653735215687e-06, "loss": 0.0951, "step": 26578 }, { "epoch": 0.7753952972752203, "grad_norm": 0.8705738509957747, "learning_rate": 1.2656512021474509e-06, "loss": 0.1152, "step": 26579 }, { "epoch": 0.7754244705058638, "grad_norm": 0.8404481239550062, "learning_rate": 1.2653370641127066e-06, "loss": 0.0934, "step": 26580 }, { "epoch": 0.7754536437365074, "grad_norm": 0.8810257364826629, "learning_rate": 1.2650229594201408e-06, "loss": 0.1379, "step": 26581 }, { "epoch": 0.7754828169671509, "grad_norm": 0.6820729342789238, "learning_rate": 1.26470888807256e-06, "loss": 0.1033, "step": 26582 }, { "epoch": 0.7755119901977945, "grad_norm": 0.7775126625130048, "learning_rate": 1.2643948500727666e-06, "loss": 0.1401, "step": 26583 }, { "epoch": 0.775541163428438, "grad_norm": 0.8362570852191665, "learning_rate": 1.264080845423563e-06, "loss": 0.1061, "step": 26584 }, { "epoch": 0.7755703366590816, "grad_norm": 0.8341827908323795, "learning_rate": 1.2637668741277548e-06, "loss": 0.1267, "step": 26585 }, { "epoch": 0.7755995098897251, "grad_norm": 0.8924927693673023, "learning_rate": 1.2634529361881442e-06, "loss": 0.1141, "step": 26586 }, { "epoch": 0.7756286831203687, "grad_norm": 0.8432344529903103, "learning_rate": 1.2631390316075315e-06, "loss": 0.1138, "step": 26587 }, { "epoch": 0.7756578563510124, "grad_norm": 0.8962107175638608, "learning_rate": 1.2628251603887238e-06, "loss": 0.1188, "step": 26588 }, { "epoch": 0.7756870295816559, "grad_norm": 0.8872274234974421, "learning_rate": 1.262511322534521e-06, "loss": 0.0966, "step": 26589 }, { "epoch": 0.7757162028122995, "grad_norm": 0.7627003562541219, "learning_rate": 1.262197518047723e-06, "loss": 0.1208, "step": 26590 }, { "epoch": 0.775745376042943, "grad_norm": 0.7761094661369075, "learning_rate": 1.2618837469311351e-06, "loss": 0.1341, "step": 26591 }, { "epoch": 0.7757745492735866, "grad_norm": 0.8623604643579398, "learning_rate": 1.261570009187557e-06, "loss": 0.1218, "step": 26592 }, { "epoch": 0.7758037225042301, "grad_norm": 0.8593192080900649, "learning_rate": 1.261256304819788e-06, "loss": 0.098, "step": 26593 }, { "epoch": 0.7758328957348737, "grad_norm": 0.6704118061247689, "learning_rate": 1.2609426338306296e-06, "loss": 0.1133, "step": 26594 }, { "epoch": 0.7758620689655172, "grad_norm": 0.8922664771139991, "learning_rate": 1.2606289962228846e-06, "loss": 0.1456, "step": 26595 }, { "epoch": 0.7758912421961608, "grad_norm": 0.6804087787177441, "learning_rate": 1.2603153919993516e-06, "loss": 0.1013, "step": 26596 }, { "epoch": 0.7759204154268043, "grad_norm": 0.7390251390423266, "learning_rate": 1.2600018211628278e-06, "loss": 0.1104, "step": 26597 }, { "epoch": 0.7759495886574479, "grad_norm": 1.2288790597249433, "learning_rate": 1.2596882837161174e-06, "loss": 0.1145, "step": 26598 }, { "epoch": 0.7759787618880915, "grad_norm": 1.1180371656432018, "learning_rate": 1.2593747796620148e-06, "loss": 0.122, "step": 26599 }, { "epoch": 0.776007935118735, "grad_norm": 0.7891802008532125, "learning_rate": 1.2590613090033215e-06, "loss": 0.1047, "step": 26600 }, { "epoch": 0.7760371083493786, "grad_norm": 0.8002358332725346, "learning_rate": 1.2587478717428375e-06, "loss": 0.1241, "step": 26601 }, { "epoch": 0.7760662815800222, "grad_norm": 0.9576015250321538, "learning_rate": 1.2584344678833587e-06, "loss": 0.1268, "step": 26602 }, { "epoch": 0.7760954548106658, "grad_norm": 0.8548690248052143, "learning_rate": 1.258121097427683e-06, "loss": 0.105, "step": 26603 }, { "epoch": 0.7761246280413093, "grad_norm": 0.8483129380130555, "learning_rate": 1.2578077603786104e-06, "loss": 0.1066, "step": 26604 }, { "epoch": 0.7761538012719529, "grad_norm": 0.8726933690240404, "learning_rate": 1.2574944567389346e-06, "loss": 0.1042, "step": 26605 }, { "epoch": 0.7761829745025964, "grad_norm": 0.8512247353198941, "learning_rate": 1.2571811865114569e-06, "loss": 0.1302, "step": 26606 }, { "epoch": 0.77621214773324, "grad_norm": 0.8286348752057278, "learning_rate": 1.2568679496989706e-06, "loss": 0.1083, "step": 26607 }, { "epoch": 0.7762413209638835, "grad_norm": 0.901537042493502, "learning_rate": 1.2565547463042753e-06, "loss": 0.1131, "step": 26608 }, { "epoch": 0.7762704941945271, "grad_norm": 0.8679979136696329, "learning_rate": 1.2562415763301656e-06, "loss": 0.1285, "step": 26609 }, { "epoch": 0.7762996674251706, "grad_norm": 0.7955828353613565, "learning_rate": 1.2559284397794353e-06, "loss": 0.091, "step": 26610 }, { "epoch": 0.7763288406558142, "grad_norm": 0.6771791765096367, "learning_rate": 1.2556153366548823e-06, "loss": 0.1016, "step": 26611 }, { "epoch": 0.7763580138864578, "grad_norm": 0.8774287468611829, "learning_rate": 1.2553022669593034e-06, "loss": 0.1142, "step": 26612 }, { "epoch": 0.7763871871171013, "grad_norm": 0.969730479636771, "learning_rate": 1.254989230695492e-06, "loss": 0.1263, "step": 26613 }, { "epoch": 0.7764163603477449, "grad_norm": 0.7443136930296058, "learning_rate": 1.2546762278662412e-06, "loss": 0.1198, "step": 26614 }, { "epoch": 0.7764455335783885, "grad_norm": 0.7589716149465291, "learning_rate": 1.2543632584743488e-06, "loss": 0.1228, "step": 26615 }, { "epoch": 0.7764747068090321, "grad_norm": 0.7364776568685283, "learning_rate": 1.2540503225226064e-06, "loss": 0.1348, "step": 26616 }, { "epoch": 0.7765038800396756, "grad_norm": 0.8164915778874572, "learning_rate": 1.2537374200138058e-06, "loss": 0.1159, "step": 26617 }, { "epoch": 0.7765330532703192, "grad_norm": 0.7452086785688905, "learning_rate": 1.2534245509507465e-06, "loss": 0.1148, "step": 26618 }, { "epoch": 0.7765622265009627, "grad_norm": 0.8122767630242331, "learning_rate": 1.2531117153362176e-06, "loss": 0.1223, "step": 26619 }, { "epoch": 0.7765913997316063, "grad_norm": 0.9413088095715116, "learning_rate": 1.2527989131730123e-06, "loss": 0.1036, "step": 26620 }, { "epoch": 0.7766205729622498, "grad_norm": 0.8623245561426485, "learning_rate": 1.2524861444639246e-06, "loss": 0.1077, "step": 26621 }, { "epoch": 0.7766497461928934, "grad_norm": 0.825163178735011, "learning_rate": 1.2521734092117466e-06, "loss": 0.1218, "step": 26622 }, { "epoch": 0.776678919423537, "grad_norm": 0.8185792339260545, "learning_rate": 1.2518607074192679e-06, "loss": 0.1176, "step": 26623 }, { "epoch": 0.7767080926541805, "grad_norm": 0.7985641157458386, "learning_rate": 1.251548039089282e-06, "loss": 0.1182, "step": 26624 }, { "epoch": 0.7767372658848241, "grad_norm": 1.024651304113618, "learning_rate": 1.2512354042245818e-06, "loss": 0.1076, "step": 26625 }, { "epoch": 0.7767664391154676, "grad_norm": 0.8758307280649539, "learning_rate": 1.2509228028279568e-06, "loss": 0.1297, "step": 26626 }, { "epoch": 0.7767956123461112, "grad_norm": 0.9491621859658749, "learning_rate": 1.250610234902197e-06, "loss": 0.1407, "step": 26627 }, { "epoch": 0.7768247855767547, "grad_norm": 1.2129012666247923, "learning_rate": 1.2502977004500956e-06, "loss": 0.105, "step": 26628 }, { "epoch": 0.7768539588073984, "grad_norm": 0.8308476317112877, "learning_rate": 1.2499851994744393e-06, "loss": 0.1151, "step": 26629 }, { "epoch": 0.7768831320380419, "grad_norm": 0.99605626968484, "learning_rate": 1.24967273197802e-06, "loss": 0.1186, "step": 26630 }, { "epoch": 0.7769123052686855, "grad_norm": 0.9990949850093903, "learning_rate": 1.2493602979636289e-06, "loss": 0.113, "step": 26631 }, { "epoch": 0.776941478499329, "grad_norm": 0.943995982225375, "learning_rate": 1.2490478974340536e-06, "loss": 0.1174, "step": 26632 }, { "epoch": 0.7769706517299726, "grad_norm": 0.8212802373601578, "learning_rate": 1.2487355303920817e-06, "loss": 0.1186, "step": 26633 }, { "epoch": 0.7769998249606161, "grad_norm": 0.9452085012954069, "learning_rate": 1.2484231968405053e-06, "loss": 0.1618, "step": 26634 }, { "epoch": 0.7770289981912597, "grad_norm": 1.0732195217586151, "learning_rate": 1.2481108967821092e-06, "loss": 0.1225, "step": 26635 }, { "epoch": 0.7770581714219033, "grad_norm": 1.1001290613704633, "learning_rate": 1.2477986302196848e-06, "loss": 0.0999, "step": 26636 }, { "epoch": 0.7770873446525468, "grad_norm": 0.8535470565199292, "learning_rate": 1.2474863971560176e-06, "loss": 0.1289, "step": 26637 }, { "epoch": 0.7771165178831904, "grad_norm": 1.0052434803438746, "learning_rate": 1.2471741975938971e-06, "loss": 0.1245, "step": 26638 }, { "epoch": 0.7771456911138339, "grad_norm": 0.9707925426337226, "learning_rate": 1.2468620315361097e-06, "loss": 0.1126, "step": 26639 }, { "epoch": 0.7771748643444775, "grad_norm": 1.0776174456302299, "learning_rate": 1.2465498989854403e-06, "loss": 0.0935, "step": 26640 }, { "epoch": 0.777204037575121, "grad_norm": 0.849047428327794, "learning_rate": 1.2462377999446772e-06, "loss": 0.1149, "step": 26641 }, { "epoch": 0.7772332108057647, "grad_norm": 0.880854927069741, "learning_rate": 1.2459257344166093e-06, "loss": 0.1347, "step": 26642 }, { "epoch": 0.7772623840364082, "grad_norm": 0.7519132175018172, "learning_rate": 1.2456137024040194e-06, "loss": 0.0963, "step": 26643 }, { "epoch": 0.7772915572670518, "grad_norm": 1.1795045369126658, "learning_rate": 1.2453017039096932e-06, "loss": 0.1205, "step": 26644 }, { "epoch": 0.7773207304976953, "grad_norm": 1.0666707961461077, "learning_rate": 1.244989738936418e-06, "loss": 0.1335, "step": 26645 }, { "epoch": 0.7773499037283389, "grad_norm": 0.7465684775842973, "learning_rate": 1.2446778074869787e-06, "loss": 0.1022, "step": 26646 }, { "epoch": 0.7773790769589825, "grad_norm": 0.9245849369293081, "learning_rate": 1.244365909564156e-06, "loss": 0.1219, "step": 26647 }, { "epoch": 0.777408250189626, "grad_norm": 0.8194364920016455, "learning_rate": 1.2440540451707412e-06, "loss": 0.1095, "step": 26648 }, { "epoch": 0.7774374234202696, "grad_norm": 1.4730538792742582, "learning_rate": 1.2437422143095146e-06, "loss": 0.1159, "step": 26649 }, { "epoch": 0.7774665966509131, "grad_norm": 0.9654301351932375, "learning_rate": 1.24343041698326e-06, "loss": 0.1004, "step": 26650 }, { "epoch": 0.7774957698815567, "grad_norm": 1.0609500260812186, "learning_rate": 1.2431186531947632e-06, "loss": 0.1005, "step": 26651 }, { "epoch": 0.7775249431122002, "grad_norm": 0.9494782063941555, "learning_rate": 1.2428069229468065e-06, "loss": 0.1039, "step": 26652 }, { "epoch": 0.7775541163428438, "grad_norm": 0.6299200428661222, "learning_rate": 1.2424952262421708e-06, "loss": 0.0927, "step": 26653 }, { "epoch": 0.7775832895734873, "grad_norm": 0.9646335130164041, "learning_rate": 1.242183563083641e-06, "loss": 0.1024, "step": 26654 }, { "epoch": 0.7776124628041309, "grad_norm": 0.8931292920192913, "learning_rate": 1.2418719334740003e-06, "loss": 0.1122, "step": 26655 }, { "epoch": 0.7776416360347745, "grad_norm": 1.0655031520186276, "learning_rate": 1.24156033741603e-06, "loss": 0.1009, "step": 26656 }, { "epoch": 0.7776708092654181, "grad_norm": 1.021857634597553, "learning_rate": 1.2412487749125107e-06, "loss": 0.1024, "step": 26657 }, { "epoch": 0.7776999824960616, "grad_norm": 0.8575078998222521, "learning_rate": 1.240937245966226e-06, "loss": 0.124, "step": 26658 }, { "epoch": 0.7777291557267052, "grad_norm": 0.9227808571683745, "learning_rate": 1.2406257505799553e-06, "loss": 0.1243, "step": 26659 }, { "epoch": 0.7777583289573488, "grad_norm": 1.116672167410054, "learning_rate": 1.24031428875648e-06, "loss": 0.1085, "step": 26660 }, { "epoch": 0.7777875021879923, "grad_norm": 0.9178566782027545, "learning_rate": 1.240002860498583e-06, "loss": 0.1331, "step": 26661 }, { "epoch": 0.7778166754186359, "grad_norm": 0.8968965306034115, "learning_rate": 1.2396914658090425e-06, "loss": 0.103, "step": 26662 }, { "epoch": 0.7778458486492794, "grad_norm": 0.9028466221955137, "learning_rate": 1.2393801046906378e-06, "loss": 0.106, "step": 26663 }, { "epoch": 0.777875021879923, "grad_norm": 0.8946521624877845, "learning_rate": 1.2390687771461514e-06, "loss": 0.102, "step": 26664 }, { "epoch": 0.7779041951105665, "grad_norm": 0.8069674068638788, "learning_rate": 1.2387574831783594e-06, "loss": 0.1271, "step": 26665 }, { "epoch": 0.7779333683412101, "grad_norm": 0.8904724566033538, "learning_rate": 1.2384462227900446e-06, "loss": 0.1321, "step": 26666 }, { "epoch": 0.7779625415718536, "grad_norm": 0.8145129592069313, "learning_rate": 1.2381349959839817e-06, "loss": 0.1135, "step": 26667 }, { "epoch": 0.7779917148024972, "grad_norm": 0.9192763556875785, "learning_rate": 1.2378238027629535e-06, "loss": 0.1009, "step": 26668 }, { "epoch": 0.7780208880331408, "grad_norm": 1.0417557445471464, "learning_rate": 1.2375126431297363e-06, "loss": 0.1238, "step": 26669 }, { "epoch": 0.7780500612637844, "grad_norm": 0.7742885384288763, "learning_rate": 1.2372015170871066e-06, "loss": 0.112, "step": 26670 }, { "epoch": 0.778079234494428, "grad_norm": 0.9630538149212657, "learning_rate": 1.2368904246378433e-06, "loss": 0.1024, "step": 26671 }, { "epoch": 0.7781084077250715, "grad_norm": 1.1272673744241541, "learning_rate": 1.2365793657847258e-06, "loss": 0.1315, "step": 26672 }, { "epoch": 0.7781375809557151, "grad_norm": 0.9593502375094775, "learning_rate": 1.2362683405305288e-06, "loss": 0.1067, "step": 26673 }, { "epoch": 0.7781667541863586, "grad_norm": 1.250199771377887, "learning_rate": 1.2359573488780286e-06, "loss": 0.1238, "step": 26674 }, { "epoch": 0.7781959274170022, "grad_norm": 1.1218438201352507, "learning_rate": 1.2356463908300038e-06, "loss": 0.119, "step": 26675 }, { "epoch": 0.7782251006476457, "grad_norm": 1.0016220520139367, "learning_rate": 1.2353354663892292e-06, "loss": 0.1054, "step": 26676 }, { "epoch": 0.7782542738782893, "grad_norm": 1.1907167595340427, "learning_rate": 1.2350245755584784e-06, "loss": 0.1153, "step": 26677 }, { "epoch": 0.7782834471089328, "grad_norm": 0.7287298020032715, "learning_rate": 1.2347137183405322e-06, "loss": 0.0937, "step": 26678 }, { "epoch": 0.7783126203395764, "grad_norm": 0.745480931902787, "learning_rate": 1.234402894738163e-06, "loss": 0.1019, "step": 26679 }, { "epoch": 0.7783417935702199, "grad_norm": 1.0994987771951368, "learning_rate": 1.2340921047541443e-06, "loss": 0.1319, "step": 26680 }, { "epoch": 0.7783709668008635, "grad_norm": 1.0285183921062595, "learning_rate": 1.2337813483912537e-06, "loss": 0.1156, "step": 26681 }, { "epoch": 0.778400140031507, "grad_norm": 0.8084195105996703, "learning_rate": 1.2334706256522645e-06, "loss": 0.1196, "step": 26682 }, { "epoch": 0.7784293132621507, "grad_norm": 0.9120359785443655, "learning_rate": 1.2331599365399488e-06, "loss": 0.1032, "step": 26683 }, { "epoch": 0.7784584864927943, "grad_norm": 0.6817428343949745, "learning_rate": 1.232849281057082e-06, "loss": 0.1221, "step": 26684 }, { "epoch": 0.7784876597234378, "grad_norm": 1.0225743706698278, "learning_rate": 1.2325386592064387e-06, "loss": 0.1441, "step": 26685 }, { "epoch": 0.7785168329540814, "grad_norm": 1.0009657983376468, "learning_rate": 1.2322280709907914e-06, "loss": 0.1362, "step": 26686 }, { "epoch": 0.7785460061847249, "grad_norm": 0.7195015225001173, "learning_rate": 1.2319175164129104e-06, "loss": 0.112, "step": 26687 }, { "epoch": 0.7785751794153685, "grad_norm": 1.0028875804361255, "learning_rate": 1.2316069954755722e-06, "loss": 0.1098, "step": 26688 }, { "epoch": 0.778604352646012, "grad_norm": 1.000659052366997, "learning_rate": 1.2312965081815454e-06, "loss": 0.1161, "step": 26689 }, { "epoch": 0.7786335258766556, "grad_norm": 0.7514182524851203, "learning_rate": 1.2309860545336038e-06, "loss": 0.1108, "step": 26690 }, { "epoch": 0.7786626991072991, "grad_norm": 0.9129436053352685, "learning_rate": 1.2306756345345206e-06, "loss": 0.1346, "step": 26691 }, { "epoch": 0.7786918723379427, "grad_norm": 0.9784802357904911, "learning_rate": 1.2303652481870654e-06, "loss": 0.0991, "step": 26692 }, { "epoch": 0.7787210455685862, "grad_norm": 0.8306120165879555, "learning_rate": 1.2300548954940079e-06, "loss": 0.1103, "step": 26693 }, { "epoch": 0.7787502187992298, "grad_norm": 0.8554702064026037, "learning_rate": 1.2297445764581218e-06, "loss": 0.1405, "step": 26694 }, { "epoch": 0.7787793920298733, "grad_norm": 0.8924164994352132, "learning_rate": 1.2294342910821743e-06, "loss": 0.1205, "step": 26695 }, { "epoch": 0.778808565260517, "grad_norm": 1.0941959692835694, "learning_rate": 1.2291240393689397e-06, "loss": 0.0954, "step": 26696 }, { "epoch": 0.7788377384911606, "grad_norm": 0.8278228119703622, "learning_rate": 1.228813821321183e-06, "loss": 0.1283, "step": 26697 }, { "epoch": 0.7788669117218041, "grad_norm": 0.7989796401063589, "learning_rate": 1.2285036369416785e-06, "loss": 0.1158, "step": 26698 }, { "epoch": 0.7788960849524477, "grad_norm": 0.8799667853157728, "learning_rate": 1.2281934862331929e-06, "loss": 0.1016, "step": 26699 }, { "epoch": 0.7789252581830912, "grad_norm": 0.8323693952223855, "learning_rate": 1.2278833691984938e-06, "loss": 0.1212, "step": 26700 }, { "epoch": 0.7789544314137348, "grad_norm": 0.8224931820100664, "learning_rate": 1.2275732858403516e-06, "loss": 0.1268, "step": 26701 }, { "epoch": 0.7789836046443783, "grad_norm": 1.0095870012942598, "learning_rate": 1.227263236161536e-06, "loss": 0.118, "step": 26702 }, { "epoch": 0.7790127778750219, "grad_norm": 0.9547925257292422, "learning_rate": 1.2269532201648138e-06, "loss": 0.1267, "step": 26703 }, { "epoch": 0.7790419511056654, "grad_norm": 0.8814984158576729, "learning_rate": 1.2266432378529515e-06, "loss": 0.1079, "step": 26704 }, { "epoch": 0.779071124336309, "grad_norm": 0.6721525574171244, "learning_rate": 1.2263332892287183e-06, "loss": 0.1031, "step": 26705 }, { "epoch": 0.7791002975669525, "grad_norm": 0.8500720477022115, "learning_rate": 1.2260233742948796e-06, "loss": 0.1099, "step": 26706 }, { "epoch": 0.7791294707975961, "grad_norm": 0.8083825183320806, "learning_rate": 1.225713493054203e-06, "loss": 0.0952, "step": 26707 }, { "epoch": 0.7791586440282396, "grad_norm": 0.8482078588101409, "learning_rate": 1.225403645509457e-06, "loss": 0.1405, "step": 26708 }, { "epoch": 0.7791878172588832, "grad_norm": 0.8291330958677605, "learning_rate": 1.2250938316634058e-06, "loss": 0.1131, "step": 26709 }, { "epoch": 0.7792169904895269, "grad_norm": 0.749058899827566, "learning_rate": 1.2247840515188148e-06, "loss": 0.1108, "step": 26710 }, { "epoch": 0.7792461637201704, "grad_norm": 0.6851845760589484, "learning_rate": 1.224474305078452e-06, "loss": 0.1075, "step": 26711 }, { "epoch": 0.779275336950814, "grad_norm": 0.8155896657226269, "learning_rate": 1.2241645923450795e-06, "loss": 0.117, "step": 26712 }, { "epoch": 0.7793045101814575, "grad_norm": 0.740677544101561, "learning_rate": 1.2238549133214656e-06, "loss": 0.1254, "step": 26713 }, { "epoch": 0.7793336834121011, "grad_norm": 0.8438046602980243, "learning_rate": 1.2235452680103727e-06, "loss": 0.1072, "step": 26714 }, { "epoch": 0.7793628566427446, "grad_norm": 0.8325713259705939, "learning_rate": 1.2232356564145669e-06, "loss": 0.1098, "step": 26715 }, { "epoch": 0.7793920298733882, "grad_norm": 0.9077832510116061, "learning_rate": 1.222926078536812e-06, "loss": 0.0973, "step": 26716 }, { "epoch": 0.7794212031040317, "grad_norm": 0.9130288259711684, "learning_rate": 1.2226165343798695e-06, "loss": 0.1291, "step": 26717 }, { "epoch": 0.7794503763346753, "grad_norm": 0.9629574235455058, "learning_rate": 1.2223070239465056e-06, "loss": 0.116, "step": 26718 }, { "epoch": 0.7794795495653188, "grad_norm": 0.9530366632288055, "learning_rate": 1.2219975472394835e-06, "loss": 0.1058, "step": 26719 }, { "epoch": 0.7795087227959624, "grad_norm": 0.8913347652453288, "learning_rate": 1.2216881042615648e-06, "loss": 0.1365, "step": 26720 }, { "epoch": 0.779537896026606, "grad_norm": 0.8646455633136173, "learning_rate": 1.2213786950155132e-06, "loss": 0.0949, "step": 26721 }, { "epoch": 0.7795670692572495, "grad_norm": 0.8838438486528315, "learning_rate": 1.2210693195040912e-06, "loss": 0.1227, "step": 26722 }, { "epoch": 0.779596242487893, "grad_norm": 0.794708762125285, "learning_rate": 1.2207599777300588e-06, "loss": 0.127, "step": 26723 }, { "epoch": 0.7796254157185367, "grad_norm": 0.8809471127235253, "learning_rate": 1.2204506696961788e-06, "loss": 0.1344, "step": 26724 }, { "epoch": 0.7796545889491803, "grad_norm": 0.7929277943310479, "learning_rate": 1.220141395405215e-06, "loss": 0.1117, "step": 26725 }, { "epoch": 0.7796837621798238, "grad_norm": 0.7012512896546673, "learning_rate": 1.2198321548599258e-06, "loss": 0.0999, "step": 26726 }, { "epoch": 0.7797129354104674, "grad_norm": 0.7876964499431774, "learning_rate": 1.2195229480630715e-06, "loss": 0.1143, "step": 26727 }, { "epoch": 0.7797421086411109, "grad_norm": 0.9069026507711498, "learning_rate": 1.2192137750174154e-06, "loss": 0.1247, "step": 26728 }, { "epoch": 0.7797712818717545, "grad_norm": 0.7822523918268156, "learning_rate": 1.218904635725716e-06, "loss": 0.1205, "step": 26729 }, { "epoch": 0.779800455102398, "grad_norm": 0.9584496067290577, "learning_rate": 1.218595530190732e-06, "loss": 0.1125, "step": 26730 }, { "epoch": 0.7798296283330416, "grad_norm": 1.5316248666339345, "learning_rate": 1.218286458415225e-06, "loss": 0.1253, "step": 26731 }, { "epoch": 0.7798588015636851, "grad_norm": 0.8843190043115959, "learning_rate": 1.2179774204019545e-06, "loss": 0.1111, "step": 26732 }, { "epoch": 0.7798879747943287, "grad_norm": 0.7014560902717982, "learning_rate": 1.2176684161536789e-06, "loss": 0.1074, "step": 26733 }, { "epoch": 0.7799171480249723, "grad_norm": 0.8405700625956761, "learning_rate": 1.2173594456731552e-06, "loss": 0.1365, "step": 26734 }, { "epoch": 0.7799463212556158, "grad_norm": 0.7313345740138453, "learning_rate": 1.217050508963145e-06, "loss": 0.1196, "step": 26735 }, { "epoch": 0.7799754944862594, "grad_norm": 0.9984755855923472, "learning_rate": 1.2167416060264032e-06, "loss": 0.1107, "step": 26736 }, { "epoch": 0.780004667716903, "grad_norm": 0.8165395911930621, "learning_rate": 1.2164327368656891e-06, "loss": 0.1034, "step": 26737 }, { "epoch": 0.7800338409475466, "grad_norm": 0.8972382791560111, "learning_rate": 1.2161239014837622e-06, "loss": 0.1332, "step": 26738 }, { "epoch": 0.7800630141781901, "grad_norm": 0.8449132336743154, "learning_rate": 1.215815099883378e-06, "loss": 0.1184, "step": 26739 }, { "epoch": 0.7800921874088337, "grad_norm": 0.901871165124545, "learning_rate": 1.215506332067291e-06, "loss": 0.0959, "step": 26740 }, { "epoch": 0.7801213606394772, "grad_norm": 0.8205331596332643, "learning_rate": 1.215197598038262e-06, "loss": 0.1495, "step": 26741 }, { "epoch": 0.7801505338701208, "grad_norm": 0.8982133362430896, "learning_rate": 1.2148888977990435e-06, "loss": 0.1186, "step": 26742 }, { "epoch": 0.7801797071007643, "grad_norm": 0.8739905942822849, "learning_rate": 1.2145802313523953e-06, "loss": 0.1084, "step": 26743 }, { "epoch": 0.7802088803314079, "grad_norm": 0.9793410854378775, "learning_rate": 1.2142715987010695e-06, "loss": 0.1272, "step": 26744 }, { "epoch": 0.7802380535620514, "grad_norm": 0.7237864976502513, "learning_rate": 1.2139629998478242e-06, "loss": 0.1137, "step": 26745 }, { "epoch": 0.780267226792695, "grad_norm": 0.8582898059022251, "learning_rate": 1.2136544347954137e-06, "loss": 0.1064, "step": 26746 }, { "epoch": 0.7802964000233386, "grad_norm": 0.8673267023193789, "learning_rate": 1.213345903546591e-06, "loss": 0.1088, "step": 26747 }, { "epoch": 0.7803255732539821, "grad_norm": 0.8770160523231496, "learning_rate": 1.2130374061041129e-06, "loss": 0.1124, "step": 26748 }, { "epoch": 0.7803547464846257, "grad_norm": 0.9360533571696744, "learning_rate": 1.2127289424707333e-06, "loss": 0.1272, "step": 26749 }, { "epoch": 0.7803839197152692, "grad_norm": 0.9511495848014366, "learning_rate": 1.2124205126492045e-06, "loss": 0.1284, "step": 26750 }, { "epoch": 0.7804130929459129, "grad_norm": 0.6983904983574253, "learning_rate": 1.2121121166422828e-06, "loss": 0.1248, "step": 26751 }, { "epoch": 0.7804422661765564, "grad_norm": 0.80836694000979, "learning_rate": 1.2118037544527195e-06, "loss": 0.1332, "step": 26752 }, { "epoch": 0.7804714394072, "grad_norm": 0.7763670996189822, "learning_rate": 1.2114954260832668e-06, "loss": 0.1042, "step": 26753 }, { "epoch": 0.7805006126378435, "grad_norm": 0.7963575504478867, "learning_rate": 1.2111871315366785e-06, "loss": 0.1133, "step": 26754 }, { "epoch": 0.7805297858684871, "grad_norm": 0.9381122350973579, "learning_rate": 1.2108788708157087e-06, "loss": 0.1133, "step": 26755 }, { "epoch": 0.7805589590991306, "grad_norm": 1.0777249917953655, "learning_rate": 1.2105706439231073e-06, "loss": 0.1191, "step": 26756 }, { "epoch": 0.7805881323297742, "grad_norm": 0.9266597424002107, "learning_rate": 1.2102624508616257e-06, "loss": 0.1087, "step": 26757 }, { "epoch": 0.7806173055604178, "grad_norm": 0.9842706032528625, "learning_rate": 1.2099542916340172e-06, "loss": 0.136, "step": 26758 }, { "epoch": 0.7806464787910613, "grad_norm": 0.9169158453911002, "learning_rate": 1.209646166243032e-06, "loss": 0.133, "step": 26759 }, { "epoch": 0.7806756520217049, "grad_norm": 0.7454080072920237, "learning_rate": 1.2093380746914201e-06, "loss": 0.0961, "step": 26760 }, { "epoch": 0.7807048252523484, "grad_norm": 0.9954352719939636, "learning_rate": 1.2090300169819325e-06, "loss": 0.1224, "step": 26761 }, { "epoch": 0.780733998482992, "grad_norm": 0.6148071655149786, "learning_rate": 1.2087219931173217e-06, "loss": 0.1043, "step": 26762 }, { "epoch": 0.7807631717136355, "grad_norm": 1.0154074792198873, "learning_rate": 1.2084140031003355e-06, "loss": 0.1218, "step": 26763 }, { "epoch": 0.7807923449442792, "grad_norm": 0.9427767581583829, "learning_rate": 1.208106046933723e-06, "loss": 0.1107, "step": 26764 }, { "epoch": 0.7808215181749227, "grad_norm": 0.9415031648731707, "learning_rate": 1.2077981246202353e-06, "loss": 0.1452, "step": 26765 }, { "epoch": 0.7808506914055663, "grad_norm": 0.8756557239714715, "learning_rate": 1.2074902361626196e-06, "loss": 0.1227, "step": 26766 }, { "epoch": 0.7808798646362098, "grad_norm": 0.6957932399657003, "learning_rate": 1.2071823815636257e-06, "loss": 0.0937, "step": 26767 }, { "epoch": 0.7809090378668534, "grad_norm": 0.883933179139988, "learning_rate": 1.2068745608260035e-06, "loss": 0.1041, "step": 26768 }, { "epoch": 0.780938211097497, "grad_norm": 0.7340686565749424, "learning_rate": 1.2065667739525e-06, "loss": 0.1299, "step": 26769 }, { "epoch": 0.7809673843281405, "grad_norm": 1.0386307950050804, "learning_rate": 1.2062590209458614e-06, "loss": 0.109, "step": 26770 }, { "epoch": 0.780996557558784, "grad_norm": 0.8605718743394106, "learning_rate": 1.2059513018088375e-06, "loss": 0.1195, "step": 26771 }, { "epoch": 0.7810257307894276, "grad_norm": 0.7351565609045481, "learning_rate": 1.2056436165441738e-06, "loss": 0.1132, "step": 26772 }, { "epoch": 0.7810549040200712, "grad_norm": 0.804844919474205, "learning_rate": 1.2053359651546193e-06, "loss": 0.144, "step": 26773 }, { "epoch": 0.7810840772507147, "grad_norm": 0.975276817856636, "learning_rate": 1.2050283476429176e-06, "loss": 0.1119, "step": 26774 }, { "epoch": 0.7811132504813583, "grad_norm": 0.6950365481214305, "learning_rate": 1.2047207640118187e-06, "loss": 0.0903, "step": 26775 }, { "epoch": 0.7811424237120018, "grad_norm": 0.7311888139797804, "learning_rate": 1.204413214264067e-06, "loss": 0.1122, "step": 26776 }, { "epoch": 0.7811715969426454, "grad_norm": 0.7739204836649747, "learning_rate": 1.2041056984024063e-06, "loss": 0.1098, "step": 26777 }, { "epoch": 0.781200770173289, "grad_norm": 0.7795367277900777, "learning_rate": 1.2037982164295837e-06, "loss": 0.1089, "step": 26778 }, { "epoch": 0.7812299434039326, "grad_norm": 0.7290185208829019, "learning_rate": 1.203490768348346e-06, "loss": 0.1092, "step": 26779 }, { "epoch": 0.7812591166345761, "grad_norm": 0.8874276508149599, "learning_rate": 1.203183354161435e-06, "loss": 0.1412, "step": 26780 }, { "epoch": 0.7812882898652197, "grad_norm": 0.7654875723898305, "learning_rate": 1.2028759738715983e-06, "loss": 0.1143, "step": 26781 }, { "epoch": 0.7813174630958633, "grad_norm": 0.967778599241166, "learning_rate": 1.2025686274815784e-06, "loss": 0.1149, "step": 26782 }, { "epoch": 0.7813466363265068, "grad_norm": 0.6716975594959714, "learning_rate": 1.2022613149941176e-06, "loss": 0.1046, "step": 26783 }, { "epoch": 0.7813758095571504, "grad_norm": 0.7881468291688234, "learning_rate": 1.2019540364119608e-06, "loss": 0.1139, "step": 26784 }, { "epoch": 0.7814049827877939, "grad_norm": 0.7793891505249078, "learning_rate": 1.2016467917378539e-06, "loss": 0.1383, "step": 26785 }, { "epoch": 0.7814341560184375, "grad_norm": 0.9393049255640398, "learning_rate": 1.201339580974537e-06, "loss": 0.1384, "step": 26786 }, { "epoch": 0.781463329249081, "grad_norm": 0.773114171474124, "learning_rate": 1.201032404124753e-06, "loss": 0.1214, "step": 26787 }, { "epoch": 0.7814925024797246, "grad_norm": 0.9296450342613287, "learning_rate": 1.2007252611912457e-06, "loss": 0.1116, "step": 26788 }, { "epoch": 0.7815216757103681, "grad_norm": 0.8073457606369954, "learning_rate": 1.200418152176756e-06, "loss": 0.1066, "step": 26789 }, { "epoch": 0.7815508489410117, "grad_norm": 1.1966057101530976, "learning_rate": 1.2001110770840253e-06, "loss": 0.1189, "step": 26790 }, { "epoch": 0.7815800221716553, "grad_norm": 0.9165946645517647, "learning_rate": 1.1998040359157954e-06, "loss": 0.1273, "step": 26791 }, { "epoch": 0.7816091954022989, "grad_norm": 1.0271750745608197, "learning_rate": 1.1994970286748093e-06, "loss": 0.1258, "step": 26792 }, { "epoch": 0.7816383686329424, "grad_norm": 0.6935406299393101, "learning_rate": 1.1991900553638065e-06, "loss": 0.1071, "step": 26793 }, { "epoch": 0.781667541863586, "grad_norm": 0.8348271471994329, "learning_rate": 1.1988831159855257e-06, "loss": 0.1212, "step": 26794 }, { "epoch": 0.7816967150942296, "grad_norm": 0.7935682707299909, "learning_rate": 1.1985762105427107e-06, "loss": 0.1183, "step": 26795 }, { "epoch": 0.7817258883248731, "grad_norm": 0.8346079487274879, "learning_rate": 1.198269339038099e-06, "loss": 0.1038, "step": 26796 }, { "epoch": 0.7817550615555167, "grad_norm": 0.8994901482859692, "learning_rate": 1.1979625014744306e-06, "loss": 0.115, "step": 26797 }, { "epoch": 0.7817842347861602, "grad_norm": 0.7649070281646475, "learning_rate": 1.1976556978544467e-06, "loss": 0.0962, "step": 26798 }, { "epoch": 0.7818134080168038, "grad_norm": 0.9454515925374907, "learning_rate": 1.1973489281808854e-06, "loss": 0.1365, "step": 26799 }, { "epoch": 0.7818425812474473, "grad_norm": 0.8893040178251703, "learning_rate": 1.1970421924564835e-06, "loss": 0.1119, "step": 26800 }, { "epoch": 0.7818717544780909, "grad_norm": 0.6299941910648025, "learning_rate": 1.1967354906839824e-06, "loss": 0.1032, "step": 26801 }, { "epoch": 0.7819009277087344, "grad_norm": 0.7194752175170329, "learning_rate": 1.1964288228661175e-06, "loss": 0.1005, "step": 26802 }, { "epoch": 0.781930100939378, "grad_norm": 0.8269650255837503, "learning_rate": 1.1961221890056296e-06, "loss": 0.115, "step": 26803 }, { "epoch": 0.7819592741700215, "grad_norm": 0.6900089824003621, "learning_rate": 1.1958155891052531e-06, "loss": 0.0987, "step": 26804 }, { "epoch": 0.7819884474006652, "grad_norm": 0.7664680136418915, "learning_rate": 1.1955090231677285e-06, "loss": 0.1228, "step": 26805 }, { "epoch": 0.7820176206313088, "grad_norm": 0.8842127872208614, "learning_rate": 1.195202491195791e-06, "loss": 0.1058, "step": 26806 }, { "epoch": 0.7820467938619523, "grad_norm": 0.9171726720943688, "learning_rate": 1.194895993192176e-06, "loss": 0.1179, "step": 26807 }, { "epoch": 0.7820759670925959, "grad_norm": 0.7028869612526993, "learning_rate": 1.1945895291596217e-06, "loss": 0.1299, "step": 26808 }, { "epoch": 0.7821051403232394, "grad_norm": 0.7584500685507514, "learning_rate": 1.1942830991008646e-06, "loss": 0.1273, "step": 26809 }, { "epoch": 0.782134313553883, "grad_norm": 0.870532507451866, "learning_rate": 1.1939767030186377e-06, "loss": 0.1283, "step": 26810 }, { "epoch": 0.7821634867845265, "grad_norm": 0.8621506605643977, "learning_rate": 1.1936703409156802e-06, "loss": 0.1084, "step": 26811 }, { "epoch": 0.7821926600151701, "grad_norm": 0.6880970855532711, "learning_rate": 1.1933640127947255e-06, "loss": 0.1067, "step": 26812 }, { "epoch": 0.7822218332458136, "grad_norm": 0.9526159172576324, "learning_rate": 1.1930577186585063e-06, "loss": 0.1329, "step": 26813 }, { "epoch": 0.7822510064764572, "grad_norm": 0.7638848398201711, "learning_rate": 1.1927514585097594e-06, "loss": 0.0982, "step": 26814 }, { "epoch": 0.7822801797071007, "grad_norm": 0.8123166818427471, "learning_rate": 1.1924452323512192e-06, "loss": 0.1011, "step": 26815 }, { "epoch": 0.7823093529377443, "grad_norm": 0.8692443813015546, "learning_rate": 1.1921390401856198e-06, "loss": 0.1246, "step": 26816 }, { "epoch": 0.7823385261683878, "grad_norm": 0.8254263945250957, "learning_rate": 1.1918328820156928e-06, "loss": 0.104, "step": 26817 }, { "epoch": 0.7823676993990315, "grad_norm": 1.043279921974565, "learning_rate": 1.1915267578441737e-06, "loss": 0.0986, "step": 26818 }, { "epoch": 0.782396872629675, "grad_norm": 0.8854793295232857, "learning_rate": 1.1912206676737942e-06, "loss": 0.1259, "step": 26819 }, { "epoch": 0.7824260458603186, "grad_norm": 0.7688145757132527, "learning_rate": 1.1909146115072866e-06, "loss": 0.1021, "step": 26820 }, { "epoch": 0.7824552190909622, "grad_norm": 0.6315318923356129, "learning_rate": 1.1906085893473835e-06, "loss": 0.0956, "step": 26821 }, { "epoch": 0.7824843923216057, "grad_norm": 0.855182269868678, "learning_rate": 1.1903026011968194e-06, "loss": 0.1018, "step": 26822 }, { "epoch": 0.7825135655522493, "grad_norm": 1.2437032393368883, "learning_rate": 1.189996647058324e-06, "loss": 0.1071, "step": 26823 }, { "epoch": 0.7825427387828928, "grad_norm": 0.7593396867233735, "learning_rate": 1.1896907269346274e-06, "loss": 0.1156, "step": 26824 }, { "epoch": 0.7825719120135364, "grad_norm": 0.7818691260607719, "learning_rate": 1.1893848408284641e-06, "loss": 0.0971, "step": 26825 }, { "epoch": 0.7826010852441799, "grad_norm": 0.8160192005648955, "learning_rate": 1.1890789887425618e-06, "loss": 0.1057, "step": 26826 }, { "epoch": 0.7826302584748235, "grad_norm": 0.7062585494868514, "learning_rate": 1.188773170679653e-06, "loss": 0.0952, "step": 26827 }, { "epoch": 0.782659431705467, "grad_norm": 1.0048072435142412, "learning_rate": 1.1884673866424683e-06, "loss": 0.1078, "step": 26828 }, { "epoch": 0.7826886049361106, "grad_norm": 0.6853913456310932, "learning_rate": 1.1881616366337372e-06, "loss": 0.0987, "step": 26829 }, { "epoch": 0.7827177781667541, "grad_norm": 0.7420160718974594, "learning_rate": 1.1878559206561874e-06, "loss": 0.1291, "step": 26830 }, { "epoch": 0.7827469513973977, "grad_norm": 1.1803984725626182, "learning_rate": 1.1875502387125514e-06, "loss": 0.1307, "step": 26831 }, { "epoch": 0.7827761246280414, "grad_norm": 0.8828823275993483, "learning_rate": 1.1872445908055557e-06, "loss": 0.141, "step": 26832 }, { "epoch": 0.7828052978586849, "grad_norm": 1.0668592512178308, "learning_rate": 1.1869389769379314e-06, "loss": 0.1124, "step": 26833 }, { "epoch": 0.7828344710893285, "grad_norm": 0.8475742344287523, "learning_rate": 1.186633397112404e-06, "loss": 0.1389, "step": 26834 }, { "epoch": 0.782863644319972, "grad_norm": 1.0318833825760718, "learning_rate": 1.1863278513317046e-06, "loss": 0.1361, "step": 26835 }, { "epoch": 0.7828928175506156, "grad_norm": 0.8669605871058667, "learning_rate": 1.1860223395985598e-06, "loss": 0.1176, "step": 26836 }, { "epoch": 0.7829219907812591, "grad_norm": 0.700379017369269, "learning_rate": 1.1857168619156962e-06, "loss": 0.1121, "step": 26837 }, { "epoch": 0.7829511640119027, "grad_norm": 0.8243197277304135, "learning_rate": 1.1854114182858413e-06, "loss": 0.1385, "step": 26838 }, { "epoch": 0.7829803372425462, "grad_norm": 0.8155538368360573, "learning_rate": 1.1851060087117244e-06, "loss": 0.1273, "step": 26839 }, { "epoch": 0.7830095104731898, "grad_norm": 0.7557102775662806, "learning_rate": 1.1848006331960688e-06, "loss": 0.1044, "step": 26840 }, { "epoch": 0.7830386837038333, "grad_norm": 0.8165470558445905, "learning_rate": 1.1844952917416043e-06, "loss": 0.1227, "step": 26841 }, { "epoch": 0.7830678569344769, "grad_norm": 0.9378788158559712, "learning_rate": 1.184189984351054e-06, "loss": 0.1128, "step": 26842 }, { "epoch": 0.7830970301651204, "grad_norm": 0.7393726333565241, "learning_rate": 1.183884711027144e-06, "loss": 0.1074, "step": 26843 }, { "epoch": 0.783126203395764, "grad_norm": 0.7803308518649982, "learning_rate": 1.1835794717726e-06, "loss": 0.1138, "step": 26844 }, { "epoch": 0.7831553766264077, "grad_norm": 1.114408197837132, "learning_rate": 1.1832742665901486e-06, "loss": 0.1222, "step": 26845 }, { "epoch": 0.7831845498570512, "grad_norm": 0.9755866168268343, "learning_rate": 1.182969095482514e-06, "loss": 0.0992, "step": 26846 }, { "epoch": 0.7832137230876948, "grad_norm": 0.7674839105195207, "learning_rate": 1.1826639584524185e-06, "loss": 0.1193, "step": 26847 }, { "epoch": 0.7832428963183383, "grad_norm": 0.7976890375796063, "learning_rate": 1.1823588555025894e-06, "loss": 0.1328, "step": 26848 }, { "epoch": 0.7832720695489819, "grad_norm": 1.0477441113233703, "learning_rate": 1.182053786635749e-06, "loss": 0.1152, "step": 26849 }, { "epoch": 0.7833012427796254, "grad_norm": 0.9130039249914723, "learning_rate": 1.1817487518546194e-06, "loss": 0.1294, "step": 26850 }, { "epoch": 0.783330416010269, "grad_norm": 0.8111684208701135, "learning_rate": 1.1814437511619254e-06, "loss": 0.1347, "step": 26851 }, { "epoch": 0.7833595892409125, "grad_norm": 1.128438059941651, "learning_rate": 1.1811387845603916e-06, "loss": 0.122, "step": 26852 }, { "epoch": 0.7833887624715561, "grad_norm": 0.722683754825944, "learning_rate": 1.180833852052739e-06, "loss": 0.1088, "step": 26853 }, { "epoch": 0.7834179357021996, "grad_norm": 1.05479675217445, "learning_rate": 1.1805289536416887e-06, "loss": 0.1169, "step": 26854 }, { "epoch": 0.7834471089328432, "grad_norm": 0.7803310474664418, "learning_rate": 1.180224089329966e-06, "loss": 0.1138, "step": 26855 }, { "epoch": 0.7834762821634867, "grad_norm": 0.8860024298146697, "learning_rate": 1.1799192591202884e-06, "loss": 0.0925, "step": 26856 }, { "epoch": 0.7835054553941303, "grad_norm": 0.8998043125351297, "learning_rate": 1.1796144630153806e-06, "loss": 0.1147, "step": 26857 }, { "epoch": 0.7835346286247739, "grad_norm": 0.8579436010477661, "learning_rate": 1.1793097010179639e-06, "loss": 0.1173, "step": 26858 }, { "epoch": 0.7835638018554175, "grad_norm": 1.1066410641333093, "learning_rate": 1.179004973130758e-06, "loss": 0.1233, "step": 26859 }, { "epoch": 0.7835929750860611, "grad_norm": 0.8990092908336493, "learning_rate": 1.1787002793564822e-06, "loss": 0.0991, "step": 26860 }, { "epoch": 0.7836221483167046, "grad_norm": 0.9630480007906665, "learning_rate": 1.1783956196978595e-06, "loss": 0.105, "step": 26861 }, { "epoch": 0.7836513215473482, "grad_norm": 0.8582363903078812, "learning_rate": 1.1780909941576074e-06, "loss": 0.1256, "step": 26862 }, { "epoch": 0.7836804947779917, "grad_norm": 0.9205639610287715, "learning_rate": 1.1777864027384478e-06, "loss": 0.1187, "step": 26863 }, { "epoch": 0.7837096680086353, "grad_norm": 1.0210478042715165, "learning_rate": 1.177481845443097e-06, "loss": 0.1108, "step": 26864 }, { "epoch": 0.7837388412392788, "grad_norm": 0.9103747963753535, "learning_rate": 1.1771773222742778e-06, "loss": 0.1126, "step": 26865 }, { "epoch": 0.7837680144699224, "grad_norm": 1.0552119030485607, "learning_rate": 1.1768728332347062e-06, "loss": 0.0976, "step": 26866 }, { "epoch": 0.783797187700566, "grad_norm": 0.9337237343380899, "learning_rate": 1.1765683783271004e-06, "loss": 0.1007, "step": 26867 }, { "epoch": 0.7838263609312095, "grad_norm": 0.9383534581297182, "learning_rate": 1.176263957554179e-06, "loss": 0.0996, "step": 26868 }, { "epoch": 0.783855534161853, "grad_norm": 0.7949921727905045, "learning_rate": 1.1759595709186616e-06, "loss": 0.1263, "step": 26869 }, { "epoch": 0.7838847073924966, "grad_norm": 1.0558144681634218, "learning_rate": 1.1756552184232634e-06, "loss": 0.1313, "step": 26870 }, { "epoch": 0.7839138806231402, "grad_norm": 0.8564336025917788, "learning_rate": 1.175350900070703e-06, "loss": 0.1046, "step": 26871 }, { "epoch": 0.7839430538537838, "grad_norm": 1.1128294106431664, "learning_rate": 1.1750466158636975e-06, "loss": 0.1235, "step": 26872 }, { "epoch": 0.7839722270844274, "grad_norm": 0.7481349097680933, "learning_rate": 1.1747423658049612e-06, "loss": 0.1033, "step": 26873 }, { "epoch": 0.7840014003150709, "grad_norm": 1.0080161958019997, "learning_rate": 1.1744381498972117e-06, "loss": 0.1238, "step": 26874 }, { "epoch": 0.7840305735457145, "grad_norm": 0.9772100033840949, "learning_rate": 1.1741339681431669e-06, "loss": 0.1103, "step": 26875 }, { "epoch": 0.784059746776358, "grad_norm": 0.8285845957630742, "learning_rate": 1.17382982054554e-06, "loss": 0.1268, "step": 26876 }, { "epoch": 0.7840889200070016, "grad_norm": 1.0864471912142566, "learning_rate": 1.1735257071070466e-06, "loss": 0.1228, "step": 26877 }, { "epoch": 0.7841180932376451, "grad_norm": 0.8564488073734594, "learning_rate": 1.1732216278304032e-06, "loss": 0.0952, "step": 26878 }, { "epoch": 0.7841472664682887, "grad_norm": 0.8030651747634504, "learning_rate": 1.1729175827183232e-06, "loss": 0.105, "step": 26879 }, { "epoch": 0.7841764396989322, "grad_norm": 0.7667886353252452, "learning_rate": 1.1726135717735204e-06, "loss": 0.1163, "step": 26880 }, { "epoch": 0.7842056129295758, "grad_norm": 1.0480954538064446, "learning_rate": 1.1723095949987101e-06, "loss": 0.1322, "step": 26881 }, { "epoch": 0.7842347861602194, "grad_norm": 0.7230827016380355, "learning_rate": 1.1720056523966072e-06, "loss": 0.1023, "step": 26882 }, { "epoch": 0.7842639593908629, "grad_norm": 0.7217534438164802, "learning_rate": 1.171701743969924e-06, "loss": 0.1179, "step": 26883 }, { "epoch": 0.7842931326215065, "grad_norm": 0.8114078742560802, "learning_rate": 1.1713978697213723e-06, "loss": 0.1053, "step": 26884 }, { "epoch": 0.78432230585215, "grad_norm": 0.7021766512605949, "learning_rate": 1.1710940296536682e-06, "loss": 0.1154, "step": 26885 }, { "epoch": 0.7843514790827937, "grad_norm": 0.8600017977371976, "learning_rate": 1.1707902237695206e-06, "loss": 0.1204, "step": 26886 }, { "epoch": 0.7843806523134372, "grad_norm": 0.8926396098940488, "learning_rate": 1.1704864520716442e-06, "loss": 0.1081, "step": 26887 }, { "epoch": 0.7844098255440808, "grad_norm": 0.8685521401022998, "learning_rate": 1.170182714562752e-06, "loss": 0.1187, "step": 26888 }, { "epoch": 0.7844389987747243, "grad_norm": 0.8758005932407248, "learning_rate": 1.1698790112455538e-06, "loss": 0.1027, "step": 26889 }, { "epoch": 0.7844681720053679, "grad_norm": 0.8952556006070274, "learning_rate": 1.1695753421227606e-06, "loss": 0.1413, "step": 26890 }, { "epoch": 0.7844973452360114, "grad_norm": 0.8134807017722688, "learning_rate": 1.1692717071970844e-06, "loss": 0.1254, "step": 26891 }, { "epoch": 0.784526518466655, "grad_norm": 1.0174722168432515, "learning_rate": 1.1689681064712367e-06, "loss": 0.115, "step": 26892 }, { "epoch": 0.7845556916972986, "grad_norm": 0.9097203778879775, "learning_rate": 1.1686645399479278e-06, "loss": 0.1077, "step": 26893 }, { "epoch": 0.7845848649279421, "grad_norm": 0.7252747379861907, "learning_rate": 1.1683610076298658e-06, "loss": 0.0995, "step": 26894 }, { "epoch": 0.7846140381585857, "grad_norm": 2.0357071447954014, "learning_rate": 1.1680575095197634e-06, "loss": 0.1343, "step": 26895 }, { "epoch": 0.7846432113892292, "grad_norm": 1.3182450614391934, "learning_rate": 1.1677540456203285e-06, "loss": 0.1078, "step": 26896 }, { "epoch": 0.7846723846198728, "grad_norm": 0.864687794459096, "learning_rate": 1.167450615934268e-06, "loss": 0.1047, "step": 26897 }, { "epoch": 0.7847015578505163, "grad_norm": 0.847906256638758, "learning_rate": 1.1671472204642964e-06, "loss": 0.1189, "step": 26898 }, { "epoch": 0.78473073108116, "grad_norm": 0.8590265179007457, "learning_rate": 1.1668438592131194e-06, "loss": 0.1006, "step": 26899 }, { "epoch": 0.7847599043118035, "grad_norm": 1.0116670667599434, "learning_rate": 1.1665405321834439e-06, "loss": 0.1186, "step": 26900 }, { "epoch": 0.7847890775424471, "grad_norm": 0.7010234245668824, "learning_rate": 1.1662372393779809e-06, "loss": 0.1174, "step": 26901 }, { "epoch": 0.7848182507730906, "grad_norm": 0.9368354596407508, "learning_rate": 1.1659339807994364e-06, "loss": 0.1467, "step": 26902 }, { "epoch": 0.7848474240037342, "grad_norm": 0.771874980587234, "learning_rate": 1.165630756450517e-06, "loss": 0.1168, "step": 26903 }, { "epoch": 0.7848765972343777, "grad_norm": 0.7951679227400112, "learning_rate": 1.1653275663339308e-06, "loss": 0.1099, "step": 26904 }, { "epoch": 0.7849057704650213, "grad_norm": 0.8609451871037228, "learning_rate": 1.1650244104523862e-06, "loss": 0.1168, "step": 26905 }, { "epoch": 0.7849349436956649, "grad_norm": 0.9215836537814548, "learning_rate": 1.164721288808588e-06, "loss": 0.0979, "step": 26906 }, { "epoch": 0.7849641169263084, "grad_norm": 0.8879045100520424, "learning_rate": 1.1644182014052408e-06, "loss": 0.1019, "step": 26907 }, { "epoch": 0.784993290156952, "grad_norm": 0.9620173950761984, "learning_rate": 1.1641151482450541e-06, "loss": 0.1483, "step": 26908 }, { "epoch": 0.7850224633875955, "grad_norm": 0.8099233952877505, "learning_rate": 1.1638121293307302e-06, "loss": 0.0995, "step": 26909 }, { "epoch": 0.7850516366182391, "grad_norm": 0.9758814672664569, "learning_rate": 1.163509144664977e-06, "loss": 0.0979, "step": 26910 }, { "epoch": 0.7850808098488826, "grad_norm": 0.9139112724301753, "learning_rate": 1.1632061942504975e-06, "loss": 0.1155, "step": 26911 }, { "epoch": 0.7851099830795262, "grad_norm": 1.267470682177874, "learning_rate": 1.1629032780899978e-06, "loss": 0.1024, "step": 26912 }, { "epoch": 0.7851391563101698, "grad_norm": 0.9679952516979631, "learning_rate": 1.1626003961861821e-06, "loss": 0.0993, "step": 26913 }, { "epoch": 0.7851683295408134, "grad_norm": 0.7575683204419322, "learning_rate": 1.1622975485417526e-06, "loss": 0.1023, "step": 26914 }, { "epoch": 0.785197502771457, "grad_norm": 0.7378218391439108, "learning_rate": 1.1619947351594147e-06, "loss": 0.1123, "step": 26915 }, { "epoch": 0.7852266760021005, "grad_norm": 0.7338850072246711, "learning_rate": 1.1616919560418727e-06, "loss": 0.105, "step": 26916 }, { "epoch": 0.785255849232744, "grad_norm": 0.8676492666613993, "learning_rate": 1.1613892111918273e-06, "loss": 0.111, "step": 26917 }, { "epoch": 0.7852850224633876, "grad_norm": 1.1011054265315563, "learning_rate": 1.1610865006119838e-06, "loss": 0.1029, "step": 26918 }, { "epoch": 0.7853141956940312, "grad_norm": 0.7005316929548462, "learning_rate": 1.160783824305044e-06, "loss": 0.1152, "step": 26919 }, { "epoch": 0.7853433689246747, "grad_norm": 1.0391599316915296, "learning_rate": 1.1604811822737084e-06, "loss": 0.1308, "step": 26920 }, { "epoch": 0.7853725421553183, "grad_norm": 0.8165121760645124, "learning_rate": 1.1601785745206795e-06, "loss": 0.1019, "step": 26921 }, { "epoch": 0.7854017153859618, "grad_norm": 0.7787105521225467, "learning_rate": 1.1598760010486614e-06, "loss": 0.0917, "step": 26922 }, { "epoch": 0.7854308886166054, "grad_norm": 0.691787327300869, "learning_rate": 1.1595734618603543e-06, "loss": 0.1258, "step": 26923 }, { "epoch": 0.7854600618472489, "grad_norm": 0.982851786473919, "learning_rate": 1.1592709569584565e-06, "loss": 0.1177, "step": 26924 }, { "epoch": 0.7854892350778925, "grad_norm": 0.8147593573127733, "learning_rate": 1.1589684863456723e-06, "loss": 0.1159, "step": 26925 }, { "epoch": 0.7855184083085361, "grad_norm": 0.8159315026056807, "learning_rate": 1.1586660500247004e-06, "loss": 0.1244, "step": 26926 }, { "epoch": 0.7855475815391797, "grad_norm": 0.8041398174115109, "learning_rate": 1.1583636479982384e-06, "loss": 0.1345, "step": 26927 }, { "epoch": 0.7855767547698232, "grad_norm": 0.7807428745584111, "learning_rate": 1.1580612802689911e-06, "loss": 0.0908, "step": 26928 }, { "epoch": 0.7856059280004668, "grad_norm": 0.7520185543281153, "learning_rate": 1.157758946839656e-06, "loss": 0.1256, "step": 26929 }, { "epoch": 0.7856351012311104, "grad_norm": 0.7153975776300564, "learning_rate": 1.1574566477129302e-06, "loss": 0.1114, "step": 26930 }, { "epoch": 0.7856642744617539, "grad_norm": 0.8062988737351608, "learning_rate": 1.1571543828915155e-06, "loss": 0.1145, "step": 26931 }, { "epoch": 0.7856934476923975, "grad_norm": 1.0590870312759066, "learning_rate": 1.1568521523781095e-06, "loss": 0.0981, "step": 26932 }, { "epoch": 0.785722620923041, "grad_norm": 0.8514193187990563, "learning_rate": 1.1565499561754085e-06, "loss": 0.127, "step": 26933 }, { "epoch": 0.7857517941536846, "grad_norm": 0.895190943725986, "learning_rate": 1.1562477942861116e-06, "loss": 0.1207, "step": 26934 }, { "epoch": 0.7857809673843281, "grad_norm": 0.9450988864887407, "learning_rate": 1.1559456667129183e-06, "loss": 0.1045, "step": 26935 }, { "epoch": 0.7858101406149717, "grad_norm": 1.0362955785684302, "learning_rate": 1.1556435734585248e-06, "loss": 0.1157, "step": 26936 }, { "epoch": 0.7858393138456152, "grad_norm": 0.7935279419670792, "learning_rate": 1.1553415145256259e-06, "loss": 0.1101, "step": 26937 }, { "epoch": 0.7858684870762588, "grad_norm": 0.9216892335011704, "learning_rate": 1.155039489916922e-06, "loss": 0.1319, "step": 26938 }, { "epoch": 0.7858976603069023, "grad_norm": 0.8454616031196389, "learning_rate": 1.1547374996351063e-06, "loss": 0.0903, "step": 26939 }, { "epoch": 0.785926833537546, "grad_norm": 0.6737115247528297, "learning_rate": 1.1544355436828769e-06, "loss": 0.1245, "step": 26940 }, { "epoch": 0.7859560067681896, "grad_norm": 0.7819384496496615, "learning_rate": 1.1541336220629285e-06, "loss": 0.1044, "step": 26941 }, { "epoch": 0.7859851799988331, "grad_norm": 1.322076755898772, "learning_rate": 1.1538317347779583e-06, "loss": 0.103, "step": 26942 }, { "epoch": 0.7860143532294767, "grad_norm": 1.382774753900953, "learning_rate": 1.1535298818306595e-06, "loss": 0.1466, "step": 26943 }, { "epoch": 0.7860435264601202, "grad_norm": 0.7743865283279714, "learning_rate": 1.1532280632237269e-06, "loss": 0.1186, "step": 26944 }, { "epoch": 0.7860726996907638, "grad_norm": 0.9518823635543687, "learning_rate": 1.1529262789598554e-06, "loss": 0.1533, "step": 26945 }, { "epoch": 0.7861018729214073, "grad_norm": 0.7107481831528828, "learning_rate": 1.1526245290417415e-06, "loss": 0.0983, "step": 26946 }, { "epoch": 0.7861310461520509, "grad_norm": 1.0304808572182447, "learning_rate": 1.152322813472076e-06, "loss": 0.1048, "step": 26947 }, { "epoch": 0.7861602193826944, "grad_norm": 0.9304661820686831, "learning_rate": 1.1520211322535552e-06, "loss": 0.1142, "step": 26948 }, { "epoch": 0.786189392613338, "grad_norm": 0.9070036935799715, "learning_rate": 1.1517194853888713e-06, "loss": 0.1145, "step": 26949 }, { "epoch": 0.7862185658439815, "grad_norm": 0.8650367570286813, "learning_rate": 1.1514178728807151e-06, "loss": 0.1046, "step": 26950 }, { "epoch": 0.7862477390746251, "grad_norm": 0.7018217070502166, "learning_rate": 1.1511162947317822e-06, "loss": 0.1131, "step": 26951 }, { "epoch": 0.7862769123052686, "grad_norm": 0.7367283180653517, "learning_rate": 1.1508147509447653e-06, "loss": 0.1079, "step": 26952 }, { "epoch": 0.7863060855359122, "grad_norm": 0.8656438834038928, "learning_rate": 1.1505132415223552e-06, "loss": 0.1071, "step": 26953 }, { "epoch": 0.7863352587665559, "grad_norm": 0.8423230424025819, "learning_rate": 1.150211766467243e-06, "loss": 0.1095, "step": 26954 }, { "epoch": 0.7863644319971994, "grad_norm": 0.9235866939573659, "learning_rate": 1.1499103257821226e-06, "loss": 0.1265, "step": 26955 }, { "epoch": 0.786393605227843, "grad_norm": 0.7883921567853773, "learning_rate": 1.149608919469683e-06, "loss": 0.1215, "step": 26956 }, { "epoch": 0.7864227784584865, "grad_norm": 0.7621126622420391, "learning_rate": 1.1493075475326138e-06, "loss": 0.102, "step": 26957 }, { "epoch": 0.7864519516891301, "grad_norm": 0.7508340717348726, "learning_rate": 1.1490062099736098e-06, "loss": 0.1274, "step": 26958 }, { "epoch": 0.7864811249197736, "grad_norm": 1.2107192085602738, "learning_rate": 1.1487049067953592e-06, "loss": 0.1102, "step": 26959 }, { "epoch": 0.7865102981504172, "grad_norm": 0.882740292413829, "learning_rate": 1.1484036380005503e-06, "loss": 0.1143, "step": 26960 }, { "epoch": 0.7865394713810607, "grad_norm": 0.8018073132782089, "learning_rate": 1.1481024035918763e-06, "loss": 0.1205, "step": 26961 }, { "epoch": 0.7865686446117043, "grad_norm": 0.7865662270094895, "learning_rate": 1.1478012035720237e-06, "loss": 0.1422, "step": 26962 }, { "epoch": 0.7865978178423478, "grad_norm": 0.8155312475792762, "learning_rate": 1.1475000379436818e-06, "loss": 0.1238, "step": 26963 }, { "epoch": 0.7866269910729914, "grad_norm": 0.9796092648509211, "learning_rate": 1.147198906709539e-06, "loss": 0.0987, "step": 26964 }, { "epoch": 0.7866561643036349, "grad_norm": 1.0586389811067674, "learning_rate": 1.1468978098722866e-06, "loss": 0.1305, "step": 26965 }, { "epoch": 0.7866853375342785, "grad_norm": 0.7409045100530098, "learning_rate": 1.1465967474346106e-06, "loss": 0.0995, "step": 26966 }, { "epoch": 0.7867145107649222, "grad_norm": 0.7780086121140247, "learning_rate": 1.1462957193991975e-06, "loss": 0.0857, "step": 26967 }, { "epoch": 0.7867436839955657, "grad_norm": 0.837628663682243, "learning_rate": 1.1459947257687376e-06, "loss": 0.1051, "step": 26968 }, { "epoch": 0.7867728572262093, "grad_norm": 0.7236360158338214, "learning_rate": 1.1456937665459156e-06, "loss": 0.121, "step": 26969 }, { "epoch": 0.7868020304568528, "grad_norm": 1.070127536202674, "learning_rate": 1.1453928417334209e-06, "loss": 0.117, "step": 26970 }, { "epoch": 0.7868312036874964, "grad_norm": 0.7734379500031032, "learning_rate": 1.145091951333937e-06, "loss": 0.1167, "step": 26971 }, { "epoch": 0.7868603769181399, "grad_norm": 0.7271054501317741, "learning_rate": 1.144791095350154e-06, "loss": 0.1132, "step": 26972 }, { "epoch": 0.7868895501487835, "grad_norm": 0.7722909978875346, "learning_rate": 1.1444902737847553e-06, "loss": 0.1153, "step": 26973 }, { "epoch": 0.786918723379427, "grad_norm": 1.0529311870098763, "learning_rate": 1.1441894866404257e-06, "loss": 0.1247, "step": 26974 }, { "epoch": 0.7869478966100706, "grad_norm": 1.1310731684381354, "learning_rate": 1.1438887339198518e-06, "loss": 0.1246, "step": 26975 }, { "epoch": 0.7869770698407141, "grad_norm": 0.7147847864563611, "learning_rate": 1.1435880156257206e-06, "loss": 0.1233, "step": 26976 }, { "epoch": 0.7870062430713577, "grad_norm": 0.8413088562452513, "learning_rate": 1.143287331760713e-06, "loss": 0.1041, "step": 26977 }, { "epoch": 0.7870354163020012, "grad_norm": 0.9282174368199931, "learning_rate": 1.142986682327517e-06, "loss": 0.1276, "step": 26978 }, { "epoch": 0.7870645895326448, "grad_norm": 0.8712969838419217, "learning_rate": 1.1426860673288153e-06, "loss": 0.1028, "step": 26979 }, { "epoch": 0.7870937627632884, "grad_norm": 1.1435555554080257, "learning_rate": 1.14238548676729e-06, "loss": 0.1414, "step": 26980 }, { "epoch": 0.787122935993932, "grad_norm": 0.7130729393273794, "learning_rate": 1.1420849406456263e-06, "loss": 0.1013, "step": 26981 }, { "epoch": 0.7871521092245756, "grad_norm": 1.1305725242569322, "learning_rate": 1.1417844289665091e-06, "loss": 0.1216, "step": 26982 }, { "epoch": 0.7871812824552191, "grad_norm": 0.7932486868097797, "learning_rate": 1.1414839517326192e-06, "loss": 0.1122, "step": 26983 }, { "epoch": 0.7872104556858627, "grad_norm": 0.8054079701445868, "learning_rate": 1.1411835089466377e-06, "loss": 0.1119, "step": 26984 }, { "epoch": 0.7872396289165062, "grad_norm": 0.9864994701651784, "learning_rate": 1.1408831006112504e-06, "loss": 0.1151, "step": 26985 }, { "epoch": 0.7872688021471498, "grad_norm": 0.8039319204841343, "learning_rate": 1.1405827267291376e-06, "loss": 0.1227, "step": 26986 }, { "epoch": 0.7872979753777933, "grad_norm": 1.0690657532685257, "learning_rate": 1.1402823873029778e-06, "loss": 0.1473, "step": 26987 }, { "epoch": 0.7873271486084369, "grad_norm": 2.349516417868194, "learning_rate": 1.1399820823354584e-06, "loss": 0.118, "step": 26988 }, { "epoch": 0.7873563218390804, "grad_norm": 0.9688472133294002, "learning_rate": 1.139681811829258e-06, "loss": 0.1093, "step": 26989 }, { "epoch": 0.787385495069724, "grad_norm": 0.8275472845456364, "learning_rate": 1.1393815757870546e-06, "loss": 0.1307, "step": 26990 }, { "epoch": 0.7874146683003675, "grad_norm": 0.9310817924852001, "learning_rate": 1.1390813742115332e-06, "loss": 0.113, "step": 26991 }, { "epoch": 0.7874438415310111, "grad_norm": 0.9557097175977453, "learning_rate": 1.1387812071053706e-06, "loss": 0.1116, "step": 26992 }, { "epoch": 0.7874730147616547, "grad_norm": 0.8246996114442896, "learning_rate": 1.1384810744712471e-06, "loss": 0.105, "step": 26993 }, { "epoch": 0.7875021879922983, "grad_norm": 0.8743406944167271, "learning_rate": 1.1381809763118424e-06, "loss": 0.1361, "step": 26994 }, { "epoch": 0.7875313612229419, "grad_norm": 1.1043558299341243, "learning_rate": 1.1378809126298373e-06, "loss": 0.1231, "step": 26995 }, { "epoch": 0.7875605344535854, "grad_norm": 1.0758464004068735, "learning_rate": 1.1375808834279095e-06, "loss": 0.1121, "step": 26996 }, { "epoch": 0.787589707684229, "grad_norm": 0.931716048355612, "learning_rate": 1.137280888708736e-06, "loss": 0.1036, "step": 26997 }, { "epoch": 0.7876188809148725, "grad_norm": 0.7779312814316556, "learning_rate": 1.1369809284749982e-06, "loss": 0.0965, "step": 26998 }, { "epoch": 0.7876480541455161, "grad_norm": 1.0376133259115237, "learning_rate": 1.1366810027293711e-06, "loss": 0.1458, "step": 26999 }, { "epoch": 0.7876772273761596, "grad_norm": 1.1522640271177464, "learning_rate": 1.1363811114745354e-06, "loss": 0.1313, "step": 27000 }, { "epoch": 0.7877064006068032, "grad_norm": 0.8182592835601398, "learning_rate": 1.1360812547131655e-06, "loss": 0.108, "step": 27001 }, { "epoch": 0.7877355738374467, "grad_norm": 1.152048894714544, "learning_rate": 1.135781432447941e-06, "loss": 0.1443, "step": 27002 }, { "epoch": 0.7877647470680903, "grad_norm": 0.884579195459697, "learning_rate": 1.135481644681537e-06, "loss": 0.1191, "step": 27003 }, { "epoch": 0.7877939202987339, "grad_norm": 0.7527828077355831, "learning_rate": 1.135181891416629e-06, "loss": 0.1037, "step": 27004 }, { "epoch": 0.7878230935293774, "grad_norm": 0.8200120988297046, "learning_rate": 1.1348821726558951e-06, "loss": 0.1116, "step": 27005 }, { "epoch": 0.787852266760021, "grad_norm": 0.8972028511074547, "learning_rate": 1.1345824884020113e-06, "loss": 0.1083, "step": 27006 }, { "epoch": 0.7878814399906645, "grad_norm": 0.855428936458742, "learning_rate": 1.134282838657651e-06, "loss": 0.1122, "step": 27007 }, { "epoch": 0.7879106132213082, "grad_norm": 0.7133366751370145, "learning_rate": 1.133983223425492e-06, "loss": 0.1092, "step": 27008 }, { "epoch": 0.7879397864519517, "grad_norm": 0.8798477211755299, "learning_rate": 1.1336836427082083e-06, "loss": 0.1307, "step": 27009 }, { "epoch": 0.7879689596825953, "grad_norm": 0.8665858198878355, "learning_rate": 1.1333840965084725e-06, "loss": 0.1097, "step": 27010 }, { "epoch": 0.7879981329132388, "grad_norm": 0.7480220082472291, "learning_rate": 1.1330845848289606e-06, "loss": 0.0864, "step": 27011 }, { "epoch": 0.7880273061438824, "grad_norm": 0.7079429673161648, "learning_rate": 1.1327851076723473e-06, "loss": 0.1112, "step": 27012 }, { "epoch": 0.7880564793745259, "grad_norm": 1.383172043736597, "learning_rate": 1.1324856650413057e-06, "loss": 0.1049, "step": 27013 }, { "epoch": 0.7880856526051695, "grad_norm": 0.8373019806538111, "learning_rate": 1.132186256938508e-06, "loss": 0.1247, "step": 27014 }, { "epoch": 0.788114825835813, "grad_norm": 0.771620531410981, "learning_rate": 1.1318868833666286e-06, "loss": 0.1167, "step": 27015 }, { "epoch": 0.7881439990664566, "grad_norm": 0.9316198499289414, "learning_rate": 1.1315875443283396e-06, "loss": 0.1064, "step": 27016 }, { "epoch": 0.7881731722971002, "grad_norm": 0.8558932039206868, "learning_rate": 1.1312882398263114e-06, "loss": 0.1145, "step": 27017 }, { "epoch": 0.7882023455277437, "grad_norm": 0.8262050949069694, "learning_rate": 1.130988969863221e-06, "loss": 0.0945, "step": 27018 }, { "epoch": 0.7882315187583873, "grad_norm": 1.0459089058686855, "learning_rate": 1.1306897344417373e-06, "loss": 0.1219, "step": 27019 }, { "epoch": 0.7882606919890308, "grad_norm": 0.9251468084913778, "learning_rate": 1.1303905335645304e-06, "loss": 0.098, "step": 27020 }, { "epoch": 0.7882898652196745, "grad_norm": 0.7679005529198607, "learning_rate": 1.1300913672342744e-06, "loss": 0.1029, "step": 27021 }, { "epoch": 0.788319038450318, "grad_norm": 0.86234896637721, "learning_rate": 1.1297922354536396e-06, "loss": 0.1487, "step": 27022 }, { "epoch": 0.7883482116809616, "grad_norm": 0.6475395648456037, "learning_rate": 1.1294931382252932e-06, "loss": 0.1192, "step": 27023 }, { "epoch": 0.7883773849116051, "grad_norm": 0.962154914870367, "learning_rate": 1.1291940755519092e-06, "loss": 0.1038, "step": 27024 }, { "epoch": 0.7884065581422487, "grad_norm": 0.8348365296359087, "learning_rate": 1.128895047436157e-06, "loss": 0.1077, "step": 27025 }, { "epoch": 0.7884357313728922, "grad_norm": 0.7840105408076355, "learning_rate": 1.1285960538807066e-06, "loss": 0.1187, "step": 27026 }, { "epoch": 0.7884649046035358, "grad_norm": 0.8347802697343071, "learning_rate": 1.1282970948882243e-06, "loss": 0.1142, "step": 27027 }, { "epoch": 0.7884940778341794, "grad_norm": 0.8759492568057259, "learning_rate": 1.1279981704613828e-06, "loss": 0.1143, "step": 27028 }, { "epoch": 0.7885232510648229, "grad_norm": 0.8110954759711658, "learning_rate": 1.1276992806028485e-06, "loss": 0.1071, "step": 27029 }, { "epoch": 0.7885524242954665, "grad_norm": 0.7398886339637835, "learning_rate": 1.1274004253152914e-06, "loss": 0.1124, "step": 27030 }, { "epoch": 0.78858159752611, "grad_norm": 0.7910840109521887, "learning_rate": 1.1271016046013778e-06, "loss": 0.116, "step": 27031 }, { "epoch": 0.7886107707567536, "grad_norm": 1.0890123327402674, "learning_rate": 1.126802818463778e-06, "loss": 0.1322, "step": 27032 }, { "epoch": 0.7886399439873971, "grad_norm": 0.6688060214778, "learning_rate": 1.1265040669051581e-06, "loss": 0.1241, "step": 27033 }, { "epoch": 0.7886691172180407, "grad_norm": 0.7722276746876664, "learning_rate": 1.1262053499281833e-06, "loss": 0.1193, "step": 27034 }, { "epoch": 0.7886982904486843, "grad_norm": 0.7518688019620501, "learning_rate": 1.1259066675355224e-06, "loss": 0.1218, "step": 27035 }, { "epoch": 0.7887274636793279, "grad_norm": 0.7614123499987674, "learning_rate": 1.1256080197298437e-06, "loss": 0.1408, "step": 27036 }, { "epoch": 0.7887566369099714, "grad_norm": 0.7663154896064904, "learning_rate": 1.1253094065138105e-06, "loss": 0.1338, "step": 27037 }, { "epoch": 0.788785810140615, "grad_norm": 0.8704325046926882, "learning_rate": 1.1250108278900906e-06, "loss": 0.132, "step": 27038 }, { "epoch": 0.7888149833712585, "grad_norm": 0.7366138270196971, "learning_rate": 1.12471228386135e-06, "loss": 0.1017, "step": 27039 }, { "epoch": 0.7888441566019021, "grad_norm": 0.7670380610852506, "learning_rate": 1.1244137744302508e-06, "loss": 0.1171, "step": 27040 }, { "epoch": 0.7888733298325457, "grad_norm": 0.845280063715469, "learning_rate": 1.1241152995994603e-06, "loss": 0.0941, "step": 27041 }, { "epoch": 0.7889025030631892, "grad_norm": 0.7234602302032337, "learning_rate": 1.1238168593716448e-06, "loss": 0.1026, "step": 27042 }, { "epoch": 0.7889316762938328, "grad_norm": 0.755873136293606, "learning_rate": 1.123518453749467e-06, "loss": 0.1067, "step": 27043 }, { "epoch": 0.7889608495244763, "grad_norm": 0.7218157111755862, "learning_rate": 1.12322008273559e-06, "loss": 0.1141, "step": 27044 }, { "epoch": 0.7889900227551199, "grad_norm": 0.9889466128783103, "learning_rate": 1.1229217463326798e-06, "loss": 0.1219, "step": 27045 }, { "epoch": 0.7890191959857634, "grad_norm": 0.7903896519892492, "learning_rate": 1.1226234445433987e-06, "loss": 0.1123, "step": 27046 }, { "epoch": 0.789048369216407, "grad_norm": 0.9271489373101308, "learning_rate": 1.1223251773704069e-06, "loss": 0.1025, "step": 27047 }, { "epoch": 0.7890775424470506, "grad_norm": 0.7319543777838504, "learning_rate": 1.1220269448163735e-06, "loss": 0.1173, "step": 27048 }, { "epoch": 0.7891067156776942, "grad_norm": 0.7077700265266096, "learning_rate": 1.121728746883957e-06, "loss": 0.1124, "step": 27049 }, { "epoch": 0.7891358889083377, "grad_norm": 1.1161121398535105, "learning_rate": 1.1214305835758194e-06, "loss": 0.1071, "step": 27050 }, { "epoch": 0.7891650621389813, "grad_norm": 0.7895281541789846, "learning_rate": 1.1211324548946255e-06, "loss": 0.1144, "step": 27051 }, { "epoch": 0.7891942353696249, "grad_norm": 0.7774085888223303, "learning_rate": 1.1208343608430344e-06, "loss": 0.1022, "step": 27052 }, { "epoch": 0.7892234086002684, "grad_norm": 0.8718471687330965, "learning_rate": 1.1205363014237075e-06, "loss": 0.1278, "step": 27053 }, { "epoch": 0.789252581830912, "grad_norm": 0.8811875020140885, "learning_rate": 1.1202382766393056e-06, "loss": 0.1089, "step": 27054 }, { "epoch": 0.7892817550615555, "grad_norm": 0.8360053810229248, "learning_rate": 1.119940286492492e-06, "loss": 0.1158, "step": 27055 }, { "epoch": 0.7893109282921991, "grad_norm": 0.9734365039401242, "learning_rate": 1.119642330985925e-06, "loss": 0.1254, "step": 27056 }, { "epoch": 0.7893401015228426, "grad_norm": 0.868937374812772, "learning_rate": 1.1193444101222639e-06, "loss": 0.1349, "step": 27057 }, { "epoch": 0.7893692747534862, "grad_norm": 0.9714343484952161, "learning_rate": 1.119046523904171e-06, "loss": 0.1136, "step": 27058 }, { "epoch": 0.7893984479841297, "grad_norm": 1.0384370064192667, "learning_rate": 1.1187486723343027e-06, "loss": 0.1113, "step": 27059 }, { "epoch": 0.7894276212147733, "grad_norm": 0.8904081157750814, "learning_rate": 1.1184508554153207e-06, "loss": 0.1324, "step": 27060 }, { "epoch": 0.7894567944454168, "grad_norm": 0.6980593416909135, "learning_rate": 1.118153073149882e-06, "loss": 0.1195, "step": 27061 }, { "epoch": 0.7894859676760605, "grad_norm": 0.8183181824070184, "learning_rate": 1.1178553255406471e-06, "loss": 0.1266, "step": 27062 }, { "epoch": 0.789515140906704, "grad_norm": 0.7674492847616166, "learning_rate": 1.1175576125902732e-06, "loss": 0.1235, "step": 27063 }, { "epoch": 0.7895443141373476, "grad_norm": 1.0686941219648478, "learning_rate": 1.1172599343014167e-06, "loss": 0.1234, "step": 27064 }, { "epoch": 0.7895734873679912, "grad_norm": 0.9257269730998541, "learning_rate": 1.1169622906767368e-06, "loss": 0.0975, "step": 27065 }, { "epoch": 0.7896026605986347, "grad_norm": 0.7989338259893596, "learning_rate": 1.116664681718892e-06, "loss": 0.1322, "step": 27066 }, { "epoch": 0.7896318338292783, "grad_norm": 0.9501551133211303, "learning_rate": 1.1163671074305365e-06, "loss": 0.0992, "step": 27067 }, { "epoch": 0.7896610070599218, "grad_norm": 0.8220297913394656, "learning_rate": 1.1160695678143297e-06, "loss": 0.0976, "step": 27068 }, { "epoch": 0.7896901802905654, "grad_norm": 0.7420781942638193, "learning_rate": 1.1157720628729264e-06, "loss": 0.0971, "step": 27069 }, { "epoch": 0.7897193535212089, "grad_norm": 0.7748818413562756, "learning_rate": 1.1154745926089816e-06, "loss": 0.1144, "step": 27070 }, { "epoch": 0.7897485267518525, "grad_norm": 0.7086597366847097, "learning_rate": 1.1151771570251524e-06, "loss": 0.0945, "step": 27071 }, { "epoch": 0.789777699982496, "grad_norm": 0.9554744825537503, "learning_rate": 1.1148797561240954e-06, "loss": 0.1112, "step": 27072 }, { "epoch": 0.7898068732131396, "grad_norm": 0.8266113022934952, "learning_rate": 1.1145823899084645e-06, "loss": 0.1034, "step": 27073 }, { "epoch": 0.7898360464437831, "grad_norm": 0.8992658916797747, "learning_rate": 1.1142850583809133e-06, "loss": 0.1184, "step": 27074 }, { "epoch": 0.7898652196744268, "grad_norm": 1.061094711579638, "learning_rate": 1.1139877615440993e-06, "loss": 0.1081, "step": 27075 }, { "epoch": 0.7898943929050704, "grad_norm": 0.7539182591125799, "learning_rate": 1.1136904994006743e-06, "loss": 0.1061, "step": 27076 }, { "epoch": 0.7899235661357139, "grad_norm": 0.7331230192797517, "learning_rate": 1.1133932719532903e-06, "loss": 0.1036, "step": 27077 }, { "epoch": 0.7899527393663575, "grad_norm": 0.8883405371673604, "learning_rate": 1.1130960792046057e-06, "loss": 0.1173, "step": 27078 }, { "epoch": 0.789981912597001, "grad_norm": 0.7454993019681683, "learning_rate": 1.1127989211572715e-06, "loss": 0.1142, "step": 27079 }, { "epoch": 0.7900110858276446, "grad_norm": 1.1582832738734559, "learning_rate": 1.1125017978139396e-06, "loss": 0.1088, "step": 27080 }, { "epoch": 0.7900402590582881, "grad_norm": 0.8510141194896007, "learning_rate": 1.1122047091772647e-06, "loss": 0.1037, "step": 27081 }, { "epoch": 0.7900694322889317, "grad_norm": 0.787896618425818, "learning_rate": 1.111907655249898e-06, "loss": 0.0945, "step": 27082 }, { "epoch": 0.7900986055195752, "grad_norm": 0.7847387202622397, "learning_rate": 1.1116106360344909e-06, "loss": 0.1298, "step": 27083 }, { "epoch": 0.7901277787502188, "grad_norm": 0.8143617847944276, "learning_rate": 1.1113136515336953e-06, "loss": 0.1479, "step": 27084 }, { "epoch": 0.7901569519808623, "grad_norm": 0.7919774903233028, "learning_rate": 1.1110167017501643e-06, "loss": 0.0974, "step": 27085 }, { "epoch": 0.7901861252115059, "grad_norm": 0.8546097168471126, "learning_rate": 1.1107197866865482e-06, "loss": 0.1096, "step": 27086 }, { "epoch": 0.7902152984421494, "grad_norm": 0.8094623375714232, "learning_rate": 1.1104229063454957e-06, "loss": 0.1419, "step": 27087 }, { "epoch": 0.790244471672793, "grad_norm": 1.1065230427955302, "learning_rate": 1.1101260607296588e-06, "loss": 0.1396, "step": 27088 }, { "epoch": 0.7902736449034367, "grad_norm": 0.7493205945004073, "learning_rate": 1.1098292498416895e-06, "loss": 0.0995, "step": 27089 }, { "epoch": 0.7903028181340802, "grad_norm": 0.832246248518706, "learning_rate": 1.109532473684236e-06, "loss": 0.1119, "step": 27090 }, { "epoch": 0.7903319913647238, "grad_norm": 0.8641004671671787, "learning_rate": 1.1092357322599467e-06, "loss": 0.1259, "step": 27091 }, { "epoch": 0.7903611645953673, "grad_norm": 0.8880996219591738, "learning_rate": 1.1089390255714733e-06, "loss": 0.1203, "step": 27092 }, { "epoch": 0.7903903378260109, "grad_norm": 0.9002697296503198, "learning_rate": 1.108642353621463e-06, "loss": 0.1159, "step": 27093 }, { "epoch": 0.7904195110566544, "grad_norm": 0.7692330428831918, "learning_rate": 1.108345716412562e-06, "loss": 0.1095, "step": 27094 }, { "epoch": 0.790448684287298, "grad_norm": 0.8019301129088359, "learning_rate": 1.1080491139474248e-06, "loss": 0.1178, "step": 27095 }, { "epoch": 0.7904778575179415, "grad_norm": 0.72149071079873, "learning_rate": 1.107752546228696e-06, "loss": 0.0918, "step": 27096 }, { "epoch": 0.7905070307485851, "grad_norm": 0.9390267499518805, "learning_rate": 1.1074560132590218e-06, "loss": 0.119, "step": 27097 }, { "epoch": 0.7905362039792286, "grad_norm": 0.9875472469030105, "learning_rate": 1.1071595150410518e-06, "loss": 0.1068, "step": 27098 }, { "epoch": 0.7905653772098722, "grad_norm": 0.9522414094922597, "learning_rate": 1.1068630515774332e-06, "loss": 0.132, "step": 27099 }, { "epoch": 0.7905945504405157, "grad_norm": 0.822154335648392, "learning_rate": 1.10656662287081e-06, "loss": 0.1058, "step": 27100 }, { "epoch": 0.7906237236711593, "grad_norm": 0.7328819702652183, "learning_rate": 1.1062702289238308e-06, "loss": 0.1111, "step": 27101 }, { "epoch": 0.790652896901803, "grad_norm": 0.92680488768724, "learning_rate": 1.105973869739143e-06, "loss": 0.1417, "step": 27102 }, { "epoch": 0.7906820701324465, "grad_norm": 0.7479262159317424, "learning_rate": 1.1056775453193907e-06, "loss": 0.0881, "step": 27103 }, { "epoch": 0.7907112433630901, "grad_norm": 1.290706270914707, "learning_rate": 1.1053812556672183e-06, "loss": 0.1094, "step": 27104 }, { "epoch": 0.7907404165937336, "grad_norm": 0.8930020032148578, "learning_rate": 1.1050850007852737e-06, "loss": 0.1224, "step": 27105 }, { "epoch": 0.7907695898243772, "grad_norm": 1.2154004616883283, "learning_rate": 1.1047887806761993e-06, "loss": 0.1273, "step": 27106 }, { "epoch": 0.7907987630550207, "grad_norm": 0.9299056012719995, "learning_rate": 1.1044925953426406e-06, "loss": 0.112, "step": 27107 }, { "epoch": 0.7908279362856643, "grad_norm": 0.8292919192985915, "learning_rate": 1.1041964447872434e-06, "loss": 0.1214, "step": 27108 }, { "epoch": 0.7908571095163078, "grad_norm": 0.937772791906975, "learning_rate": 1.10390032901265e-06, "loss": 0.1178, "step": 27109 }, { "epoch": 0.7908862827469514, "grad_norm": 0.9054766946410139, "learning_rate": 1.1036042480215037e-06, "loss": 0.1165, "step": 27110 }, { "epoch": 0.7909154559775949, "grad_norm": 0.8453587046069618, "learning_rate": 1.1033082018164492e-06, "loss": 0.152, "step": 27111 }, { "epoch": 0.7909446292082385, "grad_norm": 0.7023114841040197, "learning_rate": 1.1030121904001278e-06, "loss": 0.0991, "step": 27112 }, { "epoch": 0.790973802438882, "grad_norm": 0.9327751718824633, "learning_rate": 1.1027162137751852e-06, "loss": 0.1098, "step": 27113 }, { "epoch": 0.7910029756695256, "grad_norm": 0.7422887851770696, "learning_rate": 1.1024202719442596e-06, "loss": 0.0862, "step": 27114 }, { "epoch": 0.7910321489001692, "grad_norm": 0.7049014329909208, "learning_rate": 1.1021243649099972e-06, "loss": 0.1268, "step": 27115 }, { "epoch": 0.7910613221308128, "grad_norm": 0.8604393699555738, "learning_rate": 1.1018284926750378e-06, "loss": 0.1174, "step": 27116 }, { "epoch": 0.7910904953614564, "grad_norm": 0.9837231514686511, "learning_rate": 1.1015326552420218e-06, "loss": 0.1186, "step": 27117 }, { "epoch": 0.7911196685920999, "grad_norm": 0.7758830117983704, "learning_rate": 1.101236852613592e-06, "loss": 0.1047, "step": 27118 }, { "epoch": 0.7911488418227435, "grad_norm": 0.8812448555955511, "learning_rate": 1.1009410847923897e-06, "loss": 0.1124, "step": 27119 }, { "epoch": 0.791178015053387, "grad_norm": 0.8442310245177006, "learning_rate": 1.100645351781055e-06, "loss": 0.112, "step": 27120 }, { "epoch": 0.7912071882840306, "grad_norm": 0.7845106073583007, "learning_rate": 1.1003496535822262e-06, "loss": 0.0901, "step": 27121 }, { "epoch": 0.7912363615146741, "grad_norm": 0.7196121650906268, "learning_rate": 1.1000539901985458e-06, "loss": 0.1264, "step": 27122 }, { "epoch": 0.7912655347453177, "grad_norm": 0.7425485908378817, "learning_rate": 1.099758361632653e-06, "loss": 0.1078, "step": 27123 }, { "epoch": 0.7912947079759612, "grad_norm": 0.7759085616538031, "learning_rate": 1.0994627678871833e-06, "loss": 0.0996, "step": 27124 }, { "epoch": 0.7913238812066048, "grad_norm": 0.7586204681365918, "learning_rate": 1.0991672089647814e-06, "loss": 0.1002, "step": 27125 }, { "epoch": 0.7913530544372483, "grad_norm": 1.8345522764639859, "learning_rate": 1.0988716848680842e-06, "loss": 0.1019, "step": 27126 }, { "epoch": 0.7913822276678919, "grad_norm": 0.8190523113213574, "learning_rate": 1.0985761955997276e-06, "loss": 0.0987, "step": 27127 }, { "epoch": 0.7914114008985355, "grad_norm": 0.8617946805400212, "learning_rate": 1.0982807411623526e-06, "loss": 0.1144, "step": 27128 }, { "epoch": 0.7914405741291791, "grad_norm": 1.2286630752846879, "learning_rate": 1.0979853215585957e-06, "loss": 0.1042, "step": 27129 }, { "epoch": 0.7914697473598227, "grad_norm": 0.9680502916427776, "learning_rate": 1.0976899367910932e-06, "loss": 0.1325, "step": 27130 }, { "epoch": 0.7914989205904662, "grad_norm": 0.955072518187602, "learning_rate": 1.097394586862483e-06, "loss": 0.1125, "step": 27131 }, { "epoch": 0.7915280938211098, "grad_norm": 0.7425040135781064, "learning_rate": 1.0970992717754043e-06, "loss": 0.1068, "step": 27132 }, { "epoch": 0.7915572670517533, "grad_norm": 0.9104003460204163, "learning_rate": 1.0968039915324913e-06, "loss": 0.115, "step": 27133 }, { "epoch": 0.7915864402823969, "grad_norm": 0.8801343256956771, "learning_rate": 1.0965087461363788e-06, "loss": 0.1315, "step": 27134 }, { "epoch": 0.7916156135130404, "grad_norm": 0.9456310284348848, "learning_rate": 1.0962135355897063e-06, "loss": 0.1108, "step": 27135 }, { "epoch": 0.791644786743684, "grad_norm": 0.6906914642583293, "learning_rate": 1.0959183598951056e-06, "loss": 0.1005, "step": 27136 }, { "epoch": 0.7916739599743275, "grad_norm": 0.9960876587345895, "learning_rate": 1.095623219055214e-06, "loss": 0.1009, "step": 27137 }, { "epoch": 0.7917031332049711, "grad_norm": 0.908515226563882, "learning_rate": 1.095328113072668e-06, "loss": 0.1129, "step": 27138 }, { "epoch": 0.7917323064356147, "grad_norm": 0.771092521850201, "learning_rate": 1.0950330419501003e-06, "loss": 0.1038, "step": 27139 }, { "epoch": 0.7917614796662582, "grad_norm": 0.7236073233684125, "learning_rate": 1.0947380056901436e-06, "loss": 0.105, "step": 27140 }, { "epoch": 0.7917906528969018, "grad_norm": 0.8497883082548906, "learning_rate": 1.0944430042954358e-06, "loss": 0.1182, "step": 27141 }, { "epoch": 0.7918198261275453, "grad_norm": 0.7610705951590233, "learning_rate": 1.0941480377686065e-06, "loss": 0.0972, "step": 27142 }, { "epoch": 0.791848999358189, "grad_norm": 0.72110363440369, "learning_rate": 1.0938531061122926e-06, "loss": 0.1314, "step": 27143 }, { "epoch": 0.7918781725888325, "grad_norm": 0.807041227237705, "learning_rate": 1.0935582093291247e-06, "loss": 0.1338, "step": 27144 }, { "epoch": 0.7919073458194761, "grad_norm": 1.0550105281370603, "learning_rate": 1.0932633474217374e-06, "loss": 0.1163, "step": 27145 }, { "epoch": 0.7919365190501196, "grad_norm": 0.8850146050823224, "learning_rate": 1.0929685203927625e-06, "loss": 0.1513, "step": 27146 }, { "epoch": 0.7919656922807632, "grad_norm": 0.6960212464952853, "learning_rate": 1.0926737282448308e-06, "loss": 0.0995, "step": 27147 }, { "epoch": 0.7919948655114067, "grad_norm": 0.7826302176495409, "learning_rate": 1.0923789709805754e-06, "loss": 0.1196, "step": 27148 }, { "epoch": 0.7920240387420503, "grad_norm": 0.8804146177427541, "learning_rate": 1.092084248602629e-06, "loss": 0.0996, "step": 27149 }, { "epoch": 0.7920532119726938, "grad_norm": 0.7310782519951465, "learning_rate": 1.0917895611136214e-06, "loss": 0.1037, "step": 27150 }, { "epoch": 0.7920823852033374, "grad_norm": 0.9999875152982066, "learning_rate": 1.0914949085161819e-06, "loss": 0.1093, "step": 27151 }, { "epoch": 0.792111558433981, "grad_norm": 0.9923710383217718, "learning_rate": 1.091200290812945e-06, "loss": 0.1355, "step": 27152 }, { "epoch": 0.7921407316646245, "grad_norm": 0.9491778541297881, "learning_rate": 1.0909057080065382e-06, "loss": 0.1365, "step": 27153 }, { "epoch": 0.7921699048952681, "grad_norm": 0.6807099827568043, "learning_rate": 1.0906111600995895e-06, "loss": 0.0983, "step": 27154 }, { "epoch": 0.7921990781259116, "grad_norm": 0.8567725912593552, "learning_rate": 1.090316647094734e-06, "loss": 0.1187, "step": 27155 }, { "epoch": 0.7922282513565553, "grad_norm": 1.108805960470962, "learning_rate": 1.0900221689945978e-06, "loss": 0.1172, "step": 27156 }, { "epoch": 0.7922574245871988, "grad_norm": 0.8158151823512337, "learning_rate": 1.089727725801809e-06, "loss": 0.1112, "step": 27157 }, { "epoch": 0.7922865978178424, "grad_norm": 1.0486515651120698, "learning_rate": 1.0894333175189993e-06, "loss": 0.1294, "step": 27158 }, { "epoch": 0.7923157710484859, "grad_norm": 0.824966210310151, "learning_rate": 1.0891389441487954e-06, "loss": 0.1058, "step": 27159 }, { "epoch": 0.7923449442791295, "grad_norm": 0.9263257222914655, "learning_rate": 1.088844605693824e-06, "loss": 0.1264, "step": 27160 }, { "epoch": 0.792374117509773, "grad_norm": 1.0945050618072207, "learning_rate": 1.088550302156714e-06, "loss": 0.0885, "step": 27161 }, { "epoch": 0.7924032907404166, "grad_norm": 1.3830758417720495, "learning_rate": 1.0882560335400943e-06, "loss": 0.1211, "step": 27162 }, { "epoch": 0.7924324639710602, "grad_norm": 1.211255424690409, "learning_rate": 1.0879617998465912e-06, "loss": 0.1079, "step": 27163 }, { "epoch": 0.7924616372017037, "grad_norm": 0.8134082408658129, "learning_rate": 1.0876676010788307e-06, "loss": 0.0924, "step": 27164 }, { "epoch": 0.7924908104323473, "grad_norm": 0.889315860492122, "learning_rate": 1.0873734372394402e-06, "loss": 0.1309, "step": 27165 }, { "epoch": 0.7925199836629908, "grad_norm": 1.2094765568061059, "learning_rate": 1.0870793083310449e-06, "loss": 0.1157, "step": 27166 }, { "epoch": 0.7925491568936344, "grad_norm": 1.3901104998579217, "learning_rate": 1.0867852143562712e-06, "loss": 0.1479, "step": 27167 }, { "epoch": 0.7925783301242779, "grad_norm": 0.9522795647766157, "learning_rate": 1.0864911553177459e-06, "loss": 0.1106, "step": 27168 }, { "epoch": 0.7926075033549215, "grad_norm": 0.9193753418996613, "learning_rate": 1.0861971312180942e-06, "loss": 0.1218, "step": 27169 }, { "epoch": 0.7926366765855651, "grad_norm": 1.0362991357964215, "learning_rate": 1.085903142059938e-06, "loss": 0.1312, "step": 27170 }, { "epoch": 0.7926658498162087, "grad_norm": 1.1401714533665626, "learning_rate": 1.0856091878459064e-06, "loss": 0.1189, "step": 27171 }, { "epoch": 0.7926950230468522, "grad_norm": 0.917302415006295, "learning_rate": 1.0853152685786196e-06, "loss": 0.0886, "step": 27172 }, { "epoch": 0.7927241962774958, "grad_norm": 1.2631958799446088, "learning_rate": 1.085021384260705e-06, "loss": 0.1123, "step": 27173 }, { "epoch": 0.7927533695081393, "grad_norm": 0.7993442809384943, "learning_rate": 1.0847275348947833e-06, "loss": 0.1209, "step": 27174 }, { "epoch": 0.7927825427387829, "grad_norm": 1.187448320820712, "learning_rate": 1.0844337204834814e-06, "loss": 0.1272, "step": 27175 }, { "epoch": 0.7928117159694265, "grad_norm": 0.8773412349222718, "learning_rate": 1.08413994102942e-06, "loss": 0.1402, "step": 27176 }, { "epoch": 0.79284088920007, "grad_norm": 0.683838764971712, "learning_rate": 1.0838461965352215e-06, "loss": 0.1006, "step": 27177 }, { "epoch": 0.7928700624307136, "grad_norm": 0.8919481410247305, "learning_rate": 1.083552487003509e-06, "loss": 0.0994, "step": 27178 }, { "epoch": 0.7928992356613571, "grad_norm": 0.7318332943450829, "learning_rate": 1.083258812436907e-06, "loss": 0.0918, "step": 27179 }, { "epoch": 0.7929284088920007, "grad_norm": 0.8147479444203601, "learning_rate": 1.0829651728380346e-06, "loss": 0.1257, "step": 27180 }, { "epoch": 0.7929575821226442, "grad_norm": 1.0957109828739, "learning_rate": 1.082671568209513e-06, "loss": 0.1198, "step": 27181 }, { "epoch": 0.7929867553532878, "grad_norm": 0.9268456476495067, "learning_rate": 1.0823779985539657e-06, "loss": 0.0927, "step": 27182 }, { "epoch": 0.7930159285839314, "grad_norm": 0.937669862032185, "learning_rate": 1.0820844638740125e-06, "loss": 0.1095, "step": 27183 }, { "epoch": 0.793045101814575, "grad_norm": 0.8994880085158811, "learning_rate": 1.0817909641722713e-06, "loss": 0.1105, "step": 27184 }, { "epoch": 0.7930742750452185, "grad_norm": 0.8368374731245064, "learning_rate": 1.0814974994513672e-06, "loss": 0.1043, "step": 27185 }, { "epoch": 0.7931034482758621, "grad_norm": 0.7875018884126366, "learning_rate": 1.0812040697139187e-06, "loss": 0.1055, "step": 27186 }, { "epoch": 0.7931326215065057, "grad_norm": 0.9334903899768735, "learning_rate": 1.0809106749625431e-06, "loss": 0.1227, "step": 27187 }, { "epoch": 0.7931617947371492, "grad_norm": 1.1329781338344356, "learning_rate": 1.0806173151998628e-06, "loss": 0.1147, "step": 27188 }, { "epoch": 0.7931909679677928, "grad_norm": 1.4054346205322619, "learning_rate": 1.0803239904284952e-06, "loss": 0.1172, "step": 27189 }, { "epoch": 0.7932201411984363, "grad_norm": 0.8135928422315268, "learning_rate": 1.0800307006510585e-06, "loss": 0.103, "step": 27190 }, { "epoch": 0.7932493144290799, "grad_norm": 1.0014669905524591, "learning_rate": 1.0797374458701716e-06, "loss": 0.119, "step": 27191 }, { "epoch": 0.7932784876597234, "grad_norm": 0.9133195992648436, "learning_rate": 1.079444226088454e-06, "loss": 0.1083, "step": 27192 }, { "epoch": 0.793307660890367, "grad_norm": 0.6612529568331375, "learning_rate": 1.0791510413085232e-06, "loss": 0.1009, "step": 27193 }, { "epoch": 0.7933368341210105, "grad_norm": 1.050006629251399, "learning_rate": 1.078857891532994e-06, "loss": 0.1153, "step": 27194 }, { "epoch": 0.7933660073516541, "grad_norm": 0.9127293559854903, "learning_rate": 1.0785647767644869e-06, "loss": 0.1411, "step": 27195 }, { "epoch": 0.7933951805822976, "grad_norm": 0.8450020135109233, "learning_rate": 1.078271697005616e-06, "loss": 0.119, "step": 27196 }, { "epoch": 0.7934243538129413, "grad_norm": 0.8240192595216537, "learning_rate": 1.0779786522589998e-06, "loss": 0.124, "step": 27197 }, { "epoch": 0.7934535270435848, "grad_norm": 0.8152466647131754, "learning_rate": 1.0776856425272548e-06, "loss": 0.1238, "step": 27198 }, { "epoch": 0.7934827002742284, "grad_norm": 0.8905718914173306, "learning_rate": 1.0773926678129958e-06, "loss": 0.1203, "step": 27199 }, { "epoch": 0.793511873504872, "grad_norm": 0.9610597286522311, "learning_rate": 1.0770997281188378e-06, "loss": 0.1145, "step": 27200 }, { "epoch": 0.7935410467355155, "grad_norm": 0.75896572777544, "learning_rate": 1.0768068234473978e-06, "loss": 0.1246, "step": 27201 }, { "epoch": 0.7935702199661591, "grad_norm": 0.8331296571922095, "learning_rate": 1.0765139538012892e-06, "loss": 0.1304, "step": 27202 }, { "epoch": 0.7935993931968026, "grad_norm": 0.8385479477782422, "learning_rate": 1.0762211191831283e-06, "loss": 0.0939, "step": 27203 }, { "epoch": 0.7936285664274462, "grad_norm": 0.8796501707340214, "learning_rate": 1.0759283195955273e-06, "loss": 0.1069, "step": 27204 }, { "epoch": 0.7936577396580897, "grad_norm": 0.9855807190635272, "learning_rate": 1.075635555041103e-06, "loss": 0.1161, "step": 27205 }, { "epoch": 0.7936869128887333, "grad_norm": 1.0909022563980866, "learning_rate": 1.0753428255224674e-06, "loss": 0.1176, "step": 27206 }, { "epoch": 0.7937160861193768, "grad_norm": 0.9271966413936198, "learning_rate": 1.0750501310422328e-06, "loss": 0.12, "step": 27207 }, { "epoch": 0.7937452593500204, "grad_norm": 0.7606476951413613, "learning_rate": 1.074757471603014e-06, "loss": 0.0915, "step": 27208 }, { "epoch": 0.7937744325806639, "grad_norm": 1.0786965180563166, "learning_rate": 1.074464847207425e-06, "loss": 0.1127, "step": 27209 }, { "epoch": 0.7938036058113075, "grad_norm": 0.9642516925484652, "learning_rate": 1.074172257858076e-06, "loss": 0.0867, "step": 27210 }, { "epoch": 0.7938327790419512, "grad_norm": 0.8178564365082546, "learning_rate": 1.0738797035575787e-06, "loss": 0.116, "step": 27211 }, { "epoch": 0.7938619522725947, "grad_norm": 0.7119798409234761, "learning_rate": 1.0735871843085483e-06, "loss": 0.1112, "step": 27212 }, { "epoch": 0.7938911255032383, "grad_norm": 1.1182402379916003, "learning_rate": 1.0732947001135935e-06, "loss": 0.1263, "step": 27213 }, { "epoch": 0.7939202987338818, "grad_norm": 0.7720191956329254, "learning_rate": 1.0730022509753235e-06, "loss": 0.105, "step": 27214 }, { "epoch": 0.7939494719645254, "grad_norm": 0.606352370124071, "learning_rate": 1.072709836896355e-06, "loss": 0.1111, "step": 27215 }, { "epoch": 0.7939786451951689, "grad_norm": 0.957001421108336, "learning_rate": 1.0724174578792952e-06, "loss": 0.107, "step": 27216 }, { "epoch": 0.7940078184258125, "grad_norm": 0.9266659059031501, "learning_rate": 1.0721251139267536e-06, "loss": 0.1168, "step": 27217 }, { "epoch": 0.794036991656456, "grad_norm": 0.9046915043052184, "learning_rate": 1.071832805041343e-06, "loss": 0.1329, "step": 27218 }, { "epoch": 0.7940661648870996, "grad_norm": 0.8037414910215839, "learning_rate": 1.071540531225671e-06, "loss": 0.1393, "step": 27219 }, { "epoch": 0.7940953381177431, "grad_norm": 1.1688948899651286, "learning_rate": 1.071248292482346e-06, "loss": 0.1211, "step": 27220 }, { "epoch": 0.7941245113483867, "grad_norm": 0.8053109879370193, "learning_rate": 1.0709560888139787e-06, "loss": 0.1219, "step": 27221 }, { "epoch": 0.7941536845790302, "grad_norm": 0.7711154565723309, "learning_rate": 1.0706639202231783e-06, "loss": 0.1088, "step": 27222 }, { "epoch": 0.7941828578096738, "grad_norm": 1.011853271244742, "learning_rate": 1.0703717867125524e-06, "loss": 0.1249, "step": 27223 }, { "epoch": 0.7942120310403175, "grad_norm": 0.8364694502713238, "learning_rate": 1.070079688284708e-06, "loss": 0.1049, "step": 27224 }, { "epoch": 0.794241204270961, "grad_norm": 0.8879913859202871, "learning_rate": 1.0697876249422557e-06, "loss": 0.1262, "step": 27225 }, { "epoch": 0.7942703775016046, "grad_norm": 0.79892463453512, "learning_rate": 1.0694955966877996e-06, "loss": 0.1118, "step": 27226 }, { "epoch": 0.7942995507322481, "grad_norm": 0.7897948536845185, "learning_rate": 1.06920360352395e-06, "loss": 0.1119, "step": 27227 }, { "epoch": 0.7943287239628917, "grad_norm": 0.7850501406887443, "learning_rate": 1.0689116454533105e-06, "loss": 0.1295, "step": 27228 }, { "epoch": 0.7943578971935352, "grad_norm": 1.0072128526934645, "learning_rate": 1.068619722478491e-06, "loss": 0.1069, "step": 27229 }, { "epoch": 0.7943870704241788, "grad_norm": 0.8950120076679392, "learning_rate": 1.0683278346020953e-06, "loss": 0.1318, "step": 27230 }, { "epoch": 0.7944162436548223, "grad_norm": 1.0075836046887874, "learning_rate": 1.068035981826731e-06, "loss": 0.1288, "step": 27231 }, { "epoch": 0.7944454168854659, "grad_norm": 0.8989678170551099, "learning_rate": 1.0677441641550012e-06, "loss": 0.1053, "step": 27232 }, { "epoch": 0.7944745901161094, "grad_norm": 1.0010786692342146, "learning_rate": 1.0674523815895143e-06, "loss": 0.1479, "step": 27233 }, { "epoch": 0.794503763346753, "grad_norm": 0.7515913770216751, "learning_rate": 1.0671606341328728e-06, "loss": 0.1303, "step": 27234 }, { "epoch": 0.7945329365773965, "grad_norm": 0.8256631985755358, "learning_rate": 1.0668689217876832e-06, "loss": 0.1161, "step": 27235 }, { "epoch": 0.7945621098080401, "grad_norm": 0.9734480532915907, "learning_rate": 1.0665772445565493e-06, "loss": 0.1253, "step": 27236 }, { "epoch": 0.7945912830386836, "grad_norm": 0.6859718576154452, "learning_rate": 1.0662856024420732e-06, "loss": 0.0938, "step": 27237 }, { "epoch": 0.7946204562693273, "grad_norm": 1.0410261535803178, "learning_rate": 1.06599399544686e-06, "loss": 0.1151, "step": 27238 }, { "epoch": 0.7946496294999709, "grad_norm": 0.9047278946682342, "learning_rate": 1.0657024235735152e-06, "loss": 0.11, "step": 27239 }, { "epoch": 0.7946788027306144, "grad_norm": 0.9623374158719716, "learning_rate": 1.0654108868246398e-06, "loss": 0.1375, "step": 27240 }, { "epoch": 0.794707975961258, "grad_norm": 0.8649855101651447, "learning_rate": 1.0651193852028353e-06, "loss": 0.1204, "step": 27241 }, { "epoch": 0.7947371491919015, "grad_norm": 0.8117566411642689, "learning_rate": 1.0648279187107068e-06, "loss": 0.1297, "step": 27242 }, { "epoch": 0.7947663224225451, "grad_norm": 0.8079122548166594, "learning_rate": 1.064536487350855e-06, "loss": 0.1118, "step": 27243 }, { "epoch": 0.7947954956531886, "grad_norm": 0.8523914860289792, "learning_rate": 1.06424509112588e-06, "loss": 0.1035, "step": 27244 }, { "epoch": 0.7948246688838322, "grad_norm": 0.9556340736393387, "learning_rate": 1.063953730038388e-06, "loss": 0.1113, "step": 27245 }, { "epoch": 0.7948538421144757, "grad_norm": 0.9754331264843032, "learning_rate": 1.0636624040909765e-06, "loss": 0.1024, "step": 27246 }, { "epoch": 0.7948830153451193, "grad_norm": 0.8163054623079017, "learning_rate": 1.0633711132862467e-06, "loss": 0.1128, "step": 27247 }, { "epoch": 0.7949121885757628, "grad_norm": 0.842210170911893, "learning_rate": 1.0630798576268013e-06, "loss": 0.1115, "step": 27248 }, { "epoch": 0.7949413618064064, "grad_norm": 0.7836559465763133, "learning_rate": 1.062788637115239e-06, "loss": 0.12, "step": 27249 }, { "epoch": 0.79497053503705, "grad_norm": 0.7956972852306032, "learning_rate": 1.0624974517541587e-06, "loss": 0.0986, "step": 27250 }, { "epoch": 0.7949997082676936, "grad_norm": 0.8141726113794314, "learning_rate": 1.0622063015461603e-06, "loss": 0.1198, "step": 27251 }, { "epoch": 0.7950288814983372, "grad_norm": 0.7980079504447605, "learning_rate": 1.0619151864938464e-06, "loss": 0.1072, "step": 27252 }, { "epoch": 0.7950580547289807, "grad_norm": 0.8830622236492937, "learning_rate": 1.0616241065998134e-06, "loss": 0.1124, "step": 27253 }, { "epoch": 0.7950872279596243, "grad_norm": 0.7724068062364485, "learning_rate": 1.0613330618666584e-06, "loss": 0.1292, "step": 27254 }, { "epoch": 0.7951164011902678, "grad_norm": 0.9088773064446449, "learning_rate": 1.0610420522969833e-06, "loss": 0.1105, "step": 27255 }, { "epoch": 0.7951455744209114, "grad_norm": 0.7981120777989499, "learning_rate": 1.0607510778933828e-06, "loss": 0.1221, "step": 27256 }, { "epoch": 0.7951747476515549, "grad_norm": 0.9219924405039507, "learning_rate": 1.0604601386584579e-06, "loss": 0.1015, "step": 27257 }, { "epoch": 0.7952039208821985, "grad_norm": 0.9552342074351698, "learning_rate": 1.0601692345948033e-06, "loss": 0.1263, "step": 27258 }, { "epoch": 0.795233094112842, "grad_norm": 0.9333306679607168, "learning_rate": 1.0598783657050183e-06, "loss": 0.1196, "step": 27259 }, { "epoch": 0.7952622673434856, "grad_norm": 0.7489530231122494, "learning_rate": 1.0595875319916977e-06, "loss": 0.1193, "step": 27260 }, { "epoch": 0.7952914405741291, "grad_norm": 0.867807944720601, "learning_rate": 1.0592967334574394e-06, "loss": 0.1002, "step": 27261 }, { "epoch": 0.7953206138047727, "grad_norm": 1.0469200478889713, "learning_rate": 1.059005970104839e-06, "loss": 0.1106, "step": 27262 }, { "epoch": 0.7953497870354163, "grad_norm": 0.9471704814576529, "learning_rate": 1.0587152419364926e-06, "loss": 0.122, "step": 27263 }, { "epoch": 0.7953789602660598, "grad_norm": 0.9068258596768978, "learning_rate": 1.0584245489549956e-06, "loss": 0.1129, "step": 27264 }, { "epoch": 0.7954081334967035, "grad_norm": 0.8361631280770446, "learning_rate": 1.0581338911629436e-06, "loss": 0.1039, "step": 27265 }, { "epoch": 0.795437306727347, "grad_norm": 1.1017321696575815, "learning_rate": 1.057843268562932e-06, "loss": 0.1398, "step": 27266 }, { "epoch": 0.7954664799579906, "grad_norm": 1.2778504875373855, "learning_rate": 1.0575526811575526e-06, "loss": 0.1375, "step": 27267 }, { "epoch": 0.7954956531886341, "grad_norm": 0.7949750708281008, "learning_rate": 1.0572621289494022e-06, "loss": 0.1123, "step": 27268 }, { "epoch": 0.7955248264192777, "grad_norm": 0.7750850559447942, "learning_rate": 1.0569716119410755e-06, "loss": 0.1003, "step": 27269 }, { "epoch": 0.7955539996499212, "grad_norm": 0.9011085546946347, "learning_rate": 1.0566811301351648e-06, "loss": 0.1483, "step": 27270 }, { "epoch": 0.7955831728805648, "grad_norm": 0.7779645887598234, "learning_rate": 1.0563906835342624e-06, "loss": 0.0983, "step": 27271 }, { "epoch": 0.7956123461112083, "grad_norm": 0.7652944868889955, "learning_rate": 1.0561002721409641e-06, "loss": 0.1049, "step": 27272 }, { "epoch": 0.7956415193418519, "grad_norm": 0.8896689235920401, "learning_rate": 1.0558098959578612e-06, "loss": 0.1393, "step": 27273 }, { "epoch": 0.7956706925724955, "grad_norm": 2.154808598446225, "learning_rate": 1.0555195549875425e-06, "loss": 0.1288, "step": 27274 }, { "epoch": 0.795699865803139, "grad_norm": 0.8105039252933518, "learning_rate": 1.055229249232607e-06, "loss": 0.1057, "step": 27275 }, { "epoch": 0.7957290390337826, "grad_norm": 0.7601961340534943, "learning_rate": 1.0549389786956427e-06, "loss": 0.1068, "step": 27276 }, { "epoch": 0.7957582122644261, "grad_norm": 0.9367799725960024, "learning_rate": 1.05464874337924e-06, "loss": 0.1056, "step": 27277 }, { "epoch": 0.7957873854950698, "grad_norm": 1.3257994814465035, "learning_rate": 1.0543585432859938e-06, "loss": 0.1142, "step": 27278 }, { "epoch": 0.7958165587257133, "grad_norm": 0.8538977594302306, "learning_rate": 1.0540683784184902e-06, "loss": 0.1139, "step": 27279 }, { "epoch": 0.7958457319563569, "grad_norm": 0.8283071662944053, "learning_rate": 1.0537782487793242e-06, "loss": 0.1112, "step": 27280 }, { "epoch": 0.7958749051870004, "grad_norm": 0.8035194992437611, "learning_rate": 1.0534881543710823e-06, "loss": 0.1116, "step": 27281 }, { "epoch": 0.795904078417644, "grad_norm": 0.6971214743367149, "learning_rate": 1.0531980951963572e-06, "loss": 0.0872, "step": 27282 }, { "epoch": 0.7959332516482875, "grad_norm": 0.9076570749974003, "learning_rate": 1.0529080712577378e-06, "loss": 0.1292, "step": 27283 }, { "epoch": 0.7959624248789311, "grad_norm": 0.8532211474920889, "learning_rate": 1.0526180825578108e-06, "loss": 0.1183, "step": 27284 }, { "epoch": 0.7959915981095746, "grad_norm": 0.7560721645327512, "learning_rate": 1.0523281290991678e-06, "loss": 0.1094, "step": 27285 }, { "epoch": 0.7960207713402182, "grad_norm": 0.6863790227198021, "learning_rate": 1.0520382108843979e-06, "loss": 0.1021, "step": 27286 }, { "epoch": 0.7960499445708618, "grad_norm": 0.943530506934025, "learning_rate": 1.0517483279160889e-06, "loss": 0.1089, "step": 27287 }, { "epoch": 0.7960791178015053, "grad_norm": 0.8455719346304803, "learning_rate": 1.051458480196827e-06, "loss": 0.1103, "step": 27288 }, { "epoch": 0.7961082910321489, "grad_norm": 1.0411619034805029, "learning_rate": 1.0511686677292021e-06, "loss": 0.1068, "step": 27289 }, { "epoch": 0.7961374642627924, "grad_norm": 0.7012838742892026, "learning_rate": 1.050878890515799e-06, "loss": 0.1289, "step": 27290 }, { "epoch": 0.796166637493436, "grad_norm": 0.7541020856424244, "learning_rate": 1.0505891485592073e-06, "loss": 0.1221, "step": 27291 }, { "epoch": 0.7961958107240796, "grad_norm": 0.8180727127319456, "learning_rate": 1.050299441862014e-06, "loss": 0.1273, "step": 27292 }, { "epoch": 0.7962249839547232, "grad_norm": 0.8950766712575334, "learning_rate": 1.0500097704268042e-06, "loss": 0.1265, "step": 27293 }, { "epoch": 0.7962541571853667, "grad_norm": 0.7790903717148736, "learning_rate": 1.0497201342561625e-06, "loss": 0.1167, "step": 27294 }, { "epoch": 0.7962833304160103, "grad_norm": 0.7474984820936914, "learning_rate": 1.0494305333526782e-06, "loss": 0.1062, "step": 27295 }, { "epoch": 0.7963125036466538, "grad_norm": 0.9903092008224443, "learning_rate": 1.0491409677189352e-06, "loss": 0.1033, "step": 27296 }, { "epoch": 0.7963416768772974, "grad_norm": 0.779866185498765, "learning_rate": 1.048851437357517e-06, "loss": 0.1112, "step": 27297 }, { "epoch": 0.796370850107941, "grad_norm": 0.8087830263086303, "learning_rate": 1.0485619422710097e-06, "loss": 0.095, "step": 27298 }, { "epoch": 0.7964000233385845, "grad_norm": 1.0298698460761202, "learning_rate": 1.048272482462e-06, "loss": 0.1068, "step": 27299 }, { "epoch": 0.7964291965692281, "grad_norm": 0.8249956681325328, "learning_rate": 1.0479830579330697e-06, "loss": 0.1211, "step": 27300 }, { "epoch": 0.7964583697998716, "grad_norm": 0.7654990462051833, "learning_rate": 1.0476936686868023e-06, "loss": 0.1122, "step": 27301 }, { "epoch": 0.7964875430305152, "grad_norm": 1.2977896988259128, "learning_rate": 1.0474043147257835e-06, "loss": 0.1389, "step": 27302 }, { "epoch": 0.7965167162611587, "grad_norm": 1.008924547308374, "learning_rate": 1.0471149960525938e-06, "loss": 0.1136, "step": 27303 }, { "epoch": 0.7965458894918023, "grad_norm": 0.8084272623989259, "learning_rate": 1.0468257126698177e-06, "loss": 0.1065, "step": 27304 }, { "epoch": 0.7965750627224459, "grad_norm": 0.7677326657619127, "learning_rate": 1.0465364645800397e-06, "loss": 0.1408, "step": 27305 }, { "epoch": 0.7966042359530895, "grad_norm": 0.7261190198599423, "learning_rate": 1.0462472517858401e-06, "loss": 0.1184, "step": 27306 }, { "epoch": 0.796633409183733, "grad_norm": 0.7956539267186988, "learning_rate": 1.0459580742898e-06, "loss": 0.0889, "step": 27307 }, { "epoch": 0.7966625824143766, "grad_norm": 0.8544799910759517, "learning_rate": 1.045668932094504e-06, "loss": 0.0973, "step": 27308 }, { "epoch": 0.7966917556450201, "grad_norm": 0.9441641107906975, "learning_rate": 1.04537982520253e-06, "loss": 0.1101, "step": 27309 }, { "epoch": 0.7967209288756637, "grad_norm": 0.748369684774757, "learning_rate": 1.0450907536164623e-06, "loss": 0.0964, "step": 27310 }, { "epoch": 0.7967501021063073, "grad_norm": 0.7201166621857837, "learning_rate": 1.0448017173388792e-06, "loss": 0.1177, "step": 27311 }, { "epoch": 0.7967792753369508, "grad_norm": 0.9313919002849654, "learning_rate": 1.0445127163723634e-06, "loss": 0.1088, "step": 27312 }, { "epoch": 0.7968084485675944, "grad_norm": 0.7366937516037917, "learning_rate": 1.0442237507194936e-06, "loss": 0.1107, "step": 27313 }, { "epoch": 0.7968376217982379, "grad_norm": 0.9756247991440086, "learning_rate": 1.0439348203828487e-06, "loss": 0.1079, "step": 27314 }, { "epoch": 0.7968667950288815, "grad_norm": 0.7766049100662035, "learning_rate": 1.0436459253650088e-06, "loss": 0.1128, "step": 27315 }, { "epoch": 0.796895968259525, "grad_norm": 0.9086673460545542, "learning_rate": 1.0433570656685555e-06, "loss": 0.128, "step": 27316 }, { "epoch": 0.7969251414901686, "grad_norm": 0.8607224473451294, "learning_rate": 1.0430682412960659e-06, "loss": 0.1162, "step": 27317 }, { "epoch": 0.7969543147208121, "grad_norm": 0.9319137601603329, "learning_rate": 1.0427794522501168e-06, "loss": 0.1116, "step": 27318 }, { "epoch": 0.7969834879514558, "grad_norm": 0.7976591204978623, "learning_rate": 1.0424906985332895e-06, "loss": 0.1297, "step": 27319 }, { "epoch": 0.7970126611820993, "grad_norm": 0.870949844542542, "learning_rate": 1.0422019801481604e-06, "loss": 0.1028, "step": 27320 }, { "epoch": 0.7970418344127429, "grad_norm": 0.8619316749310867, "learning_rate": 1.0419132970973046e-06, "loss": 0.1051, "step": 27321 }, { "epoch": 0.7970710076433865, "grad_norm": 0.7862771472718773, "learning_rate": 1.041624649383305e-06, "loss": 0.1381, "step": 27322 }, { "epoch": 0.79710018087403, "grad_norm": 0.857233247605059, "learning_rate": 1.041336037008735e-06, "loss": 0.1311, "step": 27323 }, { "epoch": 0.7971293541046736, "grad_norm": 0.86679087743703, "learning_rate": 1.0410474599761711e-06, "loss": 0.1036, "step": 27324 }, { "epoch": 0.7971585273353171, "grad_norm": 0.9608810048027391, "learning_rate": 1.0407589182881916e-06, "loss": 0.0985, "step": 27325 }, { "epoch": 0.7971877005659607, "grad_norm": 0.8463746457760372, "learning_rate": 1.0404704119473707e-06, "loss": 0.1235, "step": 27326 }, { "epoch": 0.7972168737966042, "grad_norm": 0.8035519886738591, "learning_rate": 1.040181940956284e-06, "loss": 0.1092, "step": 27327 }, { "epoch": 0.7972460470272478, "grad_norm": 0.9602836819719859, "learning_rate": 1.039893505317508e-06, "loss": 0.1281, "step": 27328 }, { "epoch": 0.7972752202578913, "grad_norm": 0.9643572818748601, "learning_rate": 1.039605105033618e-06, "loss": 0.1184, "step": 27329 }, { "epoch": 0.7973043934885349, "grad_norm": 0.7501368876732976, "learning_rate": 1.0393167401071885e-06, "loss": 0.135, "step": 27330 }, { "epoch": 0.7973335667191784, "grad_norm": 1.222176281836421, "learning_rate": 1.0390284105407927e-06, "loss": 0.1354, "step": 27331 }, { "epoch": 0.7973627399498221, "grad_norm": 1.1490824271254725, "learning_rate": 1.0387401163370064e-06, "loss": 0.1145, "step": 27332 }, { "epoch": 0.7973919131804657, "grad_norm": 0.8264927213388011, "learning_rate": 1.0384518574984014e-06, "loss": 0.1142, "step": 27333 }, { "epoch": 0.7974210864111092, "grad_norm": 1.0103645390764446, "learning_rate": 1.038163634027553e-06, "loss": 0.1297, "step": 27334 }, { "epoch": 0.7974502596417528, "grad_norm": 0.7876174767197305, "learning_rate": 1.0378754459270352e-06, "loss": 0.1158, "step": 27335 }, { "epoch": 0.7974794328723963, "grad_norm": 0.6555318205499635, "learning_rate": 1.037587293199419e-06, "loss": 0.1167, "step": 27336 }, { "epoch": 0.7975086061030399, "grad_norm": 0.9265734139056865, "learning_rate": 1.0372991758472768e-06, "loss": 0.1314, "step": 27337 }, { "epoch": 0.7975377793336834, "grad_norm": 1.1487913808388042, "learning_rate": 1.037011093873183e-06, "loss": 0.1538, "step": 27338 }, { "epoch": 0.797566952564327, "grad_norm": 0.9946993047628627, "learning_rate": 1.0367230472797064e-06, "loss": 0.1128, "step": 27339 }, { "epoch": 0.7975961257949705, "grad_norm": 0.7669284339381132, "learning_rate": 1.036435036069422e-06, "loss": 0.113, "step": 27340 }, { "epoch": 0.7976252990256141, "grad_norm": 0.7329772568054622, "learning_rate": 1.0361470602448975e-06, "loss": 0.11, "step": 27341 }, { "epoch": 0.7976544722562576, "grad_norm": 0.7349377252456173, "learning_rate": 1.0358591198087076e-06, "loss": 0.1071, "step": 27342 }, { "epoch": 0.7976836454869012, "grad_norm": 0.9312266589613643, "learning_rate": 1.0355712147634211e-06, "loss": 0.1076, "step": 27343 }, { "epoch": 0.7977128187175447, "grad_norm": 0.8086047133577277, "learning_rate": 1.0352833451116069e-06, "loss": 0.0947, "step": 27344 }, { "epoch": 0.7977419919481883, "grad_norm": 0.8984617533998107, "learning_rate": 1.0349955108558369e-06, "loss": 0.1043, "step": 27345 }, { "epoch": 0.797771165178832, "grad_norm": 0.6451594497243076, "learning_rate": 1.0347077119986814e-06, "loss": 0.088, "step": 27346 }, { "epoch": 0.7978003384094755, "grad_norm": 0.8004683371416975, "learning_rate": 1.0344199485427086e-06, "loss": 0.1217, "step": 27347 }, { "epoch": 0.7978295116401191, "grad_norm": 1.0035865205869823, "learning_rate": 1.0341322204904875e-06, "loss": 0.1125, "step": 27348 }, { "epoch": 0.7978586848707626, "grad_norm": 0.9000770291014063, "learning_rate": 1.0338445278445874e-06, "loss": 0.1037, "step": 27349 }, { "epoch": 0.7978878581014062, "grad_norm": 0.7279193308205009, "learning_rate": 1.0335568706075771e-06, "loss": 0.1179, "step": 27350 }, { "epoch": 0.7979170313320497, "grad_norm": 0.666165510272029, "learning_rate": 1.0332692487820216e-06, "loss": 0.1037, "step": 27351 }, { "epoch": 0.7979462045626933, "grad_norm": 0.8768612695247942, "learning_rate": 1.0329816623704942e-06, "loss": 0.1185, "step": 27352 }, { "epoch": 0.7979753777933368, "grad_norm": 0.8473419457334794, "learning_rate": 1.032694111375559e-06, "loss": 0.0969, "step": 27353 }, { "epoch": 0.7980045510239804, "grad_norm": 0.80039041600822, "learning_rate": 1.0324065957997824e-06, "loss": 0.106, "step": 27354 }, { "epoch": 0.7980337242546239, "grad_norm": 0.7061535278711579, "learning_rate": 1.0321191156457343e-06, "loss": 0.1086, "step": 27355 }, { "epoch": 0.7980628974852675, "grad_norm": 0.7978768143194749, "learning_rate": 1.0318316709159792e-06, "loss": 0.1223, "step": 27356 }, { "epoch": 0.798092070715911, "grad_norm": 0.7193545422037824, "learning_rate": 1.0315442616130828e-06, "loss": 0.1014, "step": 27357 }, { "epoch": 0.7981212439465546, "grad_norm": 0.709688780305429, "learning_rate": 1.0312568877396111e-06, "loss": 0.1123, "step": 27358 }, { "epoch": 0.7981504171771983, "grad_norm": 0.8245917762991147, "learning_rate": 1.0309695492981324e-06, "loss": 0.1015, "step": 27359 }, { "epoch": 0.7981795904078418, "grad_norm": 1.154410388594645, "learning_rate": 1.0306822462912103e-06, "loss": 0.1255, "step": 27360 }, { "epoch": 0.7982087636384854, "grad_norm": 0.9838972049995711, "learning_rate": 1.030394978721408e-06, "loss": 0.109, "step": 27361 }, { "epoch": 0.7982379368691289, "grad_norm": 0.7685313319232628, "learning_rate": 1.0301077465912928e-06, "loss": 0.114, "step": 27362 }, { "epoch": 0.7982671100997725, "grad_norm": 0.795557683594711, "learning_rate": 1.0298205499034265e-06, "loss": 0.1279, "step": 27363 }, { "epoch": 0.798296283330416, "grad_norm": 0.9100744446084528, "learning_rate": 1.0295333886603749e-06, "loss": 0.0909, "step": 27364 }, { "epoch": 0.7983254565610596, "grad_norm": 0.9533167245822671, "learning_rate": 1.0292462628647026e-06, "loss": 0.1327, "step": 27365 }, { "epoch": 0.7983546297917031, "grad_norm": 0.9543349418398875, "learning_rate": 1.0289591725189717e-06, "loss": 0.1114, "step": 27366 }, { "epoch": 0.7983838030223467, "grad_norm": 0.8412541393310877, "learning_rate": 1.028672117625744e-06, "loss": 0.0793, "step": 27367 }, { "epoch": 0.7984129762529902, "grad_norm": 0.7861122645941854, "learning_rate": 1.0283850981875853e-06, "loss": 0.1165, "step": 27368 }, { "epoch": 0.7984421494836338, "grad_norm": 0.9010736568877544, "learning_rate": 1.0280981142070545e-06, "loss": 0.1175, "step": 27369 }, { "epoch": 0.7984713227142773, "grad_norm": 1.152385746574157, "learning_rate": 1.0278111656867174e-06, "loss": 0.137, "step": 27370 }, { "epoch": 0.7985004959449209, "grad_norm": 0.9707370319950321, "learning_rate": 1.0275242526291324e-06, "loss": 0.1173, "step": 27371 }, { "epoch": 0.7985296691755644, "grad_norm": 0.7163698412723432, "learning_rate": 1.0272373750368635e-06, "loss": 0.1067, "step": 27372 }, { "epoch": 0.7985588424062081, "grad_norm": 0.8788242145510866, "learning_rate": 1.0269505329124713e-06, "loss": 0.106, "step": 27373 }, { "epoch": 0.7985880156368517, "grad_norm": 1.0949977314962551, "learning_rate": 1.026663726258515e-06, "loss": 0.1055, "step": 27374 }, { "epoch": 0.7986171888674952, "grad_norm": 0.8828902644346203, "learning_rate": 1.0263769550775564e-06, "loss": 0.1183, "step": 27375 }, { "epoch": 0.7986463620981388, "grad_norm": 0.7971093754824756, "learning_rate": 1.0260902193721573e-06, "loss": 0.1222, "step": 27376 }, { "epoch": 0.7986755353287823, "grad_norm": 0.8554109268803041, "learning_rate": 1.0258035191448756e-06, "loss": 0.1297, "step": 27377 }, { "epoch": 0.7987047085594259, "grad_norm": 1.4321768780104973, "learning_rate": 1.0255168543982708e-06, "loss": 0.0929, "step": 27378 }, { "epoch": 0.7987338817900694, "grad_norm": 0.781439972212485, "learning_rate": 1.0252302251349033e-06, "loss": 0.1044, "step": 27379 }, { "epoch": 0.798763055020713, "grad_norm": 0.9651303026479694, "learning_rate": 1.024943631357332e-06, "loss": 0.125, "step": 27380 }, { "epoch": 0.7987922282513565, "grad_norm": 0.9186434046135167, "learning_rate": 1.0246570730681122e-06, "loss": 0.1326, "step": 27381 }, { "epoch": 0.7988214014820001, "grad_norm": 1.0266898586051725, "learning_rate": 1.0243705502698075e-06, "loss": 0.0973, "step": 27382 }, { "epoch": 0.7988505747126436, "grad_norm": 1.1227978782006496, "learning_rate": 1.0240840629649735e-06, "loss": 0.1196, "step": 27383 }, { "epoch": 0.7988797479432872, "grad_norm": 0.9718486690452055, "learning_rate": 1.0237976111561666e-06, "loss": 0.1077, "step": 27384 }, { "epoch": 0.7989089211739308, "grad_norm": 0.9010038203770405, "learning_rate": 1.023511194845947e-06, "loss": 0.1176, "step": 27385 }, { "epoch": 0.7989380944045744, "grad_norm": 0.9916957610020355, "learning_rate": 1.02322481403687e-06, "loss": 0.1277, "step": 27386 }, { "epoch": 0.798967267635218, "grad_norm": 0.851873687310045, "learning_rate": 1.0229384687314915e-06, "loss": 0.0998, "step": 27387 }, { "epoch": 0.7989964408658615, "grad_norm": 1.017243669354646, "learning_rate": 1.0226521589323684e-06, "loss": 0.1032, "step": 27388 }, { "epoch": 0.7990256140965051, "grad_norm": 1.6622331720543835, "learning_rate": 1.0223658846420593e-06, "loss": 0.0963, "step": 27389 }, { "epoch": 0.7990547873271486, "grad_norm": 0.9540677558757608, "learning_rate": 1.0220796458631171e-06, "loss": 0.1008, "step": 27390 }, { "epoch": 0.7990839605577922, "grad_norm": 0.974557481035565, "learning_rate": 1.021793442598098e-06, "loss": 0.1097, "step": 27391 }, { "epoch": 0.7991131337884357, "grad_norm": 1.0136333069184431, "learning_rate": 1.021507274849558e-06, "loss": 0.1062, "step": 27392 }, { "epoch": 0.7991423070190793, "grad_norm": 0.9111600102249696, "learning_rate": 1.0212211426200502e-06, "loss": 0.1054, "step": 27393 }, { "epoch": 0.7991714802497228, "grad_norm": 0.8667781071069143, "learning_rate": 1.0209350459121304e-06, "loss": 0.1374, "step": 27394 }, { "epoch": 0.7992006534803664, "grad_norm": 1.1658946403964856, "learning_rate": 1.0206489847283535e-06, "loss": 0.1156, "step": 27395 }, { "epoch": 0.79922982671101, "grad_norm": 1.0612162801070393, "learning_rate": 1.0203629590712727e-06, "loss": 0.1213, "step": 27396 }, { "epoch": 0.7992589999416535, "grad_norm": 0.6733781856203072, "learning_rate": 1.0200769689434404e-06, "loss": 0.1323, "step": 27397 }, { "epoch": 0.7992881731722971, "grad_norm": 0.8627238954451573, "learning_rate": 1.0197910143474116e-06, "loss": 0.1178, "step": 27398 }, { "epoch": 0.7993173464029406, "grad_norm": 1.0186587177239874, "learning_rate": 1.0195050952857378e-06, "loss": 0.1258, "step": 27399 }, { "epoch": 0.7993465196335843, "grad_norm": 1.1352688856390698, "learning_rate": 1.0192192117609727e-06, "loss": 0.0969, "step": 27400 }, { "epoch": 0.7993756928642278, "grad_norm": 0.7959818087463973, "learning_rate": 1.0189333637756676e-06, "loss": 0.1187, "step": 27401 }, { "epoch": 0.7994048660948714, "grad_norm": 0.9637908873423694, "learning_rate": 1.0186475513323762e-06, "loss": 0.1263, "step": 27402 }, { "epoch": 0.7994340393255149, "grad_norm": 0.8707825603159962, "learning_rate": 1.0183617744336494e-06, "loss": 0.097, "step": 27403 }, { "epoch": 0.7994632125561585, "grad_norm": 1.131584600088067, "learning_rate": 1.018076033082036e-06, "loss": 0.1302, "step": 27404 }, { "epoch": 0.799492385786802, "grad_norm": 0.9505786522226987, "learning_rate": 1.0177903272800898e-06, "loss": 0.1137, "step": 27405 }, { "epoch": 0.7995215590174456, "grad_norm": 0.8803258128358723, "learning_rate": 1.0175046570303626e-06, "loss": 0.1277, "step": 27406 }, { "epoch": 0.7995507322480891, "grad_norm": 1.1133269484320216, "learning_rate": 1.0172190223354023e-06, "loss": 0.1314, "step": 27407 }, { "epoch": 0.7995799054787327, "grad_norm": 0.7900603980233648, "learning_rate": 1.016933423197759e-06, "loss": 0.1098, "step": 27408 }, { "epoch": 0.7996090787093763, "grad_norm": 0.7658848977935718, "learning_rate": 1.0166478596199847e-06, "loss": 0.1147, "step": 27409 }, { "epoch": 0.7996382519400198, "grad_norm": 1.0040842211708778, "learning_rate": 1.0163623316046267e-06, "loss": 0.1266, "step": 27410 }, { "epoch": 0.7996674251706634, "grad_norm": 0.9590869503747794, "learning_rate": 1.016076839154233e-06, "loss": 0.1253, "step": 27411 }, { "epoch": 0.7996965984013069, "grad_norm": 0.825789617131499, "learning_rate": 1.0157913822713567e-06, "loss": 0.1141, "step": 27412 }, { "epoch": 0.7997257716319506, "grad_norm": 1.18040239101638, "learning_rate": 1.0155059609585432e-06, "loss": 0.1149, "step": 27413 }, { "epoch": 0.7997549448625941, "grad_norm": 0.8379277379174047, "learning_rate": 1.0152205752183408e-06, "loss": 0.1045, "step": 27414 }, { "epoch": 0.7997841180932377, "grad_norm": 0.7105118907286953, "learning_rate": 1.0149352250532985e-06, "loss": 0.1139, "step": 27415 }, { "epoch": 0.7998132913238812, "grad_norm": 0.9452899581305659, "learning_rate": 1.0146499104659634e-06, "loss": 0.1055, "step": 27416 }, { "epoch": 0.7998424645545248, "grad_norm": 1.0002905838789558, "learning_rate": 1.0143646314588817e-06, "loss": 0.1005, "step": 27417 }, { "epoch": 0.7998716377851683, "grad_norm": 0.8606236401368312, "learning_rate": 1.0140793880346006e-06, "loss": 0.1152, "step": 27418 }, { "epoch": 0.7999008110158119, "grad_norm": 0.8002761524186969, "learning_rate": 1.0137941801956686e-06, "loss": 0.1376, "step": 27419 }, { "epoch": 0.7999299842464554, "grad_norm": 0.8804755466804401, "learning_rate": 1.0135090079446307e-06, "loss": 0.1107, "step": 27420 }, { "epoch": 0.799959157477099, "grad_norm": 0.7130652507536094, "learning_rate": 1.0132238712840315e-06, "loss": 0.1182, "step": 27421 }, { "epoch": 0.7999883307077426, "grad_norm": 0.9118448767102106, "learning_rate": 1.012938770216419e-06, "loss": 0.1176, "step": 27422 }, { "epoch": 0.8000175039383861, "grad_norm": 0.8609004163722702, "learning_rate": 1.0126537047443364e-06, "loss": 0.1111, "step": 27423 }, { "epoch": 0.8000466771690297, "grad_norm": 0.9193491155298933, "learning_rate": 1.0123686748703292e-06, "loss": 0.1212, "step": 27424 }, { "epoch": 0.8000758503996732, "grad_norm": 0.8982582014204211, "learning_rate": 1.0120836805969442e-06, "loss": 0.1125, "step": 27425 }, { "epoch": 0.8001050236303168, "grad_norm": 0.7771863371344789, "learning_rate": 1.0117987219267238e-06, "loss": 0.1081, "step": 27426 }, { "epoch": 0.8001341968609604, "grad_norm": 0.8806857382044603, "learning_rate": 1.011513798862211e-06, "loss": 0.1231, "step": 27427 }, { "epoch": 0.800163370091604, "grad_norm": 0.8949088742009761, "learning_rate": 1.0112289114059525e-06, "loss": 0.1198, "step": 27428 }, { "epoch": 0.8001925433222475, "grad_norm": 0.8702535990284568, "learning_rate": 1.0109440595604887e-06, "loss": 0.1271, "step": 27429 }, { "epoch": 0.8002217165528911, "grad_norm": 0.9267839597026477, "learning_rate": 1.0106592433283652e-06, "loss": 0.117, "step": 27430 }, { "epoch": 0.8002508897835346, "grad_norm": 0.8222063174651958, "learning_rate": 1.0103744627121226e-06, "loss": 0.103, "step": 27431 }, { "epoch": 0.8002800630141782, "grad_norm": 0.7926290344386797, "learning_rate": 1.0100897177143054e-06, "loss": 0.1235, "step": 27432 }, { "epoch": 0.8003092362448218, "grad_norm": 0.8564751019855571, "learning_rate": 1.009805008337455e-06, "loss": 0.1073, "step": 27433 }, { "epoch": 0.8003384094754653, "grad_norm": 0.7440124553336979, "learning_rate": 1.0095203345841115e-06, "loss": 0.123, "step": 27434 }, { "epoch": 0.8003675827061089, "grad_norm": 0.9112097950179906, "learning_rate": 1.009235696456818e-06, "loss": 0.1039, "step": 27435 }, { "epoch": 0.8003967559367524, "grad_norm": 0.7822635495414189, "learning_rate": 1.0089510939581166e-06, "loss": 0.1068, "step": 27436 }, { "epoch": 0.800425929167396, "grad_norm": 0.9361557872530007, "learning_rate": 1.0086665270905472e-06, "loss": 0.1031, "step": 27437 }, { "epoch": 0.8004551023980395, "grad_norm": 0.9948467467756411, "learning_rate": 1.0083819958566489e-06, "loss": 0.1164, "step": 27438 }, { "epoch": 0.8004842756286831, "grad_norm": 0.72331521075041, "learning_rate": 1.0080975002589644e-06, "loss": 0.1096, "step": 27439 }, { "epoch": 0.8005134488593267, "grad_norm": 0.7935637014052944, "learning_rate": 1.007813040300033e-06, "loss": 0.1139, "step": 27440 }, { "epoch": 0.8005426220899703, "grad_norm": 0.9534471465186868, "learning_rate": 1.0075286159823905e-06, "loss": 0.1158, "step": 27441 }, { "epoch": 0.8005717953206138, "grad_norm": 0.9560321220386006, "learning_rate": 1.0072442273085825e-06, "loss": 0.1033, "step": 27442 }, { "epoch": 0.8006009685512574, "grad_norm": 0.8886134616677258, "learning_rate": 1.0069598742811448e-06, "loss": 0.1226, "step": 27443 }, { "epoch": 0.800630141781901, "grad_norm": 0.6454328075690808, "learning_rate": 1.006675556902615e-06, "loss": 0.1228, "step": 27444 }, { "epoch": 0.8006593150125445, "grad_norm": 0.9234637758355148, "learning_rate": 1.0063912751755334e-06, "loss": 0.1496, "step": 27445 }, { "epoch": 0.8006884882431881, "grad_norm": 0.9702854374728535, "learning_rate": 1.0061070291024372e-06, "loss": 0.1101, "step": 27446 }, { "epoch": 0.8007176614738316, "grad_norm": 0.7329279377667942, "learning_rate": 1.0058228186858633e-06, "loss": 0.1057, "step": 27447 }, { "epoch": 0.8007468347044752, "grad_norm": 0.9125468441366694, "learning_rate": 1.0055386439283494e-06, "loss": 0.1214, "step": 27448 }, { "epoch": 0.8007760079351187, "grad_norm": 0.7911482095262947, "learning_rate": 1.0052545048324342e-06, "loss": 0.1218, "step": 27449 }, { "epoch": 0.8008051811657623, "grad_norm": 0.8908001573765704, "learning_rate": 1.0049704014006527e-06, "loss": 0.1012, "step": 27450 }, { "epoch": 0.8008343543964058, "grad_norm": 0.9453363997313438, "learning_rate": 1.004686333635541e-06, "loss": 0.1317, "step": 27451 }, { "epoch": 0.8008635276270494, "grad_norm": 0.870801751568944, "learning_rate": 1.0044023015396375e-06, "loss": 0.1072, "step": 27452 }, { "epoch": 0.8008927008576929, "grad_norm": 0.9291563537905266, "learning_rate": 1.0041183051154746e-06, "loss": 0.1021, "step": 27453 }, { "epoch": 0.8009218740883366, "grad_norm": 0.8064952750070626, "learning_rate": 1.00383434436559e-06, "loss": 0.116, "step": 27454 }, { "epoch": 0.8009510473189801, "grad_norm": 0.9306562986957118, "learning_rate": 1.0035504192925195e-06, "loss": 0.1124, "step": 27455 }, { "epoch": 0.8009802205496237, "grad_norm": 0.9311307517380818, "learning_rate": 1.003266529898797e-06, "loss": 0.1156, "step": 27456 }, { "epoch": 0.8010093937802673, "grad_norm": 0.7746613497416763, "learning_rate": 1.0029826761869554e-06, "loss": 0.1258, "step": 27457 }, { "epoch": 0.8010385670109108, "grad_norm": 0.9255562720236967, "learning_rate": 1.0026988581595315e-06, "loss": 0.1265, "step": 27458 }, { "epoch": 0.8010677402415544, "grad_norm": 0.8488042810775273, "learning_rate": 1.0024150758190566e-06, "loss": 0.1018, "step": 27459 }, { "epoch": 0.8010969134721979, "grad_norm": 0.7533661385301036, "learning_rate": 1.0021313291680674e-06, "loss": 0.1282, "step": 27460 }, { "epoch": 0.8011260867028415, "grad_norm": 0.7168257875771503, "learning_rate": 1.0018476182090935e-06, "loss": 0.1114, "step": 27461 }, { "epoch": 0.801155259933485, "grad_norm": 0.7129530312175197, "learning_rate": 1.001563942944671e-06, "loss": 0.1119, "step": 27462 }, { "epoch": 0.8011844331641286, "grad_norm": 0.6467688600173058, "learning_rate": 1.001280303377331e-06, "loss": 0.1059, "step": 27463 }, { "epoch": 0.8012136063947721, "grad_norm": 0.8250352063102936, "learning_rate": 1.000996699509605e-06, "loss": 0.1349, "step": 27464 }, { "epoch": 0.8012427796254157, "grad_norm": 0.743134392226119, "learning_rate": 1.0007131313440255e-06, "loss": 0.1005, "step": 27465 }, { "epoch": 0.8012719528560592, "grad_norm": 0.7632444025424922, "learning_rate": 1.0004295988831259e-06, "loss": 0.132, "step": 27466 }, { "epoch": 0.8013011260867028, "grad_norm": 0.8975724299906467, "learning_rate": 1.0001461021294363e-06, "loss": 0.1106, "step": 27467 }, { "epoch": 0.8013302993173465, "grad_norm": 0.8529121784209267, "learning_rate": 9.998626410854856e-07, "loss": 0.1083, "step": 27468 }, { "epoch": 0.80135947254799, "grad_norm": 0.9149983717059812, "learning_rate": 9.99579215753808e-07, "loss": 0.1283, "step": 27469 }, { "epoch": 0.8013886457786336, "grad_norm": 0.8506981819837592, "learning_rate": 9.992958261369324e-07, "loss": 0.1109, "step": 27470 }, { "epoch": 0.8014178190092771, "grad_norm": 0.8245174876311564, "learning_rate": 9.990124722373857e-07, "loss": 0.1125, "step": 27471 }, { "epoch": 0.8014469922399207, "grad_norm": 0.7232528396795872, "learning_rate": 9.987291540577026e-07, "loss": 0.1006, "step": 27472 }, { "epoch": 0.8014761654705642, "grad_norm": 0.9982663724992538, "learning_rate": 9.984458716004114e-07, "loss": 0.1357, "step": 27473 }, { "epoch": 0.8015053387012078, "grad_norm": 0.7650235797531183, "learning_rate": 9.98162624868038e-07, "loss": 0.1184, "step": 27474 }, { "epoch": 0.8015345119318513, "grad_norm": 0.7297808765548561, "learning_rate": 9.978794138631153e-07, "loss": 0.124, "step": 27475 }, { "epoch": 0.8015636851624949, "grad_norm": 0.8622994255567406, "learning_rate": 9.975962385881688e-07, "loss": 0.1283, "step": 27476 }, { "epoch": 0.8015928583931384, "grad_norm": 0.8824422420247346, "learning_rate": 9.973130990457285e-07, "loss": 0.123, "step": 27477 }, { "epoch": 0.801622031623782, "grad_norm": 0.8055058362559767, "learning_rate": 9.9702999523832e-07, "loss": 0.1197, "step": 27478 }, { "epoch": 0.8016512048544255, "grad_norm": 0.7574393173391978, "learning_rate": 9.967469271684732e-07, "loss": 0.1043, "step": 27479 }, { "epoch": 0.8016803780850691, "grad_norm": 0.9211481717686488, "learning_rate": 9.964638948387145e-07, "loss": 0.1188, "step": 27480 }, { "epoch": 0.8017095513157128, "grad_norm": 1.547594870813011, "learning_rate": 9.961808982515693e-07, "loss": 0.1512, "step": 27481 }, { "epoch": 0.8017387245463563, "grad_norm": 0.9228661760817329, "learning_rate": 9.95897937409565e-07, "loss": 0.1156, "step": 27482 }, { "epoch": 0.8017678977769999, "grad_norm": 0.7546212666172488, "learning_rate": 9.956150123152291e-07, "loss": 0.1137, "step": 27483 }, { "epoch": 0.8017970710076434, "grad_norm": 1.1521681964485182, "learning_rate": 9.953321229710854e-07, "loss": 0.1259, "step": 27484 }, { "epoch": 0.801826244238287, "grad_norm": 0.9702727280511132, "learning_rate": 9.95049269379662e-07, "loss": 0.1, "step": 27485 }, { "epoch": 0.8018554174689305, "grad_norm": 0.8129101381875943, "learning_rate": 9.947664515434823e-07, "loss": 0.1016, "step": 27486 }, { "epoch": 0.8018845906995741, "grad_norm": 0.7833678546779559, "learning_rate": 9.944836694650706e-07, "loss": 0.1346, "step": 27487 }, { "epoch": 0.8019137639302176, "grad_norm": 0.9835583680742942, "learning_rate": 9.942009231469524e-07, "loss": 0.1318, "step": 27488 }, { "epoch": 0.8019429371608612, "grad_norm": 0.8395465308407088, "learning_rate": 9.939182125916535e-07, "loss": 0.1029, "step": 27489 }, { "epoch": 0.8019721103915047, "grad_norm": 1.2117427699435273, "learning_rate": 9.936355378016965e-07, "loss": 0.13, "step": 27490 }, { "epoch": 0.8020012836221483, "grad_norm": 0.9255749330060353, "learning_rate": 9.933528987796037e-07, "loss": 0.1078, "step": 27491 }, { "epoch": 0.8020304568527918, "grad_norm": 0.8952781065033935, "learning_rate": 9.93070295527901e-07, "loss": 0.1263, "step": 27492 }, { "epoch": 0.8020596300834354, "grad_norm": 0.8174950489610706, "learning_rate": 9.9278772804911e-07, "loss": 0.126, "step": 27493 }, { "epoch": 0.802088803314079, "grad_norm": 0.8633872550982359, "learning_rate": 9.92505196345752e-07, "loss": 0.1304, "step": 27494 }, { "epoch": 0.8021179765447226, "grad_norm": 0.895939107570586, "learning_rate": 9.922227004203517e-07, "loss": 0.1055, "step": 27495 }, { "epoch": 0.8021471497753662, "grad_norm": 0.8810439537896332, "learning_rate": 9.919402402754314e-07, "loss": 0.1054, "step": 27496 }, { "epoch": 0.8021763230060097, "grad_norm": 0.7126229364810918, "learning_rate": 9.916578159135114e-07, "loss": 0.0878, "step": 27497 }, { "epoch": 0.8022054962366533, "grad_norm": 0.7414594488993557, "learning_rate": 9.913754273371128e-07, "loss": 0.1233, "step": 27498 }, { "epoch": 0.8022346694672968, "grad_norm": 0.9218964391377544, "learning_rate": 9.910930745487586e-07, "loss": 0.1364, "step": 27499 }, { "epoch": 0.8022638426979404, "grad_norm": 0.8567387959737994, "learning_rate": 9.908107575509673e-07, "loss": 0.1068, "step": 27500 }, { "epoch": 0.8022930159285839, "grad_norm": 0.7935055703829693, "learning_rate": 9.905284763462603e-07, "loss": 0.1169, "step": 27501 }, { "epoch": 0.8023221891592275, "grad_norm": 0.8315308574392503, "learning_rate": 9.90246230937159e-07, "loss": 0.1273, "step": 27502 }, { "epoch": 0.802351362389871, "grad_norm": 0.7955133952656952, "learning_rate": 9.899640213261823e-07, "loss": 0.1268, "step": 27503 }, { "epoch": 0.8023805356205146, "grad_norm": 0.9488374605414202, "learning_rate": 9.89681847515848e-07, "loss": 0.1217, "step": 27504 }, { "epoch": 0.8024097088511581, "grad_norm": 0.7815366441533715, "learning_rate": 9.893997095086788e-07, "loss": 0.1246, "step": 27505 }, { "epoch": 0.8024388820818017, "grad_norm": 0.9385181131703689, "learning_rate": 9.891176073071896e-07, "loss": 0.1101, "step": 27506 }, { "epoch": 0.8024680553124452, "grad_norm": 0.8404212265666252, "learning_rate": 9.888355409139027e-07, "loss": 0.1133, "step": 27507 }, { "epoch": 0.8024972285430889, "grad_norm": 0.7111770796997006, "learning_rate": 9.88553510331333e-07, "loss": 0.1087, "step": 27508 }, { "epoch": 0.8025264017737325, "grad_norm": 0.85651853202019, "learning_rate": 9.882715155620015e-07, "loss": 0.1061, "step": 27509 }, { "epoch": 0.802555575004376, "grad_norm": 0.9364010872011337, "learning_rate": 9.879895566084241e-07, "loss": 0.142, "step": 27510 }, { "epoch": 0.8025847482350196, "grad_norm": 0.8477630588934852, "learning_rate": 9.877076334731167e-07, "loss": 0.0942, "step": 27511 }, { "epoch": 0.8026139214656631, "grad_norm": 0.9740602613692005, "learning_rate": 9.874257461585979e-07, "loss": 0.135, "step": 27512 }, { "epoch": 0.8026430946963067, "grad_norm": 1.0255415594007768, "learning_rate": 9.871438946673855e-07, "loss": 0.1097, "step": 27513 }, { "epoch": 0.8026722679269502, "grad_norm": 0.867708343555253, "learning_rate": 9.868620790019929e-07, "loss": 0.101, "step": 27514 }, { "epoch": 0.8027014411575938, "grad_norm": 1.0916727839946327, "learning_rate": 9.865802991649393e-07, "loss": 0.1145, "step": 27515 }, { "epoch": 0.8027306143882373, "grad_norm": 0.7076756699990595, "learning_rate": 9.862985551587384e-07, "loss": 0.1057, "step": 27516 }, { "epoch": 0.8027597876188809, "grad_norm": 0.8077215592786581, "learning_rate": 9.86016846985905e-07, "loss": 0.0968, "step": 27517 }, { "epoch": 0.8027889608495244, "grad_norm": 0.7284569848091642, "learning_rate": 9.857351746489546e-07, "loss": 0.1089, "step": 27518 }, { "epoch": 0.802818134080168, "grad_norm": 0.9925048104491448, "learning_rate": 9.854535381504038e-07, "loss": 0.1124, "step": 27519 }, { "epoch": 0.8028473073108116, "grad_norm": 1.082531906295039, "learning_rate": 9.851719374927655e-07, "loss": 0.1149, "step": 27520 }, { "epoch": 0.8028764805414551, "grad_norm": 0.9989830487617188, "learning_rate": 9.848903726785518e-07, "loss": 0.1243, "step": 27521 }, { "epoch": 0.8029056537720988, "grad_norm": 0.6992970445341425, "learning_rate": 9.846088437102802e-07, "loss": 0.1294, "step": 27522 }, { "epoch": 0.8029348270027423, "grad_norm": 0.7919031635809013, "learning_rate": 9.843273505904622e-07, "loss": 0.1084, "step": 27523 }, { "epoch": 0.8029640002333859, "grad_norm": 0.9061890772451463, "learning_rate": 9.840458933216097e-07, "loss": 0.1195, "step": 27524 }, { "epoch": 0.8029931734640294, "grad_norm": 1.0522030653377388, "learning_rate": 9.837644719062367e-07, "loss": 0.083, "step": 27525 }, { "epoch": 0.803022346694673, "grad_norm": 0.6484002636183097, "learning_rate": 9.834830863468575e-07, "loss": 0.1188, "step": 27526 }, { "epoch": 0.8030515199253165, "grad_norm": 0.7819960455186077, "learning_rate": 9.832017366459817e-07, "loss": 0.1042, "step": 27527 }, { "epoch": 0.8030806931559601, "grad_norm": 0.9196624254240777, "learning_rate": 9.829204228061212e-07, "loss": 0.1427, "step": 27528 }, { "epoch": 0.8031098663866036, "grad_norm": 0.7512637588587939, "learning_rate": 9.826391448297895e-07, "loss": 0.1036, "step": 27529 }, { "epoch": 0.8031390396172472, "grad_norm": 0.761399698788126, "learning_rate": 9.82357902719495e-07, "loss": 0.1025, "step": 27530 }, { "epoch": 0.8031682128478908, "grad_norm": 0.7884522314321935, "learning_rate": 9.820766964777501e-07, "loss": 0.1362, "step": 27531 }, { "epoch": 0.8031973860785343, "grad_norm": 0.8762764995908606, "learning_rate": 9.817955261070666e-07, "loss": 0.114, "step": 27532 }, { "epoch": 0.8032265593091779, "grad_norm": 0.7402311609140194, "learning_rate": 9.815143916099533e-07, "loss": 0.0966, "step": 27533 }, { "epoch": 0.8032557325398214, "grad_norm": 0.9790257807777503, "learning_rate": 9.81233292988919e-07, "loss": 0.1055, "step": 27534 }, { "epoch": 0.8032849057704651, "grad_norm": 0.9065201993239207, "learning_rate": 9.809522302464757e-07, "loss": 0.1247, "step": 27535 }, { "epoch": 0.8033140790011086, "grad_norm": 0.9448381142552766, "learning_rate": 9.806712033851307e-07, "loss": 0.1106, "step": 27536 }, { "epoch": 0.8033432522317522, "grad_norm": 0.9193845004981372, "learning_rate": 9.803902124073945e-07, "loss": 0.102, "step": 27537 }, { "epoch": 0.8033724254623957, "grad_norm": 0.8344915088105022, "learning_rate": 9.801092573157734e-07, "loss": 0.1209, "step": 27538 }, { "epoch": 0.8034015986930393, "grad_norm": 0.7923926602430644, "learning_rate": 9.798283381127793e-07, "loss": 0.1067, "step": 27539 }, { "epoch": 0.8034307719236828, "grad_norm": 0.831507834185248, "learning_rate": 9.795474548009176e-07, "loss": 0.1148, "step": 27540 }, { "epoch": 0.8034599451543264, "grad_norm": 0.7731722617722381, "learning_rate": 9.792666073826952e-07, "loss": 0.1399, "step": 27541 }, { "epoch": 0.80348911838497, "grad_norm": 1.0448949677239212, "learning_rate": 9.789857958606207e-07, "loss": 0.086, "step": 27542 }, { "epoch": 0.8035182916156135, "grad_norm": 0.7869590606394334, "learning_rate": 9.787050202372023e-07, "loss": 0.0958, "step": 27543 }, { "epoch": 0.803547464846257, "grad_norm": 1.0754165322432865, "learning_rate": 9.784242805149442e-07, "loss": 0.1138, "step": 27544 }, { "epoch": 0.8035766380769006, "grad_norm": 0.8133889982205859, "learning_rate": 9.78143576696356e-07, "loss": 0.1325, "step": 27545 }, { "epoch": 0.8036058113075442, "grad_norm": 0.8810615591701374, "learning_rate": 9.778629087839414e-07, "loss": 0.0991, "step": 27546 }, { "epoch": 0.8036349845381877, "grad_norm": 1.1282760815960862, "learning_rate": 9.77582276780205e-07, "loss": 0.1087, "step": 27547 }, { "epoch": 0.8036641577688313, "grad_norm": 1.0124644803125806, "learning_rate": 9.77301680687654e-07, "loss": 0.1304, "step": 27548 }, { "epoch": 0.8036933309994749, "grad_norm": 0.8586315951501855, "learning_rate": 9.770211205087948e-07, "loss": 0.1149, "step": 27549 }, { "epoch": 0.8037225042301185, "grad_norm": 1.2215218079973935, "learning_rate": 9.767405962461306e-07, "loss": 0.1087, "step": 27550 }, { "epoch": 0.803751677460762, "grad_norm": 0.757236596094083, "learning_rate": 9.764601079021645e-07, "loss": 0.1246, "step": 27551 }, { "epoch": 0.8037808506914056, "grad_norm": 0.7812827316456944, "learning_rate": 9.761796554794034e-07, "loss": 0.1124, "step": 27552 }, { "epoch": 0.8038100239220491, "grad_norm": 0.9113787930889454, "learning_rate": 9.7589923898035e-07, "loss": 0.1265, "step": 27553 }, { "epoch": 0.8038391971526927, "grad_norm": 0.9763738642644213, "learning_rate": 9.75618858407506e-07, "loss": 0.1233, "step": 27554 }, { "epoch": 0.8038683703833363, "grad_norm": 0.7342411020902224, "learning_rate": 9.753385137633764e-07, "loss": 0.1041, "step": 27555 }, { "epoch": 0.8038975436139798, "grad_norm": 0.8643422526208991, "learning_rate": 9.750582050504648e-07, "loss": 0.1276, "step": 27556 }, { "epoch": 0.8039267168446234, "grad_norm": 0.8031755648302986, "learning_rate": 9.74777932271273e-07, "loss": 0.1224, "step": 27557 }, { "epoch": 0.8039558900752669, "grad_norm": 0.941386872050956, "learning_rate": 9.744976954283013e-07, "loss": 0.132, "step": 27558 }, { "epoch": 0.8039850633059105, "grad_norm": 0.6966302439469197, "learning_rate": 9.742174945240545e-07, "loss": 0.1092, "step": 27559 }, { "epoch": 0.804014236536554, "grad_norm": 0.7677355592201016, "learning_rate": 9.739373295610322e-07, "loss": 0.0967, "step": 27560 }, { "epoch": 0.8040434097671976, "grad_norm": 0.8254770337618673, "learning_rate": 9.736572005417354e-07, "loss": 0.1289, "step": 27561 }, { "epoch": 0.8040725829978412, "grad_norm": 0.7646539644982906, "learning_rate": 9.733771074686681e-07, "loss": 0.1366, "step": 27562 }, { "epoch": 0.8041017562284848, "grad_norm": 0.7624287634823101, "learning_rate": 9.730970503443281e-07, "loss": 0.1034, "step": 27563 }, { "epoch": 0.8041309294591283, "grad_norm": 0.9306076603852309, "learning_rate": 9.728170291712153e-07, "loss": 0.1081, "step": 27564 }, { "epoch": 0.8041601026897719, "grad_norm": 0.7185652973623851, "learning_rate": 9.725370439518323e-07, "loss": 0.1123, "step": 27565 }, { "epoch": 0.8041892759204154, "grad_norm": 1.0339619225291377, "learning_rate": 9.722570946886755e-07, "loss": 0.1281, "step": 27566 }, { "epoch": 0.804218449151059, "grad_norm": 0.8357950887205879, "learning_rate": 9.71977181384247e-07, "loss": 0.126, "step": 27567 }, { "epoch": 0.8042476223817026, "grad_norm": 0.8086783951902399, "learning_rate": 9.716973040410437e-07, "loss": 0.1074, "step": 27568 }, { "epoch": 0.8042767956123461, "grad_norm": 0.7949498671032257, "learning_rate": 9.714174626615664e-07, "loss": 0.1335, "step": 27569 }, { "epoch": 0.8043059688429897, "grad_norm": 1.0117006930422152, "learning_rate": 9.711376572483122e-07, "loss": 0.1377, "step": 27570 }, { "epoch": 0.8043351420736332, "grad_norm": 0.9456639991047978, "learning_rate": 9.708578878037778e-07, "loss": 0.1108, "step": 27571 }, { "epoch": 0.8043643153042768, "grad_norm": 0.915343136462663, "learning_rate": 9.705781543304627e-07, "loss": 0.0948, "step": 27572 }, { "epoch": 0.8043934885349203, "grad_norm": 0.8714827951311132, "learning_rate": 9.702984568308654e-07, "loss": 0.1075, "step": 27573 }, { "epoch": 0.8044226617655639, "grad_norm": 0.884711263380116, "learning_rate": 9.700187953074797e-07, "loss": 0.1061, "step": 27574 }, { "epoch": 0.8044518349962074, "grad_norm": 0.890139260451249, "learning_rate": 9.697391697628056e-07, "loss": 0.1145, "step": 27575 }, { "epoch": 0.8044810082268511, "grad_norm": 0.7615195481589713, "learning_rate": 9.694595801993383e-07, "loss": 0.0963, "step": 27576 }, { "epoch": 0.8045101814574946, "grad_norm": 1.0854808140848085, "learning_rate": 9.691800266195721e-07, "loss": 0.1417, "step": 27577 }, { "epoch": 0.8045393546881382, "grad_norm": 0.874197590124047, "learning_rate": 9.689005090260045e-07, "loss": 0.0975, "step": 27578 }, { "epoch": 0.8045685279187818, "grad_norm": 0.7717465030444081, "learning_rate": 9.686210274211321e-07, "loss": 0.1153, "step": 27579 }, { "epoch": 0.8045977011494253, "grad_norm": 0.9969176410958358, "learning_rate": 9.683415818074493e-07, "loss": 0.0937, "step": 27580 }, { "epoch": 0.8046268743800689, "grad_norm": 0.8402278413277697, "learning_rate": 9.680621721874483e-07, "loss": 0.1073, "step": 27581 }, { "epoch": 0.8046560476107124, "grad_norm": 0.8249377411077903, "learning_rate": 9.67782798563628e-07, "loss": 0.1173, "step": 27582 }, { "epoch": 0.804685220841356, "grad_norm": 0.803192629308676, "learning_rate": 9.675034609384792e-07, "loss": 0.1256, "step": 27583 }, { "epoch": 0.8047143940719995, "grad_norm": 1.1678074185282614, "learning_rate": 9.672241593144965e-07, "loss": 0.1011, "step": 27584 }, { "epoch": 0.8047435673026431, "grad_norm": 1.1864856481601738, "learning_rate": 9.669448936941733e-07, "loss": 0.1217, "step": 27585 }, { "epoch": 0.8047727405332866, "grad_norm": 0.7118350636026991, "learning_rate": 9.666656640800048e-07, "loss": 0.1224, "step": 27586 }, { "epoch": 0.8048019137639302, "grad_norm": 0.7955510891893275, "learning_rate": 9.663864704744825e-07, "loss": 0.1159, "step": 27587 }, { "epoch": 0.8048310869945737, "grad_norm": 0.8232893457522569, "learning_rate": 9.661073128800973e-07, "loss": 0.107, "step": 27588 }, { "epoch": 0.8048602602252174, "grad_norm": 0.8513235400021794, "learning_rate": 9.658281912993444e-07, "loss": 0.115, "step": 27589 }, { "epoch": 0.804889433455861, "grad_norm": 1.1189109386472753, "learning_rate": 9.655491057347133e-07, "loss": 0.1057, "step": 27590 }, { "epoch": 0.8049186066865045, "grad_norm": 0.8443591755701726, "learning_rate": 9.652700561886964e-07, "loss": 0.1183, "step": 27591 }, { "epoch": 0.804947779917148, "grad_norm": 0.9920503892854112, "learning_rate": 9.64991042663787e-07, "loss": 0.1136, "step": 27592 }, { "epoch": 0.8049769531477916, "grad_norm": 0.9103245817667461, "learning_rate": 9.647120651624737e-07, "loss": 0.1125, "step": 27593 }, { "epoch": 0.8050061263784352, "grad_norm": 0.7498952518795737, "learning_rate": 9.644331236872472e-07, "loss": 0.0974, "step": 27594 }, { "epoch": 0.8050352996090787, "grad_norm": 1.1221231264385536, "learning_rate": 9.641542182405995e-07, "loss": 0.1145, "step": 27595 }, { "epoch": 0.8050644728397223, "grad_norm": 1.4629426387282476, "learning_rate": 9.63875348825018e-07, "loss": 0.1165, "step": 27596 }, { "epoch": 0.8050936460703658, "grad_norm": 0.7649073341332615, "learning_rate": 9.63596515442995e-07, "loss": 0.0948, "step": 27597 }, { "epoch": 0.8051228193010094, "grad_norm": 0.9968918576357672, "learning_rate": 9.633177180970177e-07, "loss": 0.0973, "step": 27598 }, { "epoch": 0.8051519925316529, "grad_norm": 1.0691184429048453, "learning_rate": 9.630389567895776e-07, "loss": 0.1086, "step": 27599 }, { "epoch": 0.8051811657622965, "grad_norm": 1.019450539143369, "learning_rate": 9.627602315231616e-07, "loss": 0.1242, "step": 27600 }, { "epoch": 0.80521033899294, "grad_norm": 0.9327498563407185, "learning_rate": 9.624815423002576e-07, "loss": 0.1249, "step": 27601 }, { "epoch": 0.8052395122235836, "grad_norm": 0.7362454293940043, "learning_rate": 9.622028891233543e-07, "loss": 0.0984, "step": 27602 }, { "epoch": 0.8052686854542273, "grad_norm": 0.7822579531266258, "learning_rate": 9.619242719949411e-07, "loss": 0.1144, "step": 27603 }, { "epoch": 0.8052978586848708, "grad_norm": 0.9239614326140464, "learning_rate": 9.616456909175027e-07, "loss": 0.0971, "step": 27604 }, { "epoch": 0.8053270319155144, "grad_norm": 0.8223689071686345, "learning_rate": 9.613671458935287e-07, "loss": 0.1039, "step": 27605 }, { "epoch": 0.8053562051461579, "grad_norm": 0.9177224570765862, "learning_rate": 9.610886369255051e-07, "loss": 0.1166, "step": 27606 }, { "epoch": 0.8053853783768015, "grad_norm": 0.8410811683324743, "learning_rate": 9.608101640159162e-07, "loss": 0.1146, "step": 27607 }, { "epoch": 0.805414551607445, "grad_norm": 1.1448666995095045, "learning_rate": 9.605317271672504e-07, "loss": 0.1121, "step": 27608 }, { "epoch": 0.8054437248380886, "grad_norm": 0.9329725147861546, "learning_rate": 9.60253326381994e-07, "loss": 0.1228, "step": 27609 }, { "epoch": 0.8054728980687321, "grad_norm": 0.7056684039714629, "learning_rate": 9.59974961662632e-07, "loss": 0.092, "step": 27610 }, { "epoch": 0.8055020712993757, "grad_norm": 0.9541838169658238, "learning_rate": 9.596966330116474e-07, "loss": 0.1125, "step": 27611 }, { "epoch": 0.8055312445300192, "grad_norm": 1.1040348580314125, "learning_rate": 9.594183404315283e-07, "loss": 0.1427, "step": 27612 }, { "epoch": 0.8055604177606628, "grad_norm": 0.8768262650652688, "learning_rate": 9.591400839247572e-07, "loss": 0.1166, "step": 27613 }, { "epoch": 0.8055895909913063, "grad_norm": 0.7615113329104115, "learning_rate": 9.58861863493818e-07, "loss": 0.0999, "step": 27614 }, { "epoch": 0.8056187642219499, "grad_norm": 0.912292119000475, "learning_rate": 9.58583679141195e-07, "loss": 0.1082, "step": 27615 }, { "epoch": 0.8056479374525936, "grad_norm": 0.8534327845555917, "learning_rate": 9.58305530869374e-07, "loss": 0.1074, "step": 27616 }, { "epoch": 0.8056771106832371, "grad_norm": 1.302506181373477, "learning_rate": 9.580274186808359e-07, "loss": 0.1009, "step": 27617 }, { "epoch": 0.8057062839138807, "grad_norm": 1.1892135792392173, "learning_rate": 9.577493425780631e-07, "loss": 0.1216, "step": 27618 }, { "epoch": 0.8057354571445242, "grad_norm": 1.0685644096264661, "learning_rate": 9.574713025635401e-07, "loss": 0.1243, "step": 27619 }, { "epoch": 0.8057646303751678, "grad_norm": 1.0613542035055563, "learning_rate": 9.571932986397474e-07, "loss": 0.1064, "step": 27620 }, { "epoch": 0.8057938036058113, "grad_norm": 1.0999879934902057, "learning_rate": 9.569153308091678e-07, "loss": 0.1002, "step": 27621 }, { "epoch": 0.8058229768364549, "grad_norm": 0.9100385971405007, "learning_rate": 9.566373990742845e-07, "loss": 0.1184, "step": 27622 }, { "epoch": 0.8058521500670984, "grad_norm": 0.8974380860024977, "learning_rate": 9.563595034375766e-07, "loss": 0.1017, "step": 27623 }, { "epoch": 0.805881323297742, "grad_norm": 0.8825603888271505, "learning_rate": 9.560816439015247e-07, "loss": 0.1246, "step": 27624 }, { "epoch": 0.8059104965283855, "grad_norm": 0.9686615295957507, "learning_rate": 9.55803820468612e-07, "loss": 0.1226, "step": 27625 }, { "epoch": 0.8059396697590291, "grad_norm": 0.7032030121516174, "learning_rate": 9.555260331413157e-07, "loss": 0.108, "step": 27626 }, { "epoch": 0.8059688429896726, "grad_norm": 0.9914186132599075, "learning_rate": 9.552482819221193e-07, "loss": 0.1107, "step": 27627 }, { "epoch": 0.8059980162203162, "grad_norm": 0.8088929042226313, "learning_rate": 9.54970566813499e-07, "loss": 0.1095, "step": 27628 }, { "epoch": 0.8060271894509597, "grad_norm": 1.0028305047125612, "learning_rate": 9.546928878179374e-07, "loss": 0.1124, "step": 27629 }, { "epoch": 0.8060563626816034, "grad_norm": 1.2467790967603478, "learning_rate": 9.54415244937912e-07, "loss": 0.1062, "step": 27630 }, { "epoch": 0.806085535912247, "grad_norm": 1.0096328635678125, "learning_rate": 9.541376381759004e-07, "loss": 0.1099, "step": 27631 }, { "epoch": 0.8061147091428905, "grad_norm": 0.7248636709813338, "learning_rate": 9.538600675343818e-07, "loss": 0.1066, "step": 27632 }, { "epoch": 0.8061438823735341, "grad_norm": 0.8359399294345164, "learning_rate": 9.53582533015836e-07, "loss": 0.1056, "step": 27633 }, { "epoch": 0.8061730556041776, "grad_norm": 0.9525697005330482, "learning_rate": 9.53305034622738e-07, "loss": 0.1472, "step": 27634 }, { "epoch": 0.8062022288348212, "grad_norm": 1.1082326229718025, "learning_rate": 9.530275723575677e-07, "loss": 0.113, "step": 27635 }, { "epoch": 0.8062314020654647, "grad_norm": 0.8652451949775221, "learning_rate": 9.527501462228018e-07, "loss": 0.1222, "step": 27636 }, { "epoch": 0.8062605752961083, "grad_norm": 1.0307970435761293, "learning_rate": 9.524727562209146e-07, "loss": 0.1402, "step": 27637 }, { "epoch": 0.8062897485267518, "grad_norm": 0.8585871109648557, "learning_rate": 9.521954023543844e-07, "loss": 0.123, "step": 27638 }, { "epoch": 0.8063189217573954, "grad_norm": 0.9834767927940743, "learning_rate": 9.519180846256893e-07, "loss": 0.1173, "step": 27639 }, { "epoch": 0.8063480949880389, "grad_norm": 0.8467804807672528, "learning_rate": 9.516408030373025e-07, "loss": 0.1374, "step": 27640 }, { "epoch": 0.8063772682186825, "grad_norm": 1.092355609652649, "learning_rate": 9.51363557591699e-07, "loss": 0.1256, "step": 27641 }, { "epoch": 0.806406441449326, "grad_norm": 1.1469343584533855, "learning_rate": 9.510863482913568e-07, "loss": 0.1139, "step": 27642 }, { "epoch": 0.8064356146799697, "grad_norm": 0.8730894118637729, "learning_rate": 9.508091751387489e-07, "loss": 0.1178, "step": 27643 }, { "epoch": 0.8064647879106133, "grad_norm": 0.7942972733331833, "learning_rate": 9.505320381363486e-07, "loss": 0.1097, "step": 27644 }, { "epoch": 0.8064939611412568, "grad_norm": 0.8877381070858066, "learning_rate": 9.502549372866321e-07, "loss": 0.1318, "step": 27645 }, { "epoch": 0.8065231343719004, "grad_norm": 0.8931044665754303, "learning_rate": 9.499778725920739e-07, "loss": 0.1133, "step": 27646 }, { "epoch": 0.8065523076025439, "grad_norm": 1.2627948123104578, "learning_rate": 9.497008440551464e-07, "loss": 0.1309, "step": 27647 }, { "epoch": 0.8065814808331875, "grad_norm": 0.9609561848997056, "learning_rate": 9.494238516783211e-07, "loss": 0.1153, "step": 27648 }, { "epoch": 0.806610654063831, "grad_norm": 1.00056393687836, "learning_rate": 9.491468954640742e-07, "loss": 0.1268, "step": 27649 }, { "epoch": 0.8066398272944746, "grad_norm": 0.9582638790325807, "learning_rate": 9.488699754148762e-07, "loss": 0.0995, "step": 27650 }, { "epoch": 0.8066690005251181, "grad_norm": 0.8134779507158203, "learning_rate": 9.485930915331992e-07, "loss": 0.1202, "step": 27651 }, { "epoch": 0.8066981737557617, "grad_norm": 0.7138897674898071, "learning_rate": 9.483162438215177e-07, "loss": 0.1104, "step": 27652 }, { "epoch": 0.8067273469864052, "grad_norm": 0.8479061846663208, "learning_rate": 9.480394322823011e-07, "loss": 0.122, "step": 27653 }, { "epoch": 0.8067565202170488, "grad_norm": 0.8387993100806406, "learning_rate": 9.477626569180198e-07, "loss": 0.114, "step": 27654 }, { "epoch": 0.8067856934476924, "grad_norm": 0.7209054024864514, "learning_rate": 9.474859177311479e-07, "loss": 0.1241, "step": 27655 }, { "epoch": 0.8068148666783359, "grad_norm": 1.036788125164798, "learning_rate": 9.472092147241529e-07, "loss": 0.1453, "step": 27656 }, { "epoch": 0.8068440399089796, "grad_norm": 0.7315252955723138, "learning_rate": 9.469325478995078e-07, "loss": 0.0744, "step": 27657 }, { "epoch": 0.8068732131396231, "grad_norm": 0.842430395675882, "learning_rate": 9.466559172596801e-07, "loss": 0.1302, "step": 27658 }, { "epoch": 0.8069023863702667, "grad_norm": 0.8221287547134344, "learning_rate": 9.463793228071422e-07, "loss": 0.1202, "step": 27659 }, { "epoch": 0.8069315596009102, "grad_norm": 1.0681433003230443, "learning_rate": 9.461027645443616e-07, "loss": 0.1337, "step": 27660 }, { "epoch": 0.8069607328315538, "grad_norm": 0.7929305994184553, "learning_rate": 9.458262424738069e-07, "loss": 0.1005, "step": 27661 }, { "epoch": 0.8069899060621973, "grad_norm": 0.7386760497497545, "learning_rate": 9.455497565979477e-07, "loss": 0.1427, "step": 27662 }, { "epoch": 0.8070190792928409, "grad_norm": 0.9587053256319008, "learning_rate": 9.452733069192532e-07, "loss": 0.117, "step": 27663 }, { "epoch": 0.8070482525234844, "grad_norm": 0.7468410251227554, "learning_rate": 9.449968934401899e-07, "loss": 0.1066, "step": 27664 }, { "epoch": 0.807077425754128, "grad_norm": 0.7986438414699737, "learning_rate": 9.447205161632272e-07, "loss": 0.1117, "step": 27665 }, { "epoch": 0.8071065989847716, "grad_norm": 0.9221184390188412, "learning_rate": 9.444441750908323e-07, "loss": 0.1423, "step": 27666 }, { "epoch": 0.8071357722154151, "grad_norm": 0.8046740951130008, "learning_rate": 9.441678702254697e-07, "loss": 0.1479, "step": 27667 }, { "epoch": 0.8071649454460587, "grad_norm": 0.8078955023180293, "learning_rate": 9.438916015696087e-07, "loss": 0.1273, "step": 27668 }, { "epoch": 0.8071941186767022, "grad_norm": 0.751614568114784, "learning_rate": 9.43615369125716e-07, "loss": 0.1113, "step": 27669 }, { "epoch": 0.8072232919073459, "grad_norm": 0.7839193155711048, "learning_rate": 9.433391728962571e-07, "loss": 0.1099, "step": 27670 }, { "epoch": 0.8072524651379894, "grad_norm": 0.6825041258920004, "learning_rate": 9.430630128836966e-07, "loss": 0.079, "step": 27671 }, { "epoch": 0.807281638368633, "grad_norm": 0.8099100181328582, "learning_rate": 9.427868890905023e-07, "loss": 0.1127, "step": 27672 }, { "epoch": 0.8073108115992765, "grad_norm": 0.790503562504446, "learning_rate": 9.425108015191364e-07, "loss": 0.1151, "step": 27673 }, { "epoch": 0.8073399848299201, "grad_norm": 1.3363533684220408, "learning_rate": 9.422347501720675e-07, "loss": 0.1368, "step": 27674 }, { "epoch": 0.8073691580605636, "grad_norm": 0.8836754545764983, "learning_rate": 9.419587350517562e-07, "loss": 0.1117, "step": 27675 }, { "epoch": 0.8073983312912072, "grad_norm": 0.8294599315489385, "learning_rate": 9.4168275616067e-07, "loss": 0.1263, "step": 27676 }, { "epoch": 0.8074275045218507, "grad_norm": 0.7710846682558097, "learning_rate": 9.414068135012716e-07, "loss": 0.1004, "step": 27677 }, { "epoch": 0.8074566777524943, "grad_norm": 0.9781689552740249, "learning_rate": 9.411309070760228e-07, "loss": 0.1198, "step": 27678 }, { "epoch": 0.8074858509831379, "grad_norm": 0.7372479514081962, "learning_rate": 9.408550368873882e-07, "loss": 0.111, "step": 27679 }, { "epoch": 0.8075150242137814, "grad_norm": 0.8232777820631658, "learning_rate": 9.405792029378324e-07, "loss": 0.1022, "step": 27680 }, { "epoch": 0.807544197444425, "grad_norm": 1.2378355493279536, "learning_rate": 9.403034052298148e-07, "loss": 0.1331, "step": 27681 }, { "epoch": 0.8075733706750685, "grad_norm": 0.8633461785558697, "learning_rate": 9.400276437658007e-07, "loss": 0.1218, "step": 27682 }, { "epoch": 0.8076025439057121, "grad_norm": 0.6479737271777818, "learning_rate": 9.397519185482506e-07, "loss": 0.0994, "step": 27683 }, { "epoch": 0.8076317171363557, "grad_norm": 1.0638117353299963, "learning_rate": 9.39476229579625e-07, "loss": 0.1247, "step": 27684 }, { "epoch": 0.8076608903669993, "grad_norm": 0.9631327341602408, "learning_rate": 9.392005768623863e-07, "loss": 0.1153, "step": 27685 }, { "epoch": 0.8076900635976428, "grad_norm": 0.91266006072369, "learning_rate": 9.389249603989964e-07, "loss": 0.1173, "step": 27686 }, { "epoch": 0.8077192368282864, "grad_norm": 1.076757805086941, "learning_rate": 9.38649380191915e-07, "loss": 0.1317, "step": 27687 }, { "epoch": 0.8077484100589299, "grad_norm": 0.8860938818454016, "learning_rate": 9.383738362436017e-07, "loss": 0.1151, "step": 27688 }, { "epoch": 0.8077775832895735, "grad_norm": 0.8796505794890483, "learning_rate": 9.380983285565182e-07, "loss": 0.0913, "step": 27689 }, { "epoch": 0.807806756520217, "grad_norm": 0.6516729246030777, "learning_rate": 9.378228571331227e-07, "loss": 0.1, "step": 27690 }, { "epoch": 0.8078359297508606, "grad_norm": 0.9564384973263884, "learning_rate": 9.375474219758729e-07, "loss": 0.1136, "step": 27691 }, { "epoch": 0.8078651029815042, "grad_norm": 0.9857789493763088, "learning_rate": 9.372720230872323e-07, "loss": 0.0961, "step": 27692 }, { "epoch": 0.8078942762121477, "grad_norm": 0.8264663643223091, "learning_rate": 9.369966604696573e-07, "loss": 0.0895, "step": 27693 }, { "epoch": 0.8079234494427913, "grad_norm": 0.9302521695413319, "learning_rate": 9.367213341256054e-07, "loss": 0.1204, "step": 27694 }, { "epoch": 0.8079526226734348, "grad_norm": 1.0324593926005705, "learning_rate": 9.364460440575363e-07, "loss": 0.1384, "step": 27695 }, { "epoch": 0.8079817959040784, "grad_norm": 0.7545117449966773, "learning_rate": 9.361707902679068e-07, "loss": 0.1011, "step": 27696 }, { "epoch": 0.8080109691347219, "grad_norm": 0.7125484529146792, "learning_rate": 9.358955727591729e-07, "loss": 0.1268, "step": 27697 }, { "epoch": 0.8080401423653656, "grad_norm": 0.8507698900342994, "learning_rate": 9.356203915337935e-07, "loss": 0.1134, "step": 27698 }, { "epoch": 0.8080693155960091, "grad_norm": 0.9029781710710214, "learning_rate": 9.353452465942264e-07, "loss": 0.1207, "step": 27699 }, { "epoch": 0.8080984888266527, "grad_norm": 0.889847297408012, "learning_rate": 9.350701379429261e-07, "loss": 0.1093, "step": 27700 }, { "epoch": 0.8081276620572962, "grad_norm": 0.7877630553778059, "learning_rate": 9.347950655823484e-07, "loss": 0.1, "step": 27701 }, { "epoch": 0.8081568352879398, "grad_norm": 0.7420402317438628, "learning_rate": 9.34520029514951e-07, "loss": 0.0957, "step": 27702 }, { "epoch": 0.8081860085185834, "grad_norm": 0.8436022398261921, "learning_rate": 9.342450297431871e-07, "loss": 0.1277, "step": 27703 }, { "epoch": 0.8082151817492269, "grad_norm": 0.7753417486698301, "learning_rate": 9.339700662695145e-07, "loss": 0.1069, "step": 27704 }, { "epoch": 0.8082443549798705, "grad_norm": 0.8434091077299489, "learning_rate": 9.336951390963849e-07, "loss": 0.1237, "step": 27705 }, { "epoch": 0.808273528210514, "grad_norm": 1.1354659045094726, "learning_rate": 9.334202482262555e-07, "loss": 0.1381, "step": 27706 }, { "epoch": 0.8083027014411576, "grad_norm": 0.8704295928961485, "learning_rate": 9.331453936615798e-07, "loss": 0.1012, "step": 27707 }, { "epoch": 0.8083318746718011, "grad_norm": 0.9215930248056898, "learning_rate": 9.328705754048095e-07, "loss": 0.1118, "step": 27708 }, { "epoch": 0.8083610479024447, "grad_norm": 0.8713600958684332, "learning_rate": 9.325957934584001e-07, "loss": 0.1059, "step": 27709 }, { "epoch": 0.8083902211330882, "grad_norm": 0.7628119466868182, "learning_rate": 9.323210478248057e-07, "loss": 0.1111, "step": 27710 }, { "epoch": 0.8084193943637319, "grad_norm": 0.9370638454702768, "learning_rate": 9.320463385064766e-07, "loss": 0.1165, "step": 27711 }, { "epoch": 0.8084485675943754, "grad_norm": 0.8615433986827776, "learning_rate": 9.317716655058678e-07, "loss": 0.1085, "step": 27712 }, { "epoch": 0.808477740825019, "grad_norm": 0.8872474163883061, "learning_rate": 9.314970288254304e-07, "loss": 0.1003, "step": 27713 }, { "epoch": 0.8085069140556626, "grad_norm": 0.9321866280096461, "learning_rate": 9.312224284676158e-07, "loss": 0.1269, "step": 27714 }, { "epoch": 0.8085360872863061, "grad_norm": 0.8037436535570405, "learning_rate": 9.309478644348751e-07, "loss": 0.122, "step": 27715 }, { "epoch": 0.8085652605169497, "grad_norm": 0.9128272271350647, "learning_rate": 9.306733367296622e-07, "loss": 0.1243, "step": 27716 }, { "epoch": 0.8085944337475932, "grad_norm": 1.0434697059821711, "learning_rate": 9.303988453544266e-07, "loss": 0.102, "step": 27717 }, { "epoch": 0.8086236069782368, "grad_norm": 0.7839297890649839, "learning_rate": 9.301243903116169e-07, "loss": 0.1151, "step": 27718 }, { "epoch": 0.8086527802088803, "grad_norm": 0.7976192774185791, "learning_rate": 9.298499716036863e-07, "loss": 0.1098, "step": 27719 }, { "epoch": 0.8086819534395239, "grad_norm": 0.9749869644334156, "learning_rate": 9.295755892330838e-07, "loss": 0.1226, "step": 27720 }, { "epoch": 0.8087111266701674, "grad_norm": 0.9294002766208463, "learning_rate": 9.293012432022563e-07, "loss": 0.1096, "step": 27721 }, { "epoch": 0.808740299900811, "grad_norm": 1.0845043734624233, "learning_rate": 9.290269335136576e-07, "loss": 0.1059, "step": 27722 }, { "epoch": 0.8087694731314545, "grad_norm": 0.8523074332648163, "learning_rate": 9.287526601697349e-07, "loss": 0.1204, "step": 27723 }, { "epoch": 0.8087986463620981, "grad_norm": 0.886266788779363, "learning_rate": 9.284784231729355e-07, "loss": 0.1131, "step": 27724 }, { "epoch": 0.8088278195927417, "grad_norm": 1.6201560529506194, "learning_rate": 9.282042225257099e-07, "loss": 0.0941, "step": 27725 }, { "epoch": 0.8088569928233853, "grad_norm": 0.8354550398540652, "learning_rate": 9.279300582305051e-07, "loss": 0.1198, "step": 27726 }, { "epoch": 0.8088861660540289, "grad_norm": 0.992604360467188, "learning_rate": 9.276559302897669e-07, "loss": 0.121, "step": 27727 }, { "epoch": 0.8089153392846724, "grad_norm": 1.0072788773839294, "learning_rate": 9.273818387059452e-07, "loss": 0.1139, "step": 27728 }, { "epoch": 0.808944512515316, "grad_norm": 0.9233711831501684, "learning_rate": 9.271077834814868e-07, "loss": 0.1056, "step": 27729 }, { "epoch": 0.8089736857459595, "grad_norm": 0.904104940376141, "learning_rate": 9.26833764618838e-07, "loss": 0.1421, "step": 27730 }, { "epoch": 0.8090028589766031, "grad_norm": 1.1756509487612572, "learning_rate": 9.265597821204441e-07, "loss": 0.1115, "step": 27731 }, { "epoch": 0.8090320322072466, "grad_norm": 0.9482912710344759, "learning_rate": 9.262858359887528e-07, "loss": 0.1092, "step": 27732 }, { "epoch": 0.8090612054378902, "grad_norm": 0.8394346876055967, "learning_rate": 9.260119262262085e-07, "loss": 0.1242, "step": 27733 }, { "epoch": 0.8090903786685337, "grad_norm": 0.9702058106309435, "learning_rate": 9.257380528352578e-07, "loss": 0.1043, "step": 27734 }, { "epoch": 0.8091195518991773, "grad_norm": 0.8907473523033501, "learning_rate": 9.254642158183441e-07, "loss": 0.0907, "step": 27735 }, { "epoch": 0.8091487251298208, "grad_norm": 0.9795930823804683, "learning_rate": 9.251904151779145e-07, "loss": 0.1045, "step": 27736 }, { "epoch": 0.8091778983604644, "grad_norm": 1.1694958440117467, "learning_rate": 9.24916650916412e-07, "loss": 0.1211, "step": 27737 }, { "epoch": 0.809207071591108, "grad_norm": 0.9396109202494459, "learning_rate": 9.246429230362797e-07, "loss": 0.1233, "step": 27738 }, { "epoch": 0.8092362448217516, "grad_norm": 0.8639898766860498, "learning_rate": 9.243692315399627e-07, "loss": 0.1127, "step": 27739 }, { "epoch": 0.8092654180523952, "grad_norm": 0.9180130668315164, "learning_rate": 9.240955764299053e-07, "loss": 0.1187, "step": 27740 }, { "epoch": 0.8092945912830387, "grad_norm": 1.0764279828707437, "learning_rate": 9.238219577085483e-07, "loss": 0.1268, "step": 27741 }, { "epoch": 0.8093237645136823, "grad_norm": 0.9050155148591006, "learning_rate": 9.235483753783375e-07, "loss": 0.1141, "step": 27742 }, { "epoch": 0.8093529377443258, "grad_norm": 0.8308343890269729, "learning_rate": 9.232748294417132e-07, "loss": 0.1148, "step": 27743 }, { "epoch": 0.8093821109749694, "grad_norm": 0.8282001548967434, "learning_rate": 9.230013199011168e-07, "loss": 0.1227, "step": 27744 }, { "epoch": 0.8094112842056129, "grad_norm": 1.0009096968741495, "learning_rate": 9.227278467589918e-07, "loss": 0.1234, "step": 27745 }, { "epoch": 0.8094404574362565, "grad_norm": 0.7376641217003067, "learning_rate": 9.224544100177801e-07, "loss": 0.0985, "step": 27746 }, { "epoch": 0.8094696306669, "grad_norm": 0.8762697498729055, "learning_rate": 9.221810096799222e-07, "loss": 0.0991, "step": 27747 }, { "epoch": 0.8094988038975436, "grad_norm": 1.0207785273047654, "learning_rate": 9.21907645747857e-07, "loss": 0.1134, "step": 27748 }, { "epoch": 0.8095279771281871, "grad_norm": 0.8337569521718208, "learning_rate": 9.21634318224029e-07, "loss": 0.0921, "step": 27749 }, { "epoch": 0.8095571503588307, "grad_norm": 0.7603370679586554, "learning_rate": 9.213610271108753e-07, "loss": 0.0994, "step": 27750 }, { "epoch": 0.8095863235894742, "grad_norm": 0.9298924413491145, "learning_rate": 9.210877724108347e-07, "loss": 0.1153, "step": 27751 }, { "epoch": 0.8096154968201179, "grad_norm": 0.765337120934524, "learning_rate": 9.208145541263514e-07, "loss": 0.1163, "step": 27752 }, { "epoch": 0.8096446700507615, "grad_norm": 0.9483243862733962, "learning_rate": 9.205413722598616e-07, "loss": 0.1175, "step": 27753 }, { "epoch": 0.809673843281405, "grad_norm": 0.9522069946984685, "learning_rate": 9.202682268138036e-07, "loss": 0.1199, "step": 27754 }, { "epoch": 0.8097030165120486, "grad_norm": 1.0336830645120536, "learning_rate": 9.19995117790618e-07, "loss": 0.1015, "step": 27755 }, { "epoch": 0.8097321897426921, "grad_norm": 0.8547717435982832, "learning_rate": 9.197220451927424e-07, "loss": 0.0967, "step": 27756 }, { "epoch": 0.8097613629733357, "grad_norm": 0.862362092638936, "learning_rate": 9.194490090226127e-07, "loss": 0.1078, "step": 27757 }, { "epoch": 0.8097905362039792, "grad_norm": 0.787151199295326, "learning_rate": 9.191760092826685e-07, "loss": 0.0973, "step": 27758 }, { "epoch": 0.8098197094346228, "grad_norm": 0.9128466409672559, "learning_rate": 9.189030459753473e-07, "loss": 0.1148, "step": 27759 }, { "epoch": 0.8098488826652663, "grad_norm": 0.8140903532431837, "learning_rate": 9.186301191030861e-07, "loss": 0.1199, "step": 27760 }, { "epoch": 0.8098780558959099, "grad_norm": 0.8068983792589345, "learning_rate": 9.183572286683195e-07, "loss": 0.1148, "step": 27761 }, { "epoch": 0.8099072291265534, "grad_norm": 0.9276602676040361, "learning_rate": 9.180843746734863e-07, "loss": 0.1135, "step": 27762 }, { "epoch": 0.809936402357197, "grad_norm": 0.9907750424037756, "learning_rate": 9.178115571210206e-07, "loss": 0.1409, "step": 27763 }, { "epoch": 0.8099655755878405, "grad_norm": 0.8760567874999488, "learning_rate": 9.175387760133591e-07, "loss": 0.1075, "step": 27764 }, { "epoch": 0.8099947488184842, "grad_norm": 1.0320778962467883, "learning_rate": 9.172660313529363e-07, "loss": 0.1209, "step": 27765 }, { "epoch": 0.8100239220491278, "grad_norm": 1.4146373263182843, "learning_rate": 9.16993323142189e-07, "loss": 0.1152, "step": 27766 }, { "epoch": 0.8100530952797713, "grad_norm": 0.8040419550955294, "learning_rate": 9.167206513835508e-07, "loss": 0.1067, "step": 27767 }, { "epoch": 0.8100822685104149, "grad_norm": 0.875952119479627, "learning_rate": 9.164480160794543e-07, "loss": 0.1287, "step": 27768 }, { "epoch": 0.8101114417410584, "grad_norm": 0.912674442463428, "learning_rate": 9.161754172323351e-07, "loss": 0.1207, "step": 27769 }, { "epoch": 0.810140614971702, "grad_norm": 0.941059971365258, "learning_rate": 9.159028548446281e-07, "loss": 0.1162, "step": 27770 }, { "epoch": 0.8101697882023455, "grad_norm": 0.8365171229245679, "learning_rate": 9.15630328918764e-07, "loss": 0.1123, "step": 27771 }, { "epoch": 0.8101989614329891, "grad_norm": 0.8387735287196747, "learning_rate": 9.153578394571788e-07, "loss": 0.1018, "step": 27772 }, { "epoch": 0.8102281346636326, "grad_norm": 0.8553702571973816, "learning_rate": 9.150853864623039e-07, "loss": 0.0986, "step": 27773 }, { "epoch": 0.8102573078942762, "grad_norm": 0.8284252048007986, "learning_rate": 9.148129699365699e-07, "loss": 0.1017, "step": 27774 }, { "epoch": 0.8102864811249197, "grad_norm": 0.9572269906039605, "learning_rate": 9.145405898824106e-07, "loss": 0.0964, "step": 27775 }, { "epoch": 0.8103156543555633, "grad_norm": 0.8696717750444994, "learning_rate": 9.142682463022589e-07, "loss": 0.1072, "step": 27776 }, { "epoch": 0.8103448275862069, "grad_norm": 0.8478827962096758, "learning_rate": 9.139959391985453e-07, "loss": 0.1148, "step": 27777 }, { "epoch": 0.8103740008168504, "grad_norm": 0.9358642936458822, "learning_rate": 9.137236685736988e-07, "loss": 0.1179, "step": 27778 }, { "epoch": 0.8104031740474941, "grad_norm": 0.8906320175570829, "learning_rate": 9.134514344301537e-07, "loss": 0.119, "step": 27779 }, { "epoch": 0.8104323472781376, "grad_norm": 0.9854779651616952, "learning_rate": 9.131792367703385e-07, "loss": 0.1043, "step": 27780 }, { "epoch": 0.8104615205087812, "grad_norm": 0.9770178275001017, "learning_rate": 9.129070755966807e-07, "loss": 0.1219, "step": 27781 }, { "epoch": 0.8104906937394247, "grad_norm": 0.7729644612952208, "learning_rate": 9.126349509116156e-07, "loss": 0.1022, "step": 27782 }, { "epoch": 0.8105198669700683, "grad_norm": 1.0305427076463296, "learning_rate": 9.123628627175696e-07, "loss": 0.1196, "step": 27783 }, { "epoch": 0.8105490402007118, "grad_norm": 1.077065156779928, "learning_rate": 9.120908110169713e-07, "loss": 0.1118, "step": 27784 }, { "epoch": 0.8105782134313554, "grad_norm": 1.1021114832715748, "learning_rate": 9.118187958122515e-07, "loss": 0.1193, "step": 27785 }, { "epoch": 0.8106073866619989, "grad_norm": 0.9724480263050775, "learning_rate": 9.115468171058373e-07, "loss": 0.1157, "step": 27786 }, { "epoch": 0.8106365598926425, "grad_norm": 0.944353015182522, "learning_rate": 9.11274874900156e-07, "loss": 0.0942, "step": 27787 }, { "epoch": 0.810665733123286, "grad_norm": 1.1658182485253474, "learning_rate": 9.110029691976368e-07, "loss": 0.092, "step": 27788 }, { "epoch": 0.8106949063539296, "grad_norm": 0.8876520200068828, "learning_rate": 9.10731100000708e-07, "loss": 0.1068, "step": 27789 }, { "epoch": 0.8107240795845732, "grad_norm": 0.819044641947194, "learning_rate": 9.104592673117956e-07, "loss": 0.1288, "step": 27790 }, { "epoch": 0.8107532528152167, "grad_norm": 0.8926254486360656, "learning_rate": 9.101874711333258e-07, "loss": 0.1207, "step": 27791 }, { "epoch": 0.8107824260458604, "grad_norm": 0.8922334135057118, "learning_rate": 9.09915711467727e-07, "loss": 0.1231, "step": 27792 }, { "epoch": 0.8108115992765039, "grad_norm": 0.815953010826716, "learning_rate": 9.09643988317423e-07, "loss": 0.1105, "step": 27793 }, { "epoch": 0.8108407725071475, "grad_norm": 0.8055476566901355, "learning_rate": 9.093723016848427e-07, "loss": 0.1228, "step": 27794 }, { "epoch": 0.810869945737791, "grad_norm": 0.9657413088559, "learning_rate": 9.091006515724083e-07, "loss": 0.1214, "step": 27795 }, { "epoch": 0.8108991189684346, "grad_norm": 0.9176982476270428, "learning_rate": 9.088290379825481e-07, "loss": 0.1097, "step": 27796 }, { "epoch": 0.8109282921990781, "grad_norm": 1.1033443173486115, "learning_rate": 9.085574609176856e-07, "loss": 0.1253, "step": 27797 }, { "epoch": 0.8109574654297217, "grad_norm": 0.8570696955032819, "learning_rate": 9.082859203802436e-07, "loss": 0.1237, "step": 27798 }, { "epoch": 0.8109866386603652, "grad_norm": 1.1833928866140564, "learning_rate": 9.08014416372649e-07, "loss": 0.1124, "step": 27799 }, { "epoch": 0.8110158118910088, "grad_norm": 0.8803872613469325, "learning_rate": 9.077429488973255e-07, "loss": 0.0946, "step": 27800 }, { "epoch": 0.8110449851216524, "grad_norm": 0.8849863394553857, "learning_rate": 9.07471517956695e-07, "loss": 0.1138, "step": 27801 }, { "epoch": 0.8110741583522959, "grad_norm": 0.9009437140714899, "learning_rate": 9.07200123553183e-07, "loss": 0.1063, "step": 27802 }, { "epoch": 0.8111033315829395, "grad_norm": 0.8611692773242475, "learning_rate": 9.069287656892118e-07, "loss": 0.1176, "step": 27803 }, { "epoch": 0.811132504813583, "grad_norm": 0.714812702387066, "learning_rate": 9.066574443672016e-07, "loss": 0.1183, "step": 27804 }, { "epoch": 0.8111616780442266, "grad_norm": 0.772792801634389, "learning_rate": 9.063861595895767e-07, "loss": 0.1088, "step": 27805 }, { "epoch": 0.8111908512748702, "grad_norm": 1.0948717600218414, "learning_rate": 9.061149113587603e-07, "loss": 0.1112, "step": 27806 }, { "epoch": 0.8112200245055138, "grad_norm": 0.8362913322933143, "learning_rate": 9.058436996771724e-07, "loss": 0.106, "step": 27807 }, { "epoch": 0.8112491977361573, "grad_norm": 1.2474769822718166, "learning_rate": 9.055725245472335e-07, "loss": 0.1213, "step": 27808 }, { "epoch": 0.8112783709668009, "grad_norm": 0.8080131307207777, "learning_rate": 9.053013859713672e-07, "loss": 0.1128, "step": 27809 }, { "epoch": 0.8113075441974444, "grad_norm": 0.744940244018974, "learning_rate": 9.050302839519926e-07, "loss": 0.1084, "step": 27810 }, { "epoch": 0.811336717428088, "grad_norm": 0.7687660014294762, "learning_rate": 9.047592184915272e-07, "loss": 0.0974, "step": 27811 }, { "epoch": 0.8113658906587315, "grad_norm": 0.8384119192449437, "learning_rate": 9.044881895923969e-07, "loss": 0.1269, "step": 27812 }, { "epoch": 0.8113950638893751, "grad_norm": 0.9045942397770038, "learning_rate": 9.042171972570179e-07, "loss": 0.1215, "step": 27813 }, { "epoch": 0.8114242371200187, "grad_norm": 0.8526699918228667, "learning_rate": 9.039462414878092e-07, "loss": 0.1143, "step": 27814 }, { "epoch": 0.8114534103506622, "grad_norm": 0.8874721617085721, "learning_rate": 9.036753222871914e-07, "loss": 0.1192, "step": 27815 }, { "epoch": 0.8114825835813058, "grad_norm": 0.8227247161142672, "learning_rate": 9.034044396575825e-07, "loss": 0.1189, "step": 27816 }, { "epoch": 0.8115117568119493, "grad_norm": 0.9705319413020947, "learning_rate": 9.031335936014001e-07, "loss": 0.1076, "step": 27817 }, { "epoch": 0.8115409300425929, "grad_norm": 1.1221833081400452, "learning_rate": 9.028627841210625e-07, "loss": 0.119, "step": 27818 }, { "epoch": 0.8115701032732365, "grad_norm": 0.9065303454259425, "learning_rate": 9.025920112189895e-07, "loss": 0.1022, "step": 27819 }, { "epoch": 0.8115992765038801, "grad_norm": 0.8308899240624873, "learning_rate": 9.023212748975968e-07, "loss": 0.1304, "step": 27820 }, { "epoch": 0.8116284497345236, "grad_norm": 0.8682517098380476, "learning_rate": 9.020505751593001e-07, "loss": 0.1038, "step": 27821 }, { "epoch": 0.8116576229651672, "grad_norm": 0.8321078365736718, "learning_rate": 9.01779912006519e-07, "loss": 0.1128, "step": 27822 }, { "epoch": 0.8116867961958107, "grad_norm": 0.9589914766280058, "learning_rate": 9.015092854416668e-07, "loss": 0.1025, "step": 27823 }, { "epoch": 0.8117159694264543, "grad_norm": 0.9256429735490215, "learning_rate": 9.012386954671631e-07, "loss": 0.1162, "step": 27824 }, { "epoch": 0.8117451426570979, "grad_norm": 0.9232161626985291, "learning_rate": 9.009681420854205e-07, "loss": 0.1126, "step": 27825 }, { "epoch": 0.8117743158877414, "grad_norm": 0.8141652835612888, "learning_rate": 9.006976252988569e-07, "loss": 0.1282, "step": 27826 }, { "epoch": 0.811803489118385, "grad_norm": 0.8589456492842638, "learning_rate": 9.004271451098867e-07, "loss": 0.1413, "step": 27827 }, { "epoch": 0.8118326623490285, "grad_norm": 0.8635579234007896, "learning_rate": 9.001567015209229e-07, "loss": 0.1083, "step": 27828 }, { "epoch": 0.8118618355796721, "grad_norm": 0.8168511748696249, "learning_rate": 8.998862945343811e-07, "loss": 0.114, "step": 27829 }, { "epoch": 0.8118910088103156, "grad_norm": 1.0173724138335858, "learning_rate": 8.996159241526775e-07, "loss": 0.1285, "step": 27830 }, { "epoch": 0.8119201820409592, "grad_norm": 1.1457423515664586, "learning_rate": 8.993455903782222e-07, "loss": 0.1166, "step": 27831 }, { "epoch": 0.8119493552716027, "grad_norm": 1.0103305109512088, "learning_rate": 8.990752932134322e-07, "loss": 0.0882, "step": 27832 }, { "epoch": 0.8119785285022464, "grad_norm": 0.8465947302811158, "learning_rate": 8.98805032660719e-07, "loss": 0.1103, "step": 27833 }, { "epoch": 0.8120077017328899, "grad_norm": 0.8989232846716325, "learning_rate": 8.985348087224943e-07, "loss": 0.1199, "step": 27834 }, { "epoch": 0.8120368749635335, "grad_norm": 1.0875244874638665, "learning_rate": 8.982646214011715e-07, "loss": 0.1047, "step": 27835 }, { "epoch": 0.812066048194177, "grad_norm": 0.9733932925803976, "learning_rate": 8.979944706991639e-07, "loss": 0.0913, "step": 27836 }, { "epoch": 0.8120952214248206, "grad_norm": 1.05482053175379, "learning_rate": 8.977243566188831e-07, "loss": 0.1282, "step": 27837 }, { "epoch": 0.8121243946554642, "grad_norm": 0.9503921749161258, "learning_rate": 8.974542791627383e-07, "loss": 0.1615, "step": 27838 }, { "epoch": 0.8121535678861077, "grad_norm": 0.9053194371817254, "learning_rate": 8.971842383331436e-07, "loss": 0.123, "step": 27839 }, { "epoch": 0.8121827411167513, "grad_norm": 0.9312336263357013, "learning_rate": 8.969142341325088e-07, "loss": 0.1165, "step": 27840 }, { "epoch": 0.8122119143473948, "grad_norm": 0.8743210919694165, "learning_rate": 8.966442665632418e-07, "loss": 0.1173, "step": 27841 }, { "epoch": 0.8122410875780384, "grad_norm": 1.1143132195729024, "learning_rate": 8.963743356277577e-07, "loss": 0.1225, "step": 27842 }, { "epoch": 0.8122702608086819, "grad_norm": 0.8115040841378355, "learning_rate": 8.961044413284636e-07, "loss": 0.1087, "step": 27843 }, { "epoch": 0.8122994340393255, "grad_norm": 0.9327315347288102, "learning_rate": 8.958345836677684e-07, "loss": 0.1204, "step": 27844 }, { "epoch": 0.812328607269969, "grad_norm": 1.105081627639045, "learning_rate": 8.955647626480835e-07, "loss": 0.0988, "step": 27845 }, { "epoch": 0.8123577805006127, "grad_norm": 0.9425030223238433, "learning_rate": 8.952949782718162e-07, "loss": 0.1166, "step": 27846 }, { "epoch": 0.8123869537312562, "grad_norm": 0.882574285397809, "learning_rate": 8.950252305413748e-07, "loss": 0.1379, "step": 27847 }, { "epoch": 0.8124161269618998, "grad_norm": 0.7754649697258346, "learning_rate": 8.947555194591679e-07, "loss": 0.1188, "step": 27848 }, { "epoch": 0.8124453001925434, "grad_norm": 1.03417462139561, "learning_rate": 8.944858450276051e-07, "loss": 0.1215, "step": 27849 }, { "epoch": 0.8124744734231869, "grad_norm": 0.9348661378267702, "learning_rate": 8.942162072490924e-07, "loss": 0.1114, "step": 27850 }, { "epoch": 0.8125036466538305, "grad_norm": 0.6796537220442412, "learning_rate": 8.93946606126036e-07, "loss": 0.1118, "step": 27851 }, { "epoch": 0.812532819884474, "grad_norm": 1.1326677677469623, "learning_rate": 8.93677041660846e-07, "loss": 0.1392, "step": 27852 }, { "epoch": 0.8125619931151176, "grad_norm": 0.8264266582231248, "learning_rate": 8.93407513855925e-07, "loss": 0.1091, "step": 27853 }, { "epoch": 0.8125911663457611, "grad_norm": 0.7842864713261196, "learning_rate": 8.931380227136832e-07, "loss": 0.0898, "step": 27854 }, { "epoch": 0.8126203395764047, "grad_norm": 1.0625415526642308, "learning_rate": 8.928685682365229e-07, "loss": 0.1081, "step": 27855 }, { "epoch": 0.8126495128070482, "grad_norm": 0.8304302302014871, "learning_rate": 8.925991504268533e-07, "loss": 0.1101, "step": 27856 }, { "epoch": 0.8126786860376918, "grad_norm": 0.8309523917579732, "learning_rate": 8.92329769287078e-07, "loss": 0.1093, "step": 27857 }, { "epoch": 0.8127078592683353, "grad_norm": 1.4803614141624153, "learning_rate": 8.920604248196007e-07, "loss": 0.1201, "step": 27858 }, { "epoch": 0.8127370324989789, "grad_norm": 1.0780219900720958, "learning_rate": 8.917911170268273e-07, "loss": 0.1222, "step": 27859 }, { "epoch": 0.8127662057296225, "grad_norm": 0.7461002323140512, "learning_rate": 8.91521845911163e-07, "loss": 0.0937, "step": 27860 }, { "epoch": 0.8127953789602661, "grad_norm": 0.8102491491294351, "learning_rate": 8.912526114750097e-07, "loss": 0.1078, "step": 27861 }, { "epoch": 0.8128245521909097, "grad_norm": 1.1440873401470806, "learning_rate": 8.90983413720774e-07, "loss": 0.1217, "step": 27862 }, { "epoch": 0.8128537254215532, "grad_norm": 0.7364765684460183, "learning_rate": 8.907142526508572e-07, "loss": 0.12, "step": 27863 }, { "epoch": 0.8128828986521968, "grad_norm": 1.0929421996467108, "learning_rate": 8.904451282676612e-07, "loss": 0.1081, "step": 27864 }, { "epoch": 0.8129120718828403, "grad_norm": 0.8992288094888158, "learning_rate": 8.901760405735898e-07, "loss": 0.1081, "step": 27865 }, { "epoch": 0.8129412451134839, "grad_norm": 1.0922194465089228, "learning_rate": 8.899069895710477e-07, "loss": 0.1267, "step": 27866 }, { "epoch": 0.8129704183441274, "grad_norm": 0.9395317411324351, "learning_rate": 8.89637975262434e-07, "loss": 0.1228, "step": 27867 }, { "epoch": 0.812999591574771, "grad_norm": 0.7290633268101204, "learning_rate": 8.893689976501507e-07, "loss": 0.0967, "step": 27868 }, { "epoch": 0.8130287648054145, "grad_norm": 0.8547884924335176, "learning_rate": 8.891000567366004e-07, "loss": 0.1173, "step": 27869 }, { "epoch": 0.8130579380360581, "grad_norm": 1.0524216083013698, "learning_rate": 8.888311525241822e-07, "loss": 0.1247, "step": 27870 }, { "epoch": 0.8130871112667016, "grad_norm": 0.9536313632947281, "learning_rate": 8.885622850152986e-07, "loss": 0.0942, "step": 27871 }, { "epoch": 0.8131162844973452, "grad_norm": 0.9656085483237548, "learning_rate": 8.8829345421235e-07, "loss": 0.1135, "step": 27872 }, { "epoch": 0.8131454577279889, "grad_norm": 0.788840840057328, "learning_rate": 8.880246601177361e-07, "loss": 0.1236, "step": 27873 }, { "epoch": 0.8131746309586324, "grad_norm": 0.8092387032392127, "learning_rate": 8.877559027338556e-07, "loss": 0.108, "step": 27874 }, { "epoch": 0.813203804189276, "grad_norm": 1.5655596722106258, "learning_rate": 8.874871820631098e-07, "loss": 0.1187, "step": 27875 }, { "epoch": 0.8132329774199195, "grad_norm": 0.7460362896971863, "learning_rate": 8.872184981078952e-07, "loss": 0.1205, "step": 27876 }, { "epoch": 0.8132621506505631, "grad_norm": 0.924290849914, "learning_rate": 8.869498508706137e-07, "loss": 0.1047, "step": 27877 }, { "epoch": 0.8132913238812066, "grad_norm": 0.7165676662127671, "learning_rate": 8.866812403536601e-07, "loss": 0.1213, "step": 27878 }, { "epoch": 0.8133204971118502, "grad_norm": 0.7409949292685086, "learning_rate": 8.864126665594363e-07, "loss": 0.1043, "step": 27879 }, { "epoch": 0.8133496703424937, "grad_norm": 0.825083620554212, "learning_rate": 8.861441294903383e-07, "loss": 0.1412, "step": 27880 }, { "epoch": 0.8133788435731373, "grad_norm": 0.8182300223152272, "learning_rate": 8.858756291487619e-07, "loss": 0.1253, "step": 27881 }, { "epoch": 0.8134080168037808, "grad_norm": 0.7415768586442154, "learning_rate": 8.856071655371057e-07, "loss": 0.1178, "step": 27882 }, { "epoch": 0.8134371900344244, "grad_norm": 0.8896231090110454, "learning_rate": 8.853387386577677e-07, "loss": 0.1163, "step": 27883 }, { "epoch": 0.8134663632650679, "grad_norm": 0.8696533261783911, "learning_rate": 8.85070348513144e-07, "loss": 0.1107, "step": 27884 }, { "epoch": 0.8134955364957115, "grad_norm": 0.775099061652046, "learning_rate": 8.84801995105628e-07, "loss": 0.1231, "step": 27885 }, { "epoch": 0.813524709726355, "grad_norm": 0.7282668608994736, "learning_rate": 8.845336784376185e-07, "loss": 0.1067, "step": 27886 }, { "epoch": 0.8135538829569987, "grad_norm": 0.5516454272813409, "learning_rate": 8.842653985115102e-07, "loss": 0.1039, "step": 27887 }, { "epoch": 0.8135830561876423, "grad_norm": 0.9617941084820119, "learning_rate": 8.839971553296956e-07, "loss": 0.1312, "step": 27888 }, { "epoch": 0.8136122294182858, "grad_norm": 0.84508873568692, "learning_rate": 8.837289488945738e-07, "loss": 0.0992, "step": 27889 }, { "epoch": 0.8136414026489294, "grad_norm": 0.9907097824580179, "learning_rate": 8.834607792085375e-07, "loss": 0.109, "step": 27890 }, { "epoch": 0.8136705758795729, "grad_norm": 0.9375159128166626, "learning_rate": 8.831926462739799e-07, "loss": 0.1248, "step": 27891 }, { "epoch": 0.8136997491102165, "grad_norm": 0.9239841975052013, "learning_rate": 8.829245500932959e-07, "loss": 0.1352, "step": 27892 }, { "epoch": 0.81372892234086, "grad_norm": 0.8556550843005295, "learning_rate": 8.826564906688794e-07, "loss": 0.1083, "step": 27893 }, { "epoch": 0.8137580955715036, "grad_norm": 0.9812198149961999, "learning_rate": 8.823884680031214e-07, "loss": 0.1285, "step": 27894 }, { "epoch": 0.8137872688021471, "grad_norm": 0.6997671115908596, "learning_rate": 8.821204820984164e-07, "loss": 0.122, "step": 27895 }, { "epoch": 0.8138164420327907, "grad_norm": 0.7670904985101262, "learning_rate": 8.818525329571581e-07, "loss": 0.0922, "step": 27896 }, { "epoch": 0.8138456152634342, "grad_norm": 0.9271316138154613, "learning_rate": 8.815846205817369e-07, "loss": 0.0938, "step": 27897 }, { "epoch": 0.8138747884940778, "grad_norm": 0.8567940019276363, "learning_rate": 8.813167449745436e-07, "loss": 0.1027, "step": 27898 }, { "epoch": 0.8139039617247213, "grad_norm": 0.7482419574374475, "learning_rate": 8.810489061379728e-07, "loss": 0.1055, "step": 27899 }, { "epoch": 0.813933134955365, "grad_norm": 0.8392858184176683, "learning_rate": 8.80781104074413e-07, "loss": 0.1094, "step": 27900 }, { "epoch": 0.8139623081860086, "grad_norm": 0.7941550962089481, "learning_rate": 8.805133387862558e-07, "loss": 0.103, "step": 27901 }, { "epoch": 0.8139914814166521, "grad_norm": 0.979570616426688, "learning_rate": 8.802456102758938e-07, "loss": 0.1238, "step": 27902 }, { "epoch": 0.8140206546472957, "grad_norm": 0.7537379325636532, "learning_rate": 8.799779185457153e-07, "loss": 0.0994, "step": 27903 }, { "epoch": 0.8140498278779392, "grad_norm": 0.803492811338486, "learning_rate": 8.797102635981092e-07, "loss": 0.1026, "step": 27904 }, { "epoch": 0.8140790011085828, "grad_norm": 0.9189270594140458, "learning_rate": 8.794426454354671e-07, "loss": 0.1066, "step": 27905 }, { "epoch": 0.8141081743392263, "grad_norm": 0.9376877902881371, "learning_rate": 8.791750640601765e-07, "loss": 0.1217, "step": 27906 }, { "epoch": 0.8141373475698699, "grad_norm": 0.7900601135877897, "learning_rate": 8.789075194746288e-07, "loss": 0.1024, "step": 27907 }, { "epoch": 0.8141665208005134, "grad_norm": 0.7052171852321076, "learning_rate": 8.786400116812093e-07, "loss": 0.1132, "step": 27908 }, { "epoch": 0.814195694031157, "grad_norm": 0.7881679378547637, "learning_rate": 8.783725406823095e-07, "loss": 0.1118, "step": 27909 }, { "epoch": 0.8142248672618005, "grad_norm": 0.8932581135162265, "learning_rate": 8.781051064803153e-07, "loss": 0.1019, "step": 27910 }, { "epoch": 0.8142540404924441, "grad_norm": 0.7752407613929274, "learning_rate": 8.778377090776136e-07, "loss": 0.1169, "step": 27911 }, { "epoch": 0.8142832137230877, "grad_norm": 0.9243660125538735, "learning_rate": 8.775703484765929e-07, "loss": 0.1159, "step": 27912 }, { "epoch": 0.8143123869537312, "grad_norm": 0.8063009476385474, "learning_rate": 8.773030246796416e-07, "loss": 0.1161, "step": 27913 }, { "epoch": 0.8143415601843749, "grad_norm": 0.8555816929791252, "learning_rate": 8.770357376891442e-07, "loss": 0.1098, "step": 27914 }, { "epoch": 0.8143707334150184, "grad_norm": 0.9909714390437983, "learning_rate": 8.767684875074867e-07, "loss": 0.1255, "step": 27915 }, { "epoch": 0.814399906645662, "grad_norm": 0.769080409173358, "learning_rate": 8.765012741370566e-07, "loss": 0.1154, "step": 27916 }, { "epoch": 0.8144290798763055, "grad_norm": 0.9236014058704317, "learning_rate": 8.762340975802392e-07, "loss": 0.0981, "step": 27917 }, { "epoch": 0.8144582531069491, "grad_norm": 0.7957683463367754, "learning_rate": 8.759669578394165e-07, "loss": 0.1003, "step": 27918 }, { "epoch": 0.8144874263375926, "grad_norm": 0.9393352776753457, "learning_rate": 8.756998549169793e-07, "loss": 0.1296, "step": 27919 }, { "epoch": 0.8145165995682362, "grad_norm": 0.9901589837775178, "learning_rate": 8.754327888153085e-07, "loss": 0.1265, "step": 27920 }, { "epoch": 0.8145457727988797, "grad_norm": 0.9328615769706948, "learning_rate": 8.751657595367885e-07, "loss": 0.1105, "step": 27921 }, { "epoch": 0.8145749460295233, "grad_norm": 0.8225611506888997, "learning_rate": 8.74898767083805e-07, "loss": 0.1214, "step": 27922 }, { "epoch": 0.8146041192601668, "grad_norm": 0.7273666981494035, "learning_rate": 8.74631811458741e-07, "loss": 0.0917, "step": 27923 }, { "epoch": 0.8146332924908104, "grad_norm": 0.7965634618209972, "learning_rate": 8.743648926639775e-07, "loss": 0.1348, "step": 27924 }, { "epoch": 0.814662465721454, "grad_norm": 0.9075342283984179, "learning_rate": 8.740980107018998e-07, "loss": 0.1138, "step": 27925 }, { "epoch": 0.8146916389520975, "grad_norm": 0.958721682505962, "learning_rate": 8.73831165574891e-07, "loss": 0.1293, "step": 27926 }, { "epoch": 0.8147208121827412, "grad_norm": 0.8471685867698534, "learning_rate": 8.735643572853325e-07, "loss": 0.0987, "step": 27927 }, { "epoch": 0.8147499854133847, "grad_norm": 0.9322191934295897, "learning_rate": 8.732975858356057e-07, "loss": 0.1086, "step": 27928 }, { "epoch": 0.8147791586440283, "grad_norm": 0.8270460426510711, "learning_rate": 8.730308512280938e-07, "loss": 0.1044, "step": 27929 }, { "epoch": 0.8148083318746718, "grad_norm": 0.7654599046546438, "learning_rate": 8.72764153465176e-07, "loss": 0.1168, "step": 27930 }, { "epoch": 0.8148375051053154, "grad_norm": 0.953571558305935, "learning_rate": 8.724974925492347e-07, "loss": 0.1308, "step": 27931 }, { "epoch": 0.8148666783359589, "grad_norm": 0.8514589093817848, "learning_rate": 8.722308684826514e-07, "loss": 0.1106, "step": 27932 }, { "epoch": 0.8148958515666025, "grad_norm": 0.8506731842034587, "learning_rate": 8.719642812678059e-07, "loss": 0.1331, "step": 27933 }, { "epoch": 0.814925024797246, "grad_norm": 1.485484176016856, "learning_rate": 8.716977309070762e-07, "loss": 0.1125, "step": 27934 }, { "epoch": 0.8149541980278896, "grad_norm": 1.0000498783709355, "learning_rate": 8.714312174028456e-07, "loss": 0.1156, "step": 27935 }, { "epoch": 0.8149833712585332, "grad_norm": 1.086574623093909, "learning_rate": 8.711647407574897e-07, "loss": 0.0874, "step": 27936 }, { "epoch": 0.8150125444891767, "grad_norm": 1.00993218665719, "learning_rate": 8.708983009733906e-07, "loss": 0.1451, "step": 27937 }, { "epoch": 0.8150417177198203, "grad_norm": 1.927800729165628, "learning_rate": 8.706318980529249e-07, "loss": 0.1237, "step": 27938 }, { "epoch": 0.8150708909504638, "grad_norm": 0.7889614899462662, "learning_rate": 8.703655319984728e-07, "loss": 0.1027, "step": 27939 }, { "epoch": 0.8151000641811074, "grad_norm": 1.124823704480609, "learning_rate": 8.700992028124116e-07, "loss": 0.1134, "step": 27940 }, { "epoch": 0.815129237411751, "grad_norm": 1.026337449686936, "learning_rate": 8.698329104971176e-07, "loss": 0.116, "step": 27941 }, { "epoch": 0.8151584106423946, "grad_norm": 0.8714488107477869, "learning_rate": 8.695666550549692e-07, "loss": 0.0998, "step": 27942 }, { "epoch": 0.8151875838730381, "grad_norm": 0.7970873611052459, "learning_rate": 8.693004364883451e-07, "loss": 0.117, "step": 27943 }, { "epoch": 0.8152167571036817, "grad_norm": 0.9040347466080777, "learning_rate": 8.690342547996205e-07, "loss": 0.1015, "step": 27944 }, { "epoch": 0.8152459303343252, "grad_norm": 1.1349387215847295, "learning_rate": 8.687681099911704e-07, "loss": 0.1231, "step": 27945 }, { "epoch": 0.8152751035649688, "grad_norm": 1.0015897725252845, "learning_rate": 8.685020020653745e-07, "loss": 0.1299, "step": 27946 }, { "epoch": 0.8153042767956123, "grad_norm": 1.0853462300215388, "learning_rate": 8.682359310246058e-07, "loss": 0.1263, "step": 27947 }, { "epoch": 0.8153334500262559, "grad_norm": 1.021627090189694, "learning_rate": 8.67969896871238e-07, "loss": 0.1222, "step": 27948 }, { "epoch": 0.8153626232568995, "grad_norm": 0.9893004089744691, "learning_rate": 8.677038996076509e-07, "loss": 0.1376, "step": 27949 }, { "epoch": 0.815391796487543, "grad_norm": 0.677533835465219, "learning_rate": 8.674379392362175e-07, "loss": 0.1018, "step": 27950 }, { "epoch": 0.8154209697181866, "grad_norm": 0.6357550110472617, "learning_rate": 8.671720157593099e-07, "loss": 0.1037, "step": 27951 }, { "epoch": 0.8154501429488301, "grad_norm": 0.8820956278642804, "learning_rate": 8.669061291793051e-07, "loss": 0.129, "step": 27952 }, { "epoch": 0.8154793161794737, "grad_norm": 0.862788822741437, "learning_rate": 8.666402794985762e-07, "loss": 0.1071, "step": 27953 }, { "epoch": 0.8155084894101172, "grad_norm": 0.9375392918641782, "learning_rate": 8.663744667194946e-07, "loss": 0.1031, "step": 27954 }, { "epoch": 0.8155376626407609, "grad_norm": 0.759271459198332, "learning_rate": 8.661086908444349e-07, "loss": 0.1047, "step": 27955 }, { "epoch": 0.8155668358714044, "grad_norm": 1.0672211803055243, "learning_rate": 8.658429518757716e-07, "loss": 0.1075, "step": 27956 }, { "epoch": 0.815596009102048, "grad_norm": 0.835350576889197, "learning_rate": 8.655772498158754e-07, "loss": 0.1138, "step": 27957 }, { "epoch": 0.8156251823326915, "grad_norm": 0.6703112766894321, "learning_rate": 8.653115846671173e-07, "loss": 0.1132, "step": 27958 }, { "epoch": 0.8156543555633351, "grad_norm": 0.8498449228358369, "learning_rate": 8.650459564318714e-07, "loss": 0.1214, "step": 27959 }, { "epoch": 0.8156835287939787, "grad_norm": 1.0296137250965731, "learning_rate": 8.647803651125069e-07, "loss": 0.1206, "step": 27960 }, { "epoch": 0.8157127020246222, "grad_norm": 0.9620023355296395, "learning_rate": 8.645148107113976e-07, "loss": 0.1066, "step": 27961 }, { "epoch": 0.8157418752552658, "grad_norm": 0.8697029912694123, "learning_rate": 8.642492932309116e-07, "loss": 0.1154, "step": 27962 }, { "epoch": 0.8157710484859093, "grad_norm": 0.872224754036844, "learning_rate": 8.639838126734218e-07, "loss": 0.1058, "step": 27963 }, { "epoch": 0.8158002217165529, "grad_norm": 0.7181466266117666, "learning_rate": 8.63718369041296e-07, "loss": 0.1086, "step": 27964 }, { "epoch": 0.8158293949471964, "grad_norm": 0.7093394701015387, "learning_rate": 8.634529623369059e-07, "loss": 0.1303, "step": 27965 }, { "epoch": 0.81585856817784, "grad_norm": 1.0146880919739127, "learning_rate": 8.631875925626193e-07, "loss": 0.1167, "step": 27966 }, { "epoch": 0.8158877414084835, "grad_norm": 0.6976651318338014, "learning_rate": 8.629222597208081e-07, "loss": 0.1152, "step": 27967 }, { "epoch": 0.8159169146391272, "grad_norm": 0.7889779039807315, "learning_rate": 8.626569638138377e-07, "loss": 0.0986, "step": 27968 }, { "epoch": 0.8159460878697707, "grad_norm": 0.7200946525928537, "learning_rate": 8.623917048440794e-07, "loss": 0.1368, "step": 27969 }, { "epoch": 0.8159752611004143, "grad_norm": 0.8065351815209023, "learning_rate": 8.621264828139003e-07, "loss": 0.1251, "step": 27970 }, { "epoch": 0.8160044343310578, "grad_norm": 0.8627913754233999, "learning_rate": 8.618612977256674e-07, "loss": 0.1024, "step": 27971 }, { "epoch": 0.8160336075617014, "grad_norm": 0.7703068451678616, "learning_rate": 8.615961495817482e-07, "loss": 0.1271, "step": 27972 }, { "epoch": 0.816062780792345, "grad_norm": 0.9032384205047582, "learning_rate": 8.613310383845125e-07, "loss": 0.1096, "step": 27973 }, { "epoch": 0.8160919540229885, "grad_norm": 0.9759871484921885, "learning_rate": 8.610659641363251e-07, "loss": 0.1051, "step": 27974 }, { "epoch": 0.8161211272536321, "grad_norm": 0.8059160076351759, "learning_rate": 8.608009268395512e-07, "loss": 0.1085, "step": 27975 }, { "epoch": 0.8161503004842756, "grad_norm": 0.8301027795536169, "learning_rate": 8.605359264965602e-07, "loss": 0.1036, "step": 27976 }, { "epoch": 0.8161794737149192, "grad_norm": 0.777042585692951, "learning_rate": 8.602709631097161e-07, "loss": 0.1043, "step": 27977 }, { "epoch": 0.8162086469455627, "grad_norm": 0.8054281415233515, "learning_rate": 8.600060366813823e-07, "loss": 0.1165, "step": 27978 }, { "epoch": 0.8162378201762063, "grad_norm": 0.9361815134274353, "learning_rate": 8.597411472139288e-07, "loss": 0.1392, "step": 27979 }, { "epoch": 0.8162669934068498, "grad_norm": 0.7066121911380071, "learning_rate": 8.594762947097173e-07, "loss": 0.1209, "step": 27980 }, { "epoch": 0.8162961666374934, "grad_norm": 1.027407194909586, "learning_rate": 8.592114791711126e-07, "loss": 0.1014, "step": 27981 }, { "epoch": 0.816325339868137, "grad_norm": 0.7264163145303579, "learning_rate": 8.589467006004803e-07, "loss": 0.1121, "step": 27982 }, { "epoch": 0.8163545130987806, "grad_norm": 0.7785567537892137, "learning_rate": 8.586819590001833e-07, "loss": 0.1018, "step": 27983 }, { "epoch": 0.8163836863294242, "grad_norm": 0.7948416690473252, "learning_rate": 8.584172543725839e-07, "loss": 0.1166, "step": 27984 }, { "epoch": 0.8164128595600677, "grad_norm": 0.9166306968610213, "learning_rate": 8.581525867200464e-07, "loss": 0.1146, "step": 27985 }, { "epoch": 0.8164420327907113, "grad_norm": 0.826955882779524, "learning_rate": 8.578879560449354e-07, "loss": 0.1108, "step": 27986 }, { "epoch": 0.8164712060213548, "grad_norm": 0.6589801802365504, "learning_rate": 8.576233623496117e-07, "loss": 0.1048, "step": 27987 }, { "epoch": 0.8165003792519984, "grad_norm": 1.039948581199204, "learning_rate": 8.573588056364368e-07, "loss": 0.1344, "step": 27988 }, { "epoch": 0.8165295524826419, "grad_norm": 0.6468888511256666, "learning_rate": 8.570942859077747e-07, "loss": 0.0856, "step": 27989 }, { "epoch": 0.8165587257132855, "grad_norm": 0.7987498188958376, "learning_rate": 8.568298031659844e-07, "loss": 0.121, "step": 27990 }, { "epoch": 0.816587898943929, "grad_norm": 0.6352220061579698, "learning_rate": 8.565653574134297e-07, "loss": 0.1071, "step": 27991 }, { "epoch": 0.8166170721745726, "grad_norm": 0.9729734228360658, "learning_rate": 8.563009486524698e-07, "loss": 0.1183, "step": 27992 }, { "epoch": 0.8166462454052161, "grad_norm": 0.7873868919153967, "learning_rate": 8.560365768854662e-07, "loss": 0.1241, "step": 27993 }, { "epoch": 0.8166754186358597, "grad_norm": 0.6202144193652498, "learning_rate": 8.55772242114778e-07, "loss": 0.0951, "step": 27994 }, { "epoch": 0.8167045918665033, "grad_norm": 0.7052777954048747, "learning_rate": 8.555079443427672e-07, "loss": 0.1106, "step": 27995 }, { "epoch": 0.8167337650971469, "grad_norm": 0.878384548400134, "learning_rate": 8.552436835717909e-07, "loss": 0.0756, "step": 27996 }, { "epoch": 0.8167629383277905, "grad_norm": 0.9384075757366674, "learning_rate": 8.549794598042104e-07, "loss": 0.1041, "step": 27997 }, { "epoch": 0.816792111558434, "grad_norm": 0.7983972119889551, "learning_rate": 8.54715273042383e-07, "loss": 0.1188, "step": 27998 }, { "epoch": 0.8168212847890776, "grad_norm": 0.8255508273022011, "learning_rate": 8.544511232886693e-07, "loss": 0.1267, "step": 27999 }, { "epoch": 0.8168504580197211, "grad_norm": 0.7603824207116087, "learning_rate": 8.541870105454264e-07, "loss": 0.0905, "step": 28000 }, { "epoch": 0.8168796312503647, "grad_norm": 0.8667780567402311, "learning_rate": 8.539229348150107e-07, "loss": 0.1176, "step": 28001 }, { "epoch": 0.8169088044810082, "grad_norm": 1.0316246110869005, "learning_rate": 8.536588960997811e-07, "loss": 0.112, "step": 28002 }, { "epoch": 0.8169379777116518, "grad_norm": 0.7797496813247969, "learning_rate": 8.53394894402097e-07, "loss": 0.1251, "step": 28003 }, { "epoch": 0.8169671509422953, "grad_norm": 0.9794218367100127, "learning_rate": 8.531309297243129e-07, "loss": 0.1226, "step": 28004 }, { "epoch": 0.8169963241729389, "grad_norm": 0.7280829179655549, "learning_rate": 8.528670020687845e-07, "loss": 0.1169, "step": 28005 }, { "epoch": 0.8170254974035824, "grad_norm": 1.0092566018665785, "learning_rate": 8.526031114378713e-07, "loss": 0.1391, "step": 28006 }, { "epoch": 0.817054670634226, "grad_norm": 0.983347000050237, "learning_rate": 8.523392578339268e-07, "loss": 0.0923, "step": 28007 }, { "epoch": 0.8170838438648695, "grad_norm": 0.7769789828232809, "learning_rate": 8.520754412593052e-07, "loss": 0.1325, "step": 28008 }, { "epoch": 0.8171130170955132, "grad_norm": 0.8762249085199176, "learning_rate": 8.518116617163664e-07, "loss": 0.104, "step": 28009 }, { "epoch": 0.8171421903261568, "grad_norm": 0.7477001410736634, "learning_rate": 8.515479192074627e-07, "loss": 0.1061, "step": 28010 }, { "epoch": 0.8171713635568003, "grad_norm": 0.7165997494065797, "learning_rate": 8.512842137349475e-07, "loss": 0.1028, "step": 28011 }, { "epoch": 0.8172005367874439, "grad_norm": 0.7707761993942428, "learning_rate": 8.510205453011783e-07, "loss": 0.124, "step": 28012 }, { "epoch": 0.8172297100180874, "grad_norm": 0.7468551414347658, "learning_rate": 8.507569139085064e-07, "loss": 0.1219, "step": 28013 }, { "epoch": 0.817258883248731, "grad_norm": 1.0180306974051387, "learning_rate": 8.504933195592858e-07, "loss": 0.1092, "step": 28014 }, { "epoch": 0.8172880564793745, "grad_norm": 0.7306808186430649, "learning_rate": 8.502297622558697e-07, "loss": 0.1106, "step": 28015 }, { "epoch": 0.8173172297100181, "grad_norm": 0.813070514036749, "learning_rate": 8.499662420006127e-07, "loss": 0.1224, "step": 28016 }, { "epoch": 0.8173464029406616, "grad_norm": 0.9905332900506604, "learning_rate": 8.497027587958672e-07, "loss": 0.1114, "step": 28017 }, { "epoch": 0.8173755761713052, "grad_norm": 0.8700338934573142, "learning_rate": 8.494393126439831e-07, "loss": 0.1199, "step": 28018 }, { "epoch": 0.8174047494019487, "grad_norm": 0.9352580233423357, "learning_rate": 8.491759035473152e-07, "loss": 0.1441, "step": 28019 }, { "epoch": 0.8174339226325923, "grad_norm": 0.9657119282357742, "learning_rate": 8.489125315082125e-07, "loss": 0.1272, "step": 28020 }, { "epoch": 0.8174630958632358, "grad_norm": 0.8445003314221814, "learning_rate": 8.486491965290294e-07, "loss": 0.1174, "step": 28021 }, { "epoch": 0.8174922690938795, "grad_norm": 0.9901304891181352, "learning_rate": 8.483858986121135e-07, "loss": 0.1183, "step": 28022 }, { "epoch": 0.8175214423245231, "grad_norm": 1.0211973522370925, "learning_rate": 8.48122637759819e-07, "loss": 0.1154, "step": 28023 }, { "epoch": 0.8175506155551666, "grad_norm": 1.0334704825490604, "learning_rate": 8.478594139744928e-07, "loss": 0.1232, "step": 28024 }, { "epoch": 0.8175797887858102, "grad_norm": 1.0091919402124505, "learning_rate": 8.475962272584881e-07, "loss": 0.1213, "step": 28025 }, { "epoch": 0.8176089620164537, "grad_norm": 1.010854824605726, "learning_rate": 8.47333077614152e-07, "loss": 0.1386, "step": 28026 }, { "epoch": 0.8176381352470973, "grad_norm": 0.8002253611829233, "learning_rate": 8.470699650438358e-07, "loss": 0.1223, "step": 28027 }, { "epoch": 0.8176673084777408, "grad_norm": 0.7758489461325259, "learning_rate": 8.468068895498859e-07, "loss": 0.1112, "step": 28028 }, { "epoch": 0.8176964817083844, "grad_norm": 1.0687019772086732, "learning_rate": 8.465438511346546e-07, "loss": 0.1709, "step": 28029 }, { "epoch": 0.8177256549390279, "grad_norm": 0.8801451995809791, "learning_rate": 8.462808498004882e-07, "loss": 0.1212, "step": 28030 }, { "epoch": 0.8177548281696715, "grad_norm": 0.8419010267623476, "learning_rate": 8.460178855497331e-07, "loss": 0.1316, "step": 28031 }, { "epoch": 0.817784001400315, "grad_norm": 0.8620228436395571, "learning_rate": 8.457549583847391e-07, "loss": 0.0872, "step": 28032 }, { "epoch": 0.8178131746309586, "grad_norm": 0.9043170259284783, "learning_rate": 8.454920683078544e-07, "loss": 0.1323, "step": 28033 }, { "epoch": 0.8178423478616021, "grad_norm": 0.7789213372353636, "learning_rate": 8.452292153214242e-07, "loss": 0.1269, "step": 28034 }, { "epoch": 0.8178715210922457, "grad_norm": 0.905291898223673, "learning_rate": 8.449663994277951e-07, "loss": 0.1293, "step": 28035 }, { "epoch": 0.8179006943228894, "grad_norm": 0.9083828888139835, "learning_rate": 8.447036206293152e-07, "loss": 0.125, "step": 28036 }, { "epoch": 0.8179298675535329, "grad_norm": 0.7955954016926723, "learning_rate": 8.444408789283292e-07, "loss": 0.1247, "step": 28037 }, { "epoch": 0.8179590407841765, "grad_norm": 0.9815952664476065, "learning_rate": 8.44178174327181e-07, "loss": 0.1119, "step": 28038 }, { "epoch": 0.81798821401482, "grad_norm": 0.8610262579032661, "learning_rate": 8.439155068282201e-07, "loss": 0.1226, "step": 28039 }, { "epoch": 0.8180173872454636, "grad_norm": 0.8876390810937166, "learning_rate": 8.436528764337892e-07, "loss": 0.1318, "step": 28040 }, { "epoch": 0.8180465604761071, "grad_norm": 1.015407307769133, "learning_rate": 8.43390283146232e-07, "loss": 0.1333, "step": 28041 }, { "epoch": 0.8180757337067507, "grad_norm": 0.9222701906240001, "learning_rate": 8.431277269678961e-07, "loss": 0.1041, "step": 28042 }, { "epoch": 0.8181049069373942, "grad_norm": 0.8136467820095734, "learning_rate": 8.428652079011229e-07, "loss": 0.1135, "step": 28043 }, { "epoch": 0.8181340801680378, "grad_norm": 0.8692521287609517, "learning_rate": 8.426027259482555e-07, "loss": 0.1144, "step": 28044 }, { "epoch": 0.8181632533986813, "grad_norm": 1.0202616954193195, "learning_rate": 8.423402811116388e-07, "loss": 0.1025, "step": 28045 }, { "epoch": 0.8181924266293249, "grad_norm": 0.7730376981748913, "learning_rate": 8.420778733936164e-07, "loss": 0.082, "step": 28046 }, { "epoch": 0.8182215998599685, "grad_norm": 0.9667693727371081, "learning_rate": 8.418155027965302e-07, "loss": 0.1, "step": 28047 }, { "epoch": 0.818250773090612, "grad_norm": 0.8015330055711267, "learning_rate": 8.41553169322722e-07, "loss": 0.1074, "step": 28048 }, { "epoch": 0.8182799463212557, "grad_norm": 0.8360952802038947, "learning_rate": 8.41290872974535e-07, "loss": 0.1423, "step": 28049 }, { "epoch": 0.8183091195518992, "grad_norm": 1.0510750425775541, "learning_rate": 8.410286137543089e-07, "loss": 0.1079, "step": 28050 }, { "epoch": 0.8183382927825428, "grad_norm": 0.7413942925453786, "learning_rate": 8.407663916643882e-07, "loss": 0.1062, "step": 28051 }, { "epoch": 0.8183674660131863, "grad_norm": 0.9235686785184701, "learning_rate": 8.405042067071112e-07, "loss": 0.1146, "step": 28052 }, { "epoch": 0.8183966392438299, "grad_norm": 0.8694952551010822, "learning_rate": 8.402420588848204e-07, "loss": 0.1028, "step": 28053 }, { "epoch": 0.8184258124744734, "grad_norm": 0.8309566220828405, "learning_rate": 8.399799481998555e-07, "loss": 0.0997, "step": 28054 }, { "epoch": 0.818454985705117, "grad_norm": 0.7166765632882739, "learning_rate": 8.397178746545558e-07, "loss": 0.1293, "step": 28055 }, { "epoch": 0.8184841589357605, "grad_norm": 0.7696646028108041, "learning_rate": 8.394558382512613e-07, "loss": 0.1124, "step": 28056 }, { "epoch": 0.8185133321664041, "grad_norm": 0.7930973318720292, "learning_rate": 8.391938389923132e-07, "loss": 0.1048, "step": 28057 }, { "epoch": 0.8185425053970476, "grad_norm": 0.7494214465106429, "learning_rate": 8.389318768800481e-07, "loss": 0.0871, "step": 28058 }, { "epoch": 0.8185716786276912, "grad_norm": 0.7757831673639287, "learning_rate": 8.386699519168074e-07, "loss": 0.1177, "step": 28059 }, { "epoch": 0.8186008518583348, "grad_norm": 0.8360261929111451, "learning_rate": 8.384080641049275e-07, "loss": 0.1219, "step": 28060 }, { "epoch": 0.8186300250889783, "grad_norm": 1.2544170598866653, "learning_rate": 8.38146213446746e-07, "loss": 0.118, "step": 28061 }, { "epoch": 0.8186591983196219, "grad_norm": 0.8124155581438096, "learning_rate": 8.378843999446018e-07, "loss": 0.1273, "step": 28062 }, { "epoch": 0.8186883715502655, "grad_norm": 0.893277986606559, "learning_rate": 8.376226236008328e-07, "loss": 0.1079, "step": 28063 }, { "epoch": 0.8187175447809091, "grad_norm": 0.7547091523477031, "learning_rate": 8.373608844177755e-07, "loss": 0.1154, "step": 28064 }, { "epoch": 0.8187467180115526, "grad_norm": 0.7725148549140893, "learning_rate": 8.370991823977653e-07, "loss": 0.1162, "step": 28065 }, { "epoch": 0.8187758912421962, "grad_norm": 0.7866570268503806, "learning_rate": 8.368375175431415e-07, "loss": 0.1197, "step": 28066 }, { "epoch": 0.8188050644728397, "grad_norm": 0.9351223328356347, "learning_rate": 8.365758898562371e-07, "loss": 0.1261, "step": 28067 }, { "epoch": 0.8188342377034833, "grad_norm": 0.9229732339443021, "learning_rate": 8.363142993393891e-07, "loss": 0.1184, "step": 28068 }, { "epoch": 0.8188634109341268, "grad_norm": 0.8975736632063561, "learning_rate": 8.360527459949341e-07, "loss": 0.1325, "step": 28069 }, { "epoch": 0.8188925841647704, "grad_norm": 0.7978540709436979, "learning_rate": 8.357912298252063e-07, "loss": 0.1086, "step": 28070 }, { "epoch": 0.818921757395414, "grad_norm": 0.8436162053129328, "learning_rate": 8.355297508325394e-07, "loss": 0.0938, "step": 28071 }, { "epoch": 0.8189509306260575, "grad_norm": 0.9512952476076122, "learning_rate": 8.352683090192698e-07, "loss": 0.159, "step": 28072 }, { "epoch": 0.8189801038567011, "grad_norm": 0.8949113859449455, "learning_rate": 8.35006904387729e-07, "loss": 0.1217, "step": 28073 }, { "epoch": 0.8190092770873446, "grad_norm": 0.9513471759480473, "learning_rate": 8.34745536940254e-07, "loss": 0.1079, "step": 28074 }, { "epoch": 0.8190384503179882, "grad_norm": 0.8236986247300286, "learning_rate": 8.344842066791753e-07, "loss": 0.1031, "step": 28075 }, { "epoch": 0.8190676235486318, "grad_norm": 0.8718510701854801, "learning_rate": 8.342229136068281e-07, "loss": 0.1315, "step": 28076 }, { "epoch": 0.8190967967792754, "grad_norm": 0.7291032550965775, "learning_rate": 8.339616577255444e-07, "loss": 0.0978, "step": 28077 }, { "epoch": 0.8191259700099189, "grad_norm": 0.8902461305151809, "learning_rate": 8.337004390376552e-07, "loss": 0.1079, "step": 28078 }, { "epoch": 0.8191551432405625, "grad_norm": 0.9204624887828335, "learning_rate": 8.334392575454941e-07, "loss": 0.106, "step": 28079 }, { "epoch": 0.819184316471206, "grad_norm": 1.0351279618199487, "learning_rate": 8.331781132513939e-07, "loss": 0.1142, "step": 28080 }, { "epoch": 0.8192134897018496, "grad_norm": 0.8509110671830511, "learning_rate": 8.329170061576847e-07, "loss": 0.1323, "step": 28081 }, { "epoch": 0.8192426629324931, "grad_norm": 0.7621287085683462, "learning_rate": 8.326559362666964e-07, "loss": 0.1025, "step": 28082 }, { "epoch": 0.8192718361631367, "grad_norm": 0.7648280607430413, "learning_rate": 8.323949035807621e-07, "loss": 0.117, "step": 28083 }, { "epoch": 0.8193010093937803, "grad_norm": 0.7945796102385012, "learning_rate": 8.321339081022117e-07, "loss": 0.1058, "step": 28084 }, { "epoch": 0.8193301826244238, "grad_norm": 0.8999082911118238, "learning_rate": 8.318729498333722e-07, "loss": 0.0838, "step": 28085 }, { "epoch": 0.8193593558550674, "grad_norm": 0.950698878684458, "learning_rate": 8.316120287765784e-07, "loss": 0.1321, "step": 28086 }, { "epoch": 0.8193885290857109, "grad_norm": 0.8058811638166943, "learning_rate": 8.313511449341572e-07, "loss": 0.1081, "step": 28087 }, { "epoch": 0.8194177023163545, "grad_norm": 0.8291288706638122, "learning_rate": 8.310902983084368e-07, "loss": 0.1225, "step": 28088 }, { "epoch": 0.819446875546998, "grad_norm": 0.718478792515388, "learning_rate": 8.308294889017482e-07, "loss": 0.0867, "step": 28089 }, { "epoch": 0.8194760487776417, "grad_norm": 0.9614708259651105, "learning_rate": 8.305687167164189e-07, "loss": 0.1117, "step": 28090 }, { "epoch": 0.8195052220082852, "grad_norm": 0.7657402132189794, "learning_rate": 8.303079817547749e-07, "loss": 0.1058, "step": 28091 }, { "epoch": 0.8195343952389288, "grad_norm": 0.9436221345772089, "learning_rate": 8.300472840191464e-07, "loss": 0.1092, "step": 28092 }, { "epoch": 0.8195635684695723, "grad_norm": 0.9302532658571502, "learning_rate": 8.297866235118612e-07, "loss": 0.1071, "step": 28093 }, { "epoch": 0.8195927417002159, "grad_norm": 0.744906274912989, "learning_rate": 8.295260002352462e-07, "loss": 0.1177, "step": 28094 }, { "epoch": 0.8196219149308595, "grad_norm": 0.7902433385563541, "learning_rate": 8.292654141916257e-07, "loss": 0.111, "step": 28095 }, { "epoch": 0.819651088161503, "grad_norm": 0.8382154140304817, "learning_rate": 8.290048653833288e-07, "loss": 0.1032, "step": 28096 }, { "epoch": 0.8196802613921466, "grad_norm": 1.0535307215415357, "learning_rate": 8.287443538126805e-07, "loss": 0.1179, "step": 28097 }, { "epoch": 0.8197094346227901, "grad_norm": 0.9495198768864338, "learning_rate": 8.284838794820061e-07, "loss": 0.13, "step": 28098 }, { "epoch": 0.8197386078534337, "grad_norm": 0.7835200583928362, "learning_rate": 8.28223442393633e-07, "loss": 0.1196, "step": 28099 }, { "epoch": 0.8197677810840772, "grad_norm": 1.00873338218089, "learning_rate": 8.279630425498858e-07, "loss": 0.1248, "step": 28100 }, { "epoch": 0.8197969543147208, "grad_norm": 1.0290157010544827, "learning_rate": 8.277026799530869e-07, "loss": 0.1043, "step": 28101 }, { "epoch": 0.8198261275453643, "grad_norm": 0.8396608742587166, "learning_rate": 8.274423546055638e-07, "loss": 0.0975, "step": 28102 }, { "epoch": 0.819855300776008, "grad_norm": 1.0409349443091456, "learning_rate": 8.271820665096381e-07, "loss": 0.1415, "step": 28103 }, { "epoch": 0.8198844740066515, "grad_norm": 0.7578961483672078, "learning_rate": 8.269218156676356e-07, "loss": 0.1113, "step": 28104 }, { "epoch": 0.8199136472372951, "grad_norm": 0.8745063625424268, "learning_rate": 8.266616020818779e-07, "loss": 0.1206, "step": 28105 }, { "epoch": 0.8199428204679386, "grad_norm": 1.0504859406376101, "learning_rate": 8.264014257546909e-07, "loss": 0.1223, "step": 28106 }, { "epoch": 0.8199719936985822, "grad_norm": 0.7646400680302314, "learning_rate": 8.26141286688395e-07, "loss": 0.1032, "step": 28107 }, { "epoch": 0.8200011669292258, "grad_norm": 0.7847866097437022, "learning_rate": 8.258811848853126e-07, "loss": 0.1245, "step": 28108 }, { "epoch": 0.8200303401598693, "grad_norm": 0.8507550174118889, "learning_rate": 8.256211203477659e-07, "loss": 0.1199, "step": 28109 }, { "epoch": 0.8200595133905129, "grad_norm": 1.0639099007405886, "learning_rate": 8.253610930780793e-07, "loss": 0.1216, "step": 28110 }, { "epoch": 0.8200886866211564, "grad_norm": 0.7559782093999984, "learning_rate": 8.251011030785722e-07, "loss": 0.1073, "step": 28111 }, { "epoch": 0.8201178598518, "grad_norm": 0.6794944671157042, "learning_rate": 8.248411503515641e-07, "loss": 0.1069, "step": 28112 }, { "epoch": 0.8201470330824435, "grad_norm": 1.510744603051289, "learning_rate": 8.245812348993793e-07, "loss": 0.1256, "step": 28113 }, { "epoch": 0.8201762063130871, "grad_norm": 0.9243085518108931, "learning_rate": 8.243213567243357e-07, "loss": 0.0988, "step": 28114 }, { "epoch": 0.8202053795437306, "grad_norm": 0.7633260998978164, "learning_rate": 8.240615158287524e-07, "loss": 0.1006, "step": 28115 }, { "epoch": 0.8202345527743742, "grad_norm": 0.9381278677959661, "learning_rate": 8.238017122149533e-07, "loss": 0.1101, "step": 28116 }, { "epoch": 0.8202637260050178, "grad_norm": 0.9691918239872492, "learning_rate": 8.235419458852556e-07, "loss": 0.0933, "step": 28117 }, { "epoch": 0.8202928992356614, "grad_norm": 0.9012611915988304, "learning_rate": 8.232822168419774e-07, "loss": 0.1247, "step": 28118 }, { "epoch": 0.820322072466305, "grad_norm": 0.7544283074306304, "learning_rate": 8.230225250874391e-07, "loss": 0.1091, "step": 28119 }, { "epoch": 0.8203512456969485, "grad_norm": 0.847683240702552, "learning_rate": 8.227628706239593e-07, "loss": 0.111, "step": 28120 }, { "epoch": 0.8203804189275921, "grad_norm": 1.009555390596786, "learning_rate": 8.225032534538535e-07, "loss": 0.107, "step": 28121 }, { "epoch": 0.8204095921582356, "grad_norm": 0.9268798893189278, "learning_rate": 8.22243673579442e-07, "loss": 0.1019, "step": 28122 }, { "epoch": 0.8204387653888792, "grad_norm": 0.8621761592208615, "learning_rate": 8.219841310030424e-07, "loss": 0.1083, "step": 28123 }, { "epoch": 0.8204679386195227, "grad_norm": 1.209421675021601, "learning_rate": 8.217246257269712e-07, "loss": 0.1147, "step": 28124 }, { "epoch": 0.8204971118501663, "grad_norm": 0.8386429567325634, "learning_rate": 8.214651577535442e-07, "loss": 0.0829, "step": 28125 }, { "epoch": 0.8205262850808098, "grad_norm": 0.915009359660299, "learning_rate": 8.212057270850798e-07, "loss": 0.0961, "step": 28126 }, { "epoch": 0.8205554583114534, "grad_norm": 0.8658750313762614, "learning_rate": 8.209463337238921e-07, "loss": 0.1111, "step": 28127 }, { "epoch": 0.8205846315420969, "grad_norm": 1.661013780257039, "learning_rate": 8.206869776722976e-07, "loss": 0.1069, "step": 28128 }, { "epoch": 0.8206138047727405, "grad_norm": 0.9154463517099505, "learning_rate": 8.204276589326132e-07, "loss": 0.104, "step": 28129 }, { "epoch": 0.8206429780033841, "grad_norm": 0.805309686255245, "learning_rate": 8.201683775071534e-07, "loss": 0.116, "step": 28130 }, { "epoch": 0.8206721512340277, "grad_norm": 0.9345352578915128, "learning_rate": 8.199091333982312e-07, "loss": 0.1046, "step": 28131 }, { "epoch": 0.8207013244646713, "grad_norm": 0.8183592785988367, "learning_rate": 8.196499266081631e-07, "loss": 0.089, "step": 28132 }, { "epoch": 0.8207304976953148, "grad_norm": 0.7325455817803519, "learning_rate": 8.193907571392617e-07, "loss": 0.0994, "step": 28133 }, { "epoch": 0.8207596709259584, "grad_norm": 0.8562035668368565, "learning_rate": 8.19131624993843e-07, "loss": 0.1379, "step": 28134 }, { "epoch": 0.8207888441566019, "grad_norm": 1.034877966516031, "learning_rate": 8.188725301742178e-07, "loss": 0.1013, "step": 28135 }, { "epoch": 0.8208180173872455, "grad_norm": 0.7534523804468917, "learning_rate": 8.186134726827016e-07, "loss": 0.1121, "step": 28136 }, { "epoch": 0.820847190617889, "grad_norm": 0.976663392916067, "learning_rate": 8.183544525216059e-07, "loss": 0.1151, "step": 28137 }, { "epoch": 0.8208763638485326, "grad_norm": 0.8498206421645776, "learning_rate": 8.180954696932425e-07, "loss": 0.1111, "step": 28138 }, { "epoch": 0.8209055370791761, "grad_norm": 0.8489290048667434, "learning_rate": 8.178365241999247e-07, "loss": 0.0992, "step": 28139 }, { "epoch": 0.8209347103098197, "grad_norm": 0.7877822691146962, "learning_rate": 8.175776160439646e-07, "loss": 0.1064, "step": 28140 }, { "epoch": 0.8209638835404632, "grad_norm": 0.8335305962650439, "learning_rate": 8.173187452276738e-07, "loss": 0.1234, "step": 28141 }, { "epoch": 0.8209930567711068, "grad_norm": 1.4482998243119232, "learning_rate": 8.170599117533612e-07, "loss": 0.1236, "step": 28142 }, { "epoch": 0.8210222300017503, "grad_norm": 0.8594430263994742, "learning_rate": 8.168011156233402e-07, "loss": 0.0965, "step": 28143 }, { "epoch": 0.821051403232394, "grad_norm": 0.8527821907224292, "learning_rate": 8.165423568399206e-07, "loss": 0.1167, "step": 28144 }, { "epoch": 0.8210805764630376, "grad_norm": 0.9662625297186198, "learning_rate": 8.162836354054093e-07, "loss": 0.0954, "step": 28145 }, { "epoch": 0.8211097496936811, "grad_norm": 0.870731752773125, "learning_rate": 8.160249513221218e-07, "loss": 0.1277, "step": 28146 }, { "epoch": 0.8211389229243247, "grad_norm": 0.8296927225677133, "learning_rate": 8.157663045923647e-07, "loss": 0.1056, "step": 28147 }, { "epoch": 0.8211680961549682, "grad_norm": 1.1020732115297716, "learning_rate": 8.15507695218446e-07, "loss": 0.1275, "step": 28148 }, { "epoch": 0.8211972693856118, "grad_norm": 0.9708489056716605, "learning_rate": 8.152491232026766e-07, "loss": 0.1423, "step": 28149 }, { "epoch": 0.8212264426162553, "grad_norm": 0.8520652340420806, "learning_rate": 8.149905885473641e-07, "loss": 0.1178, "step": 28150 }, { "epoch": 0.8212556158468989, "grad_norm": 0.9514485146187803, "learning_rate": 8.147320912548156e-07, "loss": 0.1154, "step": 28151 }, { "epoch": 0.8212847890775424, "grad_norm": 0.9543060882194034, "learning_rate": 8.1447363132734e-07, "loss": 0.0939, "step": 28152 }, { "epoch": 0.821313962308186, "grad_norm": 0.7780015591522186, "learning_rate": 8.142152087672456e-07, "loss": 0.1069, "step": 28153 }, { "epoch": 0.8213431355388295, "grad_norm": 0.8282556396543697, "learning_rate": 8.139568235768386e-07, "loss": 0.1097, "step": 28154 }, { "epoch": 0.8213723087694731, "grad_norm": 0.9320926407715043, "learning_rate": 8.136984757584243e-07, "loss": 0.1249, "step": 28155 }, { "epoch": 0.8214014820001166, "grad_norm": 0.8407397356659568, "learning_rate": 8.134401653143126e-07, "loss": 0.1363, "step": 28156 }, { "epoch": 0.8214306552307603, "grad_norm": 0.9340377620301075, "learning_rate": 8.13181892246806e-07, "loss": 0.0888, "step": 28157 }, { "epoch": 0.8214598284614039, "grad_norm": 0.688415117109616, "learning_rate": 8.129236565582121e-07, "loss": 0.1216, "step": 28158 }, { "epoch": 0.8214890016920474, "grad_norm": 1.2667062439300032, "learning_rate": 8.12665458250837e-07, "loss": 0.1067, "step": 28159 }, { "epoch": 0.821518174922691, "grad_norm": 0.9749497627623536, "learning_rate": 8.124072973269859e-07, "loss": 0.1164, "step": 28160 }, { "epoch": 0.8215473481533345, "grad_norm": 0.893241137790369, "learning_rate": 8.121491737889609e-07, "loss": 0.1099, "step": 28161 }, { "epoch": 0.8215765213839781, "grad_norm": 0.7536120269677897, "learning_rate": 8.118910876390701e-07, "loss": 0.1173, "step": 28162 }, { "epoch": 0.8216056946146216, "grad_norm": 0.7341619083776675, "learning_rate": 8.116330388796146e-07, "loss": 0.1106, "step": 28163 }, { "epoch": 0.8216348678452652, "grad_norm": 0.7074439101407712, "learning_rate": 8.113750275129001e-07, "loss": 0.1123, "step": 28164 }, { "epoch": 0.8216640410759087, "grad_norm": 0.7957780752170743, "learning_rate": 8.111170535412288e-07, "loss": 0.1048, "step": 28165 }, { "epoch": 0.8216932143065523, "grad_norm": 1.0663470356321327, "learning_rate": 8.108591169669055e-07, "loss": 0.1067, "step": 28166 }, { "epoch": 0.8217223875371958, "grad_norm": 0.8461527373718306, "learning_rate": 8.106012177922323e-07, "loss": 0.1075, "step": 28167 }, { "epoch": 0.8217515607678394, "grad_norm": 0.7391384934802716, "learning_rate": 8.103433560195095e-07, "loss": 0.0978, "step": 28168 }, { "epoch": 0.821780733998483, "grad_norm": 0.9557676295109226, "learning_rate": 8.100855316510414e-07, "loss": 0.1519, "step": 28169 }, { "epoch": 0.8218099072291265, "grad_norm": 0.8545413239568856, "learning_rate": 8.098277446891306e-07, "loss": 0.1335, "step": 28170 }, { "epoch": 0.8218390804597702, "grad_norm": 0.8413890070645333, "learning_rate": 8.095699951360775e-07, "loss": 0.1153, "step": 28171 }, { "epoch": 0.8218682536904137, "grad_norm": 0.8415007361696992, "learning_rate": 8.093122829941824e-07, "loss": 0.1111, "step": 28172 }, { "epoch": 0.8218974269210573, "grad_norm": 0.8162786862022892, "learning_rate": 8.090546082657475e-07, "loss": 0.1084, "step": 28173 }, { "epoch": 0.8219266001517008, "grad_norm": 0.8794433969944587, "learning_rate": 8.087969709530724e-07, "loss": 0.0986, "step": 28174 }, { "epoch": 0.8219557733823444, "grad_norm": 0.6406696687558314, "learning_rate": 8.085393710584555e-07, "loss": 0.1084, "step": 28175 }, { "epoch": 0.8219849466129879, "grad_norm": 0.8138758942418084, "learning_rate": 8.082818085842009e-07, "loss": 0.1007, "step": 28176 }, { "epoch": 0.8220141198436315, "grad_norm": 0.7493458640073105, "learning_rate": 8.080242835326052e-07, "loss": 0.1064, "step": 28177 }, { "epoch": 0.822043293074275, "grad_norm": 0.8126863787995176, "learning_rate": 8.077667959059671e-07, "loss": 0.1132, "step": 28178 }, { "epoch": 0.8220724663049186, "grad_norm": 1.1214392039574879, "learning_rate": 8.075093457065875e-07, "loss": 0.0934, "step": 28179 }, { "epoch": 0.8221016395355621, "grad_norm": 0.7769434290556544, "learning_rate": 8.072519329367634e-07, "loss": 0.1237, "step": 28180 }, { "epoch": 0.8221308127662057, "grad_norm": 1.1882184696955005, "learning_rate": 8.069945575987925e-07, "loss": 0.1196, "step": 28181 }, { "epoch": 0.8221599859968493, "grad_norm": 0.8913308246801798, "learning_rate": 8.06737219694973e-07, "loss": 0.0993, "step": 28182 }, { "epoch": 0.8221891592274928, "grad_norm": 0.7085190988101954, "learning_rate": 8.064799192276035e-07, "loss": 0.1173, "step": 28183 }, { "epoch": 0.8222183324581365, "grad_norm": 0.7946797222797932, "learning_rate": 8.062226561989806e-07, "loss": 0.1224, "step": 28184 }, { "epoch": 0.82224750568878, "grad_norm": 0.9038636203393499, "learning_rate": 8.05965430611399e-07, "loss": 0.0989, "step": 28185 }, { "epoch": 0.8222766789194236, "grad_norm": 0.8029605503880207, "learning_rate": 8.057082424671586e-07, "loss": 0.1353, "step": 28186 }, { "epoch": 0.8223058521500671, "grad_norm": 1.155784660196701, "learning_rate": 8.05451091768552e-07, "loss": 0.1096, "step": 28187 }, { "epoch": 0.8223350253807107, "grad_norm": 0.9439379323722944, "learning_rate": 8.051939785178769e-07, "loss": 0.121, "step": 28188 }, { "epoch": 0.8223641986113542, "grad_norm": 0.8646942937179766, "learning_rate": 8.049369027174303e-07, "loss": 0.1036, "step": 28189 }, { "epoch": 0.8223933718419978, "grad_norm": 0.8208873320004823, "learning_rate": 8.046798643695047e-07, "loss": 0.1146, "step": 28190 }, { "epoch": 0.8224225450726413, "grad_norm": 0.8275727422519764, "learning_rate": 8.044228634763951e-07, "loss": 0.1333, "step": 28191 }, { "epoch": 0.8224517183032849, "grad_norm": 0.9578165442255402, "learning_rate": 8.041659000403979e-07, "loss": 0.0911, "step": 28192 }, { "epoch": 0.8224808915339284, "grad_norm": 0.9774819388114641, "learning_rate": 8.039089740638045e-07, "loss": 0.1077, "step": 28193 }, { "epoch": 0.822510064764572, "grad_norm": 1.1650693290225729, "learning_rate": 8.036520855489116e-07, "loss": 0.1145, "step": 28194 }, { "epoch": 0.8225392379952156, "grad_norm": 0.7307145376854071, "learning_rate": 8.033952344980095e-07, "loss": 0.1182, "step": 28195 }, { "epoch": 0.8225684112258591, "grad_norm": 1.01455580386506, "learning_rate": 8.031384209133941e-07, "loss": 0.1035, "step": 28196 }, { "epoch": 0.8225975844565027, "grad_norm": 0.975275404336792, "learning_rate": 8.028816447973575e-07, "loss": 0.1207, "step": 28197 }, { "epoch": 0.8226267576871463, "grad_norm": 0.8215151553917331, "learning_rate": 8.026249061521901e-07, "loss": 0.1081, "step": 28198 }, { "epoch": 0.8226559309177899, "grad_norm": 0.9120500380292472, "learning_rate": 8.023682049801857e-07, "loss": 0.14, "step": 28199 }, { "epoch": 0.8226851041484334, "grad_norm": 0.932260325372986, "learning_rate": 8.021115412836372e-07, "loss": 0.0803, "step": 28200 }, { "epoch": 0.822714277379077, "grad_norm": 0.7206661322297229, "learning_rate": 8.018549150648342e-07, "loss": 0.1321, "step": 28201 }, { "epoch": 0.8227434506097205, "grad_norm": 0.9936279788538864, "learning_rate": 8.015983263260679e-07, "loss": 0.1029, "step": 28202 }, { "epoch": 0.8227726238403641, "grad_norm": 1.2841456857653297, "learning_rate": 8.013417750696301e-07, "loss": 0.1055, "step": 28203 }, { "epoch": 0.8228017970710076, "grad_norm": 0.9865292061306443, "learning_rate": 8.010852612978109e-07, "loss": 0.1093, "step": 28204 }, { "epoch": 0.8228309703016512, "grad_norm": 0.8830803277538544, "learning_rate": 8.008287850128976e-07, "loss": 0.1041, "step": 28205 }, { "epoch": 0.8228601435322948, "grad_norm": 0.8307317317359434, "learning_rate": 8.005723462171849e-07, "loss": 0.1225, "step": 28206 }, { "epoch": 0.8228893167629383, "grad_norm": 0.8542356041045629, "learning_rate": 8.0031594491296e-07, "loss": 0.0983, "step": 28207 }, { "epoch": 0.8229184899935819, "grad_norm": 1.0179093888808328, "learning_rate": 8.000595811025103e-07, "loss": 0.1079, "step": 28208 }, { "epoch": 0.8229476632242254, "grad_norm": 0.8032717625037453, "learning_rate": 7.998032547881274e-07, "loss": 0.1096, "step": 28209 }, { "epoch": 0.822976836454869, "grad_norm": 1.074792740723042, "learning_rate": 7.995469659720984e-07, "loss": 0.1033, "step": 28210 }, { "epoch": 0.8230060096855125, "grad_norm": 0.8809499433955115, "learning_rate": 7.992907146567103e-07, "loss": 0.1106, "step": 28211 }, { "epoch": 0.8230351829161562, "grad_norm": 1.1853681386942079, "learning_rate": 7.990345008442518e-07, "loss": 0.1166, "step": 28212 }, { "epoch": 0.8230643561467997, "grad_norm": 1.0374941834845035, "learning_rate": 7.987783245370118e-07, "loss": 0.1157, "step": 28213 }, { "epoch": 0.8230935293774433, "grad_norm": 1.0796943034117419, "learning_rate": 7.985221857372754e-07, "loss": 0.12, "step": 28214 }, { "epoch": 0.8231227026080868, "grad_norm": 0.7717978272424924, "learning_rate": 7.982660844473295e-07, "loss": 0.0969, "step": 28215 }, { "epoch": 0.8231518758387304, "grad_norm": 0.8402358338303498, "learning_rate": 7.980100206694613e-07, "loss": 0.1374, "step": 28216 }, { "epoch": 0.823181049069374, "grad_norm": 0.8793454759852466, "learning_rate": 7.977539944059559e-07, "loss": 0.0878, "step": 28217 }, { "epoch": 0.8232102223000175, "grad_norm": 0.9858176212897587, "learning_rate": 7.974980056590997e-07, "loss": 0.1203, "step": 28218 }, { "epoch": 0.8232393955306611, "grad_norm": 0.9134492697793195, "learning_rate": 7.972420544311793e-07, "loss": 0.1054, "step": 28219 }, { "epoch": 0.8232685687613046, "grad_norm": 0.9446679430183189, "learning_rate": 7.969861407244784e-07, "loss": 0.12, "step": 28220 }, { "epoch": 0.8232977419919482, "grad_norm": 0.9735403902127442, "learning_rate": 7.967302645412811e-07, "loss": 0.1044, "step": 28221 }, { "epoch": 0.8233269152225917, "grad_norm": 0.8884458166026682, "learning_rate": 7.96474425883873e-07, "loss": 0.0979, "step": 28222 }, { "epoch": 0.8233560884532353, "grad_norm": 0.7660160862184069, "learning_rate": 7.962186247545373e-07, "loss": 0.117, "step": 28223 }, { "epoch": 0.8233852616838788, "grad_norm": 0.9650815747394327, "learning_rate": 7.959628611555592e-07, "loss": 0.1147, "step": 28224 }, { "epoch": 0.8234144349145225, "grad_norm": 1.0124418689911145, "learning_rate": 7.957071350892198e-07, "loss": 0.1039, "step": 28225 }, { "epoch": 0.823443608145166, "grad_norm": 0.8996514207594016, "learning_rate": 7.954514465578044e-07, "loss": 0.1114, "step": 28226 }, { "epoch": 0.8234727813758096, "grad_norm": 0.8903856104697206, "learning_rate": 7.951957955635953e-07, "loss": 0.1135, "step": 28227 }, { "epoch": 0.8235019546064531, "grad_norm": 0.8601415069949574, "learning_rate": 7.949401821088726e-07, "loss": 0.1242, "step": 28228 }, { "epoch": 0.8235311278370967, "grad_norm": 0.9129479336715849, "learning_rate": 7.946846061959207e-07, "loss": 0.1315, "step": 28229 }, { "epoch": 0.8235603010677403, "grad_norm": 1.0827694069197806, "learning_rate": 7.944290678270216e-07, "loss": 0.1142, "step": 28230 }, { "epoch": 0.8235894742983838, "grad_norm": 0.8177956924191697, "learning_rate": 7.941735670044559e-07, "loss": 0.1044, "step": 28231 }, { "epoch": 0.8236186475290274, "grad_norm": 0.9078116440610156, "learning_rate": 7.939181037305033e-07, "loss": 0.1291, "step": 28232 }, { "epoch": 0.8236478207596709, "grad_norm": 0.7443518641912515, "learning_rate": 7.936626780074475e-07, "loss": 0.0973, "step": 28233 }, { "epoch": 0.8236769939903145, "grad_norm": 0.8551287825765357, "learning_rate": 7.934072898375667e-07, "loss": 0.1121, "step": 28234 }, { "epoch": 0.823706167220958, "grad_norm": 0.8493242321268851, "learning_rate": 7.93151939223139e-07, "loss": 0.1203, "step": 28235 }, { "epoch": 0.8237353404516016, "grad_norm": 0.6591109536062264, "learning_rate": 7.92896626166449e-07, "loss": 0.1245, "step": 28236 }, { "epoch": 0.8237645136822451, "grad_norm": 0.7665058536724771, "learning_rate": 7.926413506697733e-07, "loss": 0.1158, "step": 28237 }, { "epoch": 0.8237936869128887, "grad_norm": 0.8320924718203171, "learning_rate": 7.923861127353905e-07, "loss": 0.0953, "step": 28238 }, { "epoch": 0.8238228601435323, "grad_norm": 1.051981188820652, "learning_rate": 7.921309123655812e-07, "loss": 0.1072, "step": 28239 }, { "epoch": 0.8238520333741759, "grad_norm": 0.7169976623296156, "learning_rate": 7.918757495626228e-07, "loss": 0.1117, "step": 28240 }, { "epoch": 0.8238812066048194, "grad_norm": 0.8116336118984168, "learning_rate": 7.916206243287916e-07, "loss": 0.1129, "step": 28241 }, { "epoch": 0.823910379835463, "grad_norm": 0.9123025125674905, "learning_rate": 7.913655366663669e-07, "loss": 0.0958, "step": 28242 }, { "epoch": 0.8239395530661066, "grad_norm": 0.8207178203459888, "learning_rate": 7.91110486577627e-07, "loss": 0.1152, "step": 28243 }, { "epoch": 0.8239687262967501, "grad_norm": 0.7823137649557989, "learning_rate": 7.908554740648483e-07, "loss": 0.0939, "step": 28244 }, { "epoch": 0.8239978995273937, "grad_norm": 0.9079250673663318, "learning_rate": 7.906004991303057e-07, "loss": 0.1139, "step": 28245 }, { "epoch": 0.8240270727580372, "grad_norm": 0.8535080065364986, "learning_rate": 7.903455617762785e-07, "loss": 0.1037, "step": 28246 }, { "epoch": 0.8240562459886808, "grad_norm": 0.7694194856557461, "learning_rate": 7.900906620050397e-07, "loss": 0.126, "step": 28247 }, { "epoch": 0.8240854192193243, "grad_norm": 1.024056981269128, "learning_rate": 7.898357998188666e-07, "loss": 0.1345, "step": 28248 }, { "epoch": 0.8241145924499679, "grad_norm": 1.157752385255544, "learning_rate": 7.895809752200356e-07, "loss": 0.1222, "step": 28249 }, { "epoch": 0.8241437656806114, "grad_norm": 1.0403615699776128, "learning_rate": 7.893261882108205e-07, "loss": 0.0903, "step": 28250 }, { "epoch": 0.824172938911255, "grad_norm": 0.8346576357266661, "learning_rate": 7.89071438793495e-07, "loss": 0.1283, "step": 28251 }, { "epoch": 0.8242021121418986, "grad_norm": 1.0467219620973778, "learning_rate": 7.888167269703339e-07, "loss": 0.1292, "step": 28252 }, { "epoch": 0.8242312853725422, "grad_norm": 0.877842756180668, "learning_rate": 7.885620527436133e-07, "loss": 0.0997, "step": 28253 }, { "epoch": 0.8242604586031858, "grad_norm": 0.9062617894548268, "learning_rate": 7.883074161156056e-07, "loss": 0.0874, "step": 28254 }, { "epoch": 0.8242896318338293, "grad_norm": 0.9725653219617851, "learning_rate": 7.880528170885826e-07, "loss": 0.095, "step": 28255 }, { "epoch": 0.8243188050644729, "grad_norm": 1.1414445368593333, "learning_rate": 7.877982556648195e-07, "loss": 0.1184, "step": 28256 }, { "epoch": 0.8243479782951164, "grad_norm": 1.1247366079705248, "learning_rate": 7.875437318465884e-07, "loss": 0.105, "step": 28257 }, { "epoch": 0.82437715152576, "grad_norm": 0.7926035231731909, "learning_rate": 7.872892456361597e-07, "loss": 0.111, "step": 28258 }, { "epoch": 0.8244063247564035, "grad_norm": 0.8265197543925429, "learning_rate": 7.870347970358072e-07, "loss": 0.1112, "step": 28259 }, { "epoch": 0.8244354979870471, "grad_norm": 0.977496687454048, "learning_rate": 7.867803860478035e-07, "loss": 0.1006, "step": 28260 }, { "epoch": 0.8244646712176906, "grad_norm": 0.8298812136312068, "learning_rate": 7.86526012674419e-07, "loss": 0.0981, "step": 28261 }, { "epoch": 0.8244938444483342, "grad_norm": 0.923303082482779, "learning_rate": 7.86271676917923e-07, "loss": 0.1067, "step": 28262 }, { "epoch": 0.8245230176789777, "grad_norm": 0.8738865679941992, "learning_rate": 7.860173787805886e-07, "loss": 0.1028, "step": 28263 }, { "epoch": 0.8245521909096213, "grad_norm": 1.0582842785978028, "learning_rate": 7.857631182646835e-07, "loss": 0.1153, "step": 28264 }, { "epoch": 0.8245813641402648, "grad_norm": 0.8562993519550272, "learning_rate": 7.855088953724799e-07, "loss": 0.1219, "step": 28265 }, { "epoch": 0.8246105373709085, "grad_norm": 0.9664005471330721, "learning_rate": 7.85254710106248e-07, "loss": 0.1184, "step": 28266 }, { "epoch": 0.8246397106015521, "grad_norm": 0.9763949671009416, "learning_rate": 7.850005624682555e-07, "loss": 0.0945, "step": 28267 }, { "epoch": 0.8246688838321956, "grad_norm": 0.8966174290579905, "learning_rate": 7.847464524607712e-07, "loss": 0.1308, "step": 28268 }, { "epoch": 0.8246980570628392, "grad_norm": 0.8161974105240922, "learning_rate": 7.844923800860649e-07, "loss": 0.1297, "step": 28269 }, { "epoch": 0.8247272302934827, "grad_norm": 0.817094311582936, "learning_rate": 7.842383453464037e-07, "loss": 0.1091, "step": 28270 }, { "epoch": 0.8247564035241263, "grad_norm": 1.1028670740452124, "learning_rate": 7.839843482440568e-07, "loss": 0.0993, "step": 28271 }, { "epoch": 0.8247855767547698, "grad_norm": 0.9547552172157876, "learning_rate": 7.837303887812903e-07, "loss": 0.1278, "step": 28272 }, { "epoch": 0.8248147499854134, "grad_norm": 0.8519816492596898, "learning_rate": 7.834764669603733e-07, "loss": 0.1466, "step": 28273 }, { "epoch": 0.8248439232160569, "grad_norm": 1.3415020570432128, "learning_rate": 7.832225827835721e-07, "loss": 0.1201, "step": 28274 }, { "epoch": 0.8248730964467005, "grad_norm": 1.135995784586104, "learning_rate": 7.829687362531518e-07, "loss": 0.1079, "step": 28275 }, { "epoch": 0.824902269677344, "grad_norm": 0.7050498967393973, "learning_rate": 7.827149273713797e-07, "loss": 0.0952, "step": 28276 }, { "epoch": 0.8249314429079876, "grad_norm": 0.7523177070831721, "learning_rate": 7.824611561405238e-07, "loss": 0.1256, "step": 28277 }, { "epoch": 0.8249606161386311, "grad_norm": 0.7396224695897348, "learning_rate": 7.822074225628462e-07, "loss": 0.1192, "step": 28278 }, { "epoch": 0.8249897893692748, "grad_norm": 1.1024753633581628, "learning_rate": 7.819537266406152e-07, "loss": 0.1076, "step": 28279 }, { "epoch": 0.8250189625999184, "grad_norm": 0.8183379302789136, "learning_rate": 7.81700068376094e-07, "loss": 0.1262, "step": 28280 }, { "epoch": 0.8250481358305619, "grad_norm": 1.1095814611202723, "learning_rate": 7.814464477715466e-07, "loss": 0.1288, "step": 28281 }, { "epoch": 0.8250773090612055, "grad_norm": 0.8442212664368564, "learning_rate": 7.811928648292389e-07, "loss": 0.1186, "step": 28282 }, { "epoch": 0.825106482291849, "grad_norm": 0.8772915047651296, "learning_rate": 7.809393195514348e-07, "loss": 0.125, "step": 28283 }, { "epoch": 0.8251356555224926, "grad_norm": 0.813365149645741, "learning_rate": 7.806858119403976e-07, "loss": 0.0965, "step": 28284 }, { "epoch": 0.8251648287531361, "grad_norm": 0.9009509604505839, "learning_rate": 7.804323419983884e-07, "loss": 0.0976, "step": 28285 }, { "epoch": 0.8251940019837797, "grad_norm": 0.9275007492531333, "learning_rate": 7.801789097276735e-07, "loss": 0.0937, "step": 28286 }, { "epoch": 0.8252231752144232, "grad_norm": 0.9853359204091033, "learning_rate": 7.799255151305141e-07, "loss": 0.1149, "step": 28287 }, { "epoch": 0.8252523484450668, "grad_norm": 0.8139979900434158, "learning_rate": 7.79672158209171e-07, "loss": 0.1243, "step": 28288 }, { "epoch": 0.8252815216757103, "grad_norm": 1.0410753243598199, "learning_rate": 7.794188389659074e-07, "loss": 0.1094, "step": 28289 }, { "epoch": 0.8253106949063539, "grad_norm": 0.9731979014587019, "learning_rate": 7.791655574029866e-07, "loss": 0.1214, "step": 28290 }, { "epoch": 0.8253398681369974, "grad_norm": 0.7663712521466084, "learning_rate": 7.789123135226672e-07, "loss": 0.102, "step": 28291 }, { "epoch": 0.825369041367641, "grad_norm": 0.9390017754213866, "learning_rate": 7.786591073272104e-07, "loss": 0.1176, "step": 28292 }, { "epoch": 0.8253982145982847, "grad_norm": 0.8612843747806949, "learning_rate": 7.784059388188786e-07, "loss": 0.1048, "step": 28293 }, { "epoch": 0.8254273878289282, "grad_norm": 0.8004162637113328, "learning_rate": 7.78152807999929e-07, "loss": 0.1109, "step": 28294 }, { "epoch": 0.8254565610595718, "grad_norm": 0.9804348695270982, "learning_rate": 7.778997148726236e-07, "loss": 0.1222, "step": 28295 }, { "epoch": 0.8254857342902153, "grad_norm": 1.0311878124273608, "learning_rate": 7.776466594392229e-07, "loss": 0.1052, "step": 28296 }, { "epoch": 0.8255149075208589, "grad_norm": 0.8249509708880214, "learning_rate": 7.773936417019851e-07, "loss": 0.1189, "step": 28297 }, { "epoch": 0.8255440807515024, "grad_norm": 0.8672929515206617, "learning_rate": 7.771406616631677e-07, "loss": 0.1211, "step": 28298 }, { "epoch": 0.825573253982146, "grad_norm": 0.8216202561782774, "learning_rate": 7.768877193250313e-07, "loss": 0.1076, "step": 28299 }, { "epoch": 0.8256024272127895, "grad_norm": 1.0132895677276117, "learning_rate": 7.766348146898317e-07, "loss": 0.0942, "step": 28300 }, { "epoch": 0.8256316004434331, "grad_norm": 0.7401115594171007, "learning_rate": 7.7638194775983e-07, "loss": 0.1026, "step": 28301 }, { "epoch": 0.8256607736740766, "grad_norm": 1.1460493593402705, "learning_rate": 7.761291185372804e-07, "loss": 0.1073, "step": 28302 }, { "epoch": 0.8256899469047202, "grad_norm": 0.93550391849133, "learning_rate": 7.758763270244435e-07, "loss": 0.1057, "step": 28303 }, { "epoch": 0.8257191201353637, "grad_norm": 0.68782910234542, "learning_rate": 7.756235732235739e-07, "loss": 0.1132, "step": 28304 }, { "epoch": 0.8257482933660073, "grad_norm": 0.7713530952904493, "learning_rate": 7.753708571369273e-07, "loss": 0.1097, "step": 28305 }, { "epoch": 0.825777466596651, "grad_norm": 3.2181783992514017, "learning_rate": 7.751181787667616e-07, "loss": 0.1162, "step": 28306 }, { "epoch": 0.8258066398272945, "grad_norm": 0.9646956568179892, "learning_rate": 7.748655381153331e-07, "loss": 0.1285, "step": 28307 }, { "epoch": 0.8258358130579381, "grad_norm": 0.7694065702105926, "learning_rate": 7.746129351848957e-07, "loss": 0.1013, "step": 28308 }, { "epoch": 0.8258649862885816, "grad_norm": 0.9747134014101453, "learning_rate": 7.743603699777064e-07, "loss": 0.0955, "step": 28309 }, { "epoch": 0.8258941595192252, "grad_norm": 0.9093358083483087, "learning_rate": 7.741078424960186e-07, "loss": 0.1085, "step": 28310 }, { "epoch": 0.8259233327498687, "grad_norm": 0.9312594654961549, "learning_rate": 7.738553527420861e-07, "loss": 0.1365, "step": 28311 }, { "epoch": 0.8259525059805123, "grad_norm": 0.9024985437063421, "learning_rate": 7.736029007181644e-07, "loss": 0.1296, "step": 28312 }, { "epoch": 0.8259816792111558, "grad_norm": 0.676235094909886, "learning_rate": 7.733504864265079e-07, "loss": 0.1071, "step": 28313 }, { "epoch": 0.8260108524417994, "grad_norm": 1.0936149343330415, "learning_rate": 7.730981098693696e-07, "loss": 0.1107, "step": 28314 }, { "epoch": 0.826040025672443, "grad_norm": 0.7801232048413431, "learning_rate": 7.728457710490011e-07, "loss": 0.1191, "step": 28315 }, { "epoch": 0.8260691989030865, "grad_norm": 0.8271271730496492, "learning_rate": 7.725934699676574e-07, "loss": 0.1175, "step": 28316 }, { "epoch": 0.82609837213373, "grad_norm": 0.832033601569443, "learning_rate": 7.723412066275904e-07, "loss": 0.114, "step": 28317 }, { "epoch": 0.8261275453643736, "grad_norm": 0.7192394984985441, "learning_rate": 7.720889810310506e-07, "loss": 0.1147, "step": 28318 }, { "epoch": 0.8261567185950172, "grad_norm": 0.8398302146468973, "learning_rate": 7.718367931802906e-07, "loss": 0.1047, "step": 28319 }, { "epoch": 0.8261858918256608, "grad_norm": 0.8094270219011735, "learning_rate": 7.715846430775642e-07, "loss": 0.0997, "step": 28320 }, { "epoch": 0.8262150650563044, "grad_norm": 0.8033581687404708, "learning_rate": 7.713325307251201e-07, "loss": 0.1192, "step": 28321 }, { "epoch": 0.8262442382869479, "grad_norm": 0.7806039180298726, "learning_rate": 7.710804561252089e-07, "loss": 0.1096, "step": 28322 }, { "epoch": 0.8262734115175915, "grad_norm": 0.713031695630493, "learning_rate": 7.70828419280083e-07, "loss": 0.0981, "step": 28323 }, { "epoch": 0.826302584748235, "grad_norm": 0.9534880108445638, "learning_rate": 7.705764201919902e-07, "loss": 0.119, "step": 28324 }, { "epoch": 0.8263317579788786, "grad_norm": 0.9174454231049807, "learning_rate": 7.70324458863182e-07, "loss": 0.1009, "step": 28325 }, { "epoch": 0.8263609312095221, "grad_norm": 0.7142890473135709, "learning_rate": 7.700725352959076e-07, "loss": 0.1174, "step": 28326 }, { "epoch": 0.8263901044401657, "grad_norm": 0.8562145201052048, "learning_rate": 7.698206494924165e-07, "loss": 0.1552, "step": 28327 }, { "epoch": 0.8264192776708092, "grad_norm": 0.7943972321512707, "learning_rate": 7.695688014549552e-07, "loss": 0.1111, "step": 28328 }, { "epoch": 0.8264484509014528, "grad_norm": 0.8320070793705426, "learning_rate": 7.693169911857751e-07, "loss": 0.11, "step": 28329 }, { "epoch": 0.8264776241320964, "grad_norm": 0.6725606630061371, "learning_rate": 7.690652186871217e-07, "loss": 0.101, "step": 28330 }, { "epoch": 0.8265067973627399, "grad_norm": 1.0683871223509236, "learning_rate": 7.688134839612454e-07, "loss": 0.1383, "step": 28331 }, { "epoch": 0.8265359705933835, "grad_norm": 0.8267029761207979, "learning_rate": 7.685617870103912e-07, "loss": 0.0912, "step": 28332 }, { "epoch": 0.8265651438240271, "grad_norm": 0.7858096230308077, "learning_rate": 7.683101278368077e-07, "loss": 0.1146, "step": 28333 }, { "epoch": 0.8265943170546707, "grad_norm": 0.8060557571466852, "learning_rate": 7.68058506442742e-07, "loss": 0.1435, "step": 28334 }, { "epoch": 0.8266234902853142, "grad_norm": 0.9064266915483007, "learning_rate": 7.67806922830438e-07, "loss": 0.0946, "step": 28335 }, { "epoch": 0.8266526635159578, "grad_norm": 1.0676803527842267, "learning_rate": 7.675553770021438e-07, "loss": 0.1296, "step": 28336 }, { "epoch": 0.8266818367466013, "grad_norm": 0.812890709765754, "learning_rate": 7.673038689601059e-07, "loss": 0.1345, "step": 28337 }, { "epoch": 0.8267110099772449, "grad_norm": 0.85359666285905, "learning_rate": 7.670523987065675e-07, "loss": 0.1196, "step": 28338 }, { "epoch": 0.8267401832078884, "grad_norm": 1.0561929947567026, "learning_rate": 7.668009662437759e-07, "loss": 0.0988, "step": 28339 }, { "epoch": 0.826769356438532, "grad_norm": 0.8477233441117852, "learning_rate": 7.665495715739745e-07, "loss": 0.1302, "step": 28340 }, { "epoch": 0.8267985296691756, "grad_norm": 1.1954087286146777, "learning_rate": 7.662982146994074e-07, "loss": 0.1227, "step": 28341 }, { "epoch": 0.8268277028998191, "grad_norm": 0.7946048547454742, "learning_rate": 7.660468956223188e-07, "loss": 0.1163, "step": 28342 }, { "epoch": 0.8268568761304627, "grad_norm": 0.8484589925491341, "learning_rate": 7.657956143449535e-07, "loss": 0.1123, "step": 28343 }, { "epoch": 0.8268860493611062, "grad_norm": 0.9025290487939114, "learning_rate": 7.655443708695548e-07, "loss": 0.1266, "step": 28344 }, { "epoch": 0.8269152225917498, "grad_norm": 1.1840698900904802, "learning_rate": 7.652931651983636e-07, "loss": 0.1127, "step": 28345 }, { "epoch": 0.8269443958223933, "grad_norm": 0.7510775826390401, "learning_rate": 7.650419973336254e-07, "loss": 0.1074, "step": 28346 }, { "epoch": 0.826973569053037, "grad_norm": 1.043439873566343, "learning_rate": 7.647908672775817e-07, "loss": 0.1289, "step": 28347 }, { "epoch": 0.8270027422836805, "grad_norm": 0.8373814056569358, "learning_rate": 7.645397750324723e-07, "loss": 0.115, "step": 28348 }, { "epoch": 0.8270319155143241, "grad_norm": 1.0559775862677445, "learning_rate": 7.642887206005412e-07, "loss": 0.1226, "step": 28349 }, { "epoch": 0.8270610887449676, "grad_norm": 0.7475334772010874, "learning_rate": 7.640377039840302e-07, "loss": 0.0901, "step": 28350 }, { "epoch": 0.8270902619756112, "grad_norm": 1.031190810352983, "learning_rate": 7.637867251851794e-07, "loss": 0.1258, "step": 28351 }, { "epoch": 0.8271194352062547, "grad_norm": 0.7248062253201605, "learning_rate": 7.635357842062279e-07, "loss": 0.0978, "step": 28352 }, { "epoch": 0.8271486084368983, "grad_norm": 0.8071471013482424, "learning_rate": 7.632848810494193e-07, "loss": 0.0878, "step": 28353 }, { "epoch": 0.8271777816675419, "grad_norm": 0.6077534103469545, "learning_rate": 7.630340157169902e-07, "loss": 0.105, "step": 28354 }, { "epoch": 0.8272069548981854, "grad_norm": 0.7467996251164862, "learning_rate": 7.627831882111825e-07, "loss": 0.1185, "step": 28355 }, { "epoch": 0.827236128128829, "grad_norm": 1.3510960449703084, "learning_rate": 7.625323985342359e-07, "loss": 0.1043, "step": 28356 }, { "epoch": 0.8272653013594725, "grad_norm": 0.9039522451332882, "learning_rate": 7.622816466883887e-07, "loss": 0.0866, "step": 28357 }, { "epoch": 0.8272944745901161, "grad_norm": 0.9638217489673516, "learning_rate": 7.620309326758779e-07, "loss": 0.1101, "step": 28358 }, { "epoch": 0.8273236478207596, "grad_norm": 0.8155457194286022, "learning_rate": 7.617802564989446e-07, "loss": 0.1231, "step": 28359 }, { "epoch": 0.8273528210514033, "grad_norm": 0.7700458546478592, "learning_rate": 7.615296181598242e-07, "loss": 0.1051, "step": 28360 }, { "epoch": 0.8273819942820468, "grad_norm": 0.8312943947121507, "learning_rate": 7.612790176607566e-07, "loss": 0.1067, "step": 28361 }, { "epoch": 0.8274111675126904, "grad_norm": 1.020073930862499, "learning_rate": 7.61028455003977e-07, "loss": 0.1432, "step": 28362 }, { "epoch": 0.827440340743334, "grad_norm": 0.814031831138474, "learning_rate": 7.607779301917245e-07, "loss": 0.0982, "step": 28363 }, { "epoch": 0.8274695139739775, "grad_norm": 1.0610711076104806, "learning_rate": 7.60527443226235e-07, "loss": 0.0922, "step": 28364 }, { "epoch": 0.827498687204621, "grad_norm": 0.9477252653121643, "learning_rate": 7.602769941097427e-07, "loss": 0.1193, "step": 28365 }, { "epoch": 0.8275278604352646, "grad_norm": 0.7560137550405617, "learning_rate": 7.600265828444858e-07, "loss": 0.1206, "step": 28366 }, { "epoch": 0.8275570336659082, "grad_norm": 0.706287256192517, "learning_rate": 7.597762094327004e-07, "loss": 0.1111, "step": 28367 }, { "epoch": 0.8275862068965517, "grad_norm": 1.1547158265989943, "learning_rate": 7.595258738766192e-07, "loss": 0.1025, "step": 28368 }, { "epoch": 0.8276153801271953, "grad_norm": 0.6871472421710532, "learning_rate": 7.592755761784803e-07, "loss": 0.1125, "step": 28369 }, { "epoch": 0.8276445533578388, "grad_norm": 0.8584336200980527, "learning_rate": 7.59025316340517e-07, "loss": 0.1152, "step": 28370 }, { "epoch": 0.8276737265884824, "grad_norm": 1.0571173083816197, "learning_rate": 7.587750943649618e-07, "loss": 0.118, "step": 28371 }, { "epoch": 0.8277028998191259, "grad_norm": 0.8932959152501599, "learning_rate": 7.585249102540498e-07, "loss": 0.1206, "step": 28372 }, { "epoch": 0.8277320730497695, "grad_norm": 0.6940551219872615, "learning_rate": 7.582747640100168e-07, "loss": 0.1036, "step": 28373 }, { "epoch": 0.8277612462804131, "grad_norm": 0.7990764587298337, "learning_rate": 7.580246556350934e-07, "loss": 0.1088, "step": 28374 }, { "epoch": 0.8277904195110567, "grad_norm": 0.7090968228632207, "learning_rate": 7.577745851315127e-07, "loss": 0.0993, "step": 28375 }, { "epoch": 0.8278195927417002, "grad_norm": 0.689448746294295, "learning_rate": 7.575245525015085e-07, "loss": 0.0948, "step": 28376 }, { "epoch": 0.8278487659723438, "grad_norm": 0.8383856371408687, "learning_rate": 7.572745577473123e-07, "loss": 0.128, "step": 28377 }, { "epoch": 0.8278779392029874, "grad_norm": 1.0460968061717753, "learning_rate": 7.570246008711552e-07, "loss": 0.1143, "step": 28378 }, { "epoch": 0.8279071124336309, "grad_norm": 0.8701828394876752, "learning_rate": 7.567746818752692e-07, "loss": 0.1155, "step": 28379 }, { "epoch": 0.8279362856642745, "grad_norm": 0.7056535521867008, "learning_rate": 7.565248007618875e-07, "loss": 0.1285, "step": 28380 }, { "epoch": 0.827965458894918, "grad_norm": 1.0685582939700338, "learning_rate": 7.56274957533239e-07, "loss": 0.0988, "step": 28381 }, { "epoch": 0.8279946321255616, "grad_norm": 0.8933234757039856, "learning_rate": 7.560251521915534e-07, "loss": 0.1244, "step": 28382 }, { "epoch": 0.8280238053562051, "grad_norm": 0.7656982208089511, "learning_rate": 7.557753847390637e-07, "loss": 0.1076, "step": 28383 }, { "epoch": 0.8280529785868487, "grad_norm": 0.7442566120946866, "learning_rate": 7.555256551779966e-07, "loss": 0.1127, "step": 28384 }, { "epoch": 0.8280821518174922, "grad_norm": 0.9755368554092401, "learning_rate": 7.552759635105832e-07, "loss": 0.1139, "step": 28385 }, { "epoch": 0.8281113250481358, "grad_norm": 0.8705401918820534, "learning_rate": 7.550263097390543e-07, "loss": 0.1158, "step": 28386 }, { "epoch": 0.8281404982787794, "grad_norm": 0.7506131579224751, "learning_rate": 7.54776693865637e-07, "loss": 0.1143, "step": 28387 }, { "epoch": 0.828169671509423, "grad_norm": 0.8008476601379567, "learning_rate": 7.545271158925588e-07, "loss": 0.1052, "step": 28388 }, { "epoch": 0.8281988447400666, "grad_norm": 0.8488696894521165, "learning_rate": 7.542775758220499e-07, "loss": 0.1243, "step": 28389 }, { "epoch": 0.8282280179707101, "grad_norm": 0.7289801411330107, "learning_rate": 7.540280736563366e-07, "loss": 0.1159, "step": 28390 }, { "epoch": 0.8282571912013537, "grad_norm": 0.7703778206123986, "learning_rate": 7.537786093976479e-07, "loss": 0.124, "step": 28391 }, { "epoch": 0.8282863644319972, "grad_norm": 0.9645049119459579, "learning_rate": 7.535291830482088e-07, "loss": 0.1437, "step": 28392 }, { "epoch": 0.8283155376626408, "grad_norm": 0.8649477174827838, "learning_rate": 7.532797946102488e-07, "loss": 0.1335, "step": 28393 }, { "epoch": 0.8283447108932843, "grad_norm": 0.7159959822166435, "learning_rate": 7.530304440859932e-07, "loss": 0.1023, "step": 28394 }, { "epoch": 0.8283738841239279, "grad_norm": 0.8603737686192554, "learning_rate": 7.527811314776667e-07, "loss": 0.1196, "step": 28395 }, { "epoch": 0.8284030573545714, "grad_norm": 0.9851910441865979, "learning_rate": 7.525318567874962e-07, "loss": 0.1158, "step": 28396 }, { "epoch": 0.828432230585215, "grad_norm": 0.8980323382883142, "learning_rate": 7.522826200177085e-07, "loss": 0.122, "step": 28397 }, { "epoch": 0.8284614038158585, "grad_norm": 0.7215558665966592, "learning_rate": 7.520334211705265e-07, "loss": 0.1099, "step": 28398 }, { "epoch": 0.8284905770465021, "grad_norm": 0.6656369779731551, "learning_rate": 7.517842602481773e-07, "loss": 0.0843, "step": 28399 }, { "epoch": 0.8285197502771456, "grad_norm": 1.0008539603737099, "learning_rate": 7.515351372528839e-07, "loss": 0.1196, "step": 28400 }, { "epoch": 0.8285489235077893, "grad_norm": 0.8997140369278976, "learning_rate": 7.512860521868693e-07, "loss": 0.1255, "step": 28401 }, { "epoch": 0.8285780967384329, "grad_norm": 0.8186369579234746, "learning_rate": 7.510370050523591e-07, "loss": 0.1262, "step": 28402 }, { "epoch": 0.8286072699690764, "grad_norm": 0.9062082953019484, "learning_rate": 7.507879958515768e-07, "loss": 0.1302, "step": 28403 }, { "epoch": 0.82863644319972, "grad_norm": 0.7423631168798381, "learning_rate": 7.505390245867455e-07, "loss": 0.1106, "step": 28404 }, { "epoch": 0.8286656164303635, "grad_norm": 0.8827426244144843, "learning_rate": 7.502900912600858e-07, "loss": 0.1131, "step": 28405 }, { "epoch": 0.8286947896610071, "grad_norm": 0.8395354430181127, "learning_rate": 7.50041195873823e-07, "loss": 0.1241, "step": 28406 }, { "epoch": 0.8287239628916506, "grad_norm": 0.946227806949324, "learning_rate": 7.497923384301775e-07, "loss": 0.0881, "step": 28407 }, { "epoch": 0.8287531361222942, "grad_norm": 0.8016717106451814, "learning_rate": 7.495435189313704e-07, "loss": 0.1367, "step": 28408 }, { "epoch": 0.8287823093529377, "grad_norm": 0.8166243097367427, "learning_rate": 7.492947373796244e-07, "loss": 0.1323, "step": 28409 }, { "epoch": 0.8288114825835813, "grad_norm": 1.3640015216564052, "learning_rate": 7.49045993777161e-07, "loss": 0.1214, "step": 28410 }, { "epoch": 0.8288406558142248, "grad_norm": 0.8039236960400502, "learning_rate": 7.487972881262006e-07, "loss": 0.1399, "step": 28411 }, { "epoch": 0.8288698290448684, "grad_norm": 0.7848311756178701, "learning_rate": 7.485486204289616e-07, "loss": 0.1247, "step": 28412 }, { "epoch": 0.8288990022755119, "grad_norm": 0.7767831778621073, "learning_rate": 7.48299990687667e-07, "loss": 0.1125, "step": 28413 }, { "epoch": 0.8289281755061556, "grad_norm": 0.758910254961333, "learning_rate": 7.480513989045341e-07, "loss": 0.1005, "step": 28414 }, { "epoch": 0.8289573487367992, "grad_norm": 0.7087555708668387, "learning_rate": 7.478028450817832e-07, "loss": 0.1061, "step": 28415 }, { "epoch": 0.8289865219674427, "grad_norm": 1.0047912122330391, "learning_rate": 7.475543292216347e-07, "loss": 0.1199, "step": 28416 }, { "epoch": 0.8290156951980863, "grad_norm": 1.527544903022475, "learning_rate": 7.473058513263054e-07, "loss": 0.1127, "step": 28417 }, { "epoch": 0.8290448684287298, "grad_norm": 0.9787763908199145, "learning_rate": 7.470574113980139e-07, "loss": 0.1093, "step": 28418 }, { "epoch": 0.8290740416593734, "grad_norm": 0.7679047763089842, "learning_rate": 7.46809009438979e-07, "loss": 0.1215, "step": 28419 }, { "epoch": 0.8291032148900169, "grad_norm": 0.9601933188428016, "learning_rate": 7.465606454514174e-07, "loss": 0.1043, "step": 28420 }, { "epoch": 0.8291323881206605, "grad_norm": 0.7876307216321129, "learning_rate": 7.463123194375476e-07, "loss": 0.1112, "step": 28421 }, { "epoch": 0.829161561351304, "grad_norm": 0.7313640996354795, "learning_rate": 7.460640313995854e-07, "loss": 0.1209, "step": 28422 }, { "epoch": 0.8291907345819476, "grad_norm": 0.7396376101369221, "learning_rate": 7.458157813397487e-07, "loss": 0.105, "step": 28423 }, { "epoch": 0.8292199078125911, "grad_norm": 0.8886354246380967, "learning_rate": 7.455675692602532e-07, "loss": 0.1176, "step": 28424 }, { "epoch": 0.8292490810432347, "grad_norm": 1.0904938595718519, "learning_rate": 7.453193951633142e-07, "loss": 0.1045, "step": 28425 }, { "epoch": 0.8292782542738782, "grad_norm": 0.7608969060337386, "learning_rate": 7.450712590511472e-07, "loss": 0.1128, "step": 28426 }, { "epoch": 0.8293074275045218, "grad_norm": 0.7658006455646321, "learning_rate": 7.448231609259699e-07, "loss": 0.0987, "step": 28427 }, { "epoch": 0.8293366007351655, "grad_norm": 0.8597849271304524, "learning_rate": 7.445751007899943e-07, "loss": 0.1226, "step": 28428 }, { "epoch": 0.829365773965809, "grad_norm": 0.6598982366190425, "learning_rate": 7.443270786454376e-07, "loss": 0.1004, "step": 28429 }, { "epoch": 0.8293949471964526, "grad_norm": 0.9080284371942519, "learning_rate": 7.440790944945131e-07, "loss": 0.1058, "step": 28430 }, { "epoch": 0.8294241204270961, "grad_norm": 1.4034088887775558, "learning_rate": 7.438311483394328e-07, "loss": 0.1247, "step": 28431 }, { "epoch": 0.8294532936577397, "grad_norm": 0.7383696981856201, "learning_rate": 7.435832401824122e-07, "loss": 0.096, "step": 28432 }, { "epoch": 0.8294824668883832, "grad_norm": 0.7879472713030384, "learning_rate": 7.433353700256651e-07, "loss": 0.1043, "step": 28433 }, { "epoch": 0.8295116401190268, "grad_norm": 0.784470214110543, "learning_rate": 7.430875378714042e-07, "loss": 0.115, "step": 28434 }, { "epoch": 0.8295408133496703, "grad_norm": 0.7232544424523933, "learning_rate": 7.428397437218404e-07, "loss": 0.1079, "step": 28435 }, { "epoch": 0.8295699865803139, "grad_norm": 0.7159335182674081, "learning_rate": 7.425919875791881e-07, "loss": 0.104, "step": 28436 }, { "epoch": 0.8295991598109574, "grad_norm": 0.7607774667796043, "learning_rate": 7.423442694456584e-07, "loss": 0.114, "step": 28437 }, { "epoch": 0.829628333041601, "grad_norm": 0.8795922124661933, "learning_rate": 7.420965893234611e-07, "loss": 0.1102, "step": 28438 }, { "epoch": 0.8296575062722445, "grad_norm": 0.8882395773551321, "learning_rate": 7.418489472148094e-07, "loss": 0.121, "step": 28439 }, { "epoch": 0.8296866795028881, "grad_norm": 0.907455358333316, "learning_rate": 7.416013431219149e-07, "loss": 0.1074, "step": 28440 }, { "epoch": 0.8297158527335317, "grad_norm": 0.7848234078597283, "learning_rate": 7.41353777046987e-07, "loss": 0.112, "step": 28441 }, { "epoch": 0.8297450259641753, "grad_norm": 0.9077419057850704, "learning_rate": 7.411062489922344e-07, "loss": 0.1009, "step": 28442 }, { "epoch": 0.8297741991948189, "grad_norm": 0.863099550723619, "learning_rate": 7.408587589598704e-07, "loss": 0.125, "step": 28443 }, { "epoch": 0.8298033724254624, "grad_norm": 0.8234409787337859, "learning_rate": 7.406113069521009e-07, "loss": 0.1115, "step": 28444 }, { "epoch": 0.829832545656106, "grad_norm": 0.9249257401348212, "learning_rate": 7.403638929711371e-07, "loss": 0.1137, "step": 28445 }, { "epoch": 0.8298617188867495, "grad_norm": 0.9947568607836801, "learning_rate": 7.401165170191887e-07, "loss": 0.1254, "step": 28446 }, { "epoch": 0.8298908921173931, "grad_norm": 0.7455723298912932, "learning_rate": 7.39869179098463e-07, "loss": 0.0924, "step": 28447 }, { "epoch": 0.8299200653480366, "grad_norm": 0.7798383077405091, "learning_rate": 7.396218792111676e-07, "loss": 0.1023, "step": 28448 }, { "epoch": 0.8299492385786802, "grad_norm": 0.8745388904463481, "learning_rate": 7.393746173595106e-07, "loss": 0.1106, "step": 28449 }, { "epoch": 0.8299784118093237, "grad_norm": 1.0526718887968298, "learning_rate": 7.391273935457016e-07, "loss": 0.1324, "step": 28450 }, { "epoch": 0.8300075850399673, "grad_norm": 0.716904351557592, "learning_rate": 7.388802077719454e-07, "loss": 0.1236, "step": 28451 }, { "epoch": 0.8300367582706109, "grad_norm": 0.9323271799953796, "learning_rate": 7.386330600404484e-07, "loss": 0.1218, "step": 28452 }, { "epoch": 0.8300659315012544, "grad_norm": 1.0813662179731656, "learning_rate": 7.383859503534197e-07, "loss": 0.113, "step": 28453 }, { "epoch": 0.830095104731898, "grad_norm": 0.787413754439722, "learning_rate": 7.381388787130639e-07, "loss": 0.1015, "step": 28454 }, { "epoch": 0.8301242779625416, "grad_norm": 0.7645144026430417, "learning_rate": 7.378918451215844e-07, "loss": 0.1067, "step": 28455 }, { "epoch": 0.8301534511931852, "grad_norm": 0.9540995963289592, "learning_rate": 7.376448495811911e-07, "loss": 0.108, "step": 28456 }, { "epoch": 0.8301826244238287, "grad_norm": 1.043364353745594, "learning_rate": 7.373978920940878e-07, "loss": 0.1079, "step": 28457 }, { "epoch": 0.8302117976544723, "grad_norm": 0.9012451293447941, "learning_rate": 7.371509726624765e-07, "loss": 0.1326, "step": 28458 }, { "epoch": 0.8302409708851158, "grad_norm": 0.8397749882451837, "learning_rate": 7.369040912885656e-07, "loss": 0.1267, "step": 28459 }, { "epoch": 0.8302701441157594, "grad_norm": 0.7710792796993645, "learning_rate": 7.366572479745565e-07, "loss": 0.1121, "step": 28460 }, { "epoch": 0.8302993173464029, "grad_norm": 1.0625839752446422, "learning_rate": 7.364104427226532e-07, "loss": 0.1063, "step": 28461 }, { "epoch": 0.8303284905770465, "grad_norm": 0.8124520740403229, "learning_rate": 7.361636755350593e-07, "loss": 0.1366, "step": 28462 }, { "epoch": 0.83035766380769, "grad_norm": 1.019349418711587, "learning_rate": 7.359169464139798e-07, "loss": 0.1123, "step": 28463 }, { "epoch": 0.8303868370383336, "grad_norm": 0.6286083303873796, "learning_rate": 7.356702553616157e-07, "loss": 0.1103, "step": 28464 }, { "epoch": 0.8304160102689772, "grad_norm": 0.7864544122825847, "learning_rate": 7.354236023801687e-07, "loss": 0.1179, "step": 28465 }, { "epoch": 0.8304451834996207, "grad_norm": 0.8116342155865951, "learning_rate": 7.351769874718423e-07, "loss": 0.1124, "step": 28466 }, { "epoch": 0.8304743567302643, "grad_norm": 1.110231707576032, "learning_rate": 7.349304106388366e-07, "loss": 0.1167, "step": 28467 }, { "epoch": 0.8305035299609078, "grad_norm": 0.7610493832294706, "learning_rate": 7.34683871883356e-07, "loss": 0.1048, "step": 28468 }, { "epoch": 0.8305327031915515, "grad_norm": 0.769739157904961, "learning_rate": 7.344373712075976e-07, "loss": 0.1166, "step": 28469 }, { "epoch": 0.830561876422195, "grad_norm": 0.9835055826127647, "learning_rate": 7.341909086137655e-07, "loss": 0.1197, "step": 28470 }, { "epoch": 0.8305910496528386, "grad_norm": 0.6848325801545342, "learning_rate": 7.339444841040583e-07, "loss": 0.1031, "step": 28471 }, { "epoch": 0.8306202228834821, "grad_norm": 0.8539488257851424, "learning_rate": 7.336980976806757e-07, "loss": 0.1098, "step": 28472 }, { "epoch": 0.8306493961141257, "grad_norm": 0.9767131282538098, "learning_rate": 7.334517493458176e-07, "loss": 0.1091, "step": 28473 }, { "epoch": 0.8306785693447692, "grad_norm": 0.8913909267721453, "learning_rate": 7.332054391016852e-07, "loss": 0.1029, "step": 28474 }, { "epoch": 0.8307077425754128, "grad_norm": 0.9429812227510052, "learning_rate": 7.329591669504748e-07, "loss": 0.1085, "step": 28475 }, { "epoch": 0.8307369158060564, "grad_norm": 0.7317002307282251, "learning_rate": 7.327129328943877e-07, "loss": 0.1227, "step": 28476 }, { "epoch": 0.8307660890366999, "grad_norm": 0.8530850221167933, "learning_rate": 7.324667369356209e-07, "loss": 0.1042, "step": 28477 }, { "epoch": 0.8307952622673435, "grad_norm": 1.2876696314117573, "learning_rate": 7.322205790763709e-07, "loss": 0.1223, "step": 28478 }, { "epoch": 0.830824435497987, "grad_norm": 0.9535450140788281, "learning_rate": 7.319744593188371e-07, "loss": 0.0964, "step": 28479 }, { "epoch": 0.8308536087286306, "grad_norm": 0.9078480178805935, "learning_rate": 7.317283776652173e-07, "loss": 0.105, "step": 28480 }, { "epoch": 0.8308827819592741, "grad_norm": 1.0122084769082274, "learning_rate": 7.31482334117708e-07, "loss": 0.1058, "step": 28481 }, { "epoch": 0.8309119551899178, "grad_norm": 1.1380085633167951, "learning_rate": 7.31236328678504e-07, "loss": 0.0953, "step": 28482 }, { "epoch": 0.8309411284205613, "grad_norm": 1.2484178967668609, "learning_rate": 7.309903613498037e-07, "loss": 0.1333, "step": 28483 }, { "epoch": 0.8309703016512049, "grad_norm": 0.9646207965847657, "learning_rate": 7.307444321338031e-07, "loss": 0.1051, "step": 28484 }, { "epoch": 0.8309994748818484, "grad_norm": 1.0211502583035859, "learning_rate": 7.304985410326942e-07, "loss": 0.1078, "step": 28485 }, { "epoch": 0.831028648112492, "grad_norm": 0.6957929901096648, "learning_rate": 7.302526880486782e-07, "loss": 0.0964, "step": 28486 }, { "epoch": 0.8310578213431355, "grad_norm": 0.7550524268357655, "learning_rate": 7.300068731839461e-07, "loss": 0.1153, "step": 28487 }, { "epoch": 0.8310869945737791, "grad_norm": 0.754587778508101, "learning_rate": 7.297610964406926e-07, "loss": 0.1045, "step": 28488 }, { "epoch": 0.8311161678044227, "grad_norm": 0.7728547608988162, "learning_rate": 7.295153578211139e-07, "loss": 0.1108, "step": 28489 }, { "epoch": 0.8311453410350662, "grad_norm": 0.8260731903976712, "learning_rate": 7.292696573274022e-07, "loss": 0.1014, "step": 28490 }, { "epoch": 0.8311745142657098, "grad_norm": 0.776247445470421, "learning_rate": 7.290239949617506e-07, "loss": 0.1014, "step": 28491 }, { "epoch": 0.8312036874963533, "grad_norm": 0.960356033116326, "learning_rate": 7.287783707263535e-07, "loss": 0.1173, "step": 28492 }, { "epoch": 0.8312328607269969, "grad_norm": 0.6693668087549213, "learning_rate": 7.285327846234042e-07, "loss": 0.1096, "step": 28493 }, { "epoch": 0.8312620339576404, "grad_norm": 0.7847598719804603, "learning_rate": 7.282872366550947e-07, "loss": 0.1076, "step": 28494 }, { "epoch": 0.831291207188284, "grad_norm": 0.8124027714812048, "learning_rate": 7.280417268236157e-07, "loss": 0.1241, "step": 28495 }, { "epoch": 0.8313203804189276, "grad_norm": 0.8112382677692418, "learning_rate": 7.277962551311613e-07, "loss": 0.0886, "step": 28496 }, { "epoch": 0.8313495536495712, "grad_norm": 0.8637708261310176, "learning_rate": 7.275508215799216e-07, "loss": 0.1072, "step": 28497 }, { "epoch": 0.8313787268802147, "grad_norm": 0.8126661809103095, "learning_rate": 7.273054261720891e-07, "loss": 0.0964, "step": 28498 }, { "epoch": 0.8314079001108583, "grad_norm": 1.1081717731744656, "learning_rate": 7.270600689098523e-07, "loss": 0.1114, "step": 28499 }, { "epoch": 0.8314370733415019, "grad_norm": 1.0169366316178285, "learning_rate": 7.268147497954048e-07, "loss": 0.1013, "step": 28500 }, { "epoch": 0.8314662465721454, "grad_norm": 0.8697696658821369, "learning_rate": 7.265694688309349e-07, "loss": 0.1141, "step": 28501 }, { "epoch": 0.831495419802789, "grad_norm": 0.8830334202234175, "learning_rate": 7.263242260186315e-07, "loss": 0.1095, "step": 28502 }, { "epoch": 0.8315245930334325, "grad_norm": 0.9855130207278656, "learning_rate": 7.26079021360685e-07, "loss": 0.1219, "step": 28503 }, { "epoch": 0.8315537662640761, "grad_norm": 0.6709105986819079, "learning_rate": 7.258338548592858e-07, "loss": 0.0972, "step": 28504 }, { "epoch": 0.8315829394947196, "grad_norm": 0.7081321681791498, "learning_rate": 7.255887265166211e-07, "loss": 0.113, "step": 28505 }, { "epoch": 0.8316121127253632, "grad_norm": 1.1133234397478888, "learning_rate": 7.253436363348804e-07, "loss": 0.1133, "step": 28506 }, { "epoch": 0.8316412859560067, "grad_norm": 1.0341565117238025, "learning_rate": 7.250985843162517e-07, "loss": 0.1146, "step": 28507 }, { "epoch": 0.8316704591866503, "grad_norm": 1.0097485894293217, "learning_rate": 7.248535704629211e-07, "loss": 0.1001, "step": 28508 }, { "epoch": 0.8316996324172939, "grad_norm": 0.8673098107296484, "learning_rate": 7.24608594777077e-07, "loss": 0.1183, "step": 28509 }, { "epoch": 0.8317288056479375, "grad_norm": 0.8743674749189494, "learning_rate": 7.24363657260908e-07, "loss": 0.1216, "step": 28510 }, { "epoch": 0.831757978878581, "grad_norm": 0.821606492559704, "learning_rate": 7.241187579165998e-07, "loss": 0.1215, "step": 28511 }, { "epoch": 0.8317871521092246, "grad_norm": 1.0138135264361696, "learning_rate": 7.238738967463372e-07, "loss": 0.1017, "step": 28512 }, { "epoch": 0.8318163253398682, "grad_norm": 0.9482798737416205, "learning_rate": 7.236290737523089e-07, "loss": 0.1171, "step": 28513 }, { "epoch": 0.8318454985705117, "grad_norm": 0.8324205475097127, "learning_rate": 7.233842889366993e-07, "loss": 0.0775, "step": 28514 }, { "epoch": 0.8318746718011553, "grad_norm": 0.8960529727392906, "learning_rate": 7.231395423016918e-07, "loss": 0.1208, "step": 28515 }, { "epoch": 0.8319038450317988, "grad_norm": 0.9782130382983932, "learning_rate": 7.228948338494757e-07, "loss": 0.1251, "step": 28516 }, { "epoch": 0.8319330182624424, "grad_norm": 0.9436371391172321, "learning_rate": 7.226501635822337e-07, "loss": 0.1176, "step": 28517 }, { "epoch": 0.8319621914930859, "grad_norm": 1.0025269470149112, "learning_rate": 7.224055315021484e-07, "loss": 0.1165, "step": 28518 }, { "epoch": 0.8319913647237295, "grad_norm": 1.2910502736752922, "learning_rate": 7.221609376114069e-07, "loss": 0.1367, "step": 28519 }, { "epoch": 0.832020537954373, "grad_norm": 0.7938050409417399, "learning_rate": 7.21916381912191e-07, "loss": 0.1186, "step": 28520 }, { "epoch": 0.8320497111850166, "grad_norm": 0.8765039578690622, "learning_rate": 7.216718644066834e-07, "loss": 0.1342, "step": 28521 }, { "epoch": 0.8320788844156601, "grad_norm": 0.7935005075097074, "learning_rate": 7.214273850970677e-07, "loss": 0.1083, "step": 28522 }, { "epoch": 0.8321080576463038, "grad_norm": 0.8121681822286977, "learning_rate": 7.211829439855284e-07, "loss": 0.1123, "step": 28523 }, { "epoch": 0.8321372308769474, "grad_norm": 0.7256000871820524, "learning_rate": 7.209385410742465e-07, "loss": 0.1016, "step": 28524 }, { "epoch": 0.8321664041075909, "grad_norm": 0.918670339112401, "learning_rate": 7.206941763654024e-07, "loss": 0.0928, "step": 28525 }, { "epoch": 0.8321955773382345, "grad_norm": 0.7244585269610889, "learning_rate": 7.204498498611806e-07, "loss": 0.1038, "step": 28526 }, { "epoch": 0.832224750568878, "grad_norm": 1.2410483979623603, "learning_rate": 7.202055615637594e-07, "loss": 0.1294, "step": 28527 }, { "epoch": 0.8322539237995216, "grad_norm": 1.0056864448028837, "learning_rate": 7.199613114753228e-07, "loss": 0.1168, "step": 28528 }, { "epoch": 0.8322830970301651, "grad_norm": 0.914242684557372, "learning_rate": 7.197170995980485e-07, "loss": 0.1154, "step": 28529 }, { "epoch": 0.8323122702608087, "grad_norm": 0.7913292705219948, "learning_rate": 7.194729259341194e-07, "loss": 0.1113, "step": 28530 }, { "epoch": 0.8323414434914522, "grad_norm": 1.0144165012259367, "learning_rate": 7.192287904857138e-07, "loss": 0.0951, "step": 28531 }, { "epoch": 0.8323706167220958, "grad_norm": 0.6879136500285986, "learning_rate": 7.18984693255011e-07, "loss": 0.0979, "step": 28532 }, { "epoch": 0.8323997899527393, "grad_norm": 0.7006082843911513, "learning_rate": 7.187406342441905e-07, "loss": 0.0871, "step": 28533 }, { "epoch": 0.8324289631833829, "grad_norm": 0.815068687722015, "learning_rate": 7.184966134554333e-07, "loss": 0.1152, "step": 28534 }, { "epoch": 0.8324581364140264, "grad_norm": 0.8554049222639807, "learning_rate": 7.182526308909149e-07, "loss": 0.1244, "step": 28535 }, { "epoch": 0.8324873096446701, "grad_norm": 0.7513868656783036, "learning_rate": 7.180086865528157e-07, "loss": 0.1048, "step": 28536 }, { "epoch": 0.8325164828753137, "grad_norm": 0.6375296876128865, "learning_rate": 7.17764780443313e-07, "loss": 0.1205, "step": 28537 }, { "epoch": 0.8325456561059572, "grad_norm": 0.8367052356876804, "learning_rate": 7.175209125645827e-07, "loss": 0.0938, "step": 28538 }, { "epoch": 0.8325748293366008, "grad_norm": 0.7349705041397777, "learning_rate": 7.172770829188036e-07, "loss": 0.0966, "step": 28539 }, { "epoch": 0.8326040025672443, "grad_norm": 1.0130061093738332, "learning_rate": 7.170332915081535e-07, "loss": 0.1151, "step": 28540 }, { "epoch": 0.8326331757978879, "grad_norm": 1.0458129887242262, "learning_rate": 7.167895383348078e-07, "loss": 0.1209, "step": 28541 }, { "epoch": 0.8326623490285314, "grad_norm": 0.9933350286807865, "learning_rate": 7.165458234009415e-07, "loss": 0.1053, "step": 28542 }, { "epoch": 0.832691522259175, "grad_norm": 0.7640996831201017, "learning_rate": 7.163021467087322e-07, "loss": 0.0778, "step": 28543 }, { "epoch": 0.8327206954898185, "grad_norm": 0.9615357386913754, "learning_rate": 7.160585082603549e-07, "loss": 0.1144, "step": 28544 }, { "epoch": 0.8327498687204621, "grad_norm": 0.7987610593492653, "learning_rate": 7.15814908057983e-07, "loss": 0.1051, "step": 28545 }, { "epoch": 0.8327790419511056, "grad_norm": 1.015639203436034, "learning_rate": 7.155713461037944e-07, "loss": 0.0999, "step": 28546 }, { "epoch": 0.8328082151817492, "grad_norm": 0.7743864515395077, "learning_rate": 7.153278223999622e-07, "loss": 0.1014, "step": 28547 }, { "epoch": 0.8328373884123927, "grad_norm": 0.8477173807154128, "learning_rate": 7.150843369486593e-07, "loss": 0.1058, "step": 28548 }, { "epoch": 0.8328665616430363, "grad_norm": 0.8275212261359371, "learning_rate": 7.14840889752062e-07, "loss": 0.1286, "step": 28549 }, { "epoch": 0.83289573487368, "grad_norm": 0.6775216173110621, "learning_rate": 7.145974808123418e-07, "loss": 0.0743, "step": 28550 }, { "epoch": 0.8329249081043235, "grad_norm": 0.8947868673066094, "learning_rate": 7.143541101316715e-07, "loss": 0.1197, "step": 28551 }, { "epoch": 0.8329540813349671, "grad_norm": 1.1815004802146287, "learning_rate": 7.141107777122242e-07, "loss": 0.1286, "step": 28552 }, { "epoch": 0.8329832545656106, "grad_norm": 0.8162810130464536, "learning_rate": 7.138674835561743e-07, "loss": 0.1211, "step": 28553 }, { "epoch": 0.8330124277962542, "grad_norm": 0.731954929166307, "learning_rate": 7.136242276656924e-07, "loss": 0.1077, "step": 28554 }, { "epoch": 0.8330416010268977, "grad_norm": 0.6193620227743445, "learning_rate": 7.133810100429489e-07, "loss": 0.1164, "step": 28555 }, { "epoch": 0.8330707742575413, "grad_norm": 1.0063182105702355, "learning_rate": 7.131378306901171e-07, "loss": 0.1186, "step": 28556 }, { "epoch": 0.8330999474881848, "grad_norm": 1.1640924578124354, "learning_rate": 7.128946896093669e-07, "loss": 0.1063, "step": 28557 }, { "epoch": 0.8331291207188284, "grad_norm": 0.7617651428155251, "learning_rate": 7.126515868028705e-07, "loss": 0.1291, "step": 28558 }, { "epoch": 0.8331582939494719, "grad_norm": 0.7278757571787543, "learning_rate": 7.124085222727956e-07, "loss": 0.1187, "step": 28559 }, { "epoch": 0.8331874671801155, "grad_norm": 0.7965604064954516, "learning_rate": 7.121654960213159e-07, "loss": 0.1071, "step": 28560 }, { "epoch": 0.833216640410759, "grad_norm": 0.908660796583881, "learning_rate": 7.119225080505982e-07, "loss": 0.115, "step": 28561 }, { "epoch": 0.8332458136414026, "grad_norm": 0.9092177506653578, "learning_rate": 7.116795583628122e-07, "loss": 0.0974, "step": 28562 }, { "epoch": 0.8332749868720463, "grad_norm": 0.8069928837933115, "learning_rate": 7.114366469601269e-07, "loss": 0.1212, "step": 28563 }, { "epoch": 0.8333041601026898, "grad_norm": 0.8121585361316815, "learning_rate": 7.111937738447127e-07, "loss": 0.0945, "step": 28564 }, { "epoch": 0.8333333333333334, "grad_norm": 0.7402716149894177, "learning_rate": 7.109509390187358e-07, "loss": 0.0928, "step": 28565 }, { "epoch": 0.8333625065639769, "grad_norm": 0.8233873005469028, "learning_rate": 7.107081424843665e-07, "loss": 0.1142, "step": 28566 }, { "epoch": 0.8333916797946205, "grad_norm": 0.8461371407414695, "learning_rate": 7.104653842437703e-07, "loss": 0.1124, "step": 28567 }, { "epoch": 0.833420853025264, "grad_norm": 0.8776924496588524, "learning_rate": 7.10222664299114e-07, "loss": 0.1131, "step": 28568 }, { "epoch": 0.8334500262559076, "grad_norm": 0.7461887739325114, "learning_rate": 7.09979982652566e-07, "loss": 0.1015, "step": 28569 }, { "epoch": 0.8334791994865511, "grad_norm": 1.1116031844994403, "learning_rate": 7.09737339306294e-07, "loss": 0.1304, "step": 28570 }, { "epoch": 0.8335083727171947, "grad_norm": 0.8107043254877853, "learning_rate": 7.094947342624625e-07, "loss": 0.089, "step": 28571 }, { "epoch": 0.8335375459478382, "grad_norm": 0.7352173617171506, "learning_rate": 7.092521675232367e-07, "loss": 0.1032, "step": 28572 }, { "epoch": 0.8335667191784818, "grad_norm": 1.0507661539210595, "learning_rate": 7.090096390907842e-07, "loss": 0.1445, "step": 28573 }, { "epoch": 0.8335958924091253, "grad_norm": 1.0960354019228602, "learning_rate": 7.087671489672693e-07, "loss": 0.1124, "step": 28574 }, { "epoch": 0.8336250656397689, "grad_norm": 0.9175672214080708, "learning_rate": 7.085246971548549e-07, "loss": 0.1031, "step": 28575 }, { "epoch": 0.8336542388704125, "grad_norm": 0.8436657160071062, "learning_rate": 7.082822836557097e-07, "loss": 0.1119, "step": 28576 }, { "epoch": 0.8336834121010561, "grad_norm": 0.9998008421766619, "learning_rate": 7.080399084719957e-07, "loss": 0.1209, "step": 28577 }, { "epoch": 0.8337125853316997, "grad_norm": 0.8883943602055342, "learning_rate": 7.07797571605876e-07, "loss": 0.1448, "step": 28578 }, { "epoch": 0.8337417585623432, "grad_norm": 0.6888493874528057, "learning_rate": 7.075552730595159e-07, "loss": 0.124, "step": 28579 }, { "epoch": 0.8337709317929868, "grad_norm": 0.7837341038410314, "learning_rate": 7.073130128350775e-07, "loss": 0.1377, "step": 28580 }, { "epoch": 0.8338001050236303, "grad_norm": 0.928583318848546, "learning_rate": 7.07070790934723e-07, "loss": 0.1233, "step": 28581 }, { "epoch": 0.8338292782542739, "grad_norm": 0.9859291086138892, "learning_rate": 7.068286073606151e-07, "loss": 0.0833, "step": 28582 }, { "epoch": 0.8338584514849174, "grad_norm": 0.7622521249094621, "learning_rate": 7.065864621149182e-07, "loss": 0.1083, "step": 28583 }, { "epoch": 0.833887624715561, "grad_norm": 0.8917953768939532, "learning_rate": 7.063443551997923e-07, "loss": 0.1309, "step": 28584 }, { "epoch": 0.8339167979462045, "grad_norm": 0.7811004491707488, "learning_rate": 7.061022866173978e-07, "loss": 0.1166, "step": 28585 }, { "epoch": 0.8339459711768481, "grad_norm": 1.0898986487645377, "learning_rate": 7.058602563698979e-07, "loss": 0.1164, "step": 28586 }, { "epoch": 0.8339751444074917, "grad_norm": 0.9799969989463303, "learning_rate": 7.056182644594517e-07, "loss": 0.109, "step": 28587 }, { "epoch": 0.8340043176381352, "grad_norm": 0.8211650187716997, "learning_rate": 7.053763108882217e-07, "loss": 0.117, "step": 28588 }, { "epoch": 0.8340334908687788, "grad_norm": 1.0052000997715311, "learning_rate": 7.051343956583656e-07, "loss": 0.1026, "step": 28589 }, { "epoch": 0.8340626640994224, "grad_norm": 1.0879712078598416, "learning_rate": 7.048925187720451e-07, "loss": 0.1214, "step": 28590 }, { "epoch": 0.834091837330066, "grad_norm": 0.975316319835064, "learning_rate": 7.046506802314196e-07, "loss": 0.1092, "step": 28591 }, { "epoch": 0.8341210105607095, "grad_norm": 1.0447357396549664, "learning_rate": 7.044088800386456e-07, "loss": 0.1008, "step": 28592 }, { "epoch": 0.8341501837913531, "grad_norm": 0.8410676282452567, "learning_rate": 7.041671181958842e-07, "loss": 0.1031, "step": 28593 }, { "epoch": 0.8341793570219966, "grad_norm": 0.7683735529820956, "learning_rate": 7.039253947052943e-07, "loss": 0.137, "step": 28594 }, { "epoch": 0.8342085302526402, "grad_norm": 0.792190140443723, "learning_rate": 7.036837095690314e-07, "loss": 0.1138, "step": 28595 }, { "epoch": 0.8342377034832837, "grad_norm": 0.8358270075550457, "learning_rate": 7.034420627892563e-07, "loss": 0.1176, "step": 28596 }, { "epoch": 0.8342668767139273, "grad_norm": 0.8197322268176289, "learning_rate": 7.032004543681248e-07, "loss": 0.108, "step": 28597 }, { "epoch": 0.8342960499445708, "grad_norm": 1.1150473509834575, "learning_rate": 7.029588843077922e-07, "loss": 0.1358, "step": 28598 }, { "epoch": 0.8343252231752144, "grad_norm": 0.7481916598519731, "learning_rate": 7.027173526104175e-07, "loss": 0.1005, "step": 28599 }, { "epoch": 0.834354396405858, "grad_norm": 0.6620424226777286, "learning_rate": 7.024758592781577e-07, "loss": 0.0926, "step": 28600 }, { "epoch": 0.8343835696365015, "grad_norm": 0.7548615752952624, "learning_rate": 7.022344043131668e-07, "loss": 0.1049, "step": 28601 }, { "epoch": 0.8344127428671451, "grad_norm": 0.672330819651534, "learning_rate": 7.019929877176007e-07, "loss": 0.1168, "step": 28602 }, { "epoch": 0.8344419160977886, "grad_norm": 0.8648402334944325, "learning_rate": 7.017516094936161e-07, "loss": 0.0999, "step": 28603 }, { "epoch": 0.8344710893284323, "grad_norm": 0.8599387318696812, "learning_rate": 7.015102696433668e-07, "loss": 0.116, "step": 28604 }, { "epoch": 0.8345002625590758, "grad_norm": 0.866340663535028, "learning_rate": 7.01268968169006e-07, "loss": 0.1092, "step": 28605 }, { "epoch": 0.8345294357897194, "grad_norm": 0.9427175962869279, "learning_rate": 7.010277050726916e-07, "loss": 0.131, "step": 28606 }, { "epoch": 0.8345586090203629, "grad_norm": 0.9318603541137698, "learning_rate": 7.007864803565756e-07, "loss": 0.1418, "step": 28607 }, { "epoch": 0.8345877822510065, "grad_norm": 0.8435687070765879, "learning_rate": 7.005452940228103e-07, "loss": 0.1241, "step": 28608 }, { "epoch": 0.83461695548165, "grad_norm": 0.9562018194829276, "learning_rate": 7.003041460735516e-07, "loss": 0.1257, "step": 28609 }, { "epoch": 0.8346461287122936, "grad_norm": 0.9332906271173597, "learning_rate": 7.000630365109506e-07, "loss": 0.0974, "step": 28610 }, { "epoch": 0.8346753019429372, "grad_norm": 0.9706392694115391, "learning_rate": 6.998219653371597e-07, "loss": 0.1262, "step": 28611 }, { "epoch": 0.8347044751735807, "grad_norm": 0.7587883368411257, "learning_rate": 6.995809325543318e-07, "loss": 0.1283, "step": 28612 }, { "epoch": 0.8347336484042243, "grad_norm": 1.0124911720343897, "learning_rate": 6.993399381646198e-07, "loss": 0.1336, "step": 28613 }, { "epoch": 0.8347628216348678, "grad_norm": 0.990090424597686, "learning_rate": 6.990989821701738e-07, "loss": 0.0891, "step": 28614 }, { "epoch": 0.8347919948655114, "grad_norm": 0.8324856108831794, "learning_rate": 6.988580645731446e-07, "loss": 0.1154, "step": 28615 }, { "epoch": 0.8348211680961549, "grad_norm": 0.8441041372041478, "learning_rate": 6.986171853756851e-07, "loss": 0.1084, "step": 28616 }, { "epoch": 0.8348503413267986, "grad_norm": 0.9744669453409137, "learning_rate": 6.983763445799429e-07, "loss": 0.1214, "step": 28617 }, { "epoch": 0.8348795145574421, "grad_norm": 0.8003730853774177, "learning_rate": 6.981355421880715e-07, "loss": 0.0847, "step": 28618 }, { "epoch": 0.8349086877880857, "grad_norm": 0.919042214983713, "learning_rate": 6.978947782022177e-07, "loss": 0.1117, "step": 28619 }, { "epoch": 0.8349378610187292, "grad_norm": 1.0268314494193647, "learning_rate": 6.976540526245335e-07, "loss": 0.1355, "step": 28620 }, { "epoch": 0.8349670342493728, "grad_norm": 1.0784022321210935, "learning_rate": 6.974133654571668e-07, "loss": 0.1227, "step": 28621 }, { "epoch": 0.8349962074800164, "grad_norm": 0.8930931507470183, "learning_rate": 6.971727167022652e-07, "loss": 0.0935, "step": 28622 }, { "epoch": 0.8350253807106599, "grad_norm": 0.8160656435612694, "learning_rate": 6.969321063619788e-07, "loss": 0.1242, "step": 28623 }, { "epoch": 0.8350545539413035, "grad_norm": 0.6302078749308491, "learning_rate": 6.966915344384562e-07, "loss": 0.0843, "step": 28624 }, { "epoch": 0.835083727171947, "grad_norm": 1.0298737572591399, "learning_rate": 6.964510009338432e-07, "loss": 0.1219, "step": 28625 }, { "epoch": 0.8351129004025906, "grad_norm": 0.8240990607958188, "learning_rate": 6.962105058502894e-07, "loss": 0.1127, "step": 28626 }, { "epoch": 0.8351420736332341, "grad_norm": 0.7479982485384717, "learning_rate": 6.959700491899408e-07, "loss": 0.1204, "step": 28627 }, { "epoch": 0.8351712468638777, "grad_norm": 0.8899750430844123, "learning_rate": 6.957296309549432e-07, "loss": 0.0914, "step": 28628 }, { "epoch": 0.8352004200945212, "grad_norm": 0.659858287058075, "learning_rate": 6.954892511474437e-07, "loss": 0.114, "step": 28629 }, { "epoch": 0.8352295933251648, "grad_norm": 0.9106006986190525, "learning_rate": 6.952489097695897e-07, "loss": 0.1175, "step": 28630 }, { "epoch": 0.8352587665558084, "grad_norm": 0.9145590676697298, "learning_rate": 6.950086068235262e-07, "loss": 0.1229, "step": 28631 }, { "epoch": 0.835287939786452, "grad_norm": 0.6906119242767612, "learning_rate": 6.947683423113966e-07, "loss": 0.0883, "step": 28632 }, { "epoch": 0.8353171130170955, "grad_norm": 0.8851635895069407, "learning_rate": 6.94528116235349e-07, "loss": 0.1021, "step": 28633 }, { "epoch": 0.8353462862477391, "grad_norm": 0.8354837908061795, "learning_rate": 6.942879285975263e-07, "loss": 0.0986, "step": 28634 }, { "epoch": 0.8353754594783827, "grad_norm": 0.8622551654046755, "learning_rate": 6.940477794000711e-07, "loss": 0.1011, "step": 28635 }, { "epoch": 0.8354046327090262, "grad_norm": 1.2199028664512792, "learning_rate": 6.938076686451312e-07, "loss": 0.1133, "step": 28636 }, { "epoch": 0.8354338059396698, "grad_norm": 0.7976686518802866, "learning_rate": 6.935675963348487e-07, "loss": 0.1042, "step": 28637 }, { "epoch": 0.8354629791703133, "grad_norm": 0.8755527093589413, "learning_rate": 6.933275624713659e-07, "loss": 0.116, "step": 28638 }, { "epoch": 0.8354921524009569, "grad_norm": 0.8567428163946965, "learning_rate": 6.930875670568271e-07, "loss": 0.1204, "step": 28639 }, { "epoch": 0.8355213256316004, "grad_norm": 0.7874076456759808, "learning_rate": 6.92847610093374e-07, "loss": 0.1327, "step": 28640 }, { "epoch": 0.835550498862244, "grad_norm": 0.8660864449366423, "learning_rate": 6.926076915831498e-07, "loss": 0.1299, "step": 28641 }, { "epoch": 0.8355796720928875, "grad_norm": 0.8542416222610314, "learning_rate": 6.923678115282945e-07, "loss": 0.1121, "step": 28642 }, { "epoch": 0.8356088453235311, "grad_norm": 0.9936849820137602, "learning_rate": 6.921279699309525e-07, "loss": 0.1304, "step": 28643 }, { "epoch": 0.8356380185541747, "grad_norm": 0.8727545532687586, "learning_rate": 6.918881667932637e-07, "loss": 0.0936, "step": 28644 }, { "epoch": 0.8356671917848183, "grad_norm": 0.7728314756224676, "learning_rate": 6.916484021173681e-07, "loss": 0.1354, "step": 28645 }, { "epoch": 0.8356963650154619, "grad_norm": 0.8778921020468223, "learning_rate": 6.914086759054062e-07, "loss": 0.1112, "step": 28646 }, { "epoch": 0.8357255382461054, "grad_norm": 0.6652813785176614, "learning_rate": 6.911689881595208e-07, "loss": 0.0941, "step": 28647 }, { "epoch": 0.835754711476749, "grad_norm": 0.7999509435973636, "learning_rate": 6.9092933888185e-07, "loss": 0.1247, "step": 28648 }, { "epoch": 0.8357838847073925, "grad_norm": 0.8654250706204483, "learning_rate": 6.906897280745322e-07, "loss": 0.1163, "step": 28649 }, { "epoch": 0.8358130579380361, "grad_norm": 0.9017734917226445, "learning_rate": 6.904501557397092e-07, "loss": 0.0887, "step": 28650 }, { "epoch": 0.8358422311686796, "grad_norm": 0.7807807142597256, "learning_rate": 6.902106218795185e-07, "loss": 0.1143, "step": 28651 }, { "epoch": 0.8358714043993232, "grad_norm": 0.9637840937523848, "learning_rate": 6.899711264960957e-07, "loss": 0.1182, "step": 28652 }, { "epoch": 0.8359005776299667, "grad_norm": 0.8176700269805882, "learning_rate": 6.897316695915846e-07, "loss": 0.0901, "step": 28653 }, { "epoch": 0.8359297508606103, "grad_norm": 0.9653718107085448, "learning_rate": 6.894922511681196e-07, "loss": 0.1326, "step": 28654 }, { "epoch": 0.8359589240912538, "grad_norm": 0.7124793179035528, "learning_rate": 6.892528712278385e-07, "loss": 0.1109, "step": 28655 }, { "epoch": 0.8359880973218974, "grad_norm": 1.3319571241127004, "learning_rate": 6.89013529772879e-07, "loss": 0.1338, "step": 28656 }, { "epoch": 0.8360172705525409, "grad_norm": 0.9256128810957058, "learning_rate": 6.887742268053782e-07, "loss": 0.1128, "step": 28657 }, { "epoch": 0.8360464437831846, "grad_norm": 0.7285361643222902, "learning_rate": 6.885349623274706e-07, "loss": 0.1223, "step": 28658 }, { "epoch": 0.8360756170138282, "grad_norm": 1.2547232560492405, "learning_rate": 6.882957363412934e-07, "loss": 0.1158, "step": 28659 }, { "epoch": 0.8361047902444717, "grad_norm": 0.950298002722007, "learning_rate": 6.880565488489837e-07, "loss": 0.1141, "step": 28660 }, { "epoch": 0.8361339634751153, "grad_norm": 0.7211726500922024, "learning_rate": 6.87817399852676e-07, "loss": 0.1033, "step": 28661 }, { "epoch": 0.8361631367057588, "grad_norm": 0.9019731638988802, "learning_rate": 6.875782893545042e-07, "loss": 0.1038, "step": 28662 }, { "epoch": 0.8361923099364024, "grad_norm": 0.9445192804347247, "learning_rate": 6.873392173566051e-07, "loss": 0.126, "step": 28663 }, { "epoch": 0.8362214831670459, "grad_norm": 1.0122184251294524, "learning_rate": 6.871001838611102e-07, "loss": 0.1162, "step": 28664 }, { "epoch": 0.8362506563976895, "grad_norm": 0.777162636737857, "learning_rate": 6.86861188870156e-07, "loss": 0.1099, "step": 28665 }, { "epoch": 0.836279829628333, "grad_norm": 0.8608607181598449, "learning_rate": 6.866222323858762e-07, "loss": 0.1315, "step": 28666 }, { "epoch": 0.8363090028589766, "grad_norm": 0.8781240607716954, "learning_rate": 6.863833144104037e-07, "loss": 0.1152, "step": 28667 }, { "epoch": 0.8363381760896201, "grad_norm": 0.6626272101503468, "learning_rate": 6.861444349458702e-07, "loss": 0.0938, "step": 28668 }, { "epoch": 0.8363673493202637, "grad_norm": 0.7128834338638651, "learning_rate": 6.859055939944098e-07, "loss": 0.1088, "step": 28669 }, { "epoch": 0.8363965225509072, "grad_norm": 0.6701056151114528, "learning_rate": 6.856667915581538e-07, "loss": 0.1087, "step": 28670 }, { "epoch": 0.8364256957815509, "grad_norm": 0.7778893549588168, "learning_rate": 6.854280276392361e-07, "loss": 0.1027, "step": 28671 }, { "epoch": 0.8364548690121945, "grad_norm": 0.9109236468173831, "learning_rate": 6.851893022397855e-07, "loss": 0.1345, "step": 28672 }, { "epoch": 0.836484042242838, "grad_norm": 0.9414579629667669, "learning_rate": 6.849506153619356e-07, "loss": 0.1168, "step": 28673 }, { "epoch": 0.8365132154734816, "grad_norm": 0.9713870553892409, "learning_rate": 6.847119670078173e-07, "loss": 0.1127, "step": 28674 }, { "epoch": 0.8365423887041251, "grad_norm": 0.8368938544227067, "learning_rate": 6.844733571795587e-07, "loss": 0.1246, "step": 28675 }, { "epoch": 0.8365715619347687, "grad_norm": 0.7175759144395739, "learning_rate": 6.842347858792919e-07, "loss": 0.1108, "step": 28676 }, { "epoch": 0.8366007351654122, "grad_norm": 0.9315659849847924, "learning_rate": 6.839962531091482e-07, "loss": 0.1034, "step": 28677 }, { "epoch": 0.8366299083960558, "grad_norm": 0.9265283512957927, "learning_rate": 6.837577588712551e-07, "loss": 0.1021, "step": 28678 }, { "epoch": 0.8366590816266993, "grad_norm": 0.7840001819232951, "learning_rate": 6.835193031677418e-07, "loss": 0.1124, "step": 28679 }, { "epoch": 0.8366882548573429, "grad_norm": 0.7422903185418308, "learning_rate": 6.832808860007384e-07, "loss": 0.0994, "step": 28680 }, { "epoch": 0.8367174280879864, "grad_norm": 0.8006926436386917, "learning_rate": 6.830425073723728e-07, "loss": 0.1197, "step": 28681 }, { "epoch": 0.83674660131863, "grad_norm": 0.7505425422294162, "learning_rate": 6.828041672847707e-07, "loss": 0.0911, "step": 28682 }, { "epoch": 0.8367757745492735, "grad_norm": 0.7827003981639024, "learning_rate": 6.825658657400653e-07, "loss": 0.1224, "step": 28683 }, { "epoch": 0.8368049477799171, "grad_norm": 0.806682899445697, "learning_rate": 6.823276027403808e-07, "loss": 0.1044, "step": 28684 }, { "epoch": 0.8368341210105608, "grad_norm": 1.0913688915087802, "learning_rate": 6.820893782878435e-07, "loss": 0.119, "step": 28685 }, { "epoch": 0.8368632942412043, "grad_norm": 0.7646187400611799, "learning_rate": 6.818511923845828e-07, "loss": 0.0914, "step": 28686 }, { "epoch": 0.8368924674718479, "grad_norm": 0.9484808691312467, "learning_rate": 6.816130450327235e-07, "loss": 0.1129, "step": 28687 }, { "epoch": 0.8369216407024914, "grad_norm": 1.0347871767578014, "learning_rate": 6.813749362343914e-07, "loss": 0.1108, "step": 28688 }, { "epoch": 0.836950813933135, "grad_norm": 0.9270575124098639, "learning_rate": 6.811368659917128e-07, "loss": 0.1118, "step": 28689 }, { "epoch": 0.8369799871637785, "grad_norm": 0.9329141279665811, "learning_rate": 6.808988343068146e-07, "loss": 0.132, "step": 28690 }, { "epoch": 0.8370091603944221, "grad_norm": 1.13431048449298, "learning_rate": 6.8066084118182e-07, "loss": 0.1086, "step": 28691 }, { "epoch": 0.8370383336250656, "grad_norm": 0.9504017432662675, "learning_rate": 6.804228866188534e-07, "loss": 0.0996, "step": 28692 }, { "epoch": 0.8370675068557092, "grad_norm": 0.7452738299748206, "learning_rate": 6.801849706200414e-07, "loss": 0.103, "step": 28693 }, { "epoch": 0.8370966800863527, "grad_norm": 0.7461686281640708, "learning_rate": 6.799470931875051e-07, "loss": 0.1085, "step": 28694 }, { "epoch": 0.8371258533169963, "grad_norm": 0.8425623946505637, "learning_rate": 6.797092543233719e-07, "loss": 0.1244, "step": 28695 }, { "epoch": 0.8371550265476398, "grad_norm": 0.9280261705744823, "learning_rate": 6.794714540297615e-07, "loss": 0.1075, "step": 28696 }, { "epoch": 0.8371841997782834, "grad_norm": 0.853209451411559, "learning_rate": 6.792336923087994e-07, "loss": 0.1131, "step": 28697 }, { "epoch": 0.837213373008927, "grad_norm": 0.8637432778009454, "learning_rate": 6.789959691626069e-07, "loss": 0.1373, "step": 28698 }, { "epoch": 0.8372425462395706, "grad_norm": 0.8197077682181532, "learning_rate": 6.787582845933078e-07, "loss": 0.101, "step": 28699 }, { "epoch": 0.8372717194702142, "grad_norm": 1.0369316022870985, "learning_rate": 6.785206386030219e-07, "loss": 0.0987, "step": 28700 }, { "epoch": 0.8373008927008577, "grad_norm": 0.7420193447575757, "learning_rate": 6.782830311938731e-07, "loss": 0.1012, "step": 28701 }, { "epoch": 0.8373300659315013, "grad_norm": 1.0916979403699227, "learning_rate": 6.78045462367981e-07, "loss": 0.1111, "step": 28702 }, { "epoch": 0.8373592391621448, "grad_norm": 0.8667817266571434, "learning_rate": 6.778079321274683e-07, "loss": 0.1054, "step": 28703 }, { "epoch": 0.8373884123927884, "grad_norm": 0.8127985451597882, "learning_rate": 6.775704404744543e-07, "loss": 0.0948, "step": 28704 }, { "epoch": 0.8374175856234319, "grad_norm": 0.945295115772799, "learning_rate": 6.77332987411059e-07, "loss": 0.1382, "step": 28705 }, { "epoch": 0.8374467588540755, "grad_norm": 0.9591986280141048, "learning_rate": 6.770955729394024e-07, "loss": 0.1178, "step": 28706 }, { "epoch": 0.837475932084719, "grad_norm": 0.9707874551440682, "learning_rate": 6.768581970616056e-07, "loss": 0.0897, "step": 28707 }, { "epoch": 0.8375051053153626, "grad_norm": 0.5864343240786279, "learning_rate": 6.766208597797874e-07, "loss": 0.1081, "step": 28708 }, { "epoch": 0.8375342785460062, "grad_norm": 0.929317101844393, "learning_rate": 6.763835610960645e-07, "loss": 0.1371, "step": 28709 }, { "epoch": 0.8375634517766497, "grad_norm": 0.7194146484129015, "learning_rate": 6.76146301012558e-07, "loss": 0.1138, "step": 28710 }, { "epoch": 0.8375926250072933, "grad_norm": 0.8593105085498757, "learning_rate": 6.759090795313856e-07, "loss": 0.1377, "step": 28711 }, { "epoch": 0.8376217982379369, "grad_norm": 0.8303128010172225, "learning_rate": 6.756718966546622e-07, "loss": 0.1122, "step": 28712 }, { "epoch": 0.8376509714685805, "grad_norm": 0.9188633021514161, "learning_rate": 6.754347523845101e-07, "loss": 0.1058, "step": 28713 }, { "epoch": 0.837680144699224, "grad_norm": 0.8497070208353793, "learning_rate": 6.751976467230442e-07, "loss": 0.1181, "step": 28714 }, { "epoch": 0.8377093179298676, "grad_norm": 0.9130469832679831, "learning_rate": 6.749605796723802e-07, "loss": 0.1136, "step": 28715 }, { "epoch": 0.8377384911605111, "grad_norm": 0.8934997775976855, "learning_rate": 6.747235512346368e-07, "loss": 0.1219, "step": 28716 }, { "epoch": 0.8377676643911547, "grad_norm": 0.8038434375759682, "learning_rate": 6.744865614119289e-07, "loss": 0.1038, "step": 28717 }, { "epoch": 0.8377968376217982, "grad_norm": 0.8213997320272479, "learning_rate": 6.742496102063711e-07, "loss": 0.1127, "step": 28718 }, { "epoch": 0.8378260108524418, "grad_norm": 0.7663259946596382, "learning_rate": 6.740126976200806e-07, "loss": 0.1193, "step": 28719 }, { "epoch": 0.8378551840830853, "grad_norm": 0.7842153966592031, "learning_rate": 6.737758236551728e-07, "loss": 0.0954, "step": 28720 }, { "epoch": 0.8378843573137289, "grad_norm": 0.9729076057399066, "learning_rate": 6.735389883137616e-07, "loss": 0.0939, "step": 28721 }, { "epoch": 0.8379135305443725, "grad_norm": 0.909854706876548, "learning_rate": 6.73302191597961e-07, "loss": 0.1236, "step": 28722 }, { "epoch": 0.837942703775016, "grad_norm": 0.8856700792705716, "learning_rate": 6.730654335098857e-07, "loss": 0.1431, "step": 28723 }, { "epoch": 0.8379718770056596, "grad_norm": 1.1578674171675276, "learning_rate": 6.728287140516487e-07, "loss": 0.1113, "step": 28724 }, { "epoch": 0.8380010502363031, "grad_norm": 0.9210415753413871, "learning_rate": 6.725920332253654e-07, "loss": 0.1201, "step": 28725 }, { "epoch": 0.8380302234669468, "grad_norm": 0.8147646490799825, "learning_rate": 6.72355391033146e-07, "loss": 0.1157, "step": 28726 }, { "epoch": 0.8380593966975903, "grad_norm": 0.8097996550992647, "learning_rate": 6.721187874771057e-07, "loss": 0.1085, "step": 28727 }, { "epoch": 0.8380885699282339, "grad_norm": 0.7883422579529276, "learning_rate": 6.718822225593547e-07, "loss": 0.1182, "step": 28728 }, { "epoch": 0.8381177431588774, "grad_norm": 0.8299692523240949, "learning_rate": 6.716456962820067e-07, "loss": 0.1399, "step": 28729 }, { "epoch": 0.838146916389521, "grad_norm": 0.6813307692782673, "learning_rate": 6.714092086471718e-07, "loss": 0.1046, "step": 28730 }, { "epoch": 0.8381760896201645, "grad_norm": 0.9338365931502542, "learning_rate": 6.711727596569639e-07, "loss": 0.1224, "step": 28731 }, { "epoch": 0.8382052628508081, "grad_norm": 0.8818751665529719, "learning_rate": 6.709363493134902e-07, "loss": 0.1017, "step": 28732 }, { "epoch": 0.8382344360814517, "grad_norm": 0.7867298737998444, "learning_rate": 6.706999776188649e-07, "loss": 0.1068, "step": 28733 }, { "epoch": 0.8382636093120952, "grad_norm": 0.8124559165017454, "learning_rate": 6.704636445751966e-07, "loss": 0.1138, "step": 28734 }, { "epoch": 0.8382927825427388, "grad_norm": 0.8695400580267945, "learning_rate": 6.702273501845946e-07, "loss": 0.103, "step": 28735 }, { "epoch": 0.8383219557733823, "grad_norm": 0.9273491181602084, "learning_rate": 6.699910944491689e-07, "loss": 0.1379, "step": 28736 }, { "epoch": 0.8383511290040259, "grad_norm": 0.7142323570772079, "learning_rate": 6.6975487737103e-07, "loss": 0.1187, "step": 28737 }, { "epoch": 0.8383803022346694, "grad_norm": 0.768054204273176, "learning_rate": 6.695186989522856e-07, "loss": 0.1004, "step": 28738 }, { "epoch": 0.8384094754653131, "grad_norm": 0.9379413389553742, "learning_rate": 6.692825591950441e-07, "loss": 0.1177, "step": 28739 }, { "epoch": 0.8384386486959566, "grad_norm": 0.8190572260259622, "learning_rate": 6.69046458101415e-07, "loss": 0.118, "step": 28740 }, { "epoch": 0.8384678219266002, "grad_norm": 0.8160379824822651, "learning_rate": 6.688103956735048e-07, "loss": 0.1163, "step": 28741 }, { "epoch": 0.8384969951572437, "grad_norm": 1.0185656110838428, "learning_rate": 6.685743719134197e-07, "loss": 0.1283, "step": 28742 }, { "epoch": 0.8385261683878873, "grad_norm": 1.1103838702868496, "learning_rate": 6.683383868232706e-07, "loss": 0.0913, "step": 28743 }, { "epoch": 0.8385553416185308, "grad_norm": 0.7070828225170954, "learning_rate": 6.681024404051623e-07, "loss": 0.0981, "step": 28744 }, { "epoch": 0.8385845148491744, "grad_norm": 0.9366319569932889, "learning_rate": 6.678665326612005e-07, "loss": 0.1358, "step": 28745 }, { "epoch": 0.838613688079818, "grad_norm": 0.9124516318112963, "learning_rate": 6.676306635934926e-07, "loss": 0.1194, "step": 28746 }, { "epoch": 0.8386428613104615, "grad_norm": 0.8135006791237972, "learning_rate": 6.673948332041446e-07, "loss": 0.107, "step": 28747 }, { "epoch": 0.8386720345411051, "grad_norm": 0.6885575632321572, "learning_rate": 6.6715904149526e-07, "loss": 0.1078, "step": 28748 }, { "epoch": 0.8387012077717486, "grad_norm": 0.7077370536667261, "learning_rate": 6.669232884689448e-07, "loss": 0.0977, "step": 28749 }, { "epoch": 0.8387303810023922, "grad_norm": 0.8249751267352112, "learning_rate": 6.666875741273055e-07, "loss": 0.0833, "step": 28750 }, { "epoch": 0.8387595542330357, "grad_norm": 0.8238758026084099, "learning_rate": 6.66451898472445e-07, "loss": 0.1151, "step": 28751 }, { "epoch": 0.8387887274636793, "grad_norm": 0.7147478588553577, "learning_rate": 6.662162615064666e-07, "loss": 0.093, "step": 28752 }, { "epoch": 0.8388179006943229, "grad_norm": 0.7429986693398549, "learning_rate": 6.659806632314753e-07, "loss": 0.0861, "step": 28753 }, { "epoch": 0.8388470739249665, "grad_norm": 0.7751296716191228, "learning_rate": 6.657451036495738e-07, "loss": 0.1197, "step": 28754 }, { "epoch": 0.83887624715561, "grad_norm": 0.7673811335053209, "learning_rate": 6.65509582762866e-07, "loss": 0.1165, "step": 28755 }, { "epoch": 0.8389054203862536, "grad_norm": 1.0133916581262266, "learning_rate": 6.652741005734525e-07, "loss": 0.1292, "step": 28756 }, { "epoch": 0.8389345936168972, "grad_norm": 0.8180041015398647, "learning_rate": 6.650386570834383e-07, "loss": 0.0914, "step": 28757 }, { "epoch": 0.8389637668475407, "grad_norm": 0.7476366684534999, "learning_rate": 6.648032522949232e-07, "loss": 0.1249, "step": 28758 }, { "epoch": 0.8389929400781843, "grad_norm": 0.8546995788144813, "learning_rate": 6.645678862100114e-07, "loss": 0.1323, "step": 28759 }, { "epoch": 0.8390221133088278, "grad_norm": 0.7531425851554054, "learning_rate": 6.643325588308008e-07, "loss": 0.1014, "step": 28760 }, { "epoch": 0.8390512865394714, "grad_norm": 0.9611365207426606, "learning_rate": 6.64097270159395e-07, "loss": 0.097, "step": 28761 }, { "epoch": 0.8390804597701149, "grad_norm": 0.6552977587311081, "learning_rate": 6.638620201978929e-07, "loss": 0.1244, "step": 28762 }, { "epoch": 0.8391096330007585, "grad_norm": 0.7306251649323, "learning_rate": 6.636268089483971e-07, "loss": 0.1107, "step": 28763 }, { "epoch": 0.839138806231402, "grad_norm": 0.8586736099696834, "learning_rate": 6.633916364130056e-07, "loss": 0.1058, "step": 28764 }, { "epoch": 0.8391679794620456, "grad_norm": 1.0360708242033867, "learning_rate": 6.631565025938169e-07, "loss": 0.1135, "step": 28765 }, { "epoch": 0.8391971526926892, "grad_norm": 0.948705596794399, "learning_rate": 6.629214074929319e-07, "loss": 0.1187, "step": 28766 }, { "epoch": 0.8392263259233328, "grad_norm": 0.8846009342703027, "learning_rate": 6.626863511124504e-07, "loss": 0.1144, "step": 28767 }, { "epoch": 0.8392554991539763, "grad_norm": 0.8417190637982709, "learning_rate": 6.624513334544697e-07, "loss": 0.1015, "step": 28768 }, { "epoch": 0.8392846723846199, "grad_norm": 0.7031646678553474, "learning_rate": 6.622163545210875e-07, "loss": 0.1209, "step": 28769 }, { "epoch": 0.8393138456152635, "grad_norm": 0.8554940765743669, "learning_rate": 6.619814143144026e-07, "loss": 0.1413, "step": 28770 }, { "epoch": 0.839343018845907, "grad_norm": 0.8921323870672512, "learning_rate": 6.61746512836512e-07, "loss": 0.0912, "step": 28771 }, { "epoch": 0.8393721920765506, "grad_norm": 0.7582561652777967, "learning_rate": 6.615116500895113e-07, "loss": 0.1007, "step": 28772 }, { "epoch": 0.8394013653071941, "grad_norm": 0.9667372846011172, "learning_rate": 6.612768260755004e-07, "loss": 0.1223, "step": 28773 }, { "epoch": 0.8394305385378377, "grad_norm": 0.8493696642065195, "learning_rate": 6.610420407965745e-07, "loss": 0.0737, "step": 28774 }, { "epoch": 0.8394597117684812, "grad_norm": 0.9896193161742983, "learning_rate": 6.608072942548288e-07, "loss": 0.1174, "step": 28775 }, { "epoch": 0.8394888849991248, "grad_norm": 0.8209049064261055, "learning_rate": 6.605725864523604e-07, "loss": 0.1345, "step": 28776 }, { "epoch": 0.8395180582297683, "grad_norm": 0.8505099019023185, "learning_rate": 6.603379173912644e-07, "loss": 0.1197, "step": 28777 }, { "epoch": 0.8395472314604119, "grad_norm": 0.8435687476471607, "learning_rate": 6.601032870736341e-07, "loss": 0.095, "step": 28778 }, { "epoch": 0.8395764046910554, "grad_norm": 0.8049885646169743, "learning_rate": 6.598686955015654e-07, "loss": 0.1135, "step": 28779 }, { "epoch": 0.8396055779216991, "grad_norm": 0.8305185722291034, "learning_rate": 6.596341426771546e-07, "loss": 0.1203, "step": 28780 }, { "epoch": 0.8396347511523427, "grad_norm": 1.6308803784404653, "learning_rate": 6.593996286024934e-07, "loss": 0.1166, "step": 28781 }, { "epoch": 0.8396639243829862, "grad_norm": 0.8362606748811074, "learning_rate": 6.591651532796755e-07, "loss": 0.1029, "step": 28782 }, { "epoch": 0.8396930976136298, "grad_norm": 0.8043625644606235, "learning_rate": 6.589307167107962e-07, "loss": 0.1072, "step": 28783 }, { "epoch": 0.8397222708442733, "grad_norm": 0.7346860174215263, "learning_rate": 6.586963188979456e-07, "loss": 0.1369, "step": 28784 }, { "epoch": 0.8397514440749169, "grad_norm": 0.8034760711491574, "learning_rate": 6.584619598432191e-07, "loss": 0.1306, "step": 28785 }, { "epoch": 0.8397806173055604, "grad_norm": 0.9681865502098288, "learning_rate": 6.58227639548707e-07, "loss": 0.1322, "step": 28786 }, { "epoch": 0.839809790536204, "grad_norm": 0.8605951304570972, "learning_rate": 6.579933580165027e-07, "loss": 0.1088, "step": 28787 }, { "epoch": 0.8398389637668475, "grad_norm": 0.9754889755454179, "learning_rate": 6.577591152486972e-07, "loss": 0.1163, "step": 28788 }, { "epoch": 0.8398681369974911, "grad_norm": 0.9073355830069386, "learning_rate": 6.575249112473808e-07, "loss": 0.1017, "step": 28789 }, { "epoch": 0.8398973102281346, "grad_norm": 0.7126419293482791, "learning_rate": 6.572907460146454e-07, "loss": 0.1243, "step": 28790 }, { "epoch": 0.8399264834587782, "grad_norm": 0.6861012492374159, "learning_rate": 6.570566195525829e-07, "loss": 0.1029, "step": 28791 }, { "epoch": 0.8399556566894217, "grad_norm": 0.9680994117995753, "learning_rate": 6.568225318632804e-07, "loss": 0.1054, "step": 28792 }, { "epoch": 0.8399848299200654, "grad_norm": 0.7902755960454412, "learning_rate": 6.565884829488312e-07, "loss": 0.1102, "step": 28793 }, { "epoch": 0.840014003150709, "grad_norm": 0.7962644914448704, "learning_rate": 6.56354472811323e-07, "loss": 0.119, "step": 28794 }, { "epoch": 0.8400431763813525, "grad_norm": 0.9050618458773342, "learning_rate": 6.561205014528443e-07, "loss": 0.0997, "step": 28795 }, { "epoch": 0.8400723496119961, "grad_norm": 0.7665868849262667, "learning_rate": 6.558865688754845e-07, "loss": 0.0844, "step": 28796 }, { "epoch": 0.8401015228426396, "grad_norm": 0.822595988242091, "learning_rate": 6.556526750813336e-07, "loss": 0.1064, "step": 28797 }, { "epoch": 0.8401306960732832, "grad_norm": 0.8578890853877902, "learning_rate": 6.554188200724782e-07, "loss": 0.1158, "step": 28798 }, { "epoch": 0.8401598693039267, "grad_norm": 0.9404242245978279, "learning_rate": 6.551850038510054e-07, "loss": 0.114, "step": 28799 }, { "epoch": 0.8401890425345703, "grad_norm": 0.8845685891665658, "learning_rate": 6.54951226419005e-07, "loss": 0.1284, "step": 28800 }, { "epoch": 0.8402182157652138, "grad_norm": 0.8490759394963238, "learning_rate": 6.547174877785628e-07, "loss": 0.1224, "step": 28801 }, { "epoch": 0.8402473889958574, "grad_norm": 0.8678591029861535, "learning_rate": 6.54483787931764e-07, "loss": 0.1058, "step": 28802 }, { "epoch": 0.8402765622265009, "grad_norm": 0.9107735500306756, "learning_rate": 6.542501268806978e-07, "loss": 0.1079, "step": 28803 }, { "epoch": 0.8403057354571445, "grad_norm": 0.7782338705327391, "learning_rate": 6.540165046274493e-07, "loss": 0.1166, "step": 28804 }, { "epoch": 0.840334908687788, "grad_norm": 0.7252490773874591, "learning_rate": 6.537829211741032e-07, "loss": 0.1214, "step": 28805 }, { "epoch": 0.8403640819184316, "grad_norm": 0.8941322386607158, "learning_rate": 6.535493765227463e-07, "loss": 0.1138, "step": 28806 }, { "epoch": 0.8403932551490753, "grad_norm": 0.8353832920115603, "learning_rate": 6.533158706754633e-07, "loss": 0.1018, "step": 28807 }, { "epoch": 0.8404224283797188, "grad_norm": 0.7030860241272633, "learning_rate": 6.530824036343375e-07, "loss": 0.1051, "step": 28808 }, { "epoch": 0.8404516016103624, "grad_norm": 0.8681428224464733, "learning_rate": 6.528489754014545e-07, "loss": 0.0976, "step": 28809 }, { "epoch": 0.8404807748410059, "grad_norm": 0.9715518332274773, "learning_rate": 6.526155859788985e-07, "loss": 0.1011, "step": 28810 }, { "epoch": 0.8405099480716495, "grad_norm": 0.7589110212252026, "learning_rate": 6.523822353687531e-07, "loss": 0.1148, "step": 28811 }, { "epoch": 0.840539121302293, "grad_norm": 0.8907199418658908, "learning_rate": 6.521489235731005e-07, "loss": 0.1172, "step": 28812 }, { "epoch": 0.8405682945329366, "grad_norm": 0.8646787870284771, "learning_rate": 6.519156505940249e-07, "loss": 0.115, "step": 28813 }, { "epoch": 0.8405974677635801, "grad_norm": 0.8748720508629039, "learning_rate": 6.516824164336077e-07, "loss": 0.1362, "step": 28814 }, { "epoch": 0.8406266409942237, "grad_norm": 0.7613177884739198, "learning_rate": 6.514492210939327e-07, "loss": 0.0978, "step": 28815 }, { "epoch": 0.8406558142248672, "grad_norm": 0.96189654180933, "learning_rate": 6.512160645770799e-07, "loss": 0.1513, "step": 28816 }, { "epoch": 0.8406849874555108, "grad_norm": 0.8913120494635219, "learning_rate": 6.509829468851336e-07, "loss": 0.1031, "step": 28817 }, { "epoch": 0.8407141606861543, "grad_norm": 0.6909259859319695, "learning_rate": 6.50749868020173e-07, "loss": 0.0985, "step": 28818 }, { "epoch": 0.8407433339167979, "grad_norm": 0.6436536944911209, "learning_rate": 6.505168279842777e-07, "loss": 0.1226, "step": 28819 }, { "epoch": 0.8407725071474416, "grad_norm": 0.7144179993522757, "learning_rate": 6.502838267795303e-07, "loss": 0.1128, "step": 28820 }, { "epoch": 0.8408016803780851, "grad_norm": 0.8841992410911953, "learning_rate": 6.500508644080117e-07, "loss": 0.1044, "step": 28821 }, { "epoch": 0.8408308536087287, "grad_norm": 0.7712873026638355, "learning_rate": 6.498179408717992e-07, "loss": 0.1162, "step": 28822 }, { "epoch": 0.8408600268393722, "grad_norm": 0.7415406444945843, "learning_rate": 6.495850561729749e-07, "loss": 0.1158, "step": 28823 }, { "epoch": 0.8408892000700158, "grad_norm": 0.8610246489611074, "learning_rate": 6.493522103136169e-07, "loss": 0.102, "step": 28824 }, { "epoch": 0.8409183733006593, "grad_norm": 0.6609714171145477, "learning_rate": 6.491194032958026e-07, "loss": 0.0973, "step": 28825 }, { "epoch": 0.8409475465313029, "grad_norm": 0.7704983458435415, "learning_rate": 6.488866351216116e-07, "loss": 0.1128, "step": 28826 }, { "epoch": 0.8409767197619464, "grad_norm": 0.7011720107836845, "learning_rate": 6.486539057931229e-07, "loss": 0.1119, "step": 28827 }, { "epoch": 0.84100589299259, "grad_norm": 0.8783664330634164, "learning_rate": 6.484212153124137e-07, "loss": 0.1091, "step": 28828 }, { "epoch": 0.8410350662232335, "grad_norm": 0.9538451986776406, "learning_rate": 6.481885636815599e-07, "loss": 0.1047, "step": 28829 }, { "epoch": 0.8410642394538771, "grad_norm": 0.811201426363696, "learning_rate": 6.479559509026406e-07, "loss": 0.1258, "step": 28830 }, { "epoch": 0.8410934126845206, "grad_norm": 0.9946951198396704, "learning_rate": 6.477233769777319e-07, "loss": 0.1382, "step": 28831 }, { "epoch": 0.8411225859151642, "grad_norm": 0.8552353285760814, "learning_rate": 6.474908419089076e-07, "loss": 0.1221, "step": 28832 }, { "epoch": 0.8411517591458078, "grad_norm": 0.7219144875927064, "learning_rate": 6.472583456982485e-07, "loss": 0.0966, "step": 28833 }, { "epoch": 0.8411809323764514, "grad_norm": 1.2242165187967142, "learning_rate": 6.470258883478275e-07, "loss": 0.12, "step": 28834 }, { "epoch": 0.841210105607095, "grad_norm": 1.1793211548741807, "learning_rate": 6.467934698597189e-07, "loss": 0.1456, "step": 28835 }, { "epoch": 0.8412392788377385, "grad_norm": 0.9663827119742197, "learning_rate": 6.465610902360009e-07, "loss": 0.1191, "step": 28836 }, { "epoch": 0.8412684520683821, "grad_norm": 0.8042727150124604, "learning_rate": 6.463287494787446e-07, "loss": 0.1278, "step": 28837 }, { "epoch": 0.8412976252990256, "grad_norm": 1.083381842465957, "learning_rate": 6.460964475900266e-07, "loss": 0.1202, "step": 28838 }, { "epoch": 0.8413267985296692, "grad_norm": 0.9834436853846289, "learning_rate": 6.4586418457192e-07, "loss": 0.098, "step": 28839 }, { "epoch": 0.8413559717603127, "grad_norm": 0.6787730454878973, "learning_rate": 6.456319604264988e-07, "loss": 0.1179, "step": 28840 }, { "epoch": 0.8413851449909563, "grad_norm": 0.8495841239183446, "learning_rate": 6.453997751558366e-07, "loss": 0.1191, "step": 28841 }, { "epoch": 0.8414143182215998, "grad_norm": 1.1290615715573953, "learning_rate": 6.451676287620046e-07, "loss": 0.1132, "step": 28842 }, { "epoch": 0.8414434914522434, "grad_norm": 0.8453385437174072, "learning_rate": 6.44935521247076e-07, "loss": 0.114, "step": 28843 }, { "epoch": 0.841472664682887, "grad_norm": 0.9923323922290838, "learning_rate": 6.447034526131247e-07, "loss": 0.1179, "step": 28844 }, { "epoch": 0.8415018379135305, "grad_norm": 0.8784270460169328, "learning_rate": 6.444714228622212e-07, "loss": 0.1197, "step": 28845 }, { "epoch": 0.8415310111441741, "grad_norm": 0.8813807687323639, "learning_rate": 6.442394319964362e-07, "loss": 0.1099, "step": 28846 }, { "epoch": 0.8415601843748177, "grad_norm": 0.7550142799237555, "learning_rate": 6.440074800178426e-07, "loss": 0.1135, "step": 28847 }, { "epoch": 0.8415893576054613, "grad_norm": 0.7698178913705608, "learning_rate": 6.437755669285106e-07, "loss": 0.1148, "step": 28848 }, { "epoch": 0.8416185308361048, "grad_norm": 0.7689855145974284, "learning_rate": 6.435436927305077e-07, "loss": 0.1112, "step": 28849 }, { "epoch": 0.8416477040667484, "grad_norm": 0.8660549598806329, "learning_rate": 6.433118574259095e-07, "loss": 0.1002, "step": 28850 }, { "epoch": 0.8416768772973919, "grad_norm": 0.8977184197766986, "learning_rate": 6.430800610167831e-07, "loss": 0.1298, "step": 28851 }, { "epoch": 0.8417060505280355, "grad_norm": 0.9849880656802578, "learning_rate": 6.428483035051963e-07, "loss": 0.1067, "step": 28852 }, { "epoch": 0.841735223758679, "grad_norm": 0.8153342556747524, "learning_rate": 6.426165848932208e-07, "loss": 0.1035, "step": 28853 }, { "epoch": 0.8417643969893226, "grad_norm": 0.9065290418267716, "learning_rate": 6.423849051829246e-07, "loss": 0.0998, "step": 28854 }, { "epoch": 0.8417935702199661, "grad_norm": 0.7417628350177068, "learning_rate": 6.421532643763745e-07, "loss": 0.0989, "step": 28855 }, { "epoch": 0.8418227434506097, "grad_norm": 0.8666948957920616, "learning_rate": 6.419216624756397e-07, "loss": 0.1439, "step": 28856 }, { "epoch": 0.8418519166812533, "grad_norm": 0.8978597665981048, "learning_rate": 6.41690099482789e-07, "loss": 0.103, "step": 28857 }, { "epoch": 0.8418810899118968, "grad_norm": 0.7343306993336536, "learning_rate": 6.414585753998887e-07, "loss": 0.0889, "step": 28858 }, { "epoch": 0.8419102631425404, "grad_norm": 0.8811943569068373, "learning_rate": 6.412270902290047e-07, "loss": 0.1351, "step": 28859 }, { "epoch": 0.8419394363731839, "grad_norm": 0.9439589139156926, "learning_rate": 6.40995643972206e-07, "loss": 0.1295, "step": 28860 }, { "epoch": 0.8419686096038276, "grad_norm": 0.789718850268482, "learning_rate": 6.407642366315564e-07, "loss": 0.1274, "step": 28861 }, { "epoch": 0.8419977828344711, "grad_norm": 0.7178593481303421, "learning_rate": 6.405328682091228e-07, "loss": 0.1152, "step": 28862 }, { "epoch": 0.8420269560651147, "grad_norm": 0.9482712353650073, "learning_rate": 6.403015387069722e-07, "loss": 0.12, "step": 28863 }, { "epoch": 0.8420561292957582, "grad_norm": 0.8814999547315627, "learning_rate": 6.400702481271692e-07, "loss": 0.0973, "step": 28864 }, { "epoch": 0.8420853025264018, "grad_norm": 0.9815439228739841, "learning_rate": 6.398389964717766e-07, "loss": 0.1252, "step": 28865 }, { "epoch": 0.8421144757570453, "grad_norm": 0.9033488606404223, "learning_rate": 6.396077837428621e-07, "loss": 0.1454, "step": 28866 }, { "epoch": 0.8421436489876889, "grad_norm": 0.7170638165342558, "learning_rate": 6.393766099424869e-07, "loss": 0.1095, "step": 28867 }, { "epoch": 0.8421728222183325, "grad_norm": 1.0315142844876628, "learning_rate": 6.391454750727177e-07, "loss": 0.1149, "step": 28868 }, { "epoch": 0.842201995448976, "grad_norm": 0.7692115385358217, "learning_rate": 6.389143791356156e-07, "loss": 0.1185, "step": 28869 }, { "epoch": 0.8422311686796196, "grad_norm": 0.8417598144992364, "learning_rate": 6.386833221332456e-07, "loss": 0.1369, "step": 28870 }, { "epoch": 0.8422603419102631, "grad_norm": 0.8156129188232211, "learning_rate": 6.384523040676704e-07, "loss": 0.1071, "step": 28871 }, { "epoch": 0.8422895151409067, "grad_norm": 0.7775657771437443, "learning_rate": 6.382213249409502e-07, "loss": 0.1181, "step": 28872 }, { "epoch": 0.8423186883715502, "grad_norm": 0.9616514026711401, "learning_rate": 6.379903847551489e-07, "loss": 0.1025, "step": 28873 }, { "epoch": 0.8423478616021939, "grad_norm": 0.9075184500056969, "learning_rate": 6.377594835123296e-07, "loss": 0.1177, "step": 28874 }, { "epoch": 0.8423770348328374, "grad_norm": 1.0023575745946811, "learning_rate": 6.375286212145521e-07, "loss": 0.0963, "step": 28875 }, { "epoch": 0.842406208063481, "grad_norm": 0.8121770301739846, "learning_rate": 6.372977978638762e-07, "loss": 0.1237, "step": 28876 }, { "epoch": 0.8424353812941245, "grad_norm": 0.8369541291810983, "learning_rate": 6.370670134623652e-07, "loss": 0.0942, "step": 28877 }, { "epoch": 0.8424645545247681, "grad_norm": 1.0474218154312502, "learning_rate": 6.368362680120787e-07, "loss": 0.0904, "step": 28878 }, { "epoch": 0.8424937277554116, "grad_norm": 0.8552151482340444, "learning_rate": 6.366055615150746e-07, "loss": 0.0898, "step": 28879 }, { "epoch": 0.8425229009860552, "grad_norm": 0.6530577140130184, "learning_rate": 6.36374893973416e-07, "loss": 0.1051, "step": 28880 }, { "epoch": 0.8425520742166988, "grad_norm": 1.2912320210230699, "learning_rate": 6.361442653891608e-07, "loss": 0.0954, "step": 28881 }, { "epoch": 0.8425812474473423, "grad_norm": 0.8506607381697783, "learning_rate": 6.35913675764367e-07, "loss": 0.0853, "step": 28882 }, { "epoch": 0.8426104206779859, "grad_norm": 0.8377732292276981, "learning_rate": 6.356831251010948e-07, "loss": 0.0926, "step": 28883 }, { "epoch": 0.8426395939086294, "grad_norm": 0.8006864847043814, "learning_rate": 6.354526134014022e-07, "loss": 0.12, "step": 28884 }, { "epoch": 0.842668767139273, "grad_norm": 0.8652466306530592, "learning_rate": 6.352221406673453e-07, "loss": 0.1278, "step": 28885 }, { "epoch": 0.8426979403699165, "grad_norm": 0.8295095284993502, "learning_rate": 6.349917069009837e-07, "loss": 0.1077, "step": 28886 }, { "epoch": 0.8427271136005601, "grad_norm": 0.8357083084260215, "learning_rate": 6.347613121043745e-07, "loss": 0.1279, "step": 28887 }, { "epoch": 0.8427562868312037, "grad_norm": 0.8238125863301013, "learning_rate": 6.345309562795748e-07, "loss": 0.1479, "step": 28888 }, { "epoch": 0.8427854600618473, "grad_norm": 0.9650659380382625, "learning_rate": 6.343006394286394e-07, "loss": 0.1171, "step": 28889 }, { "epoch": 0.8428146332924908, "grad_norm": 0.71697884241401, "learning_rate": 6.340703615536264e-07, "loss": 0.1078, "step": 28890 }, { "epoch": 0.8428438065231344, "grad_norm": 0.8489867200630906, "learning_rate": 6.338401226565904e-07, "loss": 0.1126, "step": 28891 }, { "epoch": 0.842872979753778, "grad_norm": 0.9940494807831788, "learning_rate": 6.336099227395875e-07, "loss": 0.1168, "step": 28892 }, { "epoch": 0.8429021529844215, "grad_norm": 0.9758747411540146, "learning_rate": 6.333797618046739e-07, "loss": 0.0921, "step": 28893 }, { "epoch": 0.8429313262150651, "grad_norm": 0.7884703811383246, "learning_rate": 6.331496398539033e-07, "loss": 0.0993, "step": 28894 }, { "epoch": 0.8429604994457086, "grad_norm": 0.8741558895470134, "learning_rate": 6.329195568893292e-07, "loss": 0.1189, "step": 28895 }, { "epoch": 0.8429896726763522, "grad_norm": 0.784373689885044, "learning_rate": 6.326895129130079e-07, "loss": 0.1095, "step": 28896 }, { "epoch": 0.8430188459069957, "grad_norm": 0.9079845061376804, "learning_rate": 6.324595079269907e-07, "loss": 0.1144, "step": 28897 }, { "epoch": 0.8430480191376393, "grad_norm": 0.773522031670242, "learning_rate": 6.322295419333335e-07, "loss": 0.1287, "step": 28898 }, { "epoch": 0.8430771923682828, "grad_norm": 1.0060439764611728, "learning_rate": 6.319996149340873e-07, "loss": 0.1115, "step": 28899 }, { "epoch": 0.8431063655989264, "grad_norm": 0.849708583536126, "learning_rate": 6.317697269313072e-07, "loss": 0.0998, "step": 28900 }, { "epoch": 0.84313553882957, "grad_norm": 0.6597539800178907, "learning_rate": 6.315398779270443e-07, "loss": 0.116, "step": 28901 }, { "epoch": 0.8431647120602136, "grad_norm": 0.7636676781210086, "learning_rate": 6.313100679233491e-07, "loss": 0.1026, "step": 28902 }, { "epoch": 0.8431938852908571, "grad_norm": 0.9582051504730216, "learning_rate": 6.310802969222745e-07, "loss": 0.1268, "step": 28903 }, { "epoch": 0.8432230585215007, "grad_norm": 0.802010833128608, "learning_rate": 6.308505649258734e-07, "loss": 0.1331, "step": 28904 }, { "epoch": 0.8432522317521443, "grad_norm": 0.6908653461538637, "learning_rate": 6.306208719361956e-07, "loss": 0.1057, "step": 28905 }, { "epoch": 0.8432814049827878, "grad_norm": 0.9270209884296832, "learning_rate": 6.303912179552902e-07, "loss": 0.1208, "step": 28906 }, { "epoch": 0.8433105782134314, "grad_norm": 0.821447517787425, "learning_rate": 6.301616029852103e-07, "loss": 0.1123, "step": 28907 }, { "epoch": 0.8433397514440749, "grad_norm": 0.7620011929699494, "learning_rate": 6.299320270280046e-07, "loss": 0.1154, "step": 28908 }, { "epoch": 0.8433689246747185, "grad_norm": 0.7979724597079998, "learning_rate": 6.297024900857196e-07, "loss": 0.1114, "step": 28909 }, { "epoch": 0.843398097905362, "grad_norm": 0.912165101893905, "learning_rate": 6.294729921604104e-07, "loss": 0.1277, "step": 28910 }, { "epoch": 0.8434272711360056, "grad_norm": 0.8670551328968326, "learning_rate": 6.29243533254123e-07, "loss": 0.102, "step": 28911 }, { "epoch": 0.8434564443666491, "grad_norm": 0.9780130220486077, "learning_rate": 6.290141133689043e-07, "loss": 0.0969, "step": 28912 }, { "epoch": 0.8434856175972927, "grad_norm": 1.0302144398091853, "learning_rate": 6.287847325068059e-07, "loss": 0.1178, "step": 28913 }, { "epoch": 0.8435147908279362, "grad_norm": 0.7133364137578695, "learning_rate": 6.285553906698732e-07, "loss": 0.1008, "step": 28914 }, { "epoch": 0.8435439640585799, "grad_norm": 0.9786751119061714, "learning_rate": 6.283260878601538e-07, "loss": 0.0976, "step": 28915 }, { "epoch": 0.8435731372892235, "grad_norm": 0.9970447061315006, "learning_rate": 6.280968240796953e-07, "loss": 0.1148, "step": 28916 }, { "epoch": 0.843602310519867, "grad_norm": 0.7532731327396806, "learning_rate": 6.278675993305461e-07, "loss": 0.1119, "step": 28917 }, { "epoch": 0.8436314837505106, "grad_norm": 0.9077873625714724, "learning_rate": 6.276384136147512e-07, "loss": 0.1159, "step": 28918 }, { "epoch": 0.8436606569811541, "grad_norm": 0.8612141675562002, "learning_rate": 6.274092669343551e-07, "loss": 0.0998, "step": 28919 }, { "epoch": 0.8436898302117977, "grad_norm": 1.2176983928374796, "learning_rate": 6.271801592914068e-07, "loss": 0.1136, "step": 28920 }, { "epoch": 0.8437190034424412, "grad_norm": 0.9229084585460244, "learning_rate": 6.269510906879489e-07, "loss": 0.1134, "step": 28921 }, { "epoch": 0.8437481766730848, "grad_norm": 0.8544153087523163, "learning_rate": 6.267220611260283e-07, "loss": 0.1189, "step": 28922 }, { "epoch": 0.8437773499037283, "grad_norm": 1.08151919459781, "learning_rate": 6.264930706076894e-07, "loss": 0.1027, "step": 28923 }, { "epoch": 0.8438065231343719, "grad_norm": 0.8685751850216777, "learning_rate": 6.262641191349773e-07, "loss": 0.1052, "step": 28924 }, { "epoch": 0.8438356963650154, "grad_norm": 0.9392648152313195, "learning_rate": 6.260352067099329e-07, "loss": 0.1011, "step": 28925 }, { "epoch": 0.843864869595659, "grad_norm": 0.678042428996098, "learning_rate": 6.258063333346037e-07, "loss": 0.1097, "step": 28926 }, { "epoch": 0.8438940428263025, "grad_norm": 0.9806532946104165, "learning_rate": 6.255774990110303e-07, "loss": 0.1035, "step": 28927 }, { "epoch": 0.8439232160569462, "grad_norm": 0.9535940013267304, "learning_rate": 6.253487037412575e-07, "loss": 0.0961, "step": 28928 }, { "epoch": 0.8439523892875898, "grad_norm": 0.9213404933200429, "learning_rate": 6.251199475273262e-07, "loss": 0.0942, "step": 28929 }, { "epoch": 0.8439815625182333, "grad_norm": 0.7984942682764017, "learning_rate": 6.248912303712812e-07, "loss": 0.1244, "step": 28930 }, { "epoch": 0.8440107357488769, "grad_norm": 0.8499461483793341, "learning_rate": 6.246625522751621e-07, "loss": 0.1154, "step": 28931 }, { "epoch": 0.8440399089795204, "grad_norm": 0.7866133750070303, "learning_rate": 6.244339132410104e-07, "loss": 0.0921, "step": 28932 }, { "epoch": 0.844069082210164, "grad_norm": 0.742497463167279, "learning_rate": 6.242053132708686e-07, "loss": 0.1212, "step": 28933 }, { "epoch": 0.8440982554408075, "grad_norm": 0.8884821717707002, "learning_rate": 6.239767523667778e-07, "loss": 0.101, "step": 28934 }, { "epoch": 0.8441274286714511, "grad_norm": 0.9183989534593693, "learning_rate": 6.237482305307785e-07, "loss": 0.0791, "step": 28935 }, { "epoch": 0.8441566019020946, "grad_norm": 0.946605435463055, "learning_rate": 6.235197477649085e-07, "loss": 0.1147, "step": 28936 }, { "epoch": 0.8441857751327382, "grad_norm": 0.7782264256580208, "learning_rate": 6.232913040712107e-07, "loss": 0.1029, "step": 28937 }, { "epoch": 0.8442149483633817, "grad_norm": 0.9038559295240802, "learning_rate": 6.230628994517235e-07, "loss": 0.1046, "step": 28938 }, { "epoch": 0.8442441215940253, "grad_norm": 1.011085336469972, "learning_rate": 6.22834533908484e-07, "loss": 0.1062, "step": 28939 }, { "epoch": 0.8442732948246688, "grad_norm": 0.898068911669527, "learning_rate": 6.226062074435347e-07, "loss": 0.1139, "step": 28940 }, { "epoch": 0.8443024680553124, "grad_norm": 0.7582485393852866, "learning_rate": 6.22377920058912e-07, "loss": 0.1064, "step": 28941 }, { "epoch": 0.8443316412859561, "grad_norm": 0.8052222974506925, "learning_rate": 6.221496717566533e-07, "loss": 0.1142, "step": 28942 }, { "epoch": 0.8443608145165996, "grad_norm": 0.8742033596095347, "learning_rate": 6.219214625387987e-07, "loss": 0.0949, "step": 28943 }, { "epoch": 0.8443899877472432, "grad_norm": 1.0294737368038258, "learning_rate": 6.216932924073837e-07, "loss": 0.1574, "step": 28944 }, { "epoch": 0.8444191609778867, "grad_norm": 0.8464921950502365, "learning_rate": 6.214651613644445e-07, "loss": 0.0981, "step": 28945 }, { "epoch": 0.8444483342085303, "grad_norm": 1.0762773562063659, "learning_rate": 6.212370694120196e-07, "loss": 0.0903, "step": 28946 }, { "epoch": 0.8444775074391738, "grad_norm": 0.7914913901208562, "learning_rate": 6.21009016552146e-07, "loss": 0.1177, "step": 28947 }, { "epoch": 0.8445066806698174, "grad_norm": 0.7304359791593851, "learning_rate": 6.207810027868583e-07, "loss": 0.0968, "step": 28948 }, { "epoch": 0.8445358539004609, "grad_norm": 0.887361182023445, "learning_rate": 6.205530281181915e-07, "loss": 0.1259, "step": 28949 }, { "epoch": 0.8445650271311045, "grad_norm": 0.8771738497094493, "learning_rate": 6.203250925481824e-07, "loss": 0.1092, "step": 28950 }, { "epoch": 0.844594200361748, "grad_norm": 0.9470336523330438, "learning_rate": 6.200971960788649e-07, "loss": 0.1156, "step": 28951 }, { "epoch": 0.8446233735923916, "grad_norm": 0.949541291182677, "learning_rate": 6.19869338712274e-07, "loss": 0.1268, "step": 28952 }, { "epoch": 0.8446525468230351, "grad_norm": 0.8543379562142469, "learning_rate": 6.196415204504447e-07, "loss": 0.1099, "step": 28953 }, { "epoch": 0.8446817200536787, "grad_norm": 0.852058767861762, "learning_rate": 6.194137412954104e-07, "loss": 0.1063, "step": 28954 }, { "epoch": 0.8447108932843223, "grad_norm": 0.7605570777580096, "learning_rate": 6.191860012492034e-07, "loss": 0.1026, "step": 28955 }, { "epoch": 0.8447400665149659, "grad_norm": 0.9689081783921539, "learning_rate": 6.189583003138588e-07, "loss": 0.1123, "step": 28956 }, { "epoch": 0.8447692397456095, "grad_norm": 0.7616158465788677, "learning_rate": 6.187306384914082e-07, "loss": 0.1023, "step": 28957 }, { "epoch": 0.844798412976253, "grad_norm": 0.8094604344402836, "learning_rate": 6.185030157838851e-07, "loss": 0.1169, "step": 28958 }, { "epoch": 0.8448275862068966, "grad_norm": 0.7705378157013021, "learning_rate": 6.182754321933204e-07, "loss": 0.1298, "step": 28959 }, { "epoch": 0.8448567594375401, "grad_norm": 1.1339312308593168, "learning_rate": 6.180478877217477e-07, "loss": 0.0925, "step": 28960 }, { "epoch": 0.8448859326681837, "grad_norm": 0.9206239070075416, "learning_rate": 6.17820382371197e-07, "loss": 0.1178, "step": 28961 }, { "epoch": 0.8449151058988272, "grad_norm": 0.8088148687671737, "learning_rate": 6.175929161436994e-07, "loss": 0.108, "step": 28962 }, { "epoch": 0.8449442791294708, "grad_norm": 0.8366321837183356, "learning_rate": 6.173654890412855e-07, "loss": 0.1118, "step": 28963 }, { "epoch": 0.8449734523601143, "grad_norm": 0.7771326612400864, "learning_rate": 6.171381010659877e-07, "loss": 0.1103, "step": 28964 }, { "epoch": 0.8450026255907579, "grad_norm": 0.95233919865386, "learning_rate": 6.169107522198348e-07, "loss": 0.0969, "step": 28965 }, { "epoch": 0.8450317988214014, "grad_norm": 0.8560933825205206, "learning_rate": 6.166834425048545e-07, "loss": 0.1113, "step": 28966 }, { "epoch": 0.845060972052045, "grad_norm": 0.9462886365787525, "learning_rate": 6.1645617192308e-07, "loss": 0.1134, "step": 28967 }, { "epoch": 0.8450901452826886, "grad_norm": 0.8744992932669856, "learning_rate": 6.162289404765382e-07, "loss": 0.1297, "step": 28968 }, { "epoch": 0.8451193185133322, "grad_norm": 1.0338832755218144, "learning_rate": 6.160017481672553e-07, "loss": 0.1231, "step": 28969 }, { "epoch": 0.8451484917439758, "grad_norm": 1.1384718410664043, "learning_rate": 6.157745949972649e-07, "loss": 0.1184, "step": 28970 }, { "epoch": 0.8451776649746193, "grad_norm": 0.8257966487801969, "learning_rate": 6.155474809685919e-07, "loss": 0.1207, "step": 28971 }, { "epoch": 0.8452068382052629, "grad_norm": 1.1902644605351222, "learning_rate": 6.153204060832635e-07, "loss": 0.126, "step": 28972 }, { "epoch": 0.8452360114359064, "grad_norm": 0.9941656567051275, "learning_rate": 6.150933703433087e-07, "loss": 0.1029, "step": 28973 }, { "epoch": 0.84526518466655, "grad_norm": 0.8267997592841663, "learning_rate": 6.148663737507537e-07, "loss": 0.1198, "step": 28974 }, { "epoch": 0.8452943578971935, "grad_norm": 0.8514516111687394, "learning_rate": 6.146394163076241e-07, "loss": 0.1205, "step": 28975 }, { "epoch": 0.8453235311278371, "grad_norm": 0.794370338561396, "learning_rate": 6.144124980159466e-07, "loss": 0.1073, "step": 28976 }, { "epoch": 0.8453527043584806, "grad_norm": 0.8941573073639782, "learning_rate": 6.141856188777484e-07, "loss": 0.115, "step": 28977 }, { "epoch": 0.8453818775891242, "grad_norm": 0.9754947754227669, "learning_rate": 6.13958778895054e-07, "loss": 0.0951, "step": 28978 }, { "epoch": 0.8454110508197678, "grad_norm": 0.8238535281379181, "learning_rate": 6.137319780698881e-07, "loss": 0.1168, "step": 28979 }, { "epoch": 0.8454402240504113, "grad_norm": 0.6944915606910066, "learning_rate": 6.135052164042765e-07, "loss": 0.0883, "step": 28980 }, { "epoch": 0.8454693972810549, "grad_norm": 0.9429286371602528, "learning_rate": 6.132784939002423e-07, "loss": 0.0976, "step": 28981 }, { "epoch": 0.8454985705116984, "grad_norm": 0.7588408904627072, "learning_rate": 6.130518105598104e-07, "loss": 0.1065, "step": 28982 }, { "epoch": 0.8455277437423421, "grad_norm": 0.8980012058379951, "learning_rate": 6.128251663850055e-07, "loss": 0.0994, "step": 28983 }, { "epoch": 0.8455569169729856, "grad_norm": 1.234274059590637, "learning_rate": 6.125985613778506e-07, "loss": 0.1242, "step": 28984 }, { "epoch": 0.8455860902036292, "grad_norm": 0.9430980757964642, "learning_rate": 6.123719955403673e-07, "loss": 0.1086, "step": 28985 }, { "epoch": 0.8456152634342727, "grad_norm": 0.8886403987846055, "learning_rate": 6.121454688745804e-07, "loss": 0.0884, "step": 28986 }, { "epoch": 0.8456444366649163, "grad_norm": 0.7905198450456818, "learning_rate": 6.119189813825105e-07, "loss": 0.1259, "step": 28987 }, { "epoch": 0.8456736098955598, "grad_norm": 0.8787226332974925, "learning_rate": 6.11692533066181e-07, "loss": 0.1236, "step": 28988 }, { "epoch": 0.8457027831262034, "grad_norm": 0.9199138971677424, "learning_rate": 6.114661239276121e-07, "loss": 0.1005, "step": 28989 }, { "epoch": 0.845731956356847, "grad_norm": 1.2170396444377143, "learning_rate": 6.112397539688269e-07, "loss": 0.107, "step": 28990 }, { "epoch": 0.8457611295874905, "grad_norm": 0.9045354533634172, "learning_rate": 6.110134231918458e-07, "loss": 0.1182, "step": 28991 }, { "epoch": 0.845790302818134, "grad_norm": 0.7436760800768687, "learning_rate": 6.107871315986879e-07, "loss": 0.101, "step": 28992 }, { "epoch": 0.8458194760487776, "grad_norm": 0.7470511240234238, "learning_rate": 6.105608791913747e-07, "loss": 0.097, "step": 28993 }, { "epoch": 0.8458486492794212, "grad_norm": 0.9281311415140583, "learning_rate": 6.103346659719278e-07, "loss": 0.105, "step": 28994 }, { "epoch": 0.8458778225100647, "grad_norm": 0.8024697346946731, "learning_rate": 6.101084919423645e-07, "loss": 0.1032, "step": 28995 }, { "epoch": 0.8459069957407084, "grad_norm": 0.772975732299244, "learning_rate": 6.098823571047036e-07, "loss": 0.1081, "step": 28996 }, { "epoch": 0.8459361689713519, "grad_norm": 0.9238825247852168, "learning_rate": 6.096562614609658e-07, "loss": 0.0956, "step": 28997 }, { "epoch": 0.8459653422019955, "grad_norm": 0.898659153580409, "learning_rate": 6.094302050131695e-07, "loss": 0.1478, "step": 28998 }, { "epoch": 0.845994515432639, "grad_norm": 1.5643563022734468, "learning_rate": 6.092041877633298e-07, "loss": 0.1151, "step": 28999 }, { "epoch": 0.8460236886632826, "grad_norm": 0.8831666935557358, "learning_rate": 6.089782097134689e-07, "loss": 0.1099, "step": 29000 }, { "epoch": 0.8460528618939261, "grad_norm": 0.8419068549251498, "learning_rate": 6.087522708656024e-07, "loss": 0.1242, "step": 29001 }, { "epoch": 0.8460820351245697, "grad_norm": 0.6877823378891237, "learning_rate": 6.085263712217465e-07, "loss": 0.1057, "step": 29002 }, { "epoch": 0.8461112083552133, "grad_norm": 0.9322222929739505, "learning_rate": 6.083005107839196e-07, "loss": 0.0968, "step": 29003 }, { "epoch": 0.8461403815858568, "grad_norm": 1.1745189573711559, "learning_rate": 6.080746895541372e-07, "loss": 0.1306, "step": 29004 }, { "epoch": 0.8461695548165004, "grad_norm": 0.9126592759386749, "learning_rate": 6.078489075344152e-07, "loss": 0.116, "step": 29005 }, { "epoch": 0.8461987280471439, "grad_norm": 0.919260997371977, "learning_rate": 6.076231647267689e-07, "loss": 0.1192, "step": 29006 }, { "epoch": 0.8462279012777875, "grad_norm": 0.888339511523776, "learning_rate": 6.073974611332156e-07, "loss": 0.1072, "step": 29007 }, { "epoch": 0.846257074508431, "grad_norm": 0.9077946045990275, "learning_rate": 6.071717967557694e-07, "loss": 0.1007, "step": 29008 }, { "epoch": 0.8462862477390746, "grad_norm": 0.8680779547275776, "learning_rate": 6.069461715964436e-07, "loss": 0.1135, "step": 29009 }, { "epoch": 0.8463154209697182, "grad_norm": 1.0610882156130454, "learning_rate": 6.06720585657255e-07, "loss": 0.1028, "step": 29010 }, { "epoch": 0.8463445942003618, "grad_norm": 1.1003124603593972, "learning_rate": 6.064950389402152e-07, "loss": 0.0991, "step": 29011 }, { "epoch": 0.8463737674310053, "grad_norm": 1.0180243383169565, "learning_rate": 6.062695314473383e-07, "loss": 0.1175, "step": 29012 }, { "epoch": 0.8464029406616489, "grad_norm": 0.9826360830307576, "learning_rate": 6.060440631806397e-07, "loss": 0.1146, "step": 29013 }, { "epoch": 0.8464321138922924, "grad_norm": 0.8642952642862485, "learning_rate": 6.058186341421307e-07, "loss": 0.112, "step": 29014 }, { "epoch": 0.846461287122936, "grad_norm": 0.9622097652971888, "learning_rate": 6.05593244333823e-07, "loss": 0.1192, "step": 29015 }, { "epoch": 0.8464904603535796, "grad_norm": 0.7831867065941874, "learning_rate": 6.053678937577306e-07, "loss": 0.1062, "step": 29016 }, { "epoch": 0.8465196335842231, "grad_norm": 0.9394779054339935, "learning_rate": 6.051425824158636e-07, "loss": 0.0972, "step": 29017 }, { "epoch": 0.8465488068148667, "grad_norm": 1.0162801455659933, "learning_rate": 6.049173103102357e-07, "loss": 0.1355, "step": 29018 }, { "epoch": 0.8465779800455102, "grad_norm": 0.8091046858537951, "learning_rate": 6.046920774428555e-07, "loss": 0.1249, "step": 29019 }, { "epoch": 0.8466071532761538, "grad_norm": 0.9962912216737734, "learning_rate": 6.044668838157364e-07, "loss": 0.1096, "step": 29020 }, { "epoch": 0.8466363265067973, "grad_norm": 0.965340154386159, "learning_rate": 6.042417294308878e-07, "loss": 0.1179, "step": 29021 }, { "epoch": 0.8466654997374409, "grad_norm": 0.7149561802564454, "learning_rate": 6.040166142903186e-07, "loss": 0.0928, "step": 29022 }, { "epoch": 0.8466946729680845, "grad_norm": 0.8466089184322997, "learning_rate": 6.037915383960391e-07, "loss": 0.1121, "step": 29023 }, { "epoch": 0.8467238461987281, "grad_norm": 1.3270229358566914, "learning_rate": 6.035665017500609e-07, "loss": 0.134, "step": 29024 }, { "epoch": 0.8467530194293716, "grad_norm": 1.099903479306349, "learning_rate": 6.033415043543916e-07, "loss": 0.1099, "step": 29025 }, { "epoch": 0.8467821926600152, "grad_norm": 0.7118010932488361, "learning_rate": 6.031165462110383e-07, "loss": 0.1156, "step": 29026 }, { "epoch": 0.8468113658906588, "grad_norm": 1.0062081837952301, "learning_rate": 6.02891627322012e-07, "loss": 0.1156, "step": 29027 }, { "epoch": 0.8468405391213023, "grad_norm": 0.9127783006903045, "learning_rate": 6.0266674768932e-07, "loss": 0.1033, "step": 29028 }, { "epoch": 0.8468697123519459, "grad_norm": 0.9014088640034168, "learning_rate": 6.024419073149668e-07, "loss": 0.1145, "step": 29029 }, { "epoch": 0.8468988855825894, "grad_norm": 1.0071058171151983, "learning_rate": 6.022171062009652e-07, "loss": 0.1179, "step": 29030 }, { "epoch": 0.846928058813233, "grad_norm": 0.9446048324157715, "learning_rate": 6.019923443493192e-07, "loss": 0.1213, "step": 29031 }, { "epoch": 0.8469572320438765, "grad_norm": 0.8361849720046881, "learning_rate": 6.017676217620344e-07, "loss": 0.1067, "step": 29032 }, { "epoch": 0.8469864052745201, "grad_norm": 0.9667808011358108, "learning_rate": 6.015429384411192e-07, "loss": 0.1115, "step": 29033 }, { "epoch": 0.8470155785051636, "grad_norm": 1.0742950683483192, "learning_rate": 6.013182943885781e-07, "loss": 0.1079, "step": 29034 }, { "epoch": 0.8470447517358072, "grad_norm": 1.1633351695493122, "learning_rate": 6.010936896064184e-07, "loss": 0.1199, "step": 29035 }, { "epoch": 0.8470739249664507, "grad_norm": 0.8533109669312983, "learning_rate": 6.008691240966425e-07, "loss": 0.108, "step": 29036 }, { "epoch": 0.8471030981970944, "grad_norm": 0.8061642141837491, "learning_rate": 6.006445978612585e-07, "loss": 0.1057, "step": 29037 }, { "epoch": 0.847132271427738, "grad_norm": 0.8820922330996973, "learning_rate": 6.004201109022689e-07, "loss": 0.0857, "step": 29038 }, { "epoch": 0.8471614446583815, "grad_norm": 0.9703849497398688, "learning_rate": 6.001956632216771e-07, "loss": 0.1074, "step": 29039 }, { "epoch": 0.847190617889025, "grad_norm": 0.8490460378689196, "learning_rate": 5.999712548214886e-07, "loss": 0.1062, "step": 29040 }, { "epoch": 0.8472197911196686, "grad_norm": 0.9537004209596786, "learning_rate": 5.99746885703707e-07, "loss": 0.1173, "step": 29041 }, { "epoch": 0.8472489643503122, "grad_norm": 0.9381791552270514, "learning_rate": 5.995225558703344e-07, "loss": 0.1304, "step": 29042 }, { "epoch": 0.8472781375809557, "grad_norm": 1.2164745413556348, "learning_rate": 5.992982653233742e-07, "loss": 0.1075, "step": 29043 }, { "epoch": 0.8473073108115993, "grad_norm": 0.7229966815710739, "learning_rate": 5.990740140648288e-07, "loss": 0.1095, "step": 29044 }, { "epoch": 0.8473364840422428, "grad_norm": 0.8851650160487878, "learning_rate": 5.988498020966993e-07, "loss": 0.1282, "step": 29045 }, { "epoch": 0.8473656572728864, "grad_norm": 3.132183776134045, "learning_rate": 5.986256294209874e-07, "loss": 0.1191, "step": 29046 }, { "epoch": 0.8473948305035299, "grad_norm": 0.8087015027414471, "learning_rate": 5.984014960396972e-07, "loss": 0.1082, "step": 29047 }, { "epoch": 0.8474240037341735, "grad_norm": 0.9006617659695598, "learning_rate": 5.98177401954827e-07, "loss": 0.1113, "step": 29048 }, { "epoch": 0.847453176964817, "grad_norm": 0.8753993150754519, "learning_rate": 5.979533471683773e-07, "loss": 0.1255, "step": 29049 }, { "epoch": 0.8474823501954607, "grad_norm": 0.8116099502956838, "learning_rate": 5.977293316823502e-07, "loss": 0.1078, "step": 29050 }, { "epoch": 0.8475115234261043, "grad_norm": 0.8751003965449491, "learning_rate": 5.975053554987448e-07, "loss": 0.1115, "step": 29051 }, { "epoch": 0.8475406966567478, "grad_norm": 0.9368127147652712, "learning_rate": 5.972814186195597e-07, "loss": 0.116, "step": 29052 }, { "epoch": 0.8475698698873914, "grad_norm": 0.847932109256919, "learning_rate": 5.970575210467949e-07, "loss": 0.0994, "step": 29053 }, { "epoch": 0.8475990431180349, "grad_norm": 0.7030066060440572, "learning_rate": 5.968336627824506e-07, "loss": 0.1091, "step": 29054 }, { "epoch": 0.8476282163486785, "grad_norm": 0.660917535734426, "learning_rate": 5.966098438285245e-07, "loss": 0.1247, "step": 29055 }, { "epoch": 0.847657389579322, "grad_norm": 0.7533538106123413, "learning_rate": 5.963860641870134e-07, "loss": 0.1049, "step": 29056 }, { "epoch": 0.8476865628099656, "grad_norm": 0.9245893197000575, "learning_rate": 5.961623238599168e-07, "loss": 0.0943, "step": 29057 }, { "epoch": 0.8477157360406091, "grad_norm": 0.7611611264482083, "learning_rate": 5.959386228492314e-07, "loss": 0.1128, "step": 29058 }, { "epoch": 0.8477449092712527, "grad_norm": 0.9054708090581463, "learning_rate": 5.957149611569541e-07, "loss": 0.1033, "step": 29059 }, { "epoch": 0.8477740825018962, "grad_norm": 0.7867854167182577, "learning_rate": 5.954913387850836e-07, "loss": 0.1185, "step": 29060 }, { "epoch": 0.8478032557325398, "grad_norm": 0.9561588506171079, "learning_rate": 5.952677557356146e-07, "loss": 0.1009, "step": 29061 }, { "epoch": 0.8478324289631833, "grad_norm": 0.8367745657832105, "learning_rate": 5.950442120105432e-07, "loss": 0.1215, "step": 29062 }, { "epoch": 0.8478616021938269, "grad_norm": 0.7966824773289977, "learning_rate": 5.948207076118662e-07, "loss": 0.1464, "step": 29063 }, { "epoch": 0.8478907754244706, "grad_norm": 0.9122979652791569, "learning_rate": 5.945972425415769e-07, "loss": 0.1128, "step": 29064 }, { "epoch": 0.8479199486551141, "grad_norm": 0.7476498141301914, "learning_rate": 5.943738168016732e-07, "loss": 0.1098, "step": 29065 }, { "epoch": 0.8479491218857577, "grad_norm": 1.0757619620389016, "learning_rate": 5.941504303941475e-07, "loss": 0.1206, "step": 29066 }, { "epoch": 0.8479782951164012, "grad_norm": 1.0973587077063143, "learning_rate": 5.939270833209959e-07, "loss": 0.1108, "step": 29067 }, { "epoch": 0.8480074683470448, "grad_norm": 0.8395425331530202, "learning_rate": 5.937037755842112e-07, "loss": 0.0973, "step": 29068 }, { "epoch": 0.8480366415776883, "grad_norm": 0.8268829295462345, "learning_rate": 5.934805071857863e-07, "loss": 0.1319, "step": 29069 }, { "epoch": 0.8480658148083319, "grad_norm": 0.7921914210874075, "learning_rate": 5.932572781277158e-07, "loss": 0.1068, "step": 29070 }, { "epoch": 0.8480949880389754, "grad_norm": 0.8860279687171584, "learning_rate": 5.930340884119934e-07, "loss": 0.1045, "step": 29071 }, { "epoch": 0.848124161269619, "grad_norm": 1.0539568396441068, "learning_rate": 5.928109380406094e-07, "loss": 0.1192, "step": 29072 }, { "epoch": 0.8481533345002625, "grad_norm": 0.776371118999152, "learning_rate": 5.925878270155582e-07, "loss": 0.1015, "step": 29073 }, { "epoch": 0.8481825077309061, "grad_norm": 0.8651158857965905, "learning_rate": 5.923647553388312e-07, "loss": 0.1196, "step": 29074 }, { "epoch": 0.8482116809615496, "grad_norm": 0.8464081537327086, "learning_rate": 5.921417230124177e-07, "loss": 0.1063, "step": 29075 }, { "epoch": 0.8482408541921932, "grad_norm": 0.9301961433279016, "learning_rate": 5.919187300383112e-07, "loss": 0.1133, "step": 29076 }, { "epoch": 0.8482700274228369, "grad_norm": 0.9654583692499626, "learning_rate": 5.916957764185033e-07, "loss": 0.1393, "step": 29077 }, { "epoch": 0.8482992006534804, "grad_norm": 0.7106699956578029, "learning_rate": 5.914728621549826e-07, "loss": 0.0997, "step": 29078 }, { "epoch": 0.848328373884124, "grad_norm": 0.8398080467262697, "learning_rate": 5.91249987249739e-07, "loss": 0.1002, "step": 29079 }, { "epoch": 0.8483575471147675, "grad_norm": 1.082671993972656, "learning_rate": 5.910271517047639e-07, "loss": 0.115, "step": 29080 }, { "epoch": 0.8483867203454111, "grad_norm": 0.732134523449264, "learning_rate": 5.90804355522046e-07, "loss": 0.1049, "step": 29081 }, { "epoch": 0.8484158935760546, "grad_norm": 0.8603765456311879, "learning_rate": 5.905815987035735e-07, "loss": 0.1001, "step": 29082 }, { "epoch": 0.8484450668066982, "grad_norm": 0.8973437925039011, "learning_rate": 5.903588812513356e-07, "loss": 0.1394, "step": 29083 }, { "epoch": 0.8484742400373417, "grad_norm": 0.8189177716404316, "learning_rate": 5.901362031673219e-07, "loss": 0.1304, "step": 29084 }, { "epoch": 0.8485034132679853, "grad_norm": 0.8655269686689555, "learning_rate": 5.899135644535193e-07, "loss": 0.1104, "step": 29085 }, { "epoch": 0.8485325864986288, "grad_norm": 0.8718371207005509, "learning_rate": 5.896909651119149e-07, "loss": 0.1005, "step": 29086 }, { "epoch": 0.8485617597292724, "grad_norm": 0.7441187259645008, "learning_rate": 5.894684051444977e-07, "loss": 0.1259, "step": 29087 }, { "epoch": 0.848590932959916, "grad_norm": 0.8453179416188262, "learning_rate": 5.892458845532528e-07, "loss": 0.1064, "step": 29088 }, { "epoch": 0.8486201061905595, "grad_norm": 0.885642919851678, "learning_rate": 5.890234033401676e-07, "loss": 0.1155, "step": 29089 }, { "epoch": 0.848649279421203, "grad_norm": 1.123644647537821, "learning_rate": 5.888009615072293e-07, "loss": 0.1318, "step": 29090 }, { "epoch": 0.8486784526518467, "grad_norm": 0.9043374872091943, "learning_rate": 5.88578559056423e-07, "loss": 0.129, "step": 29091 }, { "epoch": 0.8487076258824903, "grad_norm": 1.048869038299374, "learning_rate": 5.883561959897338e-07, "loss": 0.1224, "step": 29092 }, { "epoch": 0.8487367991131338, "grad_norm": 0.7790492693780142, "learning_rate": 5.881338723091478e-07, "loss": 0.0853, "step": 29093 }, { "epoch": 0.8487659723437774, "grad_norm": 0.8720151642317396, "learning_rate": 5.879115880166486e-07, "loss": 0.1216, "step": 29094 }, { "epoch": 0.8487951455744209, "grad_norm": 0.7905411336337679, "learning_rate": 5.876893431142222e-07, "loss": 0.1265, "step": 29095 }, { "epoch": 0.8488243188050645, "grad_norm": 0.9281130594392916, "learning_rate": 5.874671376038516e-07, "loss": 0.1101, "step": 29096 }, { "epoch": 0.848853492035708, "grad_norm": 0.8018721161052225, "learning_rate": 5.872449714875217e-07, "loss": 0.1089, "step": 29097 }, { "epoch": 0.8488826652663516, "grad_norm": 0.8999977959568195, "learning_rate": 5.870228447672149e-07, "loss": 0.0836, "step": 29098 }, { "epoch": 0.8489118384969951, "grad_norm": 1.014786378940118, "learning_rate": 5.868007574449141e-07, "loss": 0.1261, "step": 29099 }, { "epoch": 0.8489410117276387, "grad_norm": 0.9986420432333819, "learning_rate": 5.865787095226028e-07, "loss": 0.0987, "step": 29100 }, { "epoch": 0.8489701849582822, "grad_norm": 0.7767268360772743, "learning_rate": 5.863567010022637e-07, "loss": 0.1271, "step": 29101 }, { "epoch": 0.8489993581889258, "grad_norm": 0.9771046731481287, "learning_rate": 5.861347318858779e-07, "loss": 0.1224, "step": 29102 }, { "epoch": 0.8490285314195694, "grad_norm": 0.7944777870991566, "learning_rate": 5.859128021754279e-07, "loss": 0.1122, "step": 29103 }, { "epoch": 0.849057704650213, "grad_norm": 0.7144871424506061, "learning_rate": 5.856909118728954e-07, "loss": 0.1138, "step": 29104 }, { "epoch": 0.8490868778808566, "grad_norm": 0.7735633126790501, "learning_rate": 5.854690609802593e-07, "loss": 0.1159, "step": 29105 }, { "epoch": 0.8491160511115001, "grad_norm": 0.9817189047356594, "learning_rate": 5.852472494995015e-07, "loss": 0.1034, "step": 29106 }, { "epoch": 0.8491452243421437, "grad_norm": 0.8178974331985509, "learning_rate": 5.850254774326037e-07, "loss": 0.109, "step": 29107 }, { "epoch": 0.8491743975727872, "grad_norm": 1.000510735201703, "learning_rate": 5.848037447815441e-07, "loss": 0.1211, "step": 29108 }, { "epoch": 0.8492035708034308, "grad_norm": 0.9691590387254037, "learning_rate": 5.84582051548302e-07, "loss": 0.1213, "step": 29109 }, { "epoch": 0.8492327440340743, "grad_norm": 1.0165256421212807, "learning_rate": 5.843603977348577e-07, "loss": 0.1086, "step": 29110 }, { "epoch": 0.8492619172647179, "grad_norm": 0.752107434507686, "learning_rate": 5.841387833431906e-07, "loss": 0.125, "step": 29111 }, { "epoch": 0.8492910904953614, "grad_norm": 0.722377620849228, "learning_rate": 5.839172083752765e-07, "loss": 0.1099, "step": 29112 }, { "epoch": 0.849320263726005, "grad_norm": 1.157240859714747, "learning_rate": 5.836956728330955e-07, "loss": 0.1158, "step": 29113 }, { "epoch": 0.8493494369566486, "grad_norm": 0.9839289493941649, "learning_rate": 5.834741767186264e-07, "loss": 0.1229, "step": 29114 }, { "epoch": 0.8493786101872921, "grad_norm": 0.9185839013973843, "learning_rate": 5.832527200338455e-07, "loss": 0.1161, "step": 29115 }, { "epoch": 0.8494077834179357, "grad_norm": 0.7015963374246615, "learning_rate": 5.830313027807294e-07, "loss": 0.1145, "step": 29116 }, { "epoch": 0.8494369566485792, "grad_norm": 0.7002043277821434, "learning_rate": 5.828099249612556e-07, "loss": 0.1176, "step": 29117 }, { "epoch": 0.8494661298792229, "grad_norm": 0.8103848443445484, "learning_rate": 5.825885865774001e-07, "loss": 0.0924, "step": 29118 }, { "epoch": 0.8494953031098664, "grad_norm": 0.6318928773755528, "learning_rate": 5.823672876311387e-07, "loss": 0.1167, "step": 29119 }, { "epoch": 0.84952447634051, "grad_norm": 0.8146860468507399, "learning_rate": 5.821460281244489e-07, "loss": 0.1168, "step": 29120 }, { "epoch": 0.8495536495711535, "grad_norm": 0.9467762026272764, "learning_rate": 5.819248080593043e-07, "loss": 0.133, "step": 29121 }, { "epoch": 0.8495828228017971, "grad_norm": 0.6215715522082205, "learning_rate": 5.817036274376797e-07, "loss": 0.0927, "step": 29122 }, { "epoch": 0.8496119960324406, "grad_norm": 1.9880019343072017, "learning_rate": 5.814824862615514e-07, "loss": 0.1337, "step": 29123 }, { "epoch": 0.8496411692630842, "grad_norm": 0.7421360015741368, "learning_rate": 5.812613845328912e-07, "loss": 0.1097, "step": 29124 }, { "epoch": 0.8496703424937277, "grad_norm": 0.7819558106184666, "learning_rate": 5.810403222536759e-07, "loss": 0.1272, "step": 29125 }, { "epoch": 0.8496995157243713, "grad_norm": 2.3119527103291024, "learning_rate": 5.808192994258771e-07, "loss": 0.1193, "step": 29126 }, { "epoch": 0.8497286889550149, "grad_norm": 0.8243265048202371, "learning_rate": 5.805983160514689e-07, "loss": 0.0942, "step": 29127 }, { "epoch": 0.8497578621856584, "grad_norm": 0.7023324635356789, "learning_rate": 5.803773721324247e-07, "loss": 0.0879, "step": 29128 }, { "epoch": 0.849787035416302, "grad_norm": 0.5917597332407699, "learning_rate": 5.801564676707144e-07, "loss": 0.1061, "step": 29129 }, { "epoch": 0.8498162086469455, "grad_norm": 0.7443615849355073, "learning_rate": 5.799356026683128e-07, "loss": 0.1144, "step": 29130 }, { "epoch": 0.8498453818775892, "grad_norm": 0.8423593937147441, "learning_rate": 5.797147771271916e-07, "loss": 0.1046, "step": 29131 }, { "epoch": 0.8498745551082327, "grad_norm": 0.9524970229024283, "learning_rate": 5.794939910493208e-07, "loss": 0.1036, "step": 29132 }, { "epoch": 0.8499037283388763, "grad_norm": 0.8264582787598, "learning_rate": 5.792732444366734e-07, "loss": 0.1114, "step": 29133 }, { "epoch": 0.8499329015695198, "grad_norm": 0.7869697019267445, "learning_rate": 5.790525372912192e-07, "loss": 0.1102, "step": 29134 }, { "epoch": 0.8499620748001634, "grad_norm": 0.9496161098997309, "learning_rate": 5.78831869614927e-07, "loss": 0.1052, "step": 29135 }, { "epoch": 0.849991248030807, "grad_norm": 0.7375757422015793, "learning_rate": 5.786112414097689e-07, "loss": 0.1191, "step": 29136 }, { "epoch": 0.8500204212614505, "grad_norm": 0.7762538525516017, "learning_rate": 5.783906526777155e-07, "loss": 0.1233, "step": 29137 }, { "epoch": 0.850049594492094, "grad_norm": 0.8092677835828772, "learning_rate": 5.781701034207343e-07, "loss": 0.1214, "step": 29138 }, { "epoch": 0.8500787677227376, "grad_norm": 0.933973821805636, "learning_rate": 5.779495936407942e-07, "loss": 0.1021, "step": 29139 }, { "epoch": 0.8501079409533812, "grad_norm": 0.9931670136317993, "learning_rate": 5.777291233398652e-07, "loss": 0.1106, "step": 29140 }, { "epoch": 0.8501371141840247, "grad_norm": 0.9863575881144226, "learning_rate": 5.775086925199152e-07, "loss": 0.1032, "step": 29141 }, { "epoch": 0.8501662874146683, "grad_norm": 1.396160627450317, "learning_rate": 5.772883011829106e-07, "loss": 0.1191, "step": 29142 }, { "epoch": 0.8501954606453118, "grad_norm": 0.9039831491364916, "learning_rate": 5.770679493308206e-07, "loss": 0.1148, "step": 29143 }, { "epoch": 0.8502246338759554, "grad_norm": 0.7585223572741104, "learning_rate": 5.768476369656128e-07, "loss": 0.1146, "step": 29144 }, { "epoch": 0.850253807106599, "grad_norm": 0.8947061008193511, "learning_rate": 5.766273640892539e-07, "loss": 0.0994, "step": 29145 }, { "epoch": 0.8502829803372426, "grad_norm": 0.9824931411171514, "learning_rate": 5.764071307037083e-07, "loss": 0.1101, "step": 29146 }, { "epoch": 0.8503121535678861, "grad_norm": 0.8736909814841786, "learning_rate": 5.761869368109451e-07, "loss": 0.1086, "step": 29147 }, { "epoch": 0.8503413267985297, "grad_norm": 0.9175574839728452, "learning_rate": 5.759667824129278e-07, "loss": 0.1266, "step": 29148 }, { "epoch": 0.8503705000291732, "grad_norm": 0.712944830383276, "learning_rate": 5.75746667511623e-07, "loss": 0.1255, "step": 29149 }, { "epoch": 0.8503996732598168, "grad_norm": 0.8898608437365731, "learning_rate": 5.75526592108997e-07, "loss": 0.1163, "step": 29150 }, { "epoch": 0.8504288464904604, "grad_norm": 0.9883291396051264, "learning_rate": 5.753065562070131e-07, "loss": 0.1288, "step": 29151 }, { "epoch": 0.8504580197211039, "grad_norm": 0.8090705015277261, "learning_rate": 5.75086559807635e-07, "loss": 0.1093, "step": 29152 }, { "epoch": 0.8504871929517475, "grad_norm": 0.7690992370566142, "learning_rate": 5.748666029128292e-07, "loss": 0.1064, "step": 29153 }, { "epoch": 0.850516366182391, "grad_norm": 0.729171642889137, "learning_rate": 5.746466855245564e-07, "loss": 0.1278, "step": 29154 }, { "epoch": 0.8505455394130346, "grad_norm": 0.7557397395067963, "learning_rate": 5.744268076447829e-07, "loss": 0.1128, "step": 29155 }, { "epoch": 0.8505747126436781, "grad_norm": 0.7387186636381406, "learning_rate": 5.742069692754692e-07, "loss": 0.1202, "step": 29156 }, { "epoch": 0.8506038858743217, "grad_norm": 0.8967285687017305, "learning_rate": 5.739871704185807e-07, "loss": 0.1042, "step": 29157 }, { "epoch": 0.8506330591049653, "grad_norm": 0.6574592588010976, "learning_rate": 5.737674110760777e-07, "loss": 0.0932, "step": 29158 }, { "epoch": 0.8506622323356089, "grad_norm": 0.7309849420674775, "learning_rate": 5.735476912499216e-07, "loss": 0.0916, "step": 29159 }, { "epoch": 0.8506914055662524, "grad_norm": 0.7636606999105496, "learning_rate": 5.733280109420753e-07, "loss": 0.1137, "step": 29160 }, { "epoch": 0.850720578796896, "grad_norm": 0.895668734767981, "learning_rate": 5.731083701545003e-07, "loss": 0.1118, "step": 29161 }, { "epoch": 0.8507497520275396, "grad_norm": 0.9338595039586092, "learning_rate": 5.728887688891566e-07, "loss": 0.1279, "step": 29162 }, { "epoch": 0.8507789252581831, "grad_norm": 0.9975607117987849, "learning_rate": 5.726692071480061e-07, "loss": 0.1198, "step": 29163 }, { "epoch": 0.8508080984888267, "grad_norm": 1.03145749253362, "learning_rate": 5.724496849330075e-07, "loss": 0.1095, "step": 29164 }, { "epoch": 0.8508372717194702, "grad_norm": 0.9783715480649365, "learning_rate": 5.722302022461206e-07, "loss": 0.1096, "step": 29165 }, { "epoch": 0.8508664449501138, "grad_norm": 0.8587857273281144, "learning_rate": 5.720107590893054e-07, "loss": 0.1263, "step": 29166 }, { "epoch": 0.8508956181807573, "grad_norm": 0.9195945072151114, "learning_rate": 5.717913554645221e-07, "loss": 0.102, "step": 29167 }, { "epoch": 0.8509247914114009, "grad_norm": 0.8380710786381557, "learning_rate": 5.715719913737283e-07, "loss": 0.1133, "step": 29168 }, { "epoch": 0.8509539646420444, "grad_norm": 0.7497282590627513, "learning_rate": 5.713526668188818e-07, "loss": 0.1194, "step": 29169 }, { "epoch": 0.850983137872688, "grad_norm": 0.9293399746740582, "learning_rate": 5.711333818019421e-07, "loss": 0.1134, "step": 29170 }, { "epoch": 0.8510123111033315, "grad_norm": 0.8645695735950689, "learning_rate": 5.709141363248666e-07, "loss": 0.0919, "step": 29171 }, { "epoch": 0.8510414843339752, "grad_norm": 0.7521536442160687, "learning_rate": 5.706949303896115e-07, "loss": 0.1092, "step": 29172 }, { "epoch": 0.8510706575646187, "grad_norm": 0.7737213973962924, "learning_rate": 5.704757639981346e-07, "loss": 0.1224, "step": 29173 }, { "epoch": 0.8510998307952623, "grad_norm": 1.0631300616216923, "learning_rate": 5.702566371523937e-07, "loss": 0.1334, "step": 29174 }, { "epoch": 0.8511290040259059, "grad_norm": 0.8543005343058528, "learning_rate": 5.700375498543442e-07, "loss": 0.1037, "step": 29175 }, { "epoch": 0.8511581772565494, "grad_norm": 0.8937262599219347, "learning_rate": 5.698185021059404e-07, "loss": 0.1138, "step": 29176 }, { "epoch": 0.851187350487193, "grad_norm": 0.7674303809737473, "learning_rate": 5.69599493909141e-07, "loss": 0.0949, "step": 29177 }, { "epoch": 0.8512165237178365, "grad_norm": 0.7625463589295192, "learning_rate": 5.693805252658984e-07, "loss": 0.1123, "step": 29178 }, { "epoch": 0.8512456969484801, "grad_norm": 0.8400415243150124, "learning_rate": 5.691615961781694e-07, "loss": 0.1069, "step": 29179 }, { "epoch": 0.8512748701791236, "grad_norm": 0.7553999497936758, "learning_rate": 5.689427066479081e-07, "loss": 0.1042, "step": 29180 }, { "epoch": 0.8513040434097672, "grad_norm": 1.9784624781982205, "learning_rate": 5.687238566770692e-07, "loss": 0.1156, "step": 29181 }, { "epoch": 0.8513332166404107, "grad_norm": 0.990666517949037, "learning_rate": 5.685050462676045e-07, "loss": 0.1046, "step": 29182 }, { "epoch": 0.8513623898710543, "grad_norm": 0.7638324941083079, "learning_rate": 5.682862754214696e-07, "loss": 0.1085, "step": 29183 }, { "epoch": 0.8513915631016978, "grad_norm": 0.8732438449305735, "learning_rate": 5.680675441406164e-07, "loss": 0.1168, "step": 29184 }, { "epoch": 0.8514207363323415, "grad_norm": 0.8059618409293302, "learning_rate": 5.678488524269993e-07, "loss": 0.1185, "step": 29185 }, { "epoch": 0.851449909562985, "grad_norm": 0.9910972610017613, "learning_rate": 5.676302002825679e-07, "loss": 0.1026, "step": 29186 }, { "epoch": 0.8514790827936286, "grad_norm": 0.950750253391563, "learning_rate": 5.674115877092773e-07, "loss": 0.1188, "step": 29187 }, { "epoch": 0.8515082560242722, "grad_norm": 0.8802836415266408, "learning_rate": 5.671930147090782e-07, "loss": 0.1192, "step": 29188 }, { "epoch": 0.8515374292549157, "grad_norm": 0.984339877038203, "learning_rate": 5.669744812839207e-07, "loss": 0.1, "step": 29189 }, { "epoch": 0.8515666024855593, "grad_norm": 0.8359904446673222, "learning_rate": 5.667559874357564e-07, "loss": 0.1205, "step": 29190 }, { "epoch": 0.8515957757162028, "grad_norm": 0.8618322585781738, "learning_rate": 5.665375331665374e-07, "loss": 0.1327, "step": 29191 }, { "epoch": 0.8516249489468464, "grad_norm": 0.9559095642731167, "learning_rate": 5.663191184782118e-07, "loss": 0.1171, "step": 29192 }, { "epoch": 0.8516541221774899, "grad_norm": 0.853620565237487, "learning_rate": 5.661007433727322e-07, "loss": 0.1026, "step": 29193 }, { "epoch": 0.8516832954081335, "grad_norm": 1.006325333801254, "learning_rate": 5.658824078520464e-07, "loss": 0.1141, "step": 29194 }, { "epoch": 0.851712468638777, "grad_norm": 1.0933564406113303, "learning_rate": 5.656641119181033e-07, "loss": 0.1073, "step": 29195 }, { "epoch": 0.8517416418694206, "grad_norm": 0.8362935999240677, "learning_rate": 5.65445855572852e-07, "loss": 0.1064, "step": 29196 }, { "epoch": 0.8517708151000641, "grad_norm": 0.7974708708563673, "learning_rate": 5.652276388182426e-07, "loss": 0.1009, "step": 29197 }, { "epoch": 0.8517999883307077, "grad_norm": 0.8415166654775664, "learning_rate": 5.650094616562224e-07, "loss": 0.1204, "step": 29198 }, { "epoch": 0.8518291615613514, "grad_norm": 0.815356017037793, "learning_rate": 5.647913240887376e-07, "loss": 0.1104, "step": 29199 }, { "epoch": 0.8518583347919949, "grad_norm": 0.7089991203220957, "learning_rate": 5.645732261177384e-07, "loss": 0.0861, "step": 29200 }, { "epoch": 0.8518875080226385, "grad_norm": 0.762593618453199, "learning_rate": 5.643551677451703e-07, "loss": 0.1023, "step": 29201 }, { "epoch": 0.851916681253282, "grad_norm": 0.7838990712235988, "learning_rate": 5.641371489729797e-07, "loss": 0.1186, "step": 29202 }, { "epoch": 0.8519458544839256, "grad_norm": 0.9898869616111717, "learning_rate": 5.639191698031137e-07, "loss": 0.1036, "step": 29203 }, { "epoch": 0.8519750277145691, "grad_norm": 0.8933480439330093, "learning_rate": 5.637012302375195e-07, "loss": 0.0961, "step": 29204 }, { "epoch": 0.8520042009452127, "grad_norm": 0.9400445190726446, "learning_rate": 5.634833302781411e-07, "loss": 0.1377, "step": 29205 }, { "epoch": 0.8520333741758562, "grad_norm": 0.8242725405017413, "learning_rate": 5.632654699269241e-07, "loss": 0.1228, "step": 29206 }, { "epoch": 0.8520625474064998, "grad_norm": 0.8976782201804555, "learning_rate": 5.630476491858145e-07, "loss": 0.1244, "step": 29207 }, { "epoch": 0.8520917206371433, "grad_norm": 0.82601834719666, "learning_rate": 5.628298680567556e-07, "loss": 0.1274, "step": 29208 }, { "epoch": 0.8521208938677869, "grad_norm": 0.9804919939828782, "learning_rate": 5.626121265416917e-07, "loss": 0.0974, "step": 29209 }, { "epoch": 0.8521500670984304, "grad_norm": 1.145379006702492, "learning_rate": 5.623944246425695e-07, "loss": 0.1192, "step": 29210 }, { "epoch": 0.852179240329074, "grad_norm": 0.7038569782151766, "learning_rate": 5.621767623613294e-07, "loss": 0.0943, "step": 29211 }, { "epoch": 0.8522084135597175, "grad_norm": 0.9213798575557912, "learning_rate": 5.619591396999158e-07, "loss": 0.1392, "step": 29212 }, { "epoch": 0.8522375867903612, "grad_norm": 0.9035633900811643, "learning_rate": 5.617415566602718e-07, "loss": 0.1199, "step": 29213 }, { "epoch": 0.8522667600210048, "grad_norm": 0.8680700646262012, "learning_rate": 5.61524013244339e-07, "loss": 0.1093, "step": 29214 }, { "epoch": 0.8522959332516483, "grad_norm": 0.8398015135678619, "learning_rate": 5.613065094540615e-07, "loss": 0.1124, "step": 29215 }, { "epoch": 0.8523251064822919, "grad_norm": 0.9560727325836809, "learning_rate": 5.610890452913787e-07, "loss": 0.1163, "step": 29216 }, { "epoch": 0.8523542797129354, "grad_norm": 0.8763948693257041, "learning_rate": 5.608716207582338e-07, "loss": 0.0945, "step": 29217 }, { "epoch": 0.852383452943579, "grad_norm": 1.0084153223099461, "learning_rate": 5.606542358565681e-07, "loss": 0.1143, "step": 29218 }, { "epoch": 0.8524126261742225, "grad_norm": 0.7695030500440252, "learning_rate": 5.6043689058832e-07, "loss": 0.1286, "step": 29219 }, { "epoch": 0.8524417994048661, "grad_norm": 0.8144531799096898, "learning_rate": 5.60219584955432e-07, "loss": 0.1279, "step": 29220 }, { "epoch": 0.8524709726355096, "grad_norm": 1.0118076535744631, "learning_rate": 5.600023189598442e-07, "loss": 0.1185, "step": 29221 }, { "epoch": 0.8525001458661532, "grad_norm": 0.895815978752632, "learning_rate": 5.597850926034954e-07, "loss": 0.1265, "step": 29222 }, { "epoch": 0.8525293190967967, "grad_norm": 0.9172450629512476, "learning_rate": 5.595679058883257e-07, "loss": 0.1147, "step": 29223 }, { "epoch": 0.8525584923274403, "grad_norm": 0.7653174122887514, "learning_rate": 5.593507588162739e-07, "loss": 0.1096, "step": 29224 }, { "epoch": 0.8525876655580839, "grad_norm": 0.7425071369559749, "learning_rate": 5.591336513892776e-07, "loss": 0.102, "step": 29225 }, { "epoch": 0.8526168387887275, "grad_norm": 0.6504027414778139, "learning_rate": 5.589165836092759e-07, "loss": 0.1053, "step": 29226 }, { "epoch": 0.8526460120193711, "grad_norm": 1.1333888888884196, "learning_rate": 5.586995554782076e-07, "loss": 0.132, "step": 29227 }, { "epoch": 0.8526751852500146, "grad_norm": 0.9105737631709079, "learning_rate": 5.584825669980098e-07, "loss": 0.12, "step": 29228 }, { "epoch": 0.8527043584806582, "grad_norm": 0.9115820786569032, "learning_rate": 5.582656181706181e-07, "loss": 0.089, "step": 29229 }, { "epoch": 0.8527335317113017, "grad_norm": 0.968959144874335, "learning_rate": 5.58048708997972e-07, "loss": 0.1097, "step": 29230 }, { "epoch": 0.8527627049419453, "grad_norm": 1.0005124495851303, "learning_rate": 5.578318394820053e-07, "loss": 0.1077, "step": 29231 }, { "epoch": 0.8527918781725888, "grad_norm": 0.9823464381116843, "learning_rate": 5.576150096246563e-07, "loss": 0.1147, "step": 29232 }, { "epoch": 0.8528210514032324, "grad_norm": 0.7421311343553169, "learning_rate": 5.573982194278594e-07, "loss": 0.1203, "step": 29233 }, { "epoch": 0.8528502246338759, "grad_norm": 0.9075214868246244, "learning_rate": 5.571814688935517e-07, "loss": 0.1131, "step": 29234 }, { "epoch": 0.8528793978645195, "grad_norm": 0.9861239596102009, "learning_rate": 5.56964758023667e-07, "loss": 0.1174, "step": 29235 }, { "epoch": 0.852908571095163, "grad_norm": 1.2044479152443401, "learning_rate": 5.567480868201397e-07, "loss": 0.1481, "step": 29236 }, { "epoch": 0.8529377443258066, "grad_norm": 0.72138023383901, "learning_rate": 5.565314552849044e-07, "loss": 0.1024, "step": 29237 }, { "epoch": 0.8529669175564502, "grad_norm": 0.9495747437271533, "learning_rate": 5.563148634198967e-07, "loss": 0.1459, "step": 29238 }, { "epoch": 0.8529960907870937, "grad_norm": 1.001649971644289, "learning_rate": 5.560983112270479e-07, "loss": 0.1078, "step": 29239 }, { "epoch": 0.8530252640177374, "grad_norm": 0.9833936050822271, "learning_rate": 5.558817987082937e-07, "loss": 0.1111, "step": 29240 }, { "epoch": 0.8530544372483809, "grad_norm": 0.7548229031177789, "learning_rate": 5.556653258655659e-07, "loss": 0.11, "step": 29241 }, { "epoch": 0.8530836104790245, "grad_norm": 0.9911272248553084, "learning_rate": 5.554488927007961e-07, "loss": 0.0984, "step": 29242 }, { "epoch": 0.853112783709668, "grad_norm": 0.7981739377756809, "learning_rate": 5.552324992159175e-07, "loss": 0.0982, "step": 29243 }, { "epoch": 0.8531419569403116, "grad_norm": 0.7470635185418462, "learning_rate": 5.550161454128633e-07, "loss": 0.1243, "step": 29244 }, { "epoch": 0.8531711301709551, "grad_norm": 0.884584534357942, "learning_rate": 5.547998312935637e-07, "loss": 0.1326, "step": 29245 }, { "epoch": 0.8532003034015987, "grad_norm": 1.1726197074415614, "learning_rate": 5.545835568599489e-07, "loss": 0.1056, "step": 29246 }, { "epoch": 0.8532294766322422, "grad_norm": 0.7488514684875713, "learning_rate": 5.543673221139517e-07, "loss": 0.1449, "step": 29247 }, { "epoch": 0.8532586498628858, "grad_norm": 0.8045346970530131, "learning_rate": 5.541511270575023e-07, "loss": 0.1172, "step": 29248 }, { "epoch": 0.8532878230935294, "grad_norm": 1.257946166363082, "learning_rate": 5.539349716925285e-07, "loss": 0.0989, "step": 29249 }, { "epoch": 0.8533169963241729, "grad_norm": 0.8544562436703373, "learning_rate": 5.537188560209633e-07, "loss": 0.1055, "step": 29250 }, { "epoch": 0.8533461695548165, "grad_norm": 0.7590419976067295, "learning_rate": 5.535027800447351e-07, "loss": 0.1372, "step": 29251 }, { "epoch": 0.85337534278546, "grad_norm": 0.9760112363389173, "learning_rate": 5.532867437657718e-07, "loss": 0.092, "step": 29252 }, { "epoch": 0.8534045160161037, "grad_norm": 1.0380472022167764, "learning_rate": 5.530707471860036e-07, "loss": 0.1023, "step": 29253 }, { "epoch": 0.8534336892467472, "grad_norm": 1.066616929378313, "learning_rate": 5.528547903073583e-07, "loss": 0.1117, "step": 29254 }, { "epoch": 0.8534628624773908, "grad_norm": 0.6603745490932088, "learning_rate": 5.526388731317627e-07, "loss": 0.0984, "step": 29255 }, { "epoch": 0.8534920357080343, "grad_norm": 0.7552520615868892, "learning_rate": 5.524229956611454e-07, "loss": 0.1125, "step": 29256 }, { "epoch": 0.8535212089386779, "grad_norm": 0.8156985411177039, "learning_rate": 5.522071578974353e-07, "loss": 0.1077, "step": 29257 }, { "epoch": 0.8535503821693214, "grad_norm": 0.6509190477659933, "learning_rate": 5.51991359842558e-07, "loss": 0.11, "step": 29258 }, { "epoch": 0.853579555399965, "grad_norm": 0.81201383586702, "learning_rate": 5.517756014984388e-07, "loss": 0.1196, "step": 29259 }, { "epoch": 0.8536087286306085, "grad_norm": 0.7430861923950558, "learning_rate": 5.51559882867006e-07, "loss": 0.135, "step": 29260 }, { "epoch": 0.8536379018612521, "grad_norm": 1.1002617405827813, "learning_rate": 5.513442039501837e-07, "loss": 0.1079, "step": 29261 }, { "epoch": 0.8536670750918957, "grad_norm": 1.135539187412405, "learning_rate": 5.511285647498993e-07, "loss": 0.1157, "step": 29262 }, { "epoch": 0.8536962483225392, "grad_norm": 1.1742054621140416, "learning_rate": 5.509129652680761e-07, "loss": 0.1466, "step": 29263 }, { "epoch": 0.8537254215531828, "grad_norm": 0.8570858075514588, "learning_rate": 5.506974055066411e-07, "loss": 0.0934, "step": 29264 }, { "epoch": 0.8537545947838263, "grad_norm": 0.6999114656213908, "learning_rate": 5.504818854675176e-07, "loss": 0.118, "step": 29265 }, { "epoch": 0.8537837680144699, "grad_norm": 0.7353575055464785, "learning_rate": 5.502664051526285e-07, "loss": 0.0964, "step": 29266 }, { "epoch": 0.8538129412451135, "grad_norm": 0.8843302451918854, "learning_rate": 5.500509645638985e-07, "loss": 0.0923, "step": 29267 }, { "epoch": 0.8538421144757571, "grad_norm": 0.8845497856311907, "learning_rate": 5.498355637032521e-07, "loss": 0.0977, "step": 29268 }, { "epoch": 0.8538712877064006, "grad_norm": 0.8000443880087532, "learning_rate": 5.49620202572611e-07, "loss": 0.1068, "step": 29269 }, { "epoch": 0.8539004609370442, "grad_norm": 0.7903037974342727, "learning_rate": 5.494048811738989e-07, "loss": 0.1145, "step": 29270 }, { "epoch": 0.8539296341676877, "grad_norm": 0.9523686122631562, "learning_rate": 5.491895995090374e-07, "loss": 0.0961, "step": 29271 }, { "epoch": 0.8539588073983313, "grad_norm": 1.2468134396418666, "learning_rate": 5.489743575799483e-07, "loss": 0.1321, "step": 29272 }, { "epoch": 0.8539879806289749, "grad_norm": 0.9434175407452533, "learning_rate": 5.48759155388553e-07, "loss": 0.1158, "step": 29273 }, { "epoch": 0.8540171538596184, "grad_norm": 1.1145933180924381, "learning_rate": 5.485439929367748e-07, "loss": 0.104, "step": 29274 }, { "epoch": 0.854046327090262, "grad_norm": 0.7761601853374593, "learning_rate": 5.483288702265327e-07, "loss": 0.1093, "step": 29275 }, { "epoch": 0.8540755003209055, "grad_norm": 0.6857080858149806, "learning_rate": 5.481137872597469e-07, "loss": 0.1158, "step": 29276 }, { "epoch": 0.8541046735515491, "grad_norm": 0.9622116621490467, "learning_rate": 5.478987440383399e-07, "loss": 0.1361, "step": 29277 }, { "epoch": 0.8541338467821926, "grad_norm": 0.8826381949857007, "learning_rate": 5.476837405642299e-07, "loss": 0.103, "step": 29278 }, { "epoch": 0.8541630200128362, "grad_norm": 0.8971376210726807, "learning_rate": 5.474687768393344e-07, "loss": 0.1357, "step": 29279 }, { "epoch": 0.8541921932434798, "grad_norm": 1.140063921540892, "learning_rate": 5.472538528655769e-07, "loss": 0.1181, "step": 29280 }, { "epoch": 0.8542213664741234, "grad_norm": 0.7401249554485791, "learning_rate": 5.47038968644874e-07, "loss": 0.0968, "step": 29281 }, { "epoch": 0.8542505397047669, "grad_norm": 0.69552968201201, "learning_rate": 5.468241241791428e-07, "loss": 0.0871, "step": 29282 }, { "epoch": 0.8542797129354105, "grad_norm": 0.9130804690861541, "learning_rate": 5.466093194703043e-07, "loss": 0.1062, "step": 29283 }, { "epoch": 0.854308886166054, "grad_norm": 1.1165299293809114, "learning_rate": 5.463945545202748e-07, "loss": 0.1248, "step": 29284 }, { "epoch": 0.8543380593966976, "grad_norm": 1.119356456817356, "learning_rate": 5.4617982933097e-07, "loss": 0.1211, "step": 29285 }, { "epoch": 0.8543672326273412, "grad_norm": 0.7619026129903873, "learning_rate": 5.45965143904309e-07, "loss": 0.1175, "step": 29286 }, { "epoch": 0.8543964058579847, "grad_norm": 1.115115578102341, "learning_rate": 5.457504982422085e-07, "loss": 0.1192, "step": 29287 }, { "epoch": 0.8544255790886283, "grad_norm": 0.967813089467639, "learning_rate": 5.455358923465843e-07, "loss": 0.1106, "step": 29288 }, { "epoch": 0.8544547523192718, "grad_norm": 2.7302747049786613, "learning_rate": 5.453213262193513e-07, "loss": 0.1132, "step": 29289 }, { "epoch": 0.8544839255499154, "grad_norm": 0.9810793593185674, "learning_rate": 5.451067998624276e-07, "loss": 0.1049, "step": 29290 }, { "epoch": 0.8545130987805589, "grad_norm": 0.8378819714164876, "learning_rate": 5.448923132777256e-07, "loss": 0.1206, "step": 29291 }, { "epoch": 0.8545422720112025, "grad_norm": 1.0451784392384882, "learning_rate": 5.44677866467162e-07, "loss": 0.1264, "step": 29292 }, { "epoch": 0.854571445241846, "grad_norm": 0.8355508850984268, "learning_rate": 5.444634594326503e-07, "loss": 0.0946, "step": 29293 }, { "epoch": 0.8546006184724897, "grad_norm": 0.8607237234297561, "learning_rate": 5.442490921761062e-07, "loss": 0.1121, "step": 29294 }, { "epoch": 0.8546297917031332, "grad_norm": 0.7746667125089692, "learning_rate": 5.440347646994426e-07, "loss": 0.1262, "step": 29295 }, { "epoch": 0.8546589649337768, "grad_norm": 0.9918799982651284, "learning_rate": 5.438204770045719e-07, "loss": 0.1231, "step": 29296 }, { "epoch": 0.8546881381644204, "grad_norm": 0.8050739299835314, "learning_rate": 5.43606229093408e-07, "loss": 0.1094, "step": 29297 }, { "epoch": 0.8547173113950639, "grad_norm": 0.6753780555823767, "learning_rate": 5.433920209678651e-07, "loss": 0.1074, "step": 29298 }, { "epoch": 0.8547464846257075, "grad_norm": 0.7884651155454309, "learning_rate": 5.431778526298531e-07, "loss": 0.1059, "step": 29299 }, { "epoch": 0.854775657856351, "grad_norm": 0.8300237753886387, "learning_rate": 5.429637240812863e-07, "loss": 0.1112, "step": 29300 }, { "epoch": 0.8548048310869946, "grad_norm": 0.9567400843568022, "learning_rate": 5.427496353240757e-07, "loss": 0.1101, "step": 29301 }, { "epoch": 0.8548340043176381, "grad_norm": 0.9788802429972455, "learning_rate": 5.425355863601311e-07, "loss": 0.1172, "step": 29302 }, { "epoch": 0.8548631775482817, "grad_norm": 0.7857087629895083, "learning_rate": 5.423215771913648e-07, "loss": 0.0997, "step": 29303 }, { "epoch": 0.8548923507789252, "grad_norm": 1.191233660332849, "learning_rate": 5.42107607819688e-07, "loss": 0.1066, "step": 29304 }, { "epoch": 0.8549215240095688, "grad_norm": 0.9282773544238075, "learning_rate": 5.418936782470108e-07, "loss": 0.1357, "step": 29305 }, { "epoch": 0.8549506972402123, "grad_norm": 0.8048667103505373, "learning_rate": 5.416797884752412e-07, "loss": 0.0964, "step": 29306 }, { "epoch": 0.854979870470856, "grad_norm": 0.6759841812231248, "learning_rate": 5.414659385062915e-07, "loss": 0.0757, "step": 29307 }, { "epoch": 0.8550090437014995, "grad_norm": 0.8143535130999393, "learning_rate": 5.412521283420691e-07, "loss": 0.1249, "step": 29308 }, { "epoch": 0.8550382169321431, "grad_norm": 1.0261914213636394, "learning_rate": 5.410383579844819e-07, "loss": 0.1434, "step": 29309 }, { "epoch": 0.8550673901627867, "grad_norm": 0.9932610365277973, "learning_rate": 5.408246274354412e-07, "loss": 0.1141, "step": 29310 }, { "epoch": 0.8550965633934302, "grad_norm": 0.8289037734445469, "learning_rate": 5.406109366968542e-07, "loss": 0.0952, "step": 29311 }, { "epoch": 0.8551257366240738, "grad_norm": 0.707035527951989, "learning_rate": 5.403972857706269e-07, "loss": 0.0987, "step": 29312 }, { "epoch": 0.8551549098547173, "grad_norm": 1.27919610258187, "learning_rate": 5.401836746586691e-07, "loss": 0.121, "step": 29313 }, { "epoch": 0.8551840830853609, "grad_norm": 0.8770398345237028, "learning_rate": 5.399701033628873e-07, "loss": 0.0863, "step": 29314 }, { "epoch": 0.8552132563160044, "grad_norm": 1.0098450048278378, "learning_rate": 5.397565718851861e-07, "loss": 0.1365, "step": 29315 }, { "epoch": 0.855242429546648, "grad_norm": 0.7429027275631229, "learning_rate": 5.395430802274737e-07, "loss": 0.0928, "step": 29316 }, { "epoch": 0.8552716027772915, "grad_norm": 1.0558971372613972, "learning_rate": 5.393296283916571e-07, "loss": 0.1118, "step": 29317 }, { "epoch": 0.8553007760079351, "grad_norm": 0.872179606238544, "learning_rate": 5.391162163796404e-07, "loss": 0.1204, "step": 29318 }, { "epoch": 0.8553299492385786, "grad_norm": 0.7364807336666261, "learning_rate": 5.38902844193328e-07, "loss": 0.1309, "step": 29319 }, { "epoch": 0.8553591224692222, "grad_norm": 1.0647888786797322, "learning_rate": 5.386895118346275e-07, "loss": 0.1439, "step": 29320 }, { "epoch": 0.8553882956998659, "grad_norm": 0.8659584860814815, "learning_rate": 5.384762193054411e-07, "loss": 0.0887, "step": 29321 }, { "epoch": 0.8554174689305094, "grad_norm": 0.716367057438113, "learning_rate": 5.38262966607675e-07, "loss": 0.0905, "step": 29322 }, { "epoch": 0.855446642161153, "grad_norm": 0.8314557367445821, "learning_rate": 5.380497537432306e-07, "loss": 0.1015, "step": 29323 }, { "epoch": 0.8554758153917965, "grad_norm": 0.935713220005702, "learning_rate": 5.37836580714014e-07, "loss": 0.1101, "step": 29324 }, { "epoch": 0.8555049886224401, "grad_norm": 0.8077184679024814, "learning_rate": 5.376234475219272e-07, "loss": 0.1062, "step": 29325 }, { "epoch": 0.8555341618530836, "grad_norm": 0.7880069280216613, "learning_rate": 5.374103541688724e-07, "loss": 0.1079, "step": 29326 }, { "epoch": 0.8555633350837272, "grad_norm": 0.7149049891944173, "learning_rate": 5.371973006567521e-07, "loss": 0.0921, "step": 29327 }, { "epoch": 0.8555925083143707, "grad_norm": 0.7817950072121391, "learning_rate": 5.369842869874703e-07, "loss": 0.0931, "step": 29328 }, { "epoch": 0.8556216815450143, "grad_norm": 0.8093934164300488, "learning_rate": 5.367713131629259e-07, "loss": 0.1161, "step": 29329 }, { "epoch": 0.8556508547756578, "grad_norm": 0.8492960270568929, "learning_rate": 5.365583791850232e-07, "loss": 0.1023, "step": 29330 }, { "epoch": 0.8556800280063014, "grad_norm": 0.8401120955569392, "learning_rate": 5.363454850556621e-07, "loss": 0.1195, "step": 29331 }, { "epoch": 0.8557092012369449, "grad_norm": 0.6950237996322621, "learning_rate": 5.361326307767411e-07, "loss": 0.1113, "step": 29332 }, { "epoch": 0.8557383744675885, "grad_norm": 0.7877889633338129, "learning_rate": 5.359198163501628e-07, "loss": 0.105, "step": 29333 }, { "epoch": 0.8557675476982322, "grad_norm": 0.7789335755700192, "learning_rate": 5.357070417778282e-07, "loss": 0.1108, "step": 29334 }, { "epoch": 0.8557967209288757, "grad_norm": 0.8458783956068391, "learning_rate": 5.354943070616348e-07, "loss": 0.1081, "step": 29335 }, { "epoch": 0.8558258941595193, "grad_norm": 0.8052134834191995, "learning_rate": 5.352816122034815e-07, "loss": 0.1087, "step": 29336 }, { "epoch": 0.8558550673901628, "grad_norm": 0.766542615685328, "learning_rate": 5.350689572052692e-07, "loss": 0.1099, "step": 29337 }, { "epoch": 0.8558842406208064, "grad_norm": 0.864832166392014, "learning_rate": 5.348563420688951e-07, "loss": 0.0988, "step": 29338 }, { "epoch": 0.8559134138514499, "grad_norm": 0.7109001278436873, "learning_rate": 5.346437667962562e-07, "loss": 0.0895, "step": 29339 }, { "epoch": 0.8559425870820935, "grad_norm": 0.7544914283330012, "learning_rate": 5.344312313892536e-07, "loss": 0.1045, "step": 29340 }, { "epoch": 0.855971760312737, "grad_norm": 0.7666375340072559, "learning_rate": 5.34218735849783e-07, "loss": 0.1037, "step": 29341 }, { "epoch": 0.8560009335433806, "grad_norm": 0.7521466008665737, "learning_rate": 5.340062801797402e-07, "loss": 0.1083, "step": 29342 }, { "epoch": 0.8560301067740241, "grad_norm": 0.8834062688206936, "learning_rate": 5.337938643810248e-07, "loss": 0.1275, "step": 29343 }, { "epoch": 0.8560592800046677, "grad_norm": 0.8681128326531021, "learning_rate": 5.335814884555313e-07, "loss": 0.0979, "step": 29344 }, { "epoch": 0.8560884532353112, "grad_norm": 0.77129467969646, "learning_rate": 5.333691524051549e-07, "loss": 0.1254, "step": 29345 }, { "epoch": 0.8561176264659548, "grad_norm": 0.7712736447209378, "learning_rate": 5.331568562317924e-07, "loss": 0.1112, "step": 29346 }, { "epoch": 0.8561467996965983, "grad_norm": 0.9815960137582008, "learning_rate": 5.3294459993734e-07, "loss": 0.1491, "step": 29347 }, { "epoch": 0.856175972927242, "grad_norm": 0.7662818827976515, "learning_rate": 5.327323835236919e-07, "loss": 0.1138, "step": 29348 }, { "epoch": 0.8562051461578856, "grad_norm": 1.0146229817022845, "learning_rate": 5.325202069927421e-07, "loss": 0.1066, "step": 29349 }, { "epoch": 0.8562343193885291, "grad_norm": 1.1253188556326577, "learning_rate": 5.323080703463862e-07, "loss": 0.13, "step": 29350 }, { "epoch": 0.8562634926191727, "grad_norm": 0.9550917774014132, "learning_rate": 5.320959735865161e-07, "loss": 0.1288, "step": 29351 }, { "epoch": 0.8562926658498162, "grad_norm": 1.6951356957353556, "learning_rate": 5.31883916715028e-07, "loss": 0.1206, "step": 29352 }, { "epoch": 0.8563218390804598, "grad_norm": 0.7042063336446921, "learning_rate": 5.316718997338128e-07, "loss": 0.0904, "step": 29353 }, { "epoch": 0.8563510123111033, "grad_norm": 0.703958415806529, "learning_rate": 5.314599226447648e-07, "loss": 0.0926, "step": 29354 }, { "epoch": 0.8563801855417469, "grad_norm": 0.7579356122279145, "learning_rate": 5.312479854497754e-07, "loss": 0.0997, "step": 29355 }, { "epoch": 0.8564093587723904, "grad_norm": 0.986485784109621, "learning_rate": 5.31036088150737e-07, "loss": 0.1041, "step": 29356 }, { "epoch": 0.856438532003034, "grad_norm": 1.199734024618942, "learning_rate": 5.308242307495414e-07, "loss": 0.0954, "step": 29357 }, { "epoch": 0.8564677052336775, "grad_norm": 0.7066274475070856, "learning_rate": 5.306124132480811e-07, "loss": 0.1054, "step": 29358 }, { "epoch": 0.8564968784643211, "grad_norm": 0.8040161117063696, "learning_rate": 5.304006356482449e-07, "loss": 0.1134, "step": 29359 }, { "epoch": 0.8565260516949647, "grad_norm": 0.8210433065501983, "learning_rate": 5.301888979519265e-07, "loss": 0.1115, "step": 29360 }, { "epoch": 0.8565552249256083, "grad_norm": 0.7970420691389978, "learning_rate": 5.299772001610143e-07, "loss": 0.135, "step": 29361 }, { "epoch": 0.8565843981562519, "grad_norm": 0.6669580327023281, "learning_rate": 5.297655422773973e-07, "loss": 0.1014, "step": 29362 }, { "epoch": 0.8566135713868954, "grad_norm": 0.964123335847811, "learning_rate": 5.295539243029668e-07, "loss": 0.1128, "step": 29363 }, { "epoch": 0.856642744617539, "grad_norm": 0.7834984730795249, "learning_rate": 5.293423462396124e-07, "loss": 0.0842, "step": 29364 }, { "epoch": 0.8566719178481825, "grad_norm": 0.8848024773925179, "learning_rate": 5.291308080892226e-07, "loss": 0.0944, "step": 29365 }, { "epoch": 0.8567010910788261, "grad_norm": 0.6919986244584313, "learning_rate": 5.289193098536844e-07, "loss": 0.1162, "step": 29366 }, { "epoch": 0.8567302643094696, "grad_norm": 0.861378097059738, "learning_rate": 5.287078515348887e-07, "loss": 0.1364, "step": 29367 }, { "epoch": 0.8567594375401132, "grad_norm": 0.8173164390501377, "learning_rate": 5.284964331347214e-07, "loss": 0.0966, "step": 29368 }, { "epoch": 0.8567886107707567, "grad_norm": 0.885406075990331, "learning_rate": 5.282850546550689e-07, "loss": 0.1085, "step": 29369 }, { "epoch": 0.8568177840014003, "grad_norm": 0.9307121622400922, "learning_rate": 5.280737160978216e-07, "loss": 0.133, "step": 29370 }, { "epoch": 0.8568469572320438, "grad_norm": 0.749292932707458, "learning_rate": 5.27862417464865e-07, "loss": 0.1077, "step": 29371 }, { "epoch": 0.8568761304626874, "grad_norm": 0.7245720534743995, "learning_rate": 5.276511587580835e-07, "loss": 0.1093, "step": 29372 }, { "epoch": 0.856905303693331, "grad_norm": 0.7584360200431304, "learning_rate": 5.274399399793667e-07, "loss": 0.1209, "step": 29373 }, { "epoch": 0.8569344769239745, "grad_norm": 1.0455436498297157, "learning_rate": 5.272287611305976e-07, "loss": 0.1208, "step": 29374 }, { "epoch": 0.8569636501546182, "grad_norm": 0.8819058019966554, "learning_rate": 5.270176222136619e-07, "loss": 0.1199, "step": 29375 }, { "epoch": 0.8569928233852617, "grad_norm": 0.7700924778430094, "learning_rate": 5.268065232304448e-07, "loss": 0.1113, "step": 29376 }, { "epoch": 0.8570219966159053, "grad_norm": 1.129022640684438, "learning_rate": 5.265954641828325e-07, "loss": 0.1218, "step": 29377 }, { "epoch": 0.8570511698465488, "grad_norm": 0.8689289119023054, "learning_rate": 5.263844450727079e-07, "loss": 0.1053, "step": 29378 }, { "epoch": 0.8570803430771924, "grad_norm": 0.8711379810428355, "learning_rate": 5.261734659019541e-07, "loss": 0.1232, "step": 29379 }, { "epoch": 0.8571095163078359, "grad_norm": 0.8130489438871341, "learning_rate": 5.259625266724566e-07, "loss": 0.1019, "step": 29380 }, { "epoch": 0.8571386895384795, "grad_norm": 0.9020637647946841, "learning_rate": 5.257516273860963e-07, "loss": 0.1222, "step": 29381 }, { "epoch": 0.857167862769123, "grad_norm": 1.0732804808961944, "learning_rate": 5.255407680447589e-07, "loss": 0.1331, "step": 29382 }, { "epoch": 0.8571970359997666, "grad_norm": 0.8667888667591757, "learning_rate": 5.253299486503238e-07, "loss": 0.1082, "step": 29383 }, { "epoch": 0.8572262092304102, "grad_norm": 0.8101618514340407, "learning_rate": 5.251191692046764e-07, "loss": 0.1161, "step": 29384 }, { "epoch": 0.8572553824610537, "grad_norm": 0.70625914522278, "learning_rate": 5.249084297096962e-07, "loss": 0.108, "step": 29385 }, { "epoch": 0.8572845556916973, "grad_norm": 0.9503020444073192, "learning_rate": 5.246977301672645e-07, "loss": 0.1103, "step": 29386 }, { "epoch": 0.8573137289223408, "grad_norm": 0.933888693705328, "learning_rate": 5.244870705792632e-07, "loss": 0.1096, "step": 29387 }, { "epoch": 0.8573429021529845, "grad_norm": 0.8162679424213231, "learning_rate": 5.24276450947574e-07, "loss": 0.0949, "step": 29388 }, { "epoch": 0.857372075383628, "grad_norm": 0.8624342590855032, "learning_rate": 5.240658712740748e-07, "loss": 0.0991, "step": 29389 }, { "epoch": 0.8574012486142716, "grad_norm": 0.7814906469522114, "learning_rate": 5.238553315606482e-07, "loss": 0.1017, "step": 29390 }, { "epoch": 0.8574304218449151, "grad_norm": 0.7770873719225593, "learning_rate": 5.236448318091731e-07, "loss": 0.0958, "step": 29391 }, { "epoch": 0.8574595950755587, "grad_norm": 0.9589446488203188, "learning_rate": 5.234343720215268e-07, "loss": 0.1103, "step": 29392 }, { "epoch": 0.8574887683062022, "grad_norm": 0.8069289270927945, "learning_rate": 5.232239521995902e-07, "loss": 0.1151, "step": 29393 }, { "epoch": 0.8575179415368458, "grad_norm": 0.7421129880346099, "learning_rate": 5.230135723452423e-07, "loss": 0.1071, "step": 29394 }, { "epoch": 0.8575471147674893, "grad_norm": 1.084033587602726, "learning_rate": 5.228032324603605e-07, "loss": 0.1159, "step": 29395 }, { "epoch": 0.8575762879981329, "grad_norm": 0.8517420453028375, "learning_rate": 5.225929325468216e-07, "loss": 0.0996, "step": 29396 }, { "epoch": 0.8576054612287765, "grad_norm": 0.7296119333962274, "learning_rate": 5.223826726065045e-07, "loss": 0.1074, "step": 29397 }, { "epoch": 0.85763463445942, "grad_norm": 0.9196641582945607, "learning_rate": 5.221724526412869e-07, "loss": 0.1288, "step": 29398 }, { "epoch": 0.8576638076900636, "grad_norm": 1.0408563371365043, "learning_rate": 5.219622726530427e-07, "loss": 0.1, "step": 29399 }, { "epoch": 0.8576929809207071, "grad_norm": 0.8169846588627468, "learning_rate": 5.21752132643652e-07, "loss": 0.1215, "step": 29400 }, { "epoch": 0.8577221541513507, "grad_norm": 0.8105030774802362, "learning_rate": 5.215420326149889e-07, "loss": 0.133, "step": 29401 }, { "epoch": 0.8577513273819943, "grad_norm": 0.7658042004307059, "learning_rate": 5.213319725689292e-07, "loss": 0.1055, "step": 29402 }, { "epoch": 0.8577805006126379, "grad_norm": 0.8088666126180049, "learning_rate": 5.211219525073491e-07, "loss": 0.105, "step": 29403 }, { "epoch": 0.8578096738432814, "grad_norm": 0.7757079944570077, "learning_rate": 5.209119724321226e-07, "loss": 0.1079, "step": 29404 }, { "epoch": 0.857838847073925, "grad_norm": 0.7172074945031245, "learning_rate": 5.207020323451245e-07, "loss": 0.1189, "step": 29405 }, { "epoch": 0.8578680203045685, "grad_norm": 0.9422097405586951, "learning_rate": 5.204921322482292e-07, "loss": 0.1313, "step": 29406 }, { "epoch": 0.8578971935352121, "grad_norm": 0.8578235439343572, "learning_rate": 5.202822721433115e-07, "loss": 0.1045, "step": 29407 }, { "epoch": 0.8579263667658557, "grad_norm": 0.8219734969697102, "learning_rate": 5.200724520322448e-07, "loss": 0.0942, "step": 29408 }, { "epoch": 0.8579555399964992, "grad_norm": 0.8307940094535435, "learning_rate": 5.198626719169004e-07, "loss": 0.1394, "step": 29409 }, { "epoch": 0.8579847132271428, "grad_norm": 0.9403933662185611, "learning_rate": 5.196529317991534e-07, "loss": 0.0993, "step": 29410 }, { "epoch": 0.8580138864577863, "grad_norm": 0.743426296762458, "learning_rate": 5.194432316808745e-07, "loss": 0.1053, "step": 29411 }, { "epoch": 0.8580430596884299, "grad_norm": 0.7736891008032006, "learning_rate": 5.19233571563938e-07, "loss": 0.1329, "step": 29412 }, { "epoch": 0.8580722329190734, "grad_norm": 0.7645620492073064, "learning_rate": 5.190239514502138e-07, "loss": 0.1067, "step": 29413 }, { "epoch": 0.858101406149717, "grad_norm": 1.0641400675618282, "learning_rate": 5.18814371341575e-07, "loss": 0.1352, "step": 29414 }, { "epoch": 0.8581305793803606, "grad_norm": 0.897047553575903, "learning_rate": 5.186048312398911e-07, "loss": 0.1247, "step": 29415 }, { "epoch": 0.8581597526110042, "grad_norm": 0.7767238626142258, "learning_rate": 5.18395331147033e-07, "loss": 0.1234, "step": 29416 }, { "epoch": 0.8581889258416477, "grad_norm": 0.9954592295334918, "learning_rate": 5.181858710648719e-07, "loss": 0.1156, "step": 29417 }, { "epoch": 0.8582180990722913, "grad_norm": 0.8472966942564119, "learning_rate": 5.179764509952779e-07, "loss": 0.1121, "step": 29418 }, { "epoch": 0.8582472723029348, "grad_norm": 0.6922976041201744, "learning_rate": 5.177670709401195e-07, "loss": 0.1179, "step": 29419 }, { "epoch": 0.8582764455335784, "grad_norm": 0.8547285959502339, "learning_rate": 5.175577309012675e-07, "loss": 0.1235, "step": 29420 }, { "epoch": 0.858305618764222, "grad_norm": 0.8427890899975256, "learning_rate": 5.173484308805899e-07, "loss": 0.0913, "step": 29421 }, { "epoch": 0.8583347919948655, "grad_norm": 0.8593176410266, "learning_rate": 5.171391708799545e-07, "loss": 0.1306, "step": 29422 }, { "epoch": 0.8583639652255091, "grad_norm": 0.6973922847440359, "learning_rate": 5.169299509012304e-07, "loss": 0.0893, "step": 29423 }, { "epoch": 0.8583931384561526, "grad_norm": 0.8347714708783674, "learning_rate": 5.167207709462868e-07, "loss": 0.1275, "step": 29424 }, { "epoch": 0.8584223116867962, "grad_norm": 0.7919908106576974, "learning_rate": 5.165116310169899e-07, "loss": 0.1037, "step": 29425 }, { "epoch": 0.8584514849174397, "grad_norm": 0.8015650500711422, "learning_rate": 5.163025311152054e-07, "loss": 0.1129, "step": 29426 }, { "epoch": 0.8584806581480833, "grad_norm": 0.7722056462631454, "learning_rate": 5.160934712428029e-07, "loss": 0.1266, "step": 29427 }, { "epoch": 0.8585098313787268, "grad_norm": 0.9812732596639206, "learning_rate": 5.158844514016464e-07, "loss": 0.1156, "step": 29428 }, { "epoch": 0.8585390046093705, "grad_norm": 0.751644355534073, "learning_rate": 5.156754715936041e-07, "loss": 0.1065, "step": 29429 }, { "epoch": 0.858568177840014, "grad_norm": 0.7564942418699732, "learning_rate": 5.154665318205399e-07, "loss": 0.1209, "step": 29430 }, { "epoch": 0.8585973510706576, "grad_norm": 0.7916462788596871, "learning_rate": 5.152576320843206e-07, "loss": 0.1113, "step": 29431 }, { "epoch": 0.8586265243013012, "grad_norm": 0.8626458932377982, "learning_rate": 5.150487723868097e-07, "loss": 0.0907, "step": 29432 }, { "epoch": 0.8586556975319447, "grad_norm": 0.784768920987382, "learning_rate": 5.148399527298737e-07, "loss": 0.1249, "step": 29433 }, { "epoch": 0.8586848707625883, "grad_norm": 0.8921925899855412, "learning_rate": 5.146311731153752e-07, "loss": 0.1036, "step": 29434 }, { "epoch": 0.8587140439932318, "grad_norm": 0.9137812493085843, "learning_rate": 5.144224335451792e-07, "loss": 0.1109, "step": 29435 }, { "epoch": 0.8587432172238754, "grad_norm": 1.0167966710404392, "learning_rate": 5.142137340211483e-07, "loss": 0.1152, "step": 29436 }, { "epoch": 0.8587723904545189, "grad_norm": 0.7613544011294333, "learning_rate": 5.140050745451475e-07, "loss": 0.1214, "step": 29437 }, { "epoch": 0.8588015636851625, "grad_norm": 0.8576052611696953, "learning_rate": 5.137964551190383e-07, "loss": 0.119, "step": 29438 }, { "epoch": 0.858830736915806, "grad_norm": 1.0403343300488277, "learning_rate": 5.135878757446827e-07, "loss": 0.0913, "step": 29439 }, { "epoch": 0.8588599101464496, "grad_norm": 0.7258185632604269, "learning_rate": 5.133793364239431e-07, "loss": 0.0994, "step": 29440 }, { "epoch": 0.8588890833770931, "grad_norm": 1.0186480626788514, "learning_rate": 5.131708371586829e-07, "loss": 0.1223, "step": 29441 }, { "epoch": 0.8589182566077367, "grad_norm": 0.8462446482007799, "learning_rate": 5.129623779507625e-07, "loss": 0.1289, "step": 29442 }, { "epoch": 0.8589474298383803, "grad_norm": 0.9467615585489157, "learning_rate": 5.127539588020419e-07, "loss": 0.0982, "step": 29443 }, { "epoch": 0.8589766030690239, "grad_norm": 0.8042426959877547, "learning_rate": 5.125455797143836e-07, "loss": 0.11, "step": 29444 }, { "epoch": 0.8590057762996675, "grad_norm": 0.7695714351117898, "learning_rate": 5.123372406896471e-07, "loss": 0.1172, "step": 29445 }, { "epoch": 0.859034949530311, "grad_norm": 1.2927717821152398, "learning_rate": 5.121289417296904e-07, "loss": 0.1035, "step": 29446 }, { "epoch": 0.8590641227609546, "grad_norm": 0.781705172671196, "learning_rate": 5.119206828363777e-07, "loss": 0.12, "step": 29447 }, { "epoch": 0.8590932959915981, "grad_norm": 0.9423321120858826, "learning_rate": 5.117124640115651e-07, "loss": 0.1116, "step": 29448 }, { "epoch": 0.8591224692222417, "grad_norm": 0.878780722775897, "learning_rate": 5.115042852571111e-07, "loss": 0.1077, "step": 29449 }, { "epoch": 0.8591516424528852, "grad_norm": 0.9486237045265276, "learning_rate": 5.112961465748767e-07, "loss": 0.1201, "step": 29450 }, { "epoch": 0.8591808156835288, "grad_norm": 0.8706977732052311, "learning_rate": 5.11088047966719e-07, "loss": 0.1344, "step": 29451 }, { "epoch": 0.8592099889141723, "grad_norm": 0.7262858207686353, "learning_rate": 5.10879989434494e-07, "loss": 0.0984, "step": 29452 }, { "epoch": 0.8592391621448159, "grad_norm": 0.9363907226557692, "learning_rate": 5.106719709800612e-07, "loss": 0.1225, "step": 29453 }, { "epoch": 0.8592683353754594, "grad_norm": 0.7580962212275943, "learning_rate": 5.104639926052785e-07, "loss": 0.0964, "step": 29454 }, { "epoch": 0.859297508606103, "grad_norm": 0.8527587779765261, "learning_rate": 5.102560543120011e-07, "loss": 0.1116, "step": 29455 }, { "epoch": 0.8593266818367467, "grad_norm": 0.8396394476457001, "learning_rate": 5.100481561020853e-07, "loss": 0.1236, "step": 29456 }, { "epoch": 0.8593558550673902, "grad_norm": 0.8179078502682865, "learning_rate": 5.098402979773886e-07, "loss": 0.1001, "step": 29457 }, { "epoch": 0.8593850282980338, "grad_norm": 0.9643860638380869, "learning_rate": 5.096324799397645e-07, "loss": 0.1241, "step": 29458 }, { "epoch": 0.8594142015286773, "grad_norm": 0.8908851752833253, "learning_rate": 5.094247019910709e-07, "loss": 0.1392, "step": 29459 }, { "epoch": 0.8594433747593209, "grad_norm": 0.8365052768357152, "learning_rate": 5.092169641331607e-07, "loss": 0.1018, "step": 29460 }, { "epoch": 0.8594725479899644, "grad_norm": 1.0732215328561312, "learning_rate": 5.090092663678903e-07, "loss": 0.095, "step": 29461 }, { "epoch": 0.859501721220608, "grad_norm": 0.8282863012039224, "learning_rate": 5.08801608697112e-07, "loss": 0.1072, "step": 29462 }, { "epoch": 0.8595308944512515, "grad_norm": 1.095662547070976, "learning_rate": 5.085939911226822e-07, "loss": 0.1489, "step": 29463 }, { "epoch": 0.8595600676818951, "grad_norm": 1.1262591698820914, "learning_rate": 5.083864136464517e-07, "loss": 0.1046, "step": 29464 }, { "epoch": 0.8595892409125386, "grad_norm": 0.7123270198738395, "learning_rate": 5.08178876270276e-07, "loss": 0.1164, "step": 29465 }, { "epoch": 0.8596184141431822, "grad_norm": 1.1177619069218627, "learning_rate": 5.079713789960061e-07, "loss": 0.116, "step": 29466 }, { "epoch": 0.8596475873738257, "grad_norm": 1.0782613105866432, "learning_rate": 5.077639218254965e-07, "loss": 0.1242, "step": 29467 }, { "epoch": 0.8596767606044693, "grad_norm": 1.2186038789684839, "learning_rate": 5.075565047605979e-07, "loss": 0.1009, "step": 29468 }, { "epoch": 0.8597059338351128, "grad_norm": 0.7794700839204736, "learning_rate": 5.073491278031617e-07, "loss": 0.1076, "step": 29469 }, { "epoch": 0.8597351070657565, "grad_norm": 0.870849339425482, "learning_rate": 5.071417909550402e-07, "loss": 0.1396, "step": 29470 }, { "epoch": 0.8597642802964001, "grad_norm": 0.7510726306428365, "learning_rate": 5.069344942180848e-07, "loss": 0.1076, "step": 29471 }, { "epoch": 0.8597934535270436, "grad_norm": 0.6740657251828723, "learning_rate": 5.067272375941463e-07, "loss": 0.0957, "step": 29472 }, { "epoch": 0.8598226267576872, "grad_norm": 0.891263377532334, "learning_rate": 5.065200210850723e-07, "loss": 0.1336, "step": 29473 }, { "epoch": 0.8598517999883307, "grad_norm": 0.8615194734568703, "learning_rate": 5.063128446927168e-07, "loss": 0.1181, "step": 29474 }, { "epoch": 0.8598809732189743, "grad_norm": 0.855735698394919, "learning_rate": 5.061057084189274e-07, "loss": 0.098, "step": 29475 }, { "epoch": 0.8599101464496178, "grad_norm": 0.9202547256040262, "learning_rate": 5.058986122655512e-07, "loss": 0.1092, "step": 29476 }, { "epoch": 0.8599393196802614, "grad_norm": 0.8691794033323256, "learning_rate": 5.056915562344411e-07, "loss": 0.1379, "step": 29477 }, { "epoch": 0.8599684929109049, "grad_norm": 0.7190411998033227, "learning_rate": 5.054845403274444e-07, "loss": 0.1002, "step": 29478 }, { "epoch": 0.8599976661415485, "grad_norm": 0.9201431554442078, "learning_rate": 5.052775645464075e-07, "loss": 0.1047, "step": 29479 }, { "epoch": 0.860026839372192, "grad_norm": 0.8565354245381157, "learning_rate": 5.050706288931806e-07, "loss": 0.1208, "step": 29480 }, { "epoch": 0.8600560126028356, "grad_norm": 1.1982432707796746, "learning_rate": 5.048637333696105e-07, "loss": 0.1142, "step": 29481 }, { "epoch": 0.8600851858334791, "grad_norm": 0.7490489965000526, "learning_rate": 5.046568779775424e-07, "loss": 0.0932, "step": 29482 }, { "epoch": 0.8601143590641228, "grad_norm": 0.8452700790680486, "learning_rate": 5.044500627188248e-07, "loss": 0.1063, "step": 29483 }, { "epoch": 0.8601435322947664, "grad_norm": 0.8475220682439937, "learning_rate": 5.042432875953046e-07, "loss": 0.1211, "step": 29484 }, { "epoch": 0.8601727055254099, "grad_norm": 0.8374258136534395, "learning_rate": 5.040365526088276e-07, "loss": 0.1239, "step": 29485 }, { "epoch": 0.8602018787560535, "grad_norm": 0.6981846126698772, "learning_rate": 5.038298577612378e-07, "loss": 0.0896, "step": 29486 }, { "epoch": 0.860231051986697, "grad_norm": 0.920102923102032, "learning_rate": 5.036232030543825e-07, "loss": 0.1133, "step": 29487 }, { "epoch": 0.8602602252173406, "grad_norm": 0.8147794332701817, "learning_rate": 5.034165884901049e-07, "loss": 0.1276, "step": 29488 }, { "epoch": 0.8602893984479841, "grad_norm": 0.8643020393761554, "learning_rate": 5.032100140702518e-07, "loss": 0.1059, "step": 29489 }, { "epoch": 0.8603185716786277, "grad_norm": 1.0273624491597337, "learning_rate": 5.030034797966649e-07, "loss": 0.1211, "step": 29490 }, { "epoch": 0.8603477449092712, "grad_norm": 1.0993653441757587, "learning_rate": 5.027969856711907e-07, "loss": 0.1197, "step": 29491 }, { "epoch": 0.8603769181399148, "grad_norm": 0.8674785008953878, "learning_rate": 5.025905316956703e-07, "loss": 0.117, "step": 29492 }, { "epoch": 0.8604060913705583, "grad_norm": 0.8134897664006652, "learning_rate": 5.023841178719491e-07, "loss": 0.1089, "step": 29493 }, { "epoch": 0.8604352646012019, "grad_norm": 0.740252608636668, "learning_rate": 5.021777442018677e-07, "loss": 0.106, "step": 29494 }, { "epoch": 0.8604644378318455, "grad_norm": 0.6740421975228598, "learning_rate": 5.019714106872709e-07, "loss": 0.0992, "step": 29495 }, { "epoch": 0.860493611062489, "grad_norm": 0.7712131246928646, "learning_rate": 5.017651173299981e-07, "loss": 0.0991, "step": 29496 }, { "epoch": 0.8605227842931327, "grad_norm": 0.6535127532044942, "learning_rate": 5.015588641318941e-07, "loss": 0.0951, "step": 29497 }, { "epoch": 0.8605519575237762, "grad_norm": 0.8457754040744747, "learning_rate": 5.013526510947986e-07, "loss": 0.1152, "step": 29498 }, { "epoch": 0.8605811307544198, "grad_norm": 0.9662646942978871, "learning_rate": 5.011464782205511e-07, "loss": 0.102, "step": 29499 }, { "epoch": 0.8606103039850633, "grad_norm": 1.363511336845176, "learning_rate": 5.009403455109946e-07, "loss": 0.106, "step": 29500 }, { "epoch": 0.8606394772157069, "grad_norm": 0.7929904353469331, "learning_rate": 5.007342529679693e-07, "loss": 0.1108, "step": 29501 }, { "epoch": 0.8606686504463504, "grad_norm": 0.7810159069157689, "learning_rate": 5.005282005933148e-07, "loss": 0.1041, "step": 29502 }, { "epoch": 0.860697823676994, "grad_norm": 0.9411311208768363, "learning_rate": 5.003221883888692e-07, "loss": 0.1139, "step": 29503 }, { "epoch": 0.8607269969076375, "grad_norm": 0.901259545584759, "learning_rate": 5.001162163564738e-07, "loss": 0.1218, "step": 29504 }, { "epoch": 0.8607561701382811, "grad_norm": 0.7228034390535022, "learning_rate": 4.99910284497967e-07, "loss": 0.1082, "step": 29505 }, { "epoch": 0.8607853433689246, "grad_norm": 0.9359231392219072, "learning_rate": 4.997043928151851e-07, "loss": 0.1067, "step": 29506 }, { "epoch": 0.8608145165995682, "grad_norm": 0.7750835167395633, "learning_rate": 4.994985413099695e-07, "loss": 0.0999, "step": 29507 }, { "epoch": 0.8608436898302118, "grad_norm": 0.9282258491681282, "learning_rate": 4.992927299841566e-07, "loss": 0.1321, "step": 29508 }, { "epoch": 0.8608728630608553, "grad_norm": 0.8180139497002444, "learning_rate": 4.99086958839583e-07, "loss": 0.1138, "step": 29509 }, { "epoch": 0.860902036291499, "grad_norm": 1.0599843840977938, "learning_rate": 4.988812278780875e-07, "loss": 0.1194, "step": 29510 }, { "epoch": 0.8609312095221425, "grad_norm": 0.7357831173012828, "learning_rate": 4.986755371015062e-07, "loss": 0.1146, "step": 29511 }, { "epoch": 0.8609603827527861, "grad_norm": 0.7365994357681691, "learning_rate": 4.984698865116739e-07, "loss": 0.1137, "step": 29512 }, { "epoch": 0.8609895559834296, "grad_norm": 0.8982152257623877, "learning_rate": 4.982642761104279e-07, "loss": 0.1135, "step": 29513 }, { "epoch": 0.8610187292140732, "grad_norm": 0.7699343204703459, "learning_rate": 4.980587058996044e-07, "loss": 0.1015, "step": 29514 }, { "epoch": 0.8610479024447167, "grad_norm": 0.7885245394304854, "learning_rate": 4.978531758810385e-07, "loss": 0.1134, "step": 29515 }, { "epoch": 0.8610770756753603, "grad_norm": 0.7656299112574463, "learning_rate": 4.976476860565638e-07, "loss": 0.0949, "step": 29516 }, { "epoch": 0.8611062489060038, "grad_norm": 0.9263050205599165, "learning_rate": 4.974422364280169e-07, "loss": 0.1144, "step": 29517 }, { "epoch": 0.8611354221366474, "grad_norm": 0.8468985385132534, "learning_rate": 4.972368269972294e-07, "loss": 0.1032, "step": 29518 }, { "epoch": 0.861164595367291, "grad_norm": 0.823224175773147, "learning_rate": 4.970314577660379e-07, "loss": 0.1014, "step": 29519 }, { "epoch": 0.8611937685979345, "grad_norm": 0.8047470667383144, "learning_rate": 4.96826128736273e-07, "loss": 0.1455, "step": 29520 }, { "epoch": 0.8612229418285781, "grad_norm": 0.9622720918984243, "learning_rate": 4.96620839909771e-07, "loss": 0.0958, "step": 29521 }, { "epoch": 0.8612521150592216, "grad_norm": 0.6545674953297864, "learning_rate": 4.964155912883628e-07, "loss": 0.1074, "step": 29522 }, { "epoch": 0.8612812882898652, "grad_norm": 0.781662532258794, "learning_rate": 4.962103828738807e-07, "loss": 0.0923, "step": 29523 }, { "epoch": 0.8613104615205088, "grad_norm": 0.8537351550173269, "learning_rate": 4.960052146681566e-07, "loss": 0.1015, "step": 29524 }, { "epoch": 0.8613396347511524, "grad_norm": 0.9227109810282786, "learning_rate": 4.95800086673024e-07, "loss": 0.0995, "step": 29525 }, { "epoch": 0.8613688079817959, "grad_norm": 0.6084352556263708, "learning_rate": 4.955949988903119e-07, "loss": 0.0862, "step": 29526 }, { "epoch": 0.8613979812124395, "grad_norm": 0.9855506075943639, "learning_rate": 4.953899513218535e-07, "loss": 0.1151, "step": 29527 }, { "epoch": 0.861427154443083, "grad_norm": 0.8146833029360177, "learning_rate": 4.951849439694778e-07, "loss": 0.1124, "step": 29528 }, { "epoch": 0.8614563276737266, "grad_norm": 1.0698802220578514, "learning_rate": 4.94979976835015e-07, "loss": 0.1176, "step": 29529 }, { "epoch": 0.8614855009043701, "grad_norm": 0.8454586330671141, "learning_rate": 4.947750499202952e-07, "loss": 0.1153, "step": 29530 }, { "epoch": 0.8615146741350137, "grad_norm": 0.8488563654978626, "learning_rate": 4.945701632271499e-07, "loss": 0.0999, "step": 29531 }, { "epoch": 0.8615438473656573, "grad_norm": 0.8152915120222931, "learning_rate": 4.943653167574058e-07, "loss": 0.0987, "step": 29532 }, { "epoch": 0.8615730205963008, "grad_norm": 0.8512444655778405, "learning_rate": 4.941605105128922e-07, "loss": 0.1129, "step": 29533 }, { "epoch": 0.8616021938269444, "grad_norm": 0.7923319732939569, "learning_rate": 4.93955744495439e-07, "loss": 0.1134, "step": 29534 }, { "epoch": 0.8616313670575879, "grad_norm": 0.990599679297401, "learning_rate": 4.937510187068728e-07, "loss": 0.0937, "step": 29535 }, { "epoch": 0.8616605402882315, "grad_norm": 0.9879391746123323, "learning_rate": 4.935463331490198e-07, "loss": 0.1152, "step": 29536 }, { "epoch": 0.8616897135188751, "grad_norm": 0.8191392081185264, "learning_rate": 4.933416878237118e-07, "loss": 0.1311, "step": 29537 }, { "epoch": 0.8617188867495187, "grad_norm": 0.9034065550039516, "learning_rate": 4.93137082732773e-07, "loss": 0.1108, "step": 29538 }, { "epoch": 0.8617480599801622, "grad_norm": 0.879997117093169, "learning_rate": 4.929325178780293e-07, "loss": 0.1106, "step": 29539 }, { "epoch": 0.8617772332108058, "grad_norm": 0.8021533510115464, "learning_rate": 4.927279932613094e-07, "loss": 0.102, "step": 29540 }, { "epoch": 0.8618064064414493, "grad_norm": 1.1201269405766916, "learning_rate": 4.925235088844382e-07, "loss": 0.1085, "step": 29541 }, { "epoch": 0.8618355796720929, "grad_norm": 1.093923757410142, "learning_rate": 4.923190647492399e-07, "loss": 0.1399, "step": 29542 }, { "epoch": 0.8618647529027365, "grad_norm": 0.8121647255800227, "learning_rate": 4.921146608575405e-07, "loss": 0.1077, "step": 29543 }, { "epoch": 0.86189392613338, "grad_norm": 0.959957420606875, "learning_rate": 4.919102972111667e-07, "loss": 0.1349, "step": 29544 }, { "epoch": 0.8619230993640236, "grad_norm": 0.9910972412210832, "learning_rate": 4.917059738119417e-07, "loss": 0.1277, "step": 29545 }, { "epoch": 0.8619522725946671, "grad_norm": 0.9460236678096623, "learning_rate": 4.915016906616888e-07, "loss": 0.0984, "step": 29546 }, { "epoch": 0.8619814458253107, "grad_norm": 0.8204929894094617, "learning_rate": 4.912974477622329e-07, "loss": 0.101, "step": 29547 }, { "epoch": 0.8620106190559542, "grad_norm": 1.071794228341519, "learning_rate": 4.910932451153966e-07, "loss": 0.1287, "step": 29548 }, { "epoch": 0.8620397922865978, "grad_norm": 1.0222057992768019, "learning_rate": 4.90889082723004e-07, "loss": 0.1319, "step": 29549 }, { "epoch": 0.8620689655172413, "grad_norm": 0.7890094777152389, "learning_rate": 4.906849605868763e-07, "loss": 0.1249, "step": 29550 }, { "epoch": 0.862098138747885, "grad_norm": 0.8086902933476409, "learning_rate": 4.904808787088383e-07, "loss": 0.1096, "step": 29551 }, { "epoch": 0.8621273119785285, "grad_norm": 0.7286563718766472, "learning_rate": 4.902768370907102e-07, "loss": 0.1083, "step": 29552 }, { "epoch": 0.8621564852091721, "grad_norm": 1.0063736550386313, "learning_rate": 4.900728357343127e-07, "loss": 0.1085, "step": 29553 }, { "epoch": 0.8621856584398156, "grad_norm": 0.898909238721518, "learning_rate": 4.898688746414687e-07, "loss": 0.1047, "step": 29554 }, { "epoch": 0.8622148316704592, "grad_norm": 0.9043671477518233, "learning_rate": 4.896649538139992e-07, "loss": 0.1001, "step": 29555 }, { "epoch": 0.8622440049011028, "grad_norm": 0.9191083316418673, "learning_rate": 4.894610732537241e-07, "loss": 0.1051, "step": 29556 }, { "epoch": 0.8622731781317463, "grad_norm": 0.8197405853127391, "learning_rate": 4.892572329624639e-07, "loss": 0.0833, "step": 29557 }, { "epoch": 0.8623023513623899, "grad_norm": 0.711686348154552, "learning_rate": 4.890534329420388e-07, "loss": 0.1055, "step": 29558 }, { "epoch": 0.8623315245930334, "grad_norm": 0.7694761334203983, "learning_rate": 4.888496731942671e-07, "loss": 0.1095, "step": 29559 }, { "epoch": 0.862360697823677, "grad_norm": 1.086513979497295, "learning_rate": 4.886459537209681e-07, "loss": 0.146, "step": 29560 }, { "epoch": 0.8623898710543205, "grad_norm": 0.8444544898854911, "learning_rate": 4.884422745239625e-07, "loss": 0.1104, "step": 29561 }, { "epoch": 0.8624190442849641, "grad_norm": 0.9671191569305575, "learning_rate": 4.882386356050667e-07, "loss": 0.1252, "step": 29562 }, { "epoch": 0.8624482175156076, "grad_norm": 0.8935100670661668, "learning_rate": 4.880350369660985e-07, "loss": 0.1355, "step": 29563 }, { "epoch": 0.8624773907462513, "grad_norm": 0.772155745151359, "learning_rate": 4.878314786088778e-07, "loss": 0.1052, "step": 29564 }, { "epoch": 0.8625065639768948, "grad_norm": 0.8498414873168953, "learning_rate": 4.876279605352202e-07, "loss": 0.1172, "step": 29565 }, { "epoch": 0.8625357372075384, "grad_norm": 0.7325778536929377, "learning_rate": 4.87424482746941e-07, "loss": 0.1088, "step": 29566 }, { "epoch": 0.862564910438182, "grad_norm": 0.982890890838626, "learning_rate": 4.872210452458609e-07, "loss": 0.0994, "step": 29567 }, { "epoch": 0.8625940836688255, "grad_norm": 0.7980004055946873, "learning_rate": 4.87017648033794e-07, "loss": 0.0958, "step": 29568 }, { "epoch": 0.8626232568994691, "grad_norm": 0.9001500264776446, "learning_rate": 4.868142911125551e-07, "loss": 0.1077, "step": 29569 }, { "epoch": 0.8626524301301126, "grad_norm": 0.8083634707055541, "learning_rate": 4.86610974483962e-07, "loss": 0.1135, "step": 29570 }, { "epoch": 0.8626816033607562, "grad_norm": 1.0745313282423126, "learning_rate": 4.864076981498284e-07, "loss": 0.1129, "step": 29571 }, { "epoch": 0.8627107765913997, "grad_norm": 0.9907363144617128, "learning_rate": 4.862044621119688e-07, "loss": 0.1273, "step": 29572 }, { "epoch": 0.8627399498220433, "grad_norm": 1.0432915768941344, "learning_rate": 4.860012663721981e-07, "loss": 0.131, "step": 29573 }, { "epoch": 0.8627691230526868, "grad_norm": 0.8546023685608694, "learning_rate": 4.857981109323312e-07, "loss": 0.1071, "step": 29574 }, { "epoch": 0.8627982962833304, "grad_norm": 0.9118591567924467, "learning_rate": 4.855949957941814e-07, "loss": 0.0988, "step": 29575 }, { "epoch": 0.8628274695139739, "grad_norm": 0.7762545627015762, "learning_rate": 4.853919209595604e-07, "loss": 0.1196, "step": 29576 }, { "epoch": 0.8628566427446175, "grad_norm": 1.0126079495030087, "learning_rate": 4.851888864302839e-07, "loss": 0.122, "step": 29577 }, { "epoch": 0.8628858159752612, "grad_norm": 1.0261197469856702, "learning_rate": 4.849858922081623e-07, "loss": 0.1097, "step": 29578 }, { "epoch": 0.8629149892059047, "grad_norm": 0.8383641536878149, "learning_rate": 4.847829382950098e-07, "loss": 0.1282, "step": 29579 }, { "epoch": 0.8629441624365483, "grad_norm": 0.867578677704776, "learning_rate": 4.845800246926369e-07, "loss": 0.1197, "step": 29580 }, { "epoch": 0.8629733356671918, "grad_norm": 0.9841259215689497, "learning_rate": 4.843771514028555e-07, "loss": 0.1044, "step": 29581 }, { "epoch": 0.8630025088978354, "grad_norm": 0.868658120493819, "learning_rate": 4.841743184274778e-07, "loss": 0.1045, "step": 29582 }, { "epoch": 0.8630316821284789, "grad_norm": 0.7679781412591147, "learning_rate": 4.839715257683125e-07, "loss": 0.1215, "step": 29583 }, { "epoch": 0.8630608553591225, "grad_norm": 0.7238086362854077, "learning_rate": 4.837687734271713e-07, "loss": 0.1178, "step": 29584 }, { "epoch": 0.863090028589766, "grad_norm": 0.8831568359416163, "learning_rate": 4.835660614058657e-07, "loss": 0.1146, "step": 29585 }, { "epoch": 0.8631192018204096, "grad_norm": 1.0227267616933795, "learning_rate": 4.833633897062029e-07, "loss": 0.1314, "step": 29586 }, { "epoch": 0.8631483750510531, "grad_norm": 1.3691232071072594, "learning_rate": 4.831607583299941e-07, "loss": 0.1041, "step": 29587 }, { "epoch": 0.8631775482816967, "grad_norm": 0.9266690092284912, "learning_rate": 4.829581672790484e-07, "loss": 0.1326, "step": 29588 }, { "epoch": 0.8632067215123402, "grad_norm": 0.6945943866312693, "learning_rate": 4.827556165551728e-07, "loss": 0.0837, "step": 29589 }, { "epoch": 0.8632358947429838, "grad_norm": 0.7280800558592375, "learning_rate": 4.825531061601768e-07, "loss": 0.1126, "step": 29590 }, { "epoch": 0.8632650679736275, "grad_norm": 1.0866269712261198, "learning_rate": 4.823506360958691e-07, "loss": 0.108, "step": 29591 }, { "epoch": 0.863294241204271, "grad_norm": 0.9334877491028227, "learning_rate": 4.821482063640559e-07, "loss": 0.1049, "step": 29592 }, { "epoch": 0.8633234144349146, "grad_norm": 0.8746056086522032, "learning_rate": 4.819458169665447e-07, "loss": 0.115, "step": 29593 }, { "epoch": 0.8633525876655581, "grad_norm": 0.8324723328818899, "learning_rate": 4.817434679051436e-07, "loss": 0.1243, "step": 29594 }, { "epoch": 0.8633817608962017, "grad_norm": 0.7242512479396693, "learning_rate": 4.815411591816583e-07, "loss": 0.0997, "step": 29595 }, { "epoch": 0.8634109341268452, "grad_norm": 0.7987187858506292, "learning_rate": 4.813388907978927e-07, "loss": 0.117, "step": 29596 }, { "epoch": 0.8634401073574888, "grad_norm": 0.8758749388026807, "learning_rate": 4.811366627556569e-07, "loss": 0.12, "step": 29597 }, { "epoch": 0.8634692805881323, "grad_norm": 0.8615853750316675, "learning_rate": 4.809344750567541e-07, "loss": 0.1227, "step": 29598 }, { "epoch": 0.8634984538187759, "grad_norm": 0.9269933404487796, "learning_rate": 4.807323277029885e-07, "loss": 0.1303, "step": 29599 }, { "epoch": 0.8635276270494194, "grad_norm": 0.8515560207390576, "learning_rate": 4.805302206961671e-07, "loss": 0.1002, "step": 29600 }, { "epoch": 0.863556800280063, "grad_norm": 0.8271251257469049, "learning_rate": 4.803281540380927e-07, "loss": 0.1053, "step": 29601 }, { "epoch": 0.8635859735107065, "grad_norm": 0.8991172528992981, "learning_rate": 4.801261277305691e-07, "loss": 0.1464, "step": 29602 }, { "epoch": 0.8636151467413501, "grad_norm": 0.8219771993122964, "learning_rate": 4.799241417754003e-07, "loss": 0.1001, "step": 29603 }, { "epoch": 0.8636443199719936, "grad_norm": 0.7661951768747031, "learning_rate": 4.797221961743903e-07, "loss": 0.1231, "step": 29604 }, { "epoch": 0.8636734932026373, "grad_norm": 0.800394710623368, "learning_rate": 4.795202909293417e-07, "loss": 0.0881, "step": 29605 }, { "epoch": 0.8637026664332809, "grad_norm": 0.957192883285935, "learning_rate": 4.793184260420558e-07, "loss": 0.122, "step": 29606 }, { "epoch": 0.8637318396639244, "grad_norm": 0.8945823312298447, "learning_rate": 4.791166015143367e-07, "loss": 0.1059, "step": 29607 }, { "epoch": 0.863761012894568, "grad_norm": 0.8919861071128578, "learning_rate": 4.789148173479846e-07, "loss": 0.1174, "step": 29608 }, { "epoch": 0.8637901861252115, "grad_norm": 0.8249856781036057, "learning_rate": 4.787130735448025e-07, "loss": 0.1166, "step": 29609 }, { "epoch": 0.8638193593558551, "grad_norm": 0.8328722605491482, "learning_rate": 4.785113701065902e-07, "loss": 0.1027, "step": 29610 }, { "epoch": 0.8638485325864986, "grad_norm": 0.826765544122123, "learning_rate": 4.783097070351494e-07, "loss": 0.0985, "step": 29611 }, { "epoch": 0.8638777058171422, "grad_norm": 0.8292172219427822, "learning_rate": 4.781080843322805e-07, "loss": 0.1188, "step": 29612 }, { "epoch": 0.8639068790477857, "grad_norm": 0.8156842541071601, "learning_rate": 4.779065019997813e-07, "loss": 0.1044, "step": 29613 }, { "epoch": 0.8639360522784293, "grad_norm": 0.6863972599258233, "learning_rate": 4.777049600394551e-07, "loss": 0.0988, "step": 29614 }, { "epoch": 0.8639652255090728, "grad_norm": 0.8723736753961623, "learning_rate": 4.775034584530997e-07, "loss": 0.1236, "step": 29615 }, { "epoch": 0.8639943987397164, "grad_norm": 0.7932184896501067, "learning_rate": 4.773019972425124e-07, "loss": 0.1085, "step": 29616 }, { "epoch": 0.86402357197036, "grad_norm": 0.8837860027059837, "learning_rate": 4.771005764094944e-07, "loss": 0.1008, "step": 29617 }, { "epoch": 0.8640527452010036, "grad_norm": 0.9156822763571928, "learning_rate": 4.768991959558428e-07, "loss": 0.0891, "step": 29618 }, { "epoch": 0.8640819184316472, "grad_norm": 0.7655104960309999, "learning_rate": 4.766978558833546e-07, "loss": 0.0967, "step": 29619 }, { "epoch": 0.8641110916622907, "grad_norm": 0.9262543272787156, "learning_rate": 4.7649655619382783e-07, "loss": 0.1254, "step": 29620 }, { "epoch": 0.8641402648929343, "grad_norm": 0.7676951827197807, "learning_rate": 4.7629529688906106e-07, "loss": 0.1097, "step": 29621 }, { "epoch": 0.8641694381235778, "grad_norm": 0.8639825347892992, "learning_rate": 4.7609407797085004e-07, "loss": 0.1322, "step": 29622 }, { "epoch": 0.8641986113542214, "grad_norm": 0.6632079328706527, "learning_rate": 4.7589289944099006e-07, "loss": 0.1022, "step": 29623 }, { "epoch": 0.8642277845848649, "grad_norm": 0.8810722908815922, "learning_rate": 4.756917613012796e-07, "loss": 0.1171, "step": 29624 }, { "epoch": 0.8642569578155085, "grad_norm": 0.8158237493702719, "learning_rate": 4.754906635535117e-07, "loss": 0.0895, "step": 29625 }, { "epoch": 0.864286131046152, "grad_norm": 0.7865349197095143, "learning_rate": 4.7528960619948326e-07, "loss": 0.1252, "step": 29626 }, { "epoch": 0.8643153042767956, "grad_norm": 0.8034154361529194, "learning_rate": 4.7508858924098957e-07, "loss": 0.1103, "step": 29627 }, { "epoch": 0.8643444775074391, "grad_norm": 0.9944795265877507, "learning_rate": 4.748876126798252e-07, "loss": 0.0968, "step": 29628 }, { "epoch": 0.8643736507380827, "grad_norm": 0.803210162221882, "learning_rate": 4.7468667651778323e-07, "loss": 0.1093, "step": 29629 }, { "epoch": 0.8644028239687263, "grad_norm": 0.7230637311969597, "learning_rate": 4.7448578075665887e-07, "loss": 0.1339, "step": 29630 }, { "epoch": 0.8644319971993698, "grad_norm": 0.763966666019329, "learning_rate": 4.7428492539824456e-07, "loss": 0.1086, "step": 29631 }, { "epoch": 0.8644611704300135, "grad_norm": 0.826078356683304, "learning_rate": 4.74084110444335e-07, "loss": 0.114, "step": 29632 }, { "epoch": 0.864490343660657, "grad_norm": 0.700532019841836, "learning_rate": 4.738833358967204e-07, "loss": 0.0972, "step": 29633 }, { "epoch": 0.8645195168913006, "grad_norm": 0.8568305123509733, "learning_rate": 4.736826017571966e-07, "loss": 0.1099, "step": 29634 }, { "epoch": 0.8645486901219441, "grad_norm": 0.8350213993387027, "learning_rate": 4.734819080275538e-07, "loss": 0.0964, "step": 29635 }, { "epoch": 0.8645778633525877, "grad_norm": 0.7811801257511575, "learning_rate": 4.732812547095833e-07, "loss": 0.1063, "step": 29636 }, { "epoch": 0.8646070365832312, "grad_norm": 0.7120592483117036, "learning_rate": 4.730806418050765e-07, "loss": 0.1155, "step": 29637 }, { "epoch": 0.8646362098138748, "grad_norm": 0.798759867045699, "learning_rate": 4.728800693158264e-07, "loss": 0.12, "step": 29638 }, { "epoch": 0.8646653830445183, "grad_norm": 1.1690973989697988, "learning_rate": 4.726795372436227e-07, "loss": 0.1087, "step": 29639 }, { "epoch": 0.8646945562751619, "grad_norm": 0.7437712717524874, "learning_rate": 4.7247904559025394e-07, "loss": 0.1404, "step": 29640 }, { "epoch": 0.8647237295058054, "grad_norm": 0.7925820616934592, "learning_rate": 4.7227859435751257e-07, "loss": 0.1112, "step": 29641 }, { "epoch": 0.864752902736449, "grad_norm": 0.7629146577327641, "learning_rate": 4.720781835471866e-07, "loss": 0.1117, "step": 29642 }, { "epoch": 0.8647820759670926, "grad_norm": 0.7356041591264301, "learning_rate": 4.718778131610641e-07, "loss": 0.109, "step": 29643 }, { "epoch": 0.8648112491977361, "grad_norm": 0.7391439129785692, "learning_rate": 4.716774832009374e-07, "loss": 0.1199, "step": 29644 }, { "epoch": 0.8648404224283798, "grad_norm": 0.9699460332926753, "learning_rate": 4.7147719366859356e-07, "loss": 0.1194, "step": 29645 }, { "epoch": 0.8648695956590233, "grad_norm": 0.9004880669962468, "learning_rate": 4.7127694456581886e-07, "loss": 0.1124, "step": 29646 }, { "epoch": 0.8648987688896669, "grad_norm": 0.8018383910546009, "learning_rate": 4.710767358944035e-07, "loss": 0.1194, "step": 29647 }, { "epoch": 0.8649279421203104, "grad_norm": 0.8808434508993854, "learning_rate": 4.708765676561339e-07, "loss": 0.1025, "step": 29648 }, { "epoch": 0.864957115350954, "grad_norm": 0.9759817118866354, "learning_rate": 4.706764398527963e-07, "loss": 0.1395, "step": 29649 }, { "epoch": 0.8649862885815975, "grad_norm": 1.155416398124548, "learning_rate": 4.704763524861783e-07, "loss": 0.0977, "step": 29650 }, { "epoch": 0.8650154618122411, "grad_norm": 0.895229972793927, "learning_rate": 4.702763055580672e-07, "loss": 0.1054, "step": 29651 }, { "epoch": 0.8650446350428846, "grad_norm": 0.8855127487464511, "learning_rate": 4.700762990702473e-07, "loss": 0.1126, "step": 29652 }, { "epoch": 0.8650738082735282, "grad_norm": 0.8177305592994144, "learning_rate": 4.698763330245043e-07, "loss": 0.102, "step": 29653 }, { "epoch": 0.8651029815041718, "grad_norm": 0.812140178505874, "learning_rate": 4.6967640742262513e-07, "loss": 0.0984, "step": 29654 }, { "epoch": 0.8651321547348153, "grad_norm": 0.8331334389322026, "learning_rate": 4.6947652226639216e-07, "loss": 0.1312, "step": 29655 }, { "epoch": 0.8651613279654589, "grad_norm": 1.0606635692666215, "learning_rate": 4.692766775575913e-07, "loss": 0.1145, "step": 29656 }, { "epoch": 0.8651905011961024, "grad_norm": 0.912480668116238, "learning_rate": 4.690768732980078e-07, "loss": 0.1161, "step": 29657 }, { "epoch": 0.865219674426746, "grad_norm": 0.8693959965482749, "learning_rate": 4.688771094894246e-07, "loss": 0.1201, "step": 29658 }, { "epoch": 0.8652488476573896, "grad_norm": 1.1410749152867425, "learning_rate": 4.6867738613362356e-07, "loss": 0.125, "step": 29659 }, { "epoch": 0.8652780208880332, "grad_norm": 0.9557606060725321, "learning_rate": 4.6847770323239006e-07, "loss": 0.1197, "step": 29660 }, { "epoch": 0.8653071941186767, "grad_norm": 0.9258297153682862, "learning_rate": 4.682780607875048e-07, "loss": 0.1102, "step": 29661 }, { "epoch": 0.8653363673493203, "grad_norm": 0.9021782178302932, "learning_rate": 4.680784588007525e-07, "loss": 0.1157, "step": 29662 }, { "epoch": 0.8653655405799638, "grad_norm": 0.912579263470195, "learning_rate": 4.678788972739129e-07, "loss": 0.0917, "step": 29663 }, { "epoch": 0.8653947138106074, "grad_norm": 0.7514401759654336, "learning_rate": 4.6767937620876946e-07, "loss": 0.1161, "step": 29664 }, { "epoch": 0.865423887041251, "grad_norm": 0.7817265586761414, "learning_rate": 4.674798956071025e-07, "loss": 0.1068, "step": 29665 }, { "epoch": 0.8654530602718945, "grad_norm": 0.7489309934528932, "learning_rate": 4.6728045547069223e-07, "loss": 0.1015, "step": 29666 }, { "epoch": 0.8654822335025381, "grad_norm": 0.7725362704533929, "learning_rate": 4.6708105580132e-07, "loss": 0.1026, "step": 29667 }, { "epoch": 0.8655114067331816, "grad_norm": 0.8417707708156441, "learning_rate": 4.6688169660076666e-07, "loss": 0.1124, "step": 29668 }, { "epoch": 0.8655405799638252, "grad_norm": 0.9401182149442621, "learning_rate": 4.6668237787081185e-07, "loss": 0.1365, "step": 29669 }, { "epoch": 0.8655697531944687, "grad_norm": 0.912303103831374, "learning_rate": 4.66483099613233e-07, "loss": 0.1218, "step": 29670 }, { "epoch": 0.8655989264251123, "grad_norm": 0.8884869685940538, "learning_rate": 4.662838618298121e-07, "loss": 0.126, "step": 29671 }, { "epoch": 0.8656280996557559, "grad_norm": 1.057071481354854, "learning_rate": 4.6608466452232713e-07, "loss": 0.1138, "step": 29672 }, { "epoch": 0.8656572728863995, "grad_norm": 0.8261209164621662, "learning_rate": 4.6588550769255336e-07, "loss": 0.1201, "step": 29673 }, { "epoch": 0.865686446117043, "grad_norm": 1.0436250607599036, "learning_rate": 4.656863913422732e-07, "loss": 0.1232, "step": 29674 }, { "epoch": 0.8657156193476866, "grad_norm": 0.7731383788619942, "learning_rate": 4.654873154732631e-07, "loss": 0.1106, "step": 29675 }, { "epoch": 0.8657447925783301, "grad_norm": 0.9086950245842619, "learning_rate": 4.652882800872982e-07, "loss": 0.1059, "step": 29676 }, { "epoch": 0.8657739658089737, "grad_norm": 0.6931387976822992, "learning_rate": 4.6508928518615883e-07, "loss": 0.1187, "step": 29677 }, { "epoch": 0.8658031390396173, "grad_norm": 0.8971935488584154, "learning_rate": 4.6489033077161907e-07, "loss": 0.1332, "step": 29678 }, { "epoch": 0.8658323122702608, "grad_norm": 0.8124624341357914, "learning_rate": 4.6469141684545473e-07, "loss": 0.1153, "step": 29679 }, { "epoch": 0.8658614855009044, "grad_norm": 0.7495792132361767, "learning_rate": 4.644925434094433e-07, "loss": 0.1164, "step": 29680 }, { "epoch": 0.8658906587315479, "grad_norm": 0.8907488826080208, "learning_rate": 4.6429371046536e-07, "loss": 0.1062, "step": 29681 }, { "epoch": 0.8659198319621915, "grad_norm": 0.7172330951828068, "learning_rate": 4.6409491801498006e-07, "loss": 0.1015, "step": 29682 }, { "epoch": 0.865949005192835, "grad_norm": 0.766924963875774, "learning_rate": 4.6389616606007717e-07, "loss": 0.0988, "step": 29683 }, { "epoch": 0.8659781784234786, "grad_norm": 0.6698874112215939, "learning_rate": 4.6369745460242755e-07, "loss": 0.1194, "step": 29684 }, { "epoch": 0.8660073516541221, "grad_norm": 0.878505291382565, "learning_rate": 4.634987836438026e-07, "loss": 0.1064, "step": 29685 }, { "epoch": 0.8660365248847658, "grad_norm": 0.8542466741397426, "learning_rate": 4.633001531859777e-07, "loss": 0.1042, "step": 29686 }, { "epoch": 0.8660656981154093, "grad_norm": 0.7377300469235439, "learning_rate": 4.631015632307273e-07, "loss": 0.1288, "step": 29687 }, { "epoch": 0.8660948713460529, "grad_norm": 0.832876429750545, "learning_rate": 4.629030137798229e-07, "loss": 0.1123, "step": 29688 }, { "epoch": 0.8661240445766965, "grad_norm": 0.8588785022383585, "learning_rate": 4.627045048350365e-07, "loss": 0.093, "step": 29689 }, { "epoch": 0.86615321780734, "grad_norm": 0.7639942154531778, "learning_rate": 4.6250603639814153e-07, "loss": 0.1027, "step": 29690 }, { "epoch": 0.8661823910379836, "grad_norm": 0.746171628499873, "learning_rate": 4.6230760847090936e-07, "loss": 0.1068, "step": 29691 }, { "epoch": 0.8662115642686271, "grad_norm": 0.812632010411005, "learning_rate": 4.62109221055112e-07, "loss": 0.0966, "step": 29692 }, { "epoch": 0.8662407374992707, "grad_norm": 1.05417175822687, "learning_rate": 4.619108741525197e-07, "loss": 0.1112, "step": 29693 }, { "epoch": 0.8662699107299142, "grad_norm": 0.829800720148143, "learning_rate": 4.6171256776490423e-07, "loss": 0.1126, "step": 29694 }, { "epoch": 0.8662990839605578, "grad_norm": 0.9135399325225867, "learning_rate": 4.61514301894036e-07, "loss": 0.1155, "step": 29695 }, { "epoch": 0.8663282571912013, "grad_norm": 0.8423022840432328, "learning_rate": 4.613160765416835e-07, "loss": 0.1176, "step": 29696 }, { "epoch": 0.8663574304218449, "grad_norm": 0.9440167894563377, "learning_rate": 4.6111789170961764e-07, "loss": 0.1227, "step": 29697 }, { "epoch": 0.8663866036524884, "grad_norm": 0.842576393518547, "learning_rate": 4.6091974739960855e-07, "loss": 0.1258, "step": 29698 }, { "epoch": 0.866415776883132, "grad_norm": 0.8789314064777867, "learning_rate": 4.607216436134243e-07, "loss": 0.1087, "step": 29699 }, { "epoch": 0.8664449501137756, "grad_norm": 0.7346332916299733, "learning_rate": 4.6052358035283296e-07, "loss": 0.0907, "step": 29700 }, { "epoch": 0.8664741233444192, "grad_norm": 0.6216925139506582, "learning_rate": 4.603255576196042e-07, "loss": 0.0919, "step": 29701 }, { "epoch": 0.8665032965750628, "grad_norm": 0.9711471230620363, "learning_rate": 4.6012757541550547e-07, "loss": 0.1112, "step": 29702 }, { "epoch": 0.8665324698057063, "grad_norm": 0.9344305312678193, "learning_rate": 4.5992963374230204e-07, "loss": 0.1119, "step": 29703 }, { "epoch": 0.8665616430363499, "grad_norm": 0.7686073239935602, "learning_rate": 4.5973173260176475e-07, "loss": 0.0924, "step": 29704 }, { "epoch": 0.8665908162669934, "grad_norm": 0.798213112409071, "learning_rate": 4.595338719956582e-07, "loss": 0.1017, "step": 29705 }, { "epoch": 0.866619989497637, "grad_norm": 0.8807198372508848, "learning_rate": 4.5933605192574894e-07, "loss": 0.1067, "step": 29706 }, { "epoch": 0.8666491627282805, "grad_norm": 0.8785217562478561, "learning_rate": 4.5913827239380483e-07, "loss": 0.0941, "step": 29707 }, { "epoch": 0.8666783359589241, "grad_norm": 1.0508830325605063, "learning_rate": 4.589405334015895e-07, "loss": 0.1189, "step": 29708 }, { "epoch": 0.8667075091895676, "grad_norm": 0.8974488364052352, "learning_rate": 4.5874283495086823e-07, "loss": 0.1269, "step": 29709 }, { "epoch": 0.8667366824202112, "grad_norm": 0.9093572309706573, "learning_rate": 4.585451770434074e-07, "loss": 0.1193, "step": 29710 }, { "epoch": 0.8667658556508547, "grad_norm": 1.1047782347058013, "learning_rate": 4.5834755968097167e-07, "loss": 0.1024, "step": 29711 }, { "epoch": 0.8667950288814983, "grad_norm": 0.7875122541939666, "learning_rate": 4.581499828653246e-07, "loss": 0.13, "step": 29712 }, { "epoch": 0.866824202112142, "grad_norm": 1.0699875763352769, "learning_rate": 4.5795244659822933e-07, "loss": 0.1018, "step": 29713 }, { "epoch": 0.8668533753427855, "grad_norm": 1.0058920404274683, "learning_rate": 4.577549508814516e-07, "loss": 0.1121, "step": 29714 }, { "epoch": 0.8668825485734291, "grad_norm": 1.5685121632036332, "learning_rate": 4.5755749571675223e-07, "loss": 0.1162, "step": 29715 }, { "epoch": 0.8669117218040726, "grad_norm": 0.7863468657298148, "learning_rate": 4.573600811058948e-07, "loss": 0.1151, "step": 29716 }, { "epoch": 0.8669408950347162, "grad_norm": 0.9508696183350926, "learning_rate": 4.571627070506435e-07, "loss": 0.1, "step": 29717 }, { "epoch": 0.8669700682653597, "grad_norm": 0.8904285033234068, "learning_rate": 4.5696537355275903e-07, "loss": 0.0987, "step": 29718 }, { "epoch": 0.8669992414960033, "grad_norm": 0.7338508216069743, "learning_rate": 4.5676808061400233e-07, "loss": 0.1011, "step": 29719 }, { "epoch": 0.8670284147266468, "grad_norm": 0.7718343882417587, "learning_rate": 4.5657082823613643e-07, "loss": 0.1052, "step": 29720 }, { "epoch": 0.8670575879572904, "grad_norm": 0.8998635641660996, "learning_rate": 4.5637361642092036e-07, "loss": 0.11, "step": 29721 }, { "epoch": 0.8670867611879339, "grad_norm": 0.9120540171063068, "learning_rate": 4.5617644517011727e-07, "loss": 0.1068, "step": 29722 }, { "epoch": 0.8671159344185775, "grad_norm": 0.904704683530814, "learning_rate": 4.559793144854857e-07, "loss": 0.1251, "step": 29723 }, { "epoch": 0.867145107649221, "grad_norm": 0.7281993287272448, "learning_rate": 4.557822243687865e-07, "loss": 0.1058, "step": 29724 }, { "epoch": 0.8671742808798646, "grad_norm": 0.7850270269802017, "learning_rate": 4.555851748217788e-07, "loss": 0.1136, "step": 29725 }, { "epoch": 0.8672034541105081, "grad_norm": 1.4475167809231915, "learning_rate": 4.553881658462206e-07, "loss": 0.1242, "step": 29726 }, { "epoch": 0.8672326273411518, "grad_norm": 0.9211778454821116, "learning_rate": 4.5519119744387273e-07, "loss": 0.1168, "step": 29727 }, { "epoch": 0.8672618005717954, "grad_norm": 0.9632290231290926, "learning_rate": 4.549942696164933e-07, "loss": 0.1036, "step": 29728 }, { "epoch": 0.8672909738024389, "grad_norm": 0.729019807957965, "learning_rate": 4.5479738236584026e-07, "loss": 0.1083, "step": 29729 }, { "epoch": 0.8673201470330825, "grad_norm": 0.7198276258285211, "learning_rate": 4.5460053569367e-07, "loss": 0.1025, "step": 29730 }, { "epoch": 0.867349320263726, "grad_norm": 0.9094212307826949, "learning_rate": 4.544037296017423e-07, "loss": 0.1269, "step": 29731 }, { "epoch": 0.8673784934943696, "grad_norm": 1.0392531432521597, "learning_rate": 4.5420696409181285e-07, "loss": 0.1173, "step": 29732 }, { "epoch": 0.8674076667250131, "grad_norm": 0.8547771219552441, "learning_rate": 4.540102391656365e-07, "loss": 0.1142, "step": 29733 }, { "epoch": 0.8674368399556567, "grad_norm": 0.9342302833273862, "learning_rate": 4.5381355482497334e-07, "loss": 0.102, "step": 29734 }, { "epoch": 0.8674660131863002, "grad_norm": 0.8647156138343178, "learning_rate": 4.536169110715777e-07, "loss": 0.1192, "step": 29735 }, { "epoch": 0.8674951864169438, "grad_norm": 0.8518650128110973, "learning_rate": 4.5342030790720415e-07, "loss": 0.0861, "step": 29736 }, { "epoch": 0.8675243596475873, "grad_norm": 0.7755918713379286, "learning_rate": 4.532237453336091e-07, "loss": 0.132, "step": 29737 }, { "epoch": 0.8675535328782309, "grad_norm": 1.0685931566112636, "learning_rate": 4.5302722335254735e-07, "loss": 0.1186, "step": 29738 }, { "epoch": 0.8675827061088744, "grad_norm": 1.0566728184063763, "learning_rate": 4.5283074196577236e-07, "loss": 0.1157, "step": 29739 }, { "epoch": 0.8676118793395181, "grad_norm": 0.7225468710013514, "learning_rate": 4.526343011750389e-07, "loss": 0.0999, "step": 29740 }, { "epoch": 0.8676410525701617, "grad_norm": 0.8328686994694089, "learning_rate": 4.524379009821017e-07, "loss": 0.1254, "step": 29741 }, { "epoch": 0.8676702258008052, "grad_norm": 0.7084810337350889, "learning_rate": 4.522415413887138e-07, "loss": 0.1049, "step": 29742 }, { "epoch": 0.8676993990314488, "grad_norm": 0.8528611464567548, "learning_rate": 4.520452223966265e-07, "loss": 0.1101, "step": 29743 }, { "epoch": 0.8677285722620923, "grad_norm": 0.9303856966190855, "learning_rate": 4.518489440075946e-07, "loss": 0.1222, "step": 29744 }, { "epoch": 0.8677577454927359, "grad_norm": 1.0222688221812284, "learning_rate": 4.516527062233683e-07, "loss": 0.1451, "step": 29745 }, { "epoch": 0.8677869187233794, "grad_norm": 0.9208701209918345, "learning_rate": 4.514565090457018e-07, "loss": 0.0919, "step": 29746 }, { "epoch": 0.867816091954023, "grad_norm": 0.8087070543058863, "learning_rate": 4.512603524763459e-07, "loss": 0.1012, "step": 29747 }, { "epoch": 0.8678452651846665, "grad_norm": 0.7322560302675284, "learning_rate": 4.51064236517052e-07, "loss": 0.119, "step": 29748 }, { "epoch": 0.8678744384153101, "grad_norm": 0.8583460991070108, "learning_rate": 4.5086816116956976e-07, "loss": 0.1025, "step": 29749 }, { "epoch": 0.8679036116459536, "grad_norm": 0.8543875345341135, "learning_rate": 4.5067212643565174e-07, "loss": 0.1139, "step": 29750 }, { "epoch": 0.8679327848765972, "grad_norm": 0.9019525138139194, "learning_rate": 4.504761323170453e-07, "loss": 0.1149, "step": 29751 }, { "epoch": 0.8679619581072407, "grad_norm": 0.8608702128939949, "learning_rate": 4.5028017881550367e-07, "loss": 0.1356, "step": 29752 }, { "epoch": 0.8679911313378843, "grad_norm": 0.895518179646985, "learning_rate": 4.500842659327731e-07, "loss": 0.1214, "step": 29753 }, { "epoch": 0.868020304568528, "grad_norm": 1.1530854543177367, "learning_rate": 4.49888393670605e-07, "loss": 0.1097, "step": 29754 }, { "epoch": 0.8680494777991715, "grad_norm": 0.9497906050404548, "learning_rate": 4.4969256203074743e-07, "loss": 0.1306, "step": 29755 }, { "epoch": 0.8680786510298151, "grad_norm": 1.0179029781359794, "learning_rate": 4.4949677101494725e-07, "loss": 0.1065, "step": 29756 }, { "epoch": 0.8681078242604586, "grad_norm": 0.7478767179124656, "learning_rate": 4.4930102062495375e-07, "loss": 0.0932, "step": 29757 }, { "epoch": 0.8681369974911022, "grad_norm": 0.8353406196041437, "learning_rate": 4.4910531086251487e-07, "loss": 0.1331, "step": 29758 }, { "epoch": 0.8681661707217457, "grad_norm": 0.7320057912699145, "learning_rate": 4.489096417293781e-07, "loss": 0.1118, "step": 29759 }, { "epoch": 0.8681953439523893, "grad_norm": 0.7836198279260504, "learning_rate": 4.4871401322728827e-07, "loss": 0.0998, "step": 29760 }, { "epoch": 0.8682245171830328, "grad_norm": 0.6365631225709393, "learning_rate": 4.485184253579944e-07, "loss": 0.0916, "step": 29761 }, { "epoch": 0.8682536904136764, "grad_norm": 0.9872720899301752, "learning_rate": 4.4832287812324127e-07, "loss": 0.1078, "step": 29762 }, { "epoch": 0.86828286364432, "grad_norm": 1.0003743596686172, "learning_rate": 4.4812737152477304e-07, "loss": 0.1116, "step": 29763 }, { "epoch": 0.8683120368749635, "grad_norm": 1.1022894300171184, "learning_rate": 4.4793190556433887e-07, "loss": 0.1167, "step": 29764 }, { "epoch": 0.868341210105607, "grad_norm": 0.9228480213490751, "learning_rate": 4.4773648024368174e-07, "loss": 0.1157, "step": 29765 }, { "epoch": 0.8683703833362506, "grad_norm": 0.8021311905958423, "learning_rate": 4.475410955645465e-07, "loss": 0.1034, "step": 29766 }, { "epoch": 0.8683995565668943, "grad_norm": 0.879069332619487, "learning_rate": 4.4734575152867777e-07, "loss": 0.1108, "step": 29767 }, { "epoch": 0.8684287297975378, "grad_norm": 0.8309891909386835, "learning_rate": 4.4715044813781974e-07, "loss": 0.1014, "step": 29768 }, { "epoch": 0.8684579030281814, "grad_norm": 0.9584210287436049, "learning_rate": 4.469551853937143e-07, "loss": 0.1141, "step": 29769 }, { "epoch": 0.8684870762588249, "grad_norm": 1.0390781683334436, "learning_rate": 4.4675996329810677e-07, "loss": 0.1122, "step": 29770 }, { "epoch": 0.8685162494894685, "grad_norm": 0.8384784217556217, "learning_rate": 4.4656478185273965e-07, "loss": 0.0945, "step": 29771 }, { "epoch": 0.868545422720112, "grad_norm": 0.934736468470086, "learning_rate": 4.463696410593554e-07, "loss": 0.1183, "step": 29772 }, { "epoch": 0.8685745959507556, "grad_norm": 1.025228067281019, "learning_rate": 4.461745409196949e-07, "loss": 0.0994, "step": 29773 }, { "epoch": 0.8686037691813991, "grad_norm": 0.8652336674970745, "learning_rate": 4.459794814355023e-07, "loss": 0.1118, "step": 29774 }, { "epoch": 0.8686329424120427, "grad_norm": 1.0542096956907, "learning_rate": 4.457844626085167e-07, "loss": 0.0994, "step": 29775 }, { "epoch": 0.8686621156426863, "grad_norm": 0.9462186891090342, "learning_rate": 4.455894844404801e-07, "loss": 0.1015, "step": 29776 }, { "epoch": 0.8686912888733298, "grad_norm": 1.1768899062578044, "learning_rate": 4.4539454693313445e-07, "loss": 0.1054, "step": 29777 }, { "epoch": 0.8687204621039734, "grad_norm": 0.9259754020643249, "learning_rate": 4.4519965008821884e-07, "loss": 0.1238, "step": 29778 }, { "epoch": 0.8687496353346169, "grad_norm": 0.9200452501299549, "learning_rate": 4.4500479390747256e-07, "loss": 0.0988, "step": 29779 }, { "epoch": 0.8687788085652605, "grad_norm": 0.6860223353769073, "learning_rate": 4.448099783926368e-07, "loss": 0.1065, "step": 29780 }, { "epoch": 0.8688079817959041, "grad_norm": 1.1867613252608016, "learning_rate": 4.446152035454493e-07, "loss": 0.1165, "step": 29781 }, { "epoch": 0.8688371550265477, "grad_norm": 0.9965666379750093, "learning_rate": 4.444204693676507e-07, "loss": 0.1074, "step": 29782 }, { "epoch": 0.8688663282571912, "grad_norm": 1.0111669054258339, "learning_rate": 4.4422577586097805e-07, "loss": 0.1385, "step": 29783 }, { "epoch": 0.8688955014878348, "grad_norm": 0.8950318607751709, "learning_rate": 4.44031123027171e-07, "loss": 0.1264, "step": 29784 }, { "epoch": 0.8689246747184783, "grad_norm": 1.0518872645517119, "learning_rate": 4.4383651086796655e-07, "loss": 0.1238, "step": 29785 }, { "epoch": 0.8689538479491219, "grad_norm": 0.9826575438077829, "learning_rate": 4.43641939385101e-07, "loss": 0.109, "step": 29786 }, { "epoch": 0.8689830211797654, "grad_norm": 0.8794138827483294, "learning_rate": 4.4344740858031253e-07, "loss": 0.1318, "step": 29787 }, { "epoch": 0.869012194410409, "grad_norm": 1.1362465386233664, "learning_rate": 4.432529184553386e-07, "loss": 0.1075, "step": 29788 }, { "epoch": 0.8690413676410526, "grad_norm": 0.8820666260009485, "learning_rate": 4.4305846901191495e-07, "loss": 0.0917, "step": 29789 }, { "epoch": 0.8690705408716961, "grad_norm": 0.712915621453368, "learning_rate": 4.428640602517764e-07, "loss": 0.1024, "step": 29790 }, { "epoch": 0.8690997141023397, "grad_norm": 0.8040965846348229, "learning_rate": 4.42669692176661e-07, "loss": 0.1011, "step": 29791 }, { "epoch": 0.8691288873329832, "grad_norm": 1.0730527558974972, "learning_rate": 4.424753647883023e-07, "loss": 0.1116, "step": 29792 }, { "epoch": 0.8691580605636268, "grad_norm": 1.048869244710362, "learning_rate": 4.42281078088434e-07, "loss": 0.1115, "step": 29793 }, { "epoch": 0.8691872337942704, "grad_norm": 0.7481494214929864, "learning_rate": 4.4208683207879355e-07, "loss": 0.1051, "step": 29794 }, { "epoch": 0.869216407024914, "grad_norm": 1.0399294612372059, "learning_rate": 4.418926267611146e-07, "loss": 0.1264, "step": 29795 }, { "epoch": 0.8692455802555575, "grad_norm": 0.7680603792499419, "learning_rate": 4.416984621371284e-07, "loss": 0.1065, "step": 29796 }, { "epoch": 0.8692747534862011, "grad_norm": 0.6179645275901688, "learning_rate": 4.4150433820857153e-07, "loss": 0.0989, "step": 29797 }, { "epoch": 0.8693039267168446, "grad_norm": 1.2297313226543285, "learning_rate": 4.4131025497717585e-07, "loss": 0.1101, "step": 29798 }, { "epoch": 0.8693330999474882, "grad_norm": 1.0925233920483266, "learning_rate": 4.4111621244467275e-07, "loss": 0.124, "step": 29799 }, { "epoch": 0.8693622731781318, "grad_norm": 0.906662609980025, "learning_rate": 4.409222106127958e-07, "loss": 0.1139, "step": 29800 }, { "epoch": 0.8693914464087753, "grad_norm": 0.887670852975099, "learning_rate": 4.407282494832782e-07, "loss": 0.127, "step": 29801 }, { "epoch": 0.8694206196394189, "grad_norm": 0.8546909175050807, "learning_rate": 4.405343290578507e-07, "loss": 0.1057, "step": 29802 }, { "epoch": 0.8694497928700624, "grad_norm": 0.8254342158400528, "learning_rate": 4.4034044933824294e-07, "loss": 0.1253, "step": 29803 }, { "epoch": 0.869478966100706, "grad_norm": 0.8674148604627151, "learning_rate": 4.401466103261881e-07, "loss": 0.14, "step": 29804 }, { "epoch": 0.8695081393313495, "grad_norm": 0.8069543818570193, "learning_rate": 4.399528120234148e-07, "loss": 0.1118, "step": 29805 }, { "epoch": 0.8695373125619931, "grad_norm": 0.8461359594532442, "learning_rate": 4.3975905443165437e-07, "loss": 0.0957, "step": 29806 }, { "epoch": 0.8695664857926366, "grad_norm": 0.9271947325694814, "learning_rate": 4.395653375526371e-07, "loss": 0.1032, "step": 29807 }, { "epoch": 0.8695956590232803, "grad_norm": 1.0509761241612552, "learning_rate": 4.3937166138809217e-07, "loss": 0.1136, "step": 29808 }, { "epoch": 0.8696248322539238, "grad_norm": 0.9624390305621473, "learning_rate": 4.3917802593974714e-07, "loss": 0.1093, "step": 29809 }, { "epoch": 0.8696540054845674, "grad_norm": 0.9669244218025086, "learning_rate": 4.389844312093322e-07, "loss": 0.1071, "step": 29810 }, { "epoch": 0.869683178715211, "grad_norm": 0.8908038698081963, "learning_rate": 4.387908771985766e-07, "loss": 0.1253, "step": 29811 }, { "epoch": 0.8697123519458545, "grad_norm": 0.9371307776667135, "learning_rate": 4.385973639092067e-07, "loss": 0.1183, "step": 29812 }, { "epoch": 0.869741525176498, "grad_norm": 0.7877828857168898, "learning_rate": 4.3840389134295e-07, "loss": 0.1128, "step": 29813 }, { "epoch": 0.8697706984071416, "grad_norm": 0.8044260548349564, "learning_rate": 4.3821045950153517e-07, "loss": 0.1077, "step": 29814 }, { "epoch": 0.8697998716377852, "grad_norm": 0.8445328060565375, "learning_rate": 4.3801706838668855e-07, "loss": 0.1203, "step": 29815 }, { "epoch": 0.8698290448684287, "grad_norm": 0.7289914820312914, "learning_rate": 4.3782371800013545e-07, "loss": 0.1149, "step": 29816 }, { "epoch": 0.8698582180990723, "grad_norm": 0.6795465123690801, "learning_rate": 4.376304083436028e-07, "loss": 0.1018, "step": 29817 }, { "epoch": 0.8698873913297158, "grad_norm": 0.7402984432363503, "learning_rate": 4.3743713941881817e-07, "loss": 0.11, "step": 29818 }, { "epoch": 0.8699165645603594, "grad_norm": 0.8466375251813191, "learning_rate": 4.3724391122750565e-07, "loss": 0.119, "step": 29819 }, { "epoch": 0.8699457377910029, "grad_norm": 0.7558040130905236, "learning_rate": 4.370507237713889e-07, "loss": 0.1048, "step": 29820 }, { "epoch": 0.8699749110216466, "grad_norm": 0.9283048767907476, "learning_rate": 4.3685757705219545e-07, "loss": 0.1111, "step": 29821 }, { "epoch": 0.8700040842522901, "grad_norm": 0.8306414883025738, "learning_rate": 4.3666447107164667e-07, "loss": 0.1019, "step": 29822 }, { "epoch": 0.8700332574829337, "grad_norm": 1.0089986757984788, "learning_rate": 4.36471405831469e-07, "loss": 0.1124, "step": 29823 }, { "epoch": 0.8700624307135773, "grad_norm": 0.960068224297036, "learning_rate": 4.362783813333854e-07, "loss": 0.1316, "step": 29824 }, { "epoch": 0.8700916039442208, "grad_norm": 0.94203315437371, "learning_rate": 4.3608539757911903e-07, "loss": 0.0981, "step": 29825 }, { "epoch": 0.8701207771748644, "grad_norm": 0.9243236379327263, "learning_rate": 4.3589245457039244e-07, "loss": 0.114, "step": 29826 }, { "epoch": 0.8701499504055079, "grad_norm": 1.14120755316505, "learning_rate": 4.3569955230892857e-07, "loss": 0.1236, "step": 29827 }, { "epoch": 0.8701791236361515, "grad_norm": 0.8580259854946702, "learning_rate": 4.355066907964489e-07, "loss": 0.1228, "step": 29828 }, { "epoch": 0.870208296866795, "grad_norm": 0.7866802087229846, "learning_rate": 4.3531387003467706e-07, "loss": 0.1305, "step": 29829 }, { "epoch": 0.8702374700974386, "grad_norm": 0.800562842323476, "learning_rate": 4.351210900253322e-07, "loss": 0.1166, "step": 29830 }, { "epoch": 0.8702666433280821, "grad_norm": 0.7850213567763958, "learning_rate": 4.349283507701374e-07, "loss": 0.133, "step": 29831 }, { "epoch": 0.8702958165587257, "grad_norm": 0.8698308370262087, "learning_rate": 4.3473565227081236e-07, "loss": 0.126, "step": 29832 }, { "epoch": 0.8703249897893692, "grad_norm": 0.6526075225422932, "learning_rate": 4.345429945290769e-07, "loss": 0.1, "step": 29833 }, { "epoch": 0.8703541630200128, "grad_norm": 0.841088174324627, "learning_rate": 4.343503775466518e-07, "loss": 0.1111, "step": 29834 }, { "epoch": 0.8703833362506564, "grad_norm": 0.8447937630109417, "learning_rate": 4.341578013252573e-07, "loss": 0.1072, "step": 29835 }, { "epoch": 0.8704125094813, "grad_norm": 0.924956378450774, "learning_rate": 4.339652658666116e-07, "loss": 0.1005, "step": 29836 }, { "epoch": 0.8704416827119436, "grad_norm": 0.8227379584896524, "learning_rate": 4.337727711724343e-07, "loss": 0.1193, "step": 29837 }, { "epoch": 0.8704708559425871, "grad_norm": 0.7631561645780006, "learning_rate": 4.3358031724444416e-07, "loss": 0.0987, "step": 29838 }, { "epoch": 0.8705000291732307, "grad_norm": 0.8128963542843648, "learning_rate": 4.333879040843575e-07, "loss": 0.1154, "step": 29839 }, { "epoch": 0.8705292024038742, "grad_norm": 0.7893618652077217, "learning_rate": 4.331955316938935e-07, "loss": 0.1227, "step": 29840 }, { "epoch": 0.8705583756345178, "grad_norm": 0.9053852248961141, "learning_rate": 4.330032000747708e-07, "loss": 0.1216, "step": 29841 }, { "epoch": 0.8705875488651613, "grad_norm": 0.8752026556647005, "learning_rate": 4.328109092287053e-07, "loss": 0.1205, "step": 29842 }, { "epoch": 0.8706167220958049, "grad_norm": 1.1452348967503392, "learning_rate": 4.3261865915741273e-07, "loss": 0.105, "step": 29843 }, { "epoch": 0.8706458953264484, "grad_norm": 0.7792971177379204, "learning_rate": 4.324264498626113e-07, "loss": 0.1105, "step": 29844 }, { "epoch": 0.870675068557092, "grad_norm": 0.7918411056047596, "learning_rate": 4.322342813460162e-07, "loss": 0.1183, "step": 29845 }, { "epoch": 0.8707042417877355, "grad_norm": 0.7757046761783517, "learning_rate": 4.320421536093422e-07, "loss": 0.0924, "step": 29846 }, { "epoch": 0.8707334150183791, "grad_norm": 0.6833492941206484, "learning_rate": 4.318500666543052e-07, "loss": 0.1033, "step": 29847 }, { "epoch": 0.8707625882490228, "grad_norm": 0.9957608773109329, "learning_rate": 4.3165802048262096e-07, "loss": 0.1247, "step": 29848 }, { "epoch": 0.8707917614796663, "grad_norm": 0.8078436126699239, "learning_rate": 4.314660150960037e-07, "loss": 0.0876, "step": 29849 }, { "epoch": 0.8708209347103099, "grad_norm": 0.8166737009025434, "learning_rate": 4.3127405049616654e-07, "loss": 0.0973, "step": 29850 }, { "epoch": 0.8708501079409534, "grad_norm": 0.9200094109189592, "learning_rate": 4.3108212668482476e-07, "loss": 0.1144, "step": 29851 }, { "epoch": 0.870879281171597, "grad_norm": 1.1565132894069288, "learning_rate": 4.308902436636903e-07, "loss": 0.1081, "step": 29852 }, { "epoch": 0.8709084544022405, "grad_norm": 1.0310305442230399, "learning_rate": 4.3069840143447674e-07, "loss": 0.1061, "step": 29853 }, { "epoch": 0.8709376276328841, "grad_norm": 0.8234617934363656, "learning_rate": 4.3050659999889776e-07, "loss": 0.1078, "step": 29854 }, { "epoch": 0.8709668008635276, "grad_norm": 0.9566713458942278, "learning_rate": 4.303148393586654e-07, "loss": 0.1094, "step": 29855 }, { "epoch": 0.8709959740941712, "grad_norm": 1.1751645532482526, "learning_rate": 4.3012311951549036e-07, "loss": 0.1275, "step": 29856 }, { "epoch": 0.8710251473248147, "grad_norm": 0.9431671484471107, "learning_rate": 4.299314404710864e-07, "loss": 0.0854, "step": 29857 }, { "epoch": 0.8710543205554583, "grad_norm": 1.068907517348355, "learning_rate": 4.2973980222716207e-07, "loss": 0.1144, "step": 29858 }, { "epoch": 0.8710834937861018, "grad_norm": 0.7769048005080315, "learning_rate": 4.29548204785431e-07, "loss": 0.1149, "step": 29859 }, { "epoch": 0.8711126670167454, "grad_norm": 0.9716605913536874, "learning_rate": 4.2935664814760136e-07, "loss": 0.1113, "step": 29860 }, { "epoch": 0.8711418402473889, "grad_norm": 0.8453873145446957, "learning_rate": 4.2916513231538557e-07, "loss": 0.1241, "step": 29861 }, { "epoch": 0.8711710134780326, "grad_norm": 0.8464955427424956, "learning_rate": 4.289736572904923e-07, "loss": 0.1215, "step": 29862 }, { "epoch": 0.8712001867086762, "grad_norm": 0.9637879572247396, "learning_rate": 4.2878222307463024e-07, "loss": 0.1416, "step": 29863 }, { "epoch": 0.8712293599393197, "grad_norm": 1.1063074579540713, "learning_rate": 4.28590829669509e-07, "loss": 0.1155, "step": 29864 }, { "epoch": 0.8712585331699633, "grad_norm": 0.9476633249530068, "learning_rate": 4.28399477076839e-07, "loss": 0.1174, "step": 29865 }, { "epoch": 0.8712877064006068, "grad_norm": 1.0494547089309854, "learning_rate": 4.2820816529832554e-07, "loss": 0.1017, "step": 29866 }, { "epoch": 0.8713168796312504, "grad_norm": 0.740568707230243, "learning_rate": 4.2801689433567937e-07, "loss": 0.0867, "step": 29867 }, { "epoch": 0.8713460528618939, "grad_norm": 0.7175939290898041, "learning_rate": 4.27825664190607e-07, "loss": 0.095, "step": 29868 }, { "epoch": 0.8713752260925375, "grad_norm": 0.9289451850827819, "learning_rate": 4.276344748648148e-07, "loss": 0.1285, "step": 29869 }, { "epoch": 0.871404399323181, "grad_norm": 0.6876481776515677, "learning_rate": 4.274433263600103e-07, "loss": 0.1117, "step": 29870 }, { "epoch": 0.8714335725538246, "grad_norm": 0.90676459476418, "learning_rate": 4.2725221867790155e-07, "loss": 0.1072, "step": 29871 }, { "epoch": 0.8714627457844681, "grad_norm": 0.7552836163927147, "learning_rate": 4.270611518201928e-07, "loss": 0.1008, "step": 29872 }, { "epoch": 0.8714919190151117, "grad_norm": 0.7758009585920969, "learning_rate": 4.268701257885899e-07, "loss": 0.1108, "step": 29873 }, { "epoch": 0.8715210922457552, "grad_norm": 0.8339668780951237, "learning_rate": 4.2667914058479976e-07, "loss": 0.1072, "step": 29874 }, { "epoch": 0.8715502654763989, "grad_norm": 0.7467170059467244, "learning_rate": 4.264881962105266e-07, "loss": 0.128, "step": 29875 }, { "epoch": 0.8715794387070425, "grad_norm": 1.0153642190803998, "learning_rate": 4.262972926674735e-07, "loss": 0.1076, "step": 29876 }, { "epoch": 0.871608611937686, "grad_norm": 0.7803989734333402, "learning_rate": 4.26106429957347e-07, "loss": 0.1203, "step": 29877 }, { "epoch": 0.8716377851683296, "grad_norm": 0.9265790676639721, "learning_rate": 4.2591560808185106e-07, "loss": 0.108, "step": 29878 }, { "epoch": 0.8716669583989731, "grad_norm": 0.8310663452206771, "learning_rate": 4.257248270426889e-07, "loss": 0.1056, "step": 29879 }, { "epoch": 0.8716961316296167, "grad_norm": 0.8347754515050189, "learning_rate": 4.255340868415625e-07, "loss": 0.1439, "step": 29880 }, { "epoch": 0.8717253048602602, "grad_norm": 0.7486925850183045, "learning_rate": 4.2534338748017655e-07, "loss": 0.1305, "step": 29881 }, { "epoch": 0.8717544780909038, "grad_norm": 0.9735315168196614, "learning_rate": 4.251527289602314e-07, "loss": 0.1217, "step": 29882 }, { "epoch": 0.8717836513215473, "grad_norm": 0.7607551056091284, "learning_rate": 4.2496211128343125e-07, "loss": 0.1111, "step": 29883 }, { "epoch": 0.8718128245521909, "grad_norm": 0.9457045281394639, "learning_rate": 4.24771534451478e-07, "loss": 0.1121, "step": 29884 }, { "epoch": 0.8718419977828344, "grad_norm": 0.8157892787241623, "learning_rate": 4.24580998466072e-07, "loss": 0.1103, "step": 29885 }, { "epoch": 0.871871171013478, "grad_norm": 0.8344157962970016, "learning_rate": 4.243905033289142e-07, "loss": 0.1019, "step": 29886 }, { "epoch": 0.8719003442441216, "grad_norm": 0.6242416127974619, "learning_rate": 4.2420004904170644e-07, "loss": 0.1195, "step": 29887 }, { "epoch": 0.8719295174747651, "grad_norm": 0.72553159326258, "learning_rate": 4.2400963560614736e-07, "loss": 0.11, "step": 29888 }, { "epoch": 0.8719586907054088, "grad_norm": 0.9495918137817224, "learning_rate": 4.238192630239385e-07, "loss": 0.1041, "step": 29889 }, { "epoch": 0.8719878639360523, "grad_norm": 0.8443970710203967, "learning_rate": 4.236289312967784e-07, "loss": 0.1021, "step": 29890 }, { "epoch": 0.8720170371666959, "grad_norm": 0.8687350574882492, "learning_rate": 4.234386404263674e-07, "loss": 0.1141, "step": 29891 }, { "epoch": 0.8720462103973394, "grad_norm": 1.1711484215960941, "learning_rate": 4.232483904144036e-07, "loss": 0.1398, "step": 29892 }, { "epoch": 0.872075383627983, "grad_norm": 0.8544625514073956, "learning_rate": 4.2305818126258445e-07, "loss": 0.1305, "step": 29893 }, { "epoch": 0.8721045568586265, "grad_norm": 0.8036563494629677, "learning_rate": 4.2286801297260983e-07, "loss": 0.1024, "step": 29894 }, { "epoch": 0.8721337300892701, "grad_norm": 0.7717621317173138, "learning_rate": 4.226778855461777e-07, "loss": 0.105, "step": 29895 }, { "epoch": 0.8721629033199136, "grad_norm": 0.8559787744048357, "learning_rate": 4.224877989849835e-07, "loss": 0.1056, "step": 29896 }, { "epoch": 0.8721920765505572, "grad_norm": 0.7974922375493215, "learning_rate": 4.2229775329072687e-07, "loss": 0.1164, "step": 29897 }, { "epoch": 0.8722212497812007, "grad_norm": 0.6807687891692692, "learning_rate": 4.221077484651026e-07, "loss": 0.1071, "step": 29898 }, { "epoch": 0.8722504230118443, "grad_norm": 0.9698070852506537, "learning_rate": 4.219177845098071e-07, "loss": 0.1102, "step": 29899 }, { "epoch": 0.8722795962424879, "grad_norm": 1.0138886604346813, "learning_rate": 4.2172786142653633e-07, "loss": 0.0935, "step": 29900 }, { "epoch": 0.8723087694731314, "grad_norm": 0.9475220467153204, "learning_rate": 4.215379792169877e-07, "loss": 0.1128, "step": 29901 }, { "epoch": 0.8723379427037751, "grad_norm": 0.8521653135230126, "learning_rate": 4.2134813788285436e-07, "loss": 0.129, "step": 29902 }, { "epoch": 0.8723671159344186, "grad_norm": 0.8184166906469769, "learning_rate": 4.2115833742583157e-07, "loss": 0.1151, "step": 29903 }, { "epoch": 0.8723962891650622, "grad_norm": 0.8239441281175344, "learning_rate": 4.2096857784761466e-07, "loss": 0.1221, "step": 29904 }, { "epoch": 0.8724254623957057, "grad_norm": 0.778795356122018, "learning_rate": 4.2077885914989733e-07, "loss": 0.1057, "step": 29905 }, { "epoch": 0.8724546356263493, "grad_norm": 0.8442644199060447, "learning_rate": 4.205891813343721e-07, "loss": 0.1132, "step": 29906 }, { "epoch": 0.8724838088569928, "grad_norm": 0.7711401672933712, "learning_rate": 4.203995444027337e-07, "loss": 0.1139, "step": 29907 }, { "epoch": 0.8725129820876364, "grad_norm": 0.7549938529351892, "learning_rate": 4.202099483566757e-07, "loss": 0.1347, "step": 29908 }, { "epoch": 0.8725421553182799, "grad_norm": 0.8560959875783646, "learning_rate": 4.200203931978897e-07, "loss": 0.1139, "step": 29909 }, { "epoch": 0.8725713285489235, "grad_norm": 0.8226736831077333, "learning_rate": 4.198308789280681e-07, "loss": 0.1178, "step": 29910 }, { "epoch": 0.872600501779567, "grad_norm": 0.8858024223853243, "learning_rate": 4.1964140554890343e-07, "loss": 0.1126, "step": 29911 }, { "epoch": 0.8726296750102106, "grad_norm": 0.7147194341951294, "learning_rate": 4.1945197306208606e-07, "loss": 0.1069, "step": 29912 }, { "epoch": 0.8726588482408542, "grad_norm": 4.827301150089571, "learning_rate": 4.19262581469308e-07, "loss": 0.1068, "step": 29913 }, { "epoch": 0.8726880214714977, "grad_norm": 0.9969858528344643, "learning_rate": 4.1907323077226114e-07, "loss": 0.1077, "step": 29914 }, { "epoch": 0.8727171947021413, "grad_norm": 0.9808522789212067, "learning_rate": 4.1888392097263473e-07, "loss": 0.1035, "step": 29915 }, { "epoch": 0.8727463679327849, "grad_norm": 0.8994254612147985, "learning_rate": 4.18694652072118e-07, "loss": 0.1273, "step": 29916 }, { "epoch": 0.8727755411634285, "grad_norm": 0.8959273599287071, "learning_rate": 4.185054240724029e-07, "loss": 0.1051, "step": 29917 }, { "epoch": 0.872804714394072, "grad_norm": 0.832198318156632, "learning_rate": 4.1831623697517697e-07, "loss": 0.1083, "step": 29918 }, { "epoch": 0.8728338876247156, "grad_norm": 0.7491026800389943, "learning_rate": 4.1812709078213056e-07, "loss": 0.1021, "step": 29919 }, { "epoch": 0.8728630608553591, "grad_norm": 0.7795319850787747, "learning_rate": 4.1793798549495115e-07, "loss": 0.1048, "step": 29920 }, { "epoch": 0.8728922340860027, "grad_norm": 0.8050425427365901, "learning_rate": 4.177489211153279e-07, "loss": 0.0944, "step": 29921 }, { "epoch": 0.8729214073166462, "grad_norm": 1.023494835948346, "learning_rate": 4.175598976449491e-07, "loss": 0.1113, "step": 29922 }, { "epoch": 0.8729505805472898, "grad_norm": 0.8657322492817636, "learning_rate": 4.1737091508550043e-07, "loss": 0.1197, "step": 29923 }, { "epoch": 0.8729797537779334, "grad_norm": 0.9398422308099172, "learning_rate": 4.1718197343867004e-07, "loss": 0.119, "step": 29924 }, { "epoch": 0.8730089270085769, "grad_norm": 0.9654851883590972, "learning_rate": 4.1699307270614607e-07, "loss": 0.099, "step": 29925 }, { "epoch": 0.8730381002392205, "grad_norm": 0.9190948702960542, "learning_rate": 4.168042128896127e-07, "loss": 0.1241, "step": 29926 }, { "epoch": 0.873067273469864, "grad_norm": 0.9845219121354531, "learning_rate": 4.1661539399075855e-07, "loss": 0.1198, "step": 29927 }, { "epoch": 0.8730964467005076, "grad_norm": 0.8927423186777527, "learning_rate": 4.164266160112679e-07, "loss": 0.1357, "step": 29928 }, { "epoch": 0.8731256199311512, "grad_norm": 0.6886350312245313, "learning_rate": 4.162378789528254e-07, "loss": 0.1073, "step": 29929 }, { "epoch": 0.8731547931617948, "grad_norm": 1.350166590262735, "learning_rate": 4.160491828171165e-07, "loss": 0.1191, "step": 29930 }, { "epoch": 0.8731839663924383, "grad_norm": 0.8723272090238917, "learning_rate": 4.1586052760582753e-07, "loss": 0.1056, "step": 29931 }, { "epoch": 0.8732131396230819, "grad_norm": 0.8579215096389476, "learning_rate": 4.156719133206416e-07, "loss": 0.1123, "step": 29932 }, { "epoch": 0.8732423128537254, "grad_norm": 0.9218092007763244, "learning_rate": 4.154833399632413e-07, "loss": 0.1153, "step": 29933 }, { "epoch": 0.873271486084369, "grad_norm": 0.7899232977371622, "learning_rate": 4.1529480753531193e-07, "loss": 0.1077, "step": 29934 }, { "epoch": 0.8733006593150126, "grad_norm": 0.8275254846762283, "learning_rate": 4.1510631603853655e-07, "loss": 0.1211, "step": 29935 }, { "epoch": 0.8733298325456561, "grad_norm": 1.005011908344243, "learning_rate": 4.149178654745961e-07, "loss": 0.1223, "step": 29936 }, { "epoch": 0.8733590057762997, "grad_norm": 0.7056828889262484, "learning_rate": 4.1472945584517476e-07, "loss": 0.1276, "step": 29937 }, { "epoch": 0.8733881790069432, "grad_norm": 1.3001967449194474, "learning_rate": 4.145410871519551e-07, "loss": 0.1158, "step": 29938 }, { "epoch": 0.8734173522375868, "grad_norm": 1.0241636410606516, "learning_rate": 4.143527593966179e-07, "loss": 0.0849, "step": 29939 }, { "epoch": 0.8734465254682303, "grad_norm": 0.9323677915350644, "learning_rate": 4.141644725808436e-07, "loss": 0.0996, "step": 29940 }, { "epoch": 0.8734756986988739, "grad_norm": 0.7319661576150167, "learning_rate": 4.1397622670631523e-07, "loss": 0.101, "step": 29941 }, { "epoch": 0.8735048719295174, "grad_norm": 0.7916314828259348, "learning_rate": 4.1378802177471144e-07, "loss": 0.107, "step": 29942 }, { "epoch": 0.8735340451601611, "grad_norm": 0.9512679043925271, "learning_rate": 4.135998577877132e-07, "loss": 0.1079, "step": 29943 }, { "epoch": 0.8735632183908046, "grad_norm": 1.0788826656880606, "learning_rate": 4.134117347470018e-07, "loss": 0.1201, "step": 29944 }, { "epoch": 0.8735923916214482, "grad_norm": 1.027673197157146, "learning_rate": 4.1322365265425545e-07, "loss": 0.1201, "step": 29945 }, { "epoch": 0.8736215648520917, "grad_norm": 1.0343293405049407, "learning_rate": 4.130356115111522e-07, "loss": 0.0916, "step": 29946 }, { "epoch": 0.8736507380827353, "grad_norm": 0.7508662886368539, "learning_rate": 4.12847611319373e-07, "loss": 0.1096, "step": 29947 }, { "epoch": 0.8736799113133789, "grad_norm": 0.8008703546242361, "learning_rate": 4.1265965208059423e-07, "loss": 0.1011, "step": 29948 }, { "epoch": 0.8737090845440224, "grad_norm": 0.749532749373626, "learning_rate": 4.124717337964962e-07, "loss": 0.1193, "step": 29949 }, { "epoch": 0.873738257774666, "grad_norm": 0.7525171418922129, "learning_rate": 4.122838564687542e-07, "loss": 0.11, "step": 29950 }, { "epoch": 0.8737674310053095, "grad_norm": 0.8707908207771639, "learning_rate": 4.120960200990481e-07, "loss": 0.1171, "step": 29951 }, { "epoch": 0.8737966042359531, "grad_norm": 0.7708651216906697, "learning_rate": 4.119082246890532e-07, "loss": 0.1204, "step": 29952 }, { "epoch": 0.8738257774665966, "grad_norm": 0.9071471813604898, "learning_rate": 4.117204702404459e-07, "loss": 0.0917, "step": 29953 }, { "epoch": 0.8738549506972402, "grad_norm": 1.0675546952025274, "learning_rate": 4.115327567549021e-07, "loss": 0.1175, "step": 29954 }, { "epoch": 0.8738841239278837, "grad_norm": 0.887214975252043, "learning_rate": 4.113450842340999e-07, "loss": 0.1088, "step": 29955 }, { "epoch": 0.8739132971585273, "grad_norm": 0.9111866400366525, "learning_rate": 4.11157452679713e-07, "loss": 0.096, "step": 29956 }, { "epoch": 0.873942470389171, "grad_norm": 0.697758272359337, "learning_rate": 4.1096986209341716e-07, "loss": 0.1141, "step": 29957 }, { "epoch": 0.8739716436198145, "grad_norm": 0.8424568829645216, "learning_rate": 4.107823124768867e-07, "loss": 0.105, "step": 29958 }, { "epoch": 0.874000816850458, "grad_norm": 0.7569289409986389, "learning_rate": 4.1059480383179586e-07, "loss": 0.1084, "step": 29959 }, { "epoch": 0.8740299900811016, "grad_norm": 1.0302477770546048, "learning_rate": 4.1040733615981876e-07, "loss": 0.1246, "step": 29960 }, { "epoch": 0.8740591633117452, "grad_norm": 1.1180763991332148, "learning_rate": 4.1021990946263025e-07, "loss": 0.1065, "step": 29961 }, { "epoch": 0.8740883365423887, "grad_norm": 0.7034423738035324, "learning_rate": 4.1003252374190284e-07, "loss": 0.0891, "step": 29962 }, { "epoch": 0.8741175097730323, "grad_norm": 0.8282797111144902, "learning_rate": 4.098451789993085e-07, "loss": 0.1065, "step": 29963 }, { "epoch": 0.8741466830036758, "grad_norm": 0.8533735373287727, "learning_rate": 4.0965787523652156e-07, "loss": 0.1067, "step": 29964 }, { "epoch": 0.8741758562343194, "grad_norm": 0.9289842007959089, "learning_rate": 4.094706124552128e-07, "loss": 0.1331, "step": 29965 }, { "epoch": 0.8742050294649629, "grad_norm": 0.7402285195808873, "learning_rate": 4.0928339065705424e-07, "loss": 0.1074, "step": 29966 }, { "epoch": 0.8742342026956065, "grad_norm": 0.898508439569312, "learning_rate": 4.0909620984371733e-07, "loss": 0.0995, "step": 29967 }, { "epoch": 0.87426337592625, "grad_norm": 0.7110177183257381, "learning_rate": 4.0890907001687463e-07, "loss": 0.0994, "step": 29968 }, { "epoch": 0.8742925491568936, "grad_norm": 0.9737372746845955, "learning_rate": 4.087219711781959e-07, "loss": 0.1059, "step": 29969 }, { "epoch": 0.8743217223875372, "grad_norm": 0.793370043176925, "learning_rate": 4.0853491332935034e-07, "loss": 0.1002, "step": 29970 }, { "epoch": 0.8743508956181808, "grad_norm": 0.9129891616227633, "learning_rate": 4.0834789647201003e-07, "loss": 0.1241, "step": 29971 }, { "epoch": 0.8743800688488244, "grad_norm": 0.9607523923505505, "learning_rate": 4.081609206078424e-07, "loss": 0.1325, "step": 29972 }, { "epoch": 0.8744092420794679, "grad_norm": 0.8913248019903911, "learning_rate": 4.079739857385179e-07, "loss": 0.1118, "step": 29973 }, { "epoch": 0.8744384153101115, "grad_norm": 0.8102677818546921, "learning_rate": 4.077870918657062e-07, "loss": 0.1165, "step": 29974 }, { "epoch": 0.874467588540755, "grad_norm": 0.9559230906975134, "learning_rate": 4.076002389910755e-07, "loss": 0.1191, "step": 29975 }, { "epoch": 0.8744967617713986, "grad_norm": 1.04818129475544, "learning_rate": 4.074134271162927e-07, "loss": 0.111, "step": 29976 }, { "epoch": 0.8745259350020421, "grad_norm": 0.8321931156481468, "learning_rate": 4.0722665624302717e-07, "loss": 0.1135, "step": 29977 }, { "epoch": 0.8745551082326857, "grad_norm": 0.9789598585791341, "learning_rate": 4.0703992637294466e-07, "loss": 0.1107, "step": 29978 }, { "epoch": 0.8745842814633292, "grad_norm": 0.8875249128288172, "learning_rate": 4.068532375077144e-07, "loss": 0.1119, "step": 29979 }, { "epoch": 0.8746134546939728, "grad_norm": 3.800407419199114, "learning_rate": 4.066665896490013e-07, "loss": 0.1162, "step": 29980 }, { "epoch": 0.8746426279246163, "grad_norm": 0.7398637198877579, "learning_rate": 4.0647998279847277e-07, "loss": 0.111, "step": 29981 }, { "epoch": 0.8746718011552599, "grad_norm": 0.9066124867651206, "learning_rate": 4.0629341695779423e-07, "loss": 0.092, "step": 29982 }, { "epoch": 0.8747009743859034, "grad_norm": 0.7808211060457666, "learning_rate": 4.06106892128631e-07, "loss": 0.1071, "step": 29983 }, { "epoch": 0.8747301476165471, "grad_norm": 0.8295891986742827, "learning_rate": 4.059204083126489e-07, "loss": 0.1066, "step": 29984 }, { "epoch": 0.8747593208471907, "grad_norm": 0.980293509020823, "learning_rate": 4.0573396551151335e-07, "loss": 0.094, "step": 29985 }, { "epoch": 0.8747884940778342, "grad_norm": 0.7797549997944827, "learning_rate": 4.0554756372688744e-07, "loss": 0.1293, "step": 29986 }, { "epoch": 0.8748176673084778, "grad_norm": 0.6828497120825575, "learning_rate": 4.05361202960437e-07, "loss": 0.119, "step": 29987 }, { "epoch": 0.8748468405391213, "grad_norm": 0.7780385789923829, "learning_rate": 4.051748832138247e-07, "loss": 0.1278, "step": 29988 }, { "epoch": 0.8748760137697649, "grad_norm": 0.8115155380145198, "learning_rate": 4.049886044887136e-07, "loss": 0.1093, "step": 29989 }, { "epoch": 0.8749051870004084, "grad_norm": 0.7910249846198542, "learning_rate": 4.0480236678676674e-07, "loss": 0.1009, "step": 29990 }, { "epoch": 0.874934360231052, "grad_norm": 0.9517769185784213, "learning_rate": 4.0461617010964906e-07, "loss": 0.1178, "step": 29991 }, { "epoch": 0.8749635334616955, "grad_norm": 1.1443227130278366, "learning_rate": 4.0443001445902073e-07, "loss": 0.1106, "step": 29992 }, { "epoch": 0.8749927066923391, "grad_norm": 0.9945797268553265, "learning_rate": 4.042438998365433e-07, "loss": 0.1464, "step": 29993 }, { "epoch": 0.8750218799229826, "grad_norm": 0.7847706764904184, "learning_rate": 4.040578262438799e-07, "loss": 0.1146, "step": 29994 }, { "epoch": 0.8750510531536262, "grad_norm": 0.8295375998411549, "learning_rate": 4.0387179368269137e-07, "loss": 0.1081, "step": 29995 }, { "epoch": 0.8750802263842697, "grad_norm": 0.8514728598688256, "learning_rate": 4.0368580215463746e-07, "loss": 0.1125, "step": 29996 }, { "epoch": 0.8751093996149134, "grad_norm": 0.857317926644112, "learning_rate": 4.034998516613797e-07, "loss": 0.1265, "step": 29997 }, { "epoch": 0.875138572845557, "grad_norm": 1.005063040112087, "learning_rate": 4.033139422045784e-07, "loss": 0.1122, "step": 29998 }, { "epoch": 0.8751677460762005, "grad_norm": 0.783048773544439, "learning_rate": 4.0312807378589335e-07, "loss": 0.0949, "step": 29999 }, { "epoch": 0.8751969193068441, "grad_norm": 0.7896354250843888, "learning_rate": 4.029422464069821e-07, "loss": 0.0973, "step": 30000 }, { "epoch": 0.8752260925374876, "grad_norm": 0.6961728329544881, "learning_rate": 4.027564600695055e-07, "loss": 0.115, "step": 30001 }, { "epoch": 0.8752552657681312, "grad_norm": 0.8803479317906855, "learning_rate": 4.025707147751223e-07, "loss": 0.1123, "step": 30002 }, { "epoch": 0.8752844389987747, "grad_norm": 0.8960196147789281, "learning_rate": 4.023850105254895e-07, "loss": 0.1079, "step": 30003 }, { "epoch": 0.8753136122294183, "grad_norm": 0.7061949648126875, "learning_rate": 4.021993473222668e-07, "loss": 0.0946, "step": 30004 }, { "epoch": 0.8753427854600618, "grad_norm": 0.7050193902636842, "learning_rate": 4.020137251671108e-07, "loss": 0.0999, "step": 30005 }, { "epoch": 0.8753719586907054, "grad_norm": 1.057616937879093, "learning_rate": 4.018281440616778e-07, "loss": 0.1046, "step": 30006 }, { "epoch": 0.8754011319213489, "grad_norm": 1.6158759986205025, "learning_rate": 4.016426040076249e-07, "loss": 0.0961, "step": 30007 }, { "epoch": 0.8754303051519925, "grad_norm": 0.6381332574210345, "learning_rate": 4.0145710500661075e-07, "loss": 0.1005, "step": 30008 }, { "epoch": 0.875459478382636, "grad_norm": 0.7287050047502998, "learning_rate": 4.012716470602895e-07, "loss": 0.1222, "step": 30009 }, { "epoch": 0.8754886516132796, "grad_norm": 0.7602515936667066, "learning_rate": 4.0108623017031613e-07, "loss": 0.1154, "step": 30010 }, { "epoch": 0.8755178248439233, "grad_norm": 0.7219171099542323, "learning_rate": 4.00900854338348e-07, "loss": 0.0995, "step": 30011 }, { "epoch": 0.8755469980745668, "grad_norm": 0.7480555337142266, "learning_rate": 4.0071551956603893e-07, "loss": 0.103, "step": 30012 }, { "epoch": 0.8755761713052104, "grad_norm": 0.7282980125470874, "learning_rate": 4.005302258550425e-07, "loss": 0.1128, "step": 30013 }, { "epoch": 0.8756053445358539, "grad_norm": 0.8296426808167071, "learning_rate": 4.0034497320701584e-07, "loss": 0.1136, "step": 30014 }, { "epoch": 0.8756345177664975, "grad_norm": 0.7558778919722866, "learning_rate": 4.001597616236108e-07, "loss": 0.1005, "step": 30015 }, { "epoch": 0.875663690997141, "grad_norm": 1.0726349994338809, "learning_rate": 3.999745911064812e-07, "loss": 0.1275, "step": 30016 }, { "epoch": 0.8756928642277846, "grad_norm": 0.8753592511955679, "learning_rate": 3.997894616572806e-07, "loss": 0.1007, "step": 30017 }, { "epoch": 0.8757220374584281, "grad_norm": 0.8331388970453892, "learning_rate": 3.996043732776617e-07, "loss": 0.0973, "step": 30018 }, { "epoch": 0.8757512106890717, "grad_norm": 0.7109433945251317, "learning_rate": 3.994193259692758e-07, "loss": 0.1228, "step": 30019 }, { "epoch": 0.8757803839197152, "grad_norm": 0.8393881516507595, "learning_rate": 3.992343197337761e-07, "loss": 0.1025, "step": 30020 }, { "epoch": 0.8758095571503588, "grad_norm": 0.830369322465344, "learning_rate": 3.9904935457281524e-07, "loss": 0.1054, "step": 30021 }, { "epoch": 0.8758387303810024, "grad_norm": 0.7929558092667852, "learning_rate": 3.988644304880429e-07, "loss": 0.1091, "step": 30022 }, { "epoch": 0.8758679036116459, "grad_norm": 0.8207451692628104, "learning_rate": 3.9867954748111e-07, "loss": 0.0978, "step": 30023 }, { "epoch": 0.8758970768422896, "grad_norm": 0.8455388434418435, "learning_rate": 3.984947055536681e-07, "loss": 0.1114, "step": 30024 }, { "epoch": 0.8759262500729331, "grad_norm": 1.1177513257661293, "learning_rate": 3.9830990470736684e-07, "loss": 0.0862, "step": 30025 }, { "epoch": 0.8759554233035767, "grad_norm": 0.9104098906899314, "learning_rate": 3.981251449438567e-07, "loss": 0.1016, "step": 30026 }, { "epoch": 0.8759845965342202, "grad_norm": 0.766933255601191, "learning_rate": 3.9794042626478566e-07, "loss": 0.0957, "step": 30027 }, { "epoch": 0.8760137697648638, "grad_norm": 0.8170312088935947, "learning_rate": 3.9775574867180477e-07, "loss": 0.1149, "step": 30028 }, { "epoch": 0.8760429429955073, "grad_norm": 0.73466149004003, "learning_rate": 3.975711121665621e-07, "loss": 0.1059, "step": 30029 }, { "epoch": 0.8760721162261509, "grad_norm": 0.8350009194852385, "learning_rate": 3.973865167507052e-07, "loss": 0.1139, "step": 30030 }, { "epoch": 0.8761012894567944, "grad_norm": 0.8192370000669458, "learning_rate": 3.9720196242588214e-07, "loss": 0.104, "step": 30031 }, { "epoch": 0.876130462687438, "grad_norm": 0.9749452790959781, "learning_rate": 3.9701744919374285e-07, "loss": 0.108, "step": 30032 }, { "epoch": 0.8761596359180815, "grad_norm": 0.8738741975347576, "learning_rate": 3.968329770559315e-07, "loss": 0.1195, "step": 30033 }, { "epoch": 0.8761888091487251, "grad_norm": 0.8300108519181769, "learning_rate": 3.966485460140973e-07, "loss": 0.102, "step": 30034 }, { "epoch": 0.8762179823793687, "grad_norm": 1.1384501295776965, "learning_rate": 3.964641560698862e-07, "loss": 0.1216, "step": 30035 }, { "epoch": 0.8762471556100122, "grad_norm": 0.8709744781499231, "learning_rate": 3.962798072249435e-07, "loss": 0.134, "step": 30036 }, { "epoch": 0.8762763288406558, "grad_norm": 0.9695599752029358, "learning_rate": 3.9609549948091517e-07, "loss": 0.0907, "step": 30037 }, { "epoch": 0.8763055020712994, "grad_norm": 0.7941932202922776, "learning_rate": 3.9591123283944875e-07, "loss": 0.1069, "step": 30038 }, { "epoch": 0.876334675301943, "grad_norm": 0.8073176101841258, "learning_rate": 3.9572700730218685e-07, "loss": 0.105, "step": 30039 }, { "epoch": 0.8763638485325865, "grad_norm": 0.7648654751563687, "learning_rate": 3.955428228707747e-07, "loss": 0.0891, "step": 30040 }, { "epoch": 0.8763930217632301, "grad_norm": 0.9619900489099411, "learning_rate": 3.953586795468584e-07, "loss": 0.1166, "step": 30041 }, { "epoch": 0.8764221949938736, "grad_norm": 0.9208818118250522, "learning_rate": 3.9517457733207973e-07, "loss": 0.1073, "step": 30042 }, { "epoch": 0.8764513682245172, "grad_norm": 1.0076492847406802, "learning_rate": 3.9499051622808203e-07, "loss": 0.106, "step": 30043 }, { "epoch": 0.8764805414551607, "grad_norm": 0.6455984377638654, "learning_rate": 3.948064962365111e-07, "loss": 0.1056, "step": 30044 }, { "epoch": 0.8765097146858043, "grad_norm": 0.7674607736520921, "learning_rate": 3.9462251735900845e-07, "loss": 0.1208, "step": 30045 }, { "epoch": 0.8765388879164479, "grad_norm": 0.9589067616641717, "learning_rate": 3.944385795972161e-07, "loss": 0.1063, "step": 30046 }, { "epoch": 0.8765680611470914, "grad_norm": 1.0815651788480012, "learning_rate": 3.9425468295277714e-07, "loss": 0.1416, "step": 30047 }, { "epoch": 0.876597234377735, "grad_norm": 0.9132409396108546, "learning_rate": 3.9407082742733306e-07, "loss": 0.1517, "step": 30048 }, { "epoch": 0.8766264076083785, "grad_norm": 0.9288953014955593, "learning_rate": 3.938870130225242e-07, "loss": 0.1227, "step": 30049 }, { "epoch": 0.8766555808390221, "grad_norm": 0.8664721978661181, "learning_rate": 3.937032397399926e-07, "loss": 0.0873, "step": 30050 }, { "epoch": 0.8766847540696657, "grad_norm": 0.849766620273122, "learning_rate": 3.935195075813797e-07, "loss": 0.1311, "step": 30051 }, { "epoch": 0.8767139273003093, "grad_norm": 1.6430253113803783, "learning_rate": 3.9333581654832473e-07, "loss": 0.1059, "step": 30052 }, { "epoch": 0.8767431005309528, "grad_norm": 0.927702926764876, "learning_rate": 3.931521666424676e-07, "loss": 0.1006, "step": 30053 }, { "epoch": 0.8767722737615964, "grad_norm": 0.8112553638058957, "learning_rate": 3.929685578654485e-07, "loss": 0.1072, "step": 30054 }, { "epoch": 0.8768014469922399, "grad_norm": 0.8092630212378464, "learning_rate": 3.927849902189057e-07, "loss": 0.1063, "step": 30055 }, { "epoch": 0.8768306202228835, "grad_norm": 1.0379929437241544, "learning_rate": 3.926014637044795e-07, "loss": 0.125, "step": 30056 }, { "epoch": 0.876859793453527, "grad_norm": 0.8573863127018945, "learning_rate": 3.9241797832380634e-07, "loss": 0.103, "step": 30057 }, { "epoch": 0.8768889666841706, "grad_norm": 0.7165389145789426, "learning_rate": 3.922345340785266e-07, "loss": 0.1025, "step": 30058 }, { "epoch": 0.8769181399148142, "grad_norm": 0.9053114762016254, "learning_rate": 3.9205113097027734e-07, "loss": 0.1301, "step": 30059 }, { "epoch": 0.8769473131454577, "grad_norm": 1.5393050782075324, "learning_rate": 3.9186776900069444e-07, "loss": 0.0985, "step": 30060 }, { "epoch": 0.8769764863761013, "grad_norm": 0.7210706822800717, "learning_rate": 3.9168444817141603e-07, "loss": 0.1217, "step": 30061 }, { "epoch": 0.8770056596067448, "grad_norm": 0.8042049388030086, "learning_rate": 3.9150116848407973e-07, "loss": 0.1152, "step": 30062 }, { "epoch": 0.8770348328373884, "grad_norm": 0.9580766186000356, "learning_rate": 3.913179299403203e-07, "loss": 0.1204, "step": 30063 }, { "epoch": 0.8770640060680319, "grad_norm": 0.8548960706534561, "learning_rate": 3.911347325417747e-07, "loss": 0.1178, "step": 30064 }, { "epoch": 0.8770931792986756, "grad_norm": 0.7249366520179904, "learning_rate": 3.9095157629007786e-07, "loss": 0.1217, "step": 30065 }, { "epoch": 0.8771223525293191, "grad_norm": 0.7628181865832407, "learning_rate": 3.907684611868645e-07, "loss": 0.1001, "step": 30066 }, { "epoch": 0.8771515257599627, "grad_norm": 0.9237902685551095, "learning_rate": 3.905853872337695e-07, "loss": 0.1089, "step": 30067 }, { "epoch": 0.8771806989906062, "grad_norm": 0.7328612675413081, "learning_rate": 3.9040235443242924e-07, "loss": 0.1103, "step": 30068 }, { "epoch": 0.8772098722212498, "grad_norm": 0.7110917749059704, "learning_rate": 3.9021936278447636e-07, "loss": 0.1169, "step": 30069 }, { "epoch": 0.8772390454518934, "grad_norm": 0.9421898718187118, "learning_rate": 3.900364122915434e-07, "loss": 0.0999, "step": 30070 }, { "epoch": 0.8772682186825369, "grad_norm": 0.7690975519919347, "learning_rate": 3.898535029552658e-07, "loss": 0.135, "step": 30071 }, { "epoch": 0.8772973919131805, "grad_norm": 0.999268463003431, "learning_rate": 3.896706347772755e-07, "loss": 0.1127, "step": 30072 }, { "epoch": 0.877326565143824, "grad_norm": 1.0454217622245405, "learning_rate": 3.89487807759204e-07, "loss": 0.124, "step": 30073 }, { "epoch": 0.8773557383744676, "grad_norm": 1.1237790440347422, "learning_rate": 3.8930502190268616e-07, "loss": 0.1349, "step": 30074 }, { "epoch": 0.8773849116051111, "grad_norm": 0.8953995022199389, "learning_rate": 3.891222772093523e-07, "loss": 0.1143, "step": 30075 }, { "epoch": 0.8774140848357547, "grad_norm": 0.638862969028371, "learning_rate": 3.8893957368083325e-07, "loss": 0.1054, "step": 30076 }, { "epoch": 0.8774432580663982, "grad_norm": 0.6438116980388293, "learning_rate": 3.887569113187617e-07, "loss": 0.1073, "step": 30077 }, { "epoch": 0.8774724312970419, "grad_norm": 0.6530132009425201, "learning_rate": 3.885742901247674e-07, "loss": 0.102, "step": 30078 }, { "epoch": 0.8775016045276854, "grad_norm": 0.7356138233567865, "learning_rate": 3.8839171010048083e-07, "loss": 0.1374, "step": 30079 }, { "epoch": 0.877530777758329, "grad_norm": 0.8455874163711665, "learning_rate": 3.8820917124753163e-07, "loss": 0.1328, "step": 30080 }, { "epoch": 0.8775599509889725, "grad_norm": 0.9487013068431305, "learning_rate": 3.8802667356755084e-07, "loss": 0.0962, "step": 30081 }, { "epoch": 0.8775891242196161, "grad_norm": 0.7003432501212324, "learning_rate": 3.8784421706216714e-07, "loss": 0.0828, "step": 30082 }, { "epoch": 0.8776182974502597, "grad_norm": 0.6712415974749824, "learning_rate": 3.876618017330086e-07, "loss": 0.1132, "step": 30083 }, { "epoch": 0.8776474706809032, "grad_norm": 0.8493576882662304, "learning_rate": 3.874794275817051e-07, "loss": 0.0873, "step": 30084 }, { "epoch": 0.8776766439115468, "grad_norm": 0.8328528734137715, "learning_rate": 3.8729709460988365e-07, "loss": 0.1332, "step": 30085 }, { "epoch": 0.8777058171421903, "grad_norm": 0.8530582869124106, "learning_rate": 3.871148028191729e-07, "loss": 0.118, "step": 30086 }, { "epoch": 0.8777349903728339, "grad_norm": 0.9669593474332003, "learning_rate": 3.869325522111994e-07, "loss": 0.119, "step": 30087 }, { "epoch": 0.8777641636034774, "grad_norm": 0.9538297900146231, "learning_rate": 3.8675034278759184e-07, "loss": 0.1051, "step": 30088 }, { "epoch": 0.877793336834121, "grad_norm": 0.7721806789673515, "learning_rate": 3.865681745499761e-07, "loss": 0.1056, "step": 30089 }, { "epoch": 0.8778225100647645, "grad_norm": 0.5860090225534232, "learning_rate": 3.86386047499977e-07, "loss": 0.1165, "step": 30090 }, { "epoch": 0.8778516832954081, "grad_norm": 0.768090832424776, "learning_rate": 3.862039616392221e-07, "loss": 0.1027, "step": 30091 }, { "epoch": 0.8778808565260517, "grad_norm": 0.9645450329038163, "learning_rate": 3.86021916969338e-07, "loss": 0.1128, "step": 30092 }, { "epoch": 0.8779100297566953, "grad_norm": 1.041949592417441, "learning_rate": 3.858399134919472e-07, "loss": 0.1298, "step": 30093 }, { "epoch": 0.8779392029873389, "grad_norm": 0.8590835881084311, "learning_rate": 3.856579512086778e-07, "loss": 0.1187, "step": 30094 }, { "epoch": 0.8779683762179824, "grad_norm": 0.8951618817724409, "learning_rate": 3.854760301211524e-07, "loss": 0.1185, "step": 30095 }, { "epoch": 0.877997549448626, "grad_norm": 0.7344761342477992, "learning_rate": 3.8529415023099425e-07, "loss": 0.1026, "step": 30096 }, { "epoch": 0.8780267226792695, "grad_norm": 0.7742693369014273, "learning_rate": 3.8511231153982866e-07, "loss": 0.1022, "step": 30097 }, { "epoch": 0.8780558959099131, "grad_norm": 1.4251768437248684, "learning_rate": 3.8493051404927985e-07, "loss": 0.1222, "step": 30098 }, { "epoch": 0.8780850691405566, "grad_norm": 0.85558468117075, "learning_rate": 3.847487577609693e-07, "loss": 0.1207, "step": 30099 }, { "epoch": 0.8781142423712002, "grad_norm": 0.7702977387294887, "learning_rate": 3.845670426765191e-07, "loss": 0.1001, "step": 30100 }, { "epoch": 0.8781434156018437, "grad_norm": 0.6038297114219298, "learning_rate": 3.843853687975535e-07, "loss": 0.1134, "step": 30101 }, { "epoch": 0.8781725888324873, "grad_norm": 0.7674689518157997, "learning_rate": 3.842037361256934e-07, "loss": 0.1063, "step": 30102 }, { "epoch": 0.8782017620631308, "grad_norm": 0.8539562901700051, "learning_rate": 3.8402214466255914e-07, "loss": 0.1057, "step": 30103 }, { "epoch": 0.8782309352937744, "grad_norm": 0.7256561522628749, "learning_rate": 3.838405944097745e-07, "loss": 0.1229, "step": 30104 }, { "epoch": 0.878260108524418, "grad_norm": 0.7276274960672796, "learning_rate": 3.8365908536895924e-07, "loss": 0.1316, "step": 30105 }, { "epoch": 0.8782892817550616, "grad_norm": 0.9949774348404163, "learning_rate": 3.834776175417332e-07, "loss": 0.1199, "step": 30106 }, { "epoch": 0.8783184549857052, "grad_norm": 0.8770199532814935, "learning_rate": 3.832961909297173e-07, "loss": 0.1152, "step": 30107 }, { "epoch": 0.8783476282163487, "grad_norm": 0.8766923169155485, "learning_rate": 3.8311480553453127e-07, "loss": 0.0978, "step": 30108 }, { "epoch": 0.8783768014469923, "grad_norm": 0.8452586878530937, "learning_rate": 3.8293346135779287e-07, "loss": 0.1069, "step": 30109 }, { "epoch": 0.8784059746776358, "grad_norm": 0.7680703348608079, "learning_rate": 3.827521584011229e-07, "loss": 0.1005, "step": 30110 }, { "epoch": 0.8784351479082794, "grad_norm": 0.9003956041630796, "learning_rate": 3.8257089666613957e-07, "loss": 0.0992, "step": 30111 }, { "epoch": 0.8784643211389229, "grad_norm": 0.9955260840028338, "learning_rate": 3.8238967615446155e-07, "loss": 0.1114, "step": 30112 }, { "epoch": 0.8784934943695665, "grad_norm": 0.8329995820013536, "learning_rate": 3.8220849686770535e-07, "loss": 0.1021, "step": 30113 }, { "epoch": 0.87852266760021, "grad_norm": 0.887962467161193, "learning_rate": 3.820273588074896e-07, "loss": 0.1167, "step": 30114 }, { "epoch": 0.8785518408308536, "grad_norm": 0.8019271561048367, "learning_rate": 3.8184626197543095e-07, "loss": 0.114, "step": 30115 }, { "epoch": 0.8785810140614971, "grad_norm": 0.8154637267247528, "learning_rate": 3.8166520637314684e-07, "loss": 0.1241, "step": 30116 }, { "epoch": 0.8786101872921407, "grad_norm": 1.0624333597161308, "learning_rate": 3.8148419200225275e-07, "loss": 0.1192, "step": 30117 }, { "epoch": 0.8786393605227842, "grad_norm": 0.7806937411066809, "learning_rate": 3.813032188643662e-07, "loss": 0.1105, "step": 30118 }, { "epoch": 0.8786685337534279, "grad_norm": 0.7367626320148644, "learning_rate": 3.8112228696110144e-07, "loss": 0.1245, "step": 30119 }, { "epoch": 0.8786977069840715, "grad_norm": 0.8753164490793615, "learning_rate": 3.809413962940739e-07, "loss": 0.1381, "step": 30120 }, { "epoch": 0.878726880214715, "grad_norm": 0.7303670589121473, "learning_rate": 3.8076054686489893e-07, "loss": 0.0997, "step": 30121 }, { "epoch": 0.8787560534453586, "grad_norm": 0.7192568392563339, "learning_rate": 3.805797386751914e-07, "loss": 0.1191, "step": 30122 }, { "epoch": 0.8787852266760021, "grad_norm": 0.8562402704481382, "learning_rate": 3.803989717265649e-07, "loss": 0.1306, "step": 30123 }, { "epoch": 0.8788143999066457, "grad_norm": 0.9609287710374402, "learning_rate": 3.802182460206344e-07, "loss": 0.1022, "step": 30124 }, { "epoch": 0.8788435731372892, "grad_norm": 0.9550523474148769, "learning_rate": 3.800375615590124e-07, "loss": 0.1078, "step": 30125 }, { "epoch": 0.8788727463679328, "grad_norm": 0.8383321912229541, "learning_rate": 3.798569183433115e-07, "loss": 0.1209, "step": 30126 }, { "epoch": 0.8789019195985763, "grad_norm": 0.7164831033059212, "learning_rate": 3.796763163751449e-07, "loss": 0.0934, "step": 30127 }, { "epoch": 0.8789310928292199, "grad_norm": 0.9388970028053739, "learning_rate": 3.7949575565612626e-07, "loss": 0.0829, "step": 30128 }, { "epoch": 0.8789602660598634, "grad_norm": 0.8746108903663825, "learning_rate": 3.7931523618786605e-07, "loss": 0.1208, "step": 30129 }, { "epoch": 0.878989439290507, "grad_norm": 0.9518464912522664, "learning_rate": 3.7913475797197616e-07, "loss": 0.1028, "step": 30130 }, { "epoch": 0.8790186125211505, "grad_norm": 0.8934136366208427, "learning_rate": 3.789543210100688e-07, "loss": 0.1041, "step": 30131 }, { "epoch": 0.8790477857517942, "grad_norm": 1.497982141631577, "learning_rate": 3.787739253037537e-07, "loss": 0.098, "step": 30132 }, { "epoch": 0.8790769589824378, "grad_norm": 0.8273318173618878, "learning_rate": 3.785935708546401e-07, "loss": 0.1017, "step": 30133 }, { "epoch": 0.8791061322130813, "grad_norm": 0.7497054741907678, "learning_rate": 3.7841325766434236e-07, "loss": 0.095, "step": 30134 }, { "epoch": 0.8791353054437249, "grad_norm": 1.0387847957457483, "learning_rate": 3.7823298573446687e-07, "loss": 0.0966, "step": 30135 }, { "epoch": 0.8791644786743684, "grad_norm": 0.8097839007625008, "learning_rate": 3.7805275506662355e-07, "loss": 0.1179, "step": 30136 }, { "epoch": 0.879193651905012, "grad_norm": 0.8732204206518616, "learning_rate": 3.778725656624227e-07, "loss": 0.1078, "step": 30137 }, { "epoch": 0.8792228251356555, "grad_norm": 0.8567947949250957, "learning_rate": 3.776924175234725e-07, "loss": 0.121, "step": 30138 }, { "epoch": 0.8792519983662991, "grad_norm": 1.1688406674700176, "learning_rate": 3.775123106513795e-07, "loss": 0.0971, "step": 30139 }, { "epoch": 0.8792811715969426, "grad_norm": 0.8019344356493667, "learning_rate": 3.7733224504775344e-07, "loss": 0.1092, "step": 30140 }, { "epoch": 0.8793103448275862, "grad_norm": 0.9010931894003198, "learning_rate": 3.7715222071420253e-07, "loss": 0.1175, "step": 30141 }, { "epoch": 0.8793395180582297, "grad_norm": 0.9679309741482341, "learning_rate": 3.769722376523327e-07, "loss": 0.1171, "step": 30142 }, { "epoch": 0.8793686912888733, "grad_norm": 0.9531648527760062, "learning_rate": 3.76792295863751e-07, "loss": 0.1175, "step": 30143 }, { "epoch": 0.8793978645195168, "grad_norm": 0.9525118817732205, "learning_rate": 3.766123953500639e-07, "loss": 0.1195, "step": 30144 }, { "epoch": 0.8794270377501604, "grad_norm": 0.8026345896916173, "learning_rate": 3.7643253611287734e-07, "loss": 0.0972, "step": 30145 }, { "epoch": 0.8794562109808041, "grad_norm": 1.0289423310726562, "learning_rate": 3.762527181537984e-07, "loss": 0.0985, "step": 30146 }, { "epoch": 0.8794853842114476, "grad_norm": 0.8479740417898695, "learning_rate": 3.760729414744302e-07, "loss": 0.1142, "step": 30147 }, { "epoch": 0.8795145574420912, "grad_norm": 0.8876567653180892, "learning_rate": 3.758932060763798e-07, "loss": 0.1036, "step": 30148 }, { "epoch": 0.8795437306727347, "grad_norm": 0.9156554228765477, "learning_rate": 3.757135119612515e-07, "loss": 0.1044, "step": 30149 }, { "epoch": 0.8795729039033783, "grad_norm": 0.786024507820376, "learning_rate": 3.755338591306473e-07, "loss": 0.1046, "step": 30150 }, { "epoch": 0.8796020771340218, "grad_norm": 0.7131721520840634, "learning_rate": 3.753542475861738e-07, "loss": 0.136, "step": 30151 }, { "epoch": 0.8796312503646654, "grad_norm": 0.6966960885413169, "learning_rate": 3.75174677329434e-07, "loss": 0.0937, "step": 30152 }, { "epoch": 0.8796604235953089, "grad_norm": 1.554708338985056, "learning_rate": 3.7499514836202954e-07, "loss": 0.1018, "step": 30153 }, { "epoch": 0.8796895968259525, "grad_norm": 0.884565350692909, "learning_rate": 3.7481566068556575e-07, "loss": 0.0866, "step": 30154 }, { "epoch": 0.879718770056596, "grad_norm": 0.8087524084328968, "learning_rate": 3.74636214301643e-07, "loss": 0.1291, "step": 30155 }, { "epoch": 0.8797479432872396, "grad_norm": 0.8313123893698217, "learning_rate": 3.744568092118633e-07, "loss": 0.094, "step": 30156 }, { "epoch": 0.8797771165178832, "grad_norm": 0.8654617412353212, "learning_rate": 3.742774454178294e-07, "loss": 0.1, "step": 30157 }, { "epoch": 0.8798062897485267, "grad_norm": 0.7752968563793792, "learning_rate": 3.740981229211427e-07, "loss": 0.1252, "step": 30158 }, { "epoch": 0.8798354629791704, "grad_norm": 0.8847875580572274, "learning_rate": 3.739188417234041e-07, "loss": 0.1138, "step": 30159 }, { "epoch": 0.8798646362098139, "grad_norm": 0.9552669953758516, "learning_rate": 3.737396018262124e-07, "loss": 0.1187, "step": 30160 }, { "epoch": 0.8798938094404575, "grad_norm": 0.8074904802698792, "learning_rate": 3.7356040323117016e-07, "loss": 0.1026, "step": 30161 }, { "epoch": 0.879922982671101, "grad_norm": 0.8214451204027293, "learning_rate": 3.733812459398761e-07, "loss": 0.1191, "step": 30162 }, { "epoch": 0.8799521559017446, "grad_norm": 0.9769523954735072, "learning_rate": 3.73202129953929e-07, "loss": 0.1034, "step": 30163 }, { "epoch": 0.8799813291323881, "grad_norm": 0.7063403750544465, "learning_rate": 3.730230552749292e-07, "loss": 0.0912, "step": 30164 }, { "epoch": 0.8800105023630317, "grad_norm": 0.6989168611891687, "learning_rate": 3.7284402190447546e-07, "loss": 0.135, "step": 30165 }, { "epoch": 0.8800396755936752, "grad_norm": 0.8157231699521588, "learning_rate": 3.7266502984416477e-07, "loss": 0.1035, "step": 30166 }, { "epoch": 0.8800688488243188, "grad_norm": 1.1671000651664127, "learning_rate": 3.7248607909559697e-07, "loss": 0.1036, "step": 30167 }, { "epoch": 0.8800980220549623, "grad_norm": 0.7715153986115448, "learning_rate": 3.7230716966036915e-07, "loss": 0.0905, "step": 30168 }, { "epoch": 0.8801271952856059, "grad_norm": 0.876595675384357, "learning_rate": 3.7212830154007675e-07, "loss": 0.1093, "step": 30169 }, { "epoch": 0.8801563685162495, "grad_norm": 0.953601495349331, "learning_rate": 3.7194947473631837e-07, "loss": 0.1222, "step": 30170 }, { "epoch": 0.880185541746893, "grad_norm": 0.862498674999839, "learning_rate": 3.7177068925069116e-07, "loss": 0.1162, "step": 30171 }, { "epoch": 0.8802147149775366, "grad_norm": 0.8836441089597472, "learning_rate": 3.7159194508479046e-07, "loss": 0.1326, "step": 30172 }, { "epoch": 0.8802438882081802, "grad_norm": 0.8572154163014452, "learning_rate": 3.7141324224021116e-07, "loss": 0.1065, "step": 30173 }, { "epoch": 0.8802730614388238, "grad_norm": 1.014929219004118, "learning_rate": 3.7123458071855024e-07, "loss": 0.1051, "step": 30174 }, { "epoch": 0.8803022346694673, "grad_norm": 0.8122855596044031, "learning_rate": 3.7105596052140145e-07, "loss": 0.1376, "step": 30175 }, { "epoch": 0.8803314079001109, "grad_norm": 0.7589586008961208, "learning_rate": 3.708773816503608e-07, "loss": 0.1035, "step": 30176 }, { "epoch": 0.8803605811307544, "grad_norm": 0.7209743075329469, "learning_rate": 3.706988441070203e-07, "loss": 0.1359, "step": 30177 }, { "epoch": 0.880389754361398, "grad_norm": 0.7143563623297943, "learning_rate": 3.7052034789297697e-07, "loss": 0.0876, "step": 30178 }, { "epoch": 0.8804189275920415, "grad_norm": 1.0480815545668183, "learning_rate": 3.7034189300982294e-07, "loss": 0.0997, "step": 30179 }, { "epoch": 0.8804481008226851, "grad_norm": 0.7673726984716717, "learning_rate": 3.7016347945914966e-07, "loss": 0.117, "step": 30180 }, { "epoch": 0.8804772740533287, "grad_norm": 0.6952595807552829, "learning_rate": 3.699851072425525e-07, "loss": 0.1018, "step": 30181 }, { "epoch": 0.8805064472839722, "grad_norm": 0.8821413448512636, "learning_rate": 3.698067763616231e-07, "loss": 0.1159, "step": 30182 }, { "epoch": 0.8805356205146158, "grad_norm": 0.9795421892790883, "learning_rate": 3.696284868179534e-07, "loss": 0.1087, "step": 30183 }, { "epoch": 0.8805647937452593, "grad_norm": 0.6566982558593002, "learning_rate": 3.6945023861313547e-07, "loss": 0.1128, "step": 30184 }, { "epoch": 0.8805939669759029, "grad_norm": 0.8421261691287741, "learning_rate": 3.6927203174876027e-07, "loss": 0.1046, "step": 30185 }, { "epoch": 0.8806231402065464, "grad_norm": 0.8631026093023745, "learning_rate": 3.6909386622641876e-07, "loss": 0.1012, "step": 30186 }, { "epoch": 0.8806523134371901, "grad_norm": 0.8232195206923908, "learning_rate": 3.689157420477013e-07, "loss": 0.0996, "step": 30187 }, { "epoch": 0.8806814866678336, "grad_norm": 0.9174507808088106, "learning_rate": 3.687376592141995e-07, "loss": 0.1314, "step": 30188 }, { "epoch": 0.8807106598984772, "grad_norm": 0.8254176508882628, "learning_rate": 3.6855961772750193e-07, "loss": 0.0857, "step": 30189 }, { "epoch": 0.8807398331291207, "grad_norm": 0.8672602160728197, "learning_rate": 3.68381617589198e-07, "loss": 0.0941, "step": 30190 }, { "epoch": 0.8807690063597643, "grad_norm": 0.8352647499556992, "learning_rate": 3.682036588008786e-07, "loss": 0.1155, "step": 30191 }, { "epoch": 0.8807981795904078, "grad_norm": 0.9017842170348959, "learning_rate": 3.6802574136413084e-07, "loss": 0.1294, "step": 30192 }, { "epoch": 0.8808273528210514, "grad_norm": 0.7935272301615023, "learning_rate": 3.678478652805423e-07, "loss": 0.1041, "step": 30193 }, { "epoch": 0.880856526051695, "grad_norm": 0.8954750138041571, "learning_rate": 3.676700305517028e-07, "loss": 0.1378, "step": 30194 }, { "epoch": 0.8808856992823385, "grad_norm": 0.9790343919425698, "learning_rate": 3.674922371792e-07, "loss": 0.1232, "step": 30195 }, { "epoch": 0.8809148725129821, "grad_norm": 1.0833191201543046, "learning_rate": 3.673144851646199e-07, "loss": 0.1253, "step": 30196 }, { "epoch": 0.8809440457436256, "grad_norm": 0.7122042425090758, "learning_rate": 3.671367745095511e-07, "loss": 0.1127, "step": 30197 }, { "epoch": 0.8809732189742692, "grad_norm": 0.7927459358635081, "learning_rate": 3.6695910521557797e-07, "loss": 0.1189, "step": 30198 }, { "epoch": 0.8810023922049127, "grad_norm": 0.7376181023768205, "learning_rate": 3.6678147728428926e-07, "loss": 0.1042, "step": 30199 }, { "epoch": 0.8810315654355564, "grad_norm": 0.8077804932474733, "learning_rate": 3.6660389071726807e-07, "loss": 0.0969, "step": 30200 }, { "epoch": 0.8810607386661999, "grad_norm": 0.7568147108828605, "learning_rate": 3.664263455161027e-07, "loss": 0.1205, "step": 30201 }, { "epoch": 0.8810899118968435, "grad_norm": 0.9443235460663865, "learning_rate": 3.6624884168237675e-07, "loss": 0.1218, "step": 30202 }, { "epoch": 0.881119085127487, "grad_norm": 0.9146561949814475, "learning_rate": 3.66071379217674e-07, "loss": 0.1306, "step": 30203 }, { "epoch": 0.8811482583581306, "grad_norm": 0.7435052455702427, "learning_rate": 3.658939581235793e-07, "loss": 0.126, "step": 30204 }, { "epoch": 0.8811774315887742, "grad_norm": 0.8530381042113752, "learning_rate": 3.6571657840167864e-07, "loss": 0.1057, "step": 30205 }, { "epoch": 0.8812066048194177, "grad_norm": 0.8355369961185656, "learning_rate": 3.6553924005355347e-07, "loss": 0.0845, "step": 30206 }, { "epoch": 0.8812357780500613, "grad_norm": 0.8559363073796667, "learning_rate": 3.6536194308078756e-07, "loss": 0.1119, "step": 30207 }, { "epoch": 0.8812649512807048, "grad_norm": 1.2055206965322574, "learning_rate": 3.6518468748496406e-07, "loss": 0.1162, "step": 30208 }, { "epoch": 0.8812941245113484, "grad_norm": 0.8828364406691396, "learning_rate": 3.650074732676656e-07, "loss": 0.1211, "step": 30209 }, { "epoch": 0.8813232977419919, "grad_norm": 0.8566573650686539, "learning_rate": 3.648303004304721e-07, "loss": 0.0958, "step": 30210 }, { "epoch": 0.8813524709726355, "grad_norm": 0.7686853905378299, "learning_rate": 3.6465316897496883e-07, "loss": 0.0915, "step": 30211 }, { "epoch": 0.881381644203279, "grad_norm": 0.7660954448389473, "learning_rate": 3.6447607890273516e-07, "loss": 0.118, "step": 30212 }, { "epoch": 0.8814108174339226, "grad_norm": 1.2803163696680397, "learning_rate": 3.6429903021535207e-07, "loss": 0.1309, "step": 30213 }, { "epoch": 0.8814399906645662, "grad_norm": 0.8122822849721811, "learning_rate": 3.641220229144016e-07, "loss": 0.0962, "step": 30214 }, { "epoch": 0.8814691638952098, "grad_norm": 0.9291086541392866, "learning_rate": 3.63945057001463e-07, "loss": 0.1174, "step": 30215 }, { "epoch": 0.8814983371258533, "grad_norm": 0.7421057537233868, "learning_rate": 3.6376813247811503e-07, "loss": 0.1095, "step": 30216 }, { "epoch": 0.8815275103564969, "grad_norm": 0.8081893349893342, "learning_rate": 3.635912493459387e-07, "loss": 0.1213, "step": 30217 }, { "epoch": 0.8815566835871405, "grad_norm": 0.7958339955453644, "learning_rate": 3.634144076065133e-07, "loss": 0.1276, "step": 30218 }, { "epoch": 0.881585856817784, "grad_norm": 0.773084285292648, "learning_rate": 3.63237607261418e-07, "loss": 0.1135, "step": 30219 }, { "epoch": 0.8816150300484276, "grad_norm": 0.8580062597910707, "learning_rate": 3.6306084831222887e-07, "loss": 0.1146, "step": 30220 }, { "epoch": 0.8816442032790711, "grad_norm": 1.190768283142839, "learning_rate": 3.628841307605269e-07, "loss": 0.1097, "step": 30221 }, { "epoch": 0.8816733765097147, "grad_norm": 0.7718992014176562, "learning_rate": 3.6270745460788736e-07, "loss": 0.1004, "step": 30222 }, { "epoch": 0.8817025497403582, "grad_norm": 0.757772427516553, "learning_rate": 3.625308198558897e-07, "loss": 0.1068, "step": 30223 }, { "epoch": 0.8817317229710018, "grad_norm": 0.8011985273552965, "learning_rate": 3.6235422650610863e-07, "loss": 0.109, "step": 30224 }, { "epoch": 0.8817608962016453, "grad_norm": 0.8914904698648717, "learning_rate": 3.62177674560123e-07, "loss": 0.1159, "step": 30225 }, { "epoch": 0.8817900694322889, "grad_norm": 0.6703580860724604, "learning_rate": 3.6200116401950704e-07, "loss": 0.1239, "step": 30226 }, { "epoch": 0.8818192426629325, "grad_norm": 0.9313783388406465, "learning_rate": 3.6182469488583836e-07, "loss": 0.0931, "step": 30227 }, { "epoch": 0.8818484158935761, "grad_norm": 1.0304906920137527, "learning_rate": 3.616482671606908e-07, "loss": 0.1236, "step": 30228 }, { "epoch": 0.8818775891242197, "grad_norm": 0.7672447611229704, "learning_rate": 3.6147188084564075e-07, "loss": 0.1131, "step": 30229 }, { "epoch": 0.8819067623548632, "grad_norm": 0.8632346594672483, "learning_rate": 3.612955359422621e-07, "loss": 0.1032, "step": 30230 }, { "epoch": 0.8819359355855068, "grad_norm": 0.8417856631660557, "learning_rate": 3.611192324521301e-07, "loss": 0.1315, "step": 30231 }, { "epoch": 0.8819651088161503, "grad_norm": 0.7716576250477684, "learning_rate": 3.6094297037681857e-07, "loss": 0.0889, "step": 30232 }, { "epoch": 0.8819942820467939, "grad_norm": 0.5673189806323955, "learning_rate": 3.607667497178996e-07, "loss": 0.1137, "step": 30233 }, { "epoch": 0.8820234552774374, "grad_norm": 1.2397629243526644, "learning_rate": 3.6059057047694745e-07, "loss": 0.1183, "step": 30234 }, { "epoch": 0.882052628508081, "grad_norm": 0.7958689435050259, "learning_rate": 3.6041443265553645e-07, "loss": 0.0991, "step": 30235 }, { "epoch": 0.8820818017387245, "grad_norm": 0.7340789986518017, "learning_rate": 3.602383362552375e-07, "loss": 0.0997, "step": 30236 }, { "epoch": 0.8821109749693681, "grad_norm": 1.2508656996866652, "learning_rate": 3.6006228127762275e-07, "loss": 0.1301, "step": 30237 }, { "epoch": 0.8821401482000116, "grad_norm": 0.9875033771728545, "learning_rate": 3.598862677242643e-07, "loss": 0.1124, "step": 30238 }, { "epoch": 0.8821693214306552, "grad_norm": 0.8114853958236029, "learning_rate": 3.5971029559673407e-07, "loss": 0.0895, "step": 30239 }, { "epoch": 0.8821984946612987, "grad_norm": 1.0385342276752447, "learning_rate": 3.59534364896601e-07, "loss": 0.1121, "step": 30240 }, { "epoch": 0.8822276678919424, "grad_norm": 0.7225282116525515, "learning_rate": 3.5935847562543927e-07, "loss": 0.1008, "step": 30241 }, { "epoch": 0.882256841122586, "grad_norm": 0.8569040528906543, "learning_rate": 3.591826277848165e-07, "loss": 0.1013, "step": 30242 }, { "epoch": 0.8822860143532295, "grad_norm": 0.7433117161667993, "learning_rate": 3.5900682137630317e-07, "loss": 0.097, "step": 30243 }, { "epoch": 0.8823151875838731, "grad_norm": 0.8608631443274006, "learning_rate": 3.5883105640146965e-07, "loss": 0.1158, "step": 30244 }, { "epoch": 0.8823443608145166, "grad_norm": 1.0101357825938861, "learning_rate": 3.5865533286188415e-07, "loss": 0.1232, "step": 30245 }, { "epoch": 0.8823735340451602, "grad_norm": 0.8340525484015279, "learning_rate": 3.58479650759116e-07, "loss": 0.1139, "step": 30246 }, { "epoch": 0.8824027072758037, "grad_norm": 0.7593287722072896, "learning_rate": 3.583040100947327e-07, "loss": 0.1226, "step": 30247 }, { "epoch": 0.8824318805064473, "grad_norm": 0.9341213761325549, "learning_rate": 3.5812841087030427e-07, "loss": 0.1289, "step": 30248 }, { "epoch": 0.8824610537370908, "grad_norm": 0.7890900124503014, "learning_rate": 3.5795285308739715e-07, "loss": 0.0969, "step": 30249 }, { "epoch": 0.8824902269677344, "grad_norm": 0.7419355140165514, "learning_rate": 3.577773367475779e-07, "loss": 0.1042, "step": 30250 }, { "epoch": 0.8825194001983779, "grad_norm": 0.7754554477757386, "learning_rate": 3.576018618524152e-07, "loss": 0.1147, "step": 30251 }, { "epoch": 0.8825485734290215, "grad_norm": 0.8652486385826137, "learning_rate": 3.574264284034745e-07, "loss": 0.1177, "step": 30252 }, { "epoch": 0.882577746659665, "grad_norm": 0.8863230988863542, "learning_rate": 3.572510364023224e-07, "loss": 0.1053, "step": 30253 }, { "epoch": 0.8826069198903087, "grad_norm": 0.7918883451187265, "learning_rate": 3.5707568585052477e-07, "loss": 0.098, "step": 30254 }, { "epoch": 0.8826360931209523, "grad_norm": 0.7949978564938222, "learning_rate": 3.569003767496476e-07, "loss": 0.1235, "step": 30255 }, { "epoch": 0.8826652663515958, "grad_norm": 1.299755252294959, "learning_rate": 3.5672510910125526e-07, "loss": 0.1428, "step": 30256 }, { "epoch": 0.8826944395822394, "grad_norm": 0.7907743204613458, "learning_rate": 3.565498829069119e-07, "loss": 0.103, "step": 30257 }, { "epoch": 0.8827236128128829, "grad_norm": 0.8460585996047849, "learning_rate": 3.563746981681826e-07, "loss": 0.1137, "step": 30258 }, { "epoch": 0.8827527860435265, "grad_norm": 1.0933770669696037, "learning_rate": 3.561995548866326e-07, "loss": 0.1239, "step": 30259 }, { "epoch": 0.88278195927417, "grad_norm": 0.7781846495940566, "learning_rate": 3.560244530638235e-07, "loss": 0.0979, "step": 30260 }, { "epoch": 0.8828111325048136, "grad_norm": 0.8418009683284758, "learning_rate": 3.558493927013201e-07, "loss": 0.1074, "step": 30261 }, { "epoch": 0.8828403057354571, "grad_norm": 0.7863138275600805, "learning_rate": 3.5567437380068515e-07, "loss": 0.1113, "step": 30262 }, { "epoch": 0.8828694789661007, "grad_norm": 1.299581592141793, "learning_rate": 3.554993963634795e-07, "loss": 0.1033, "step": 30263 }, { "epoch": 0.8828986521967442, "grad_norm": 0.9671923105206143, "learning_rate": 3.5532446039126645e-07, "loss": 0.1229, "step": 30264 }, { "epoch": 0.8829278254273878, "grad_norm": 0.7420217463153288, "learning_rate": 3.551495658856091e-07, "loss": 0.112, "step": 30265 }, { "epoch": 0.8829569986580313, "grad_norm": 0.8974301122852568, "learning_rate": 3.5497471284806686e-07, "loss": 0.1269, "step": 30266 }, { "epoch": 0.8829861718886749, "grad_norm": 1.0018350939994582, "learning_rate": 3.5479990128020113e-07, "loss": 0.0921, "step": 30267 }, { "epoch": 0.8830153451193186, "grad_norm": 0.9819348515423417, "learning_rate": 3.5462513118357413e-07, "loss": 0.1246, "step": 30268 }, { "epoch": 0.8830445183499621, "grad_norm": 0.8769700484705546, "learning_rate": 3.544504025597445e-07, "loss": 0.1112, "step": 30269 }, { "epoch": 0.8830736915806057, "grad_norm": 0.8802326679739839, "learning_rate": 3.542757154102716e-07, "loss": 0.1104, "step": 30270 }, { "epoch": 0.8831028648112492, "grad_norm": 0.8391767456555339, "learning_rate": 3.54101069736717e-07, "loss": 0.1093, "step": 30271 }, { "epoch": 0.8831320380418928, "grad_norm": 0.6753819973574647, "learning_rate": 3.5392646554063935e-07, "loss": 0.1079, "step": 30272 }, { "epoch": 0.8831612112725363, "grad_norm": 0.7957394720387498, "learning_rate": 3.537519028235964e-07, "loss": 0.1181, "step": 30273 }, { "epoch": 0.8831903845031799, "grad_norm": 0.9294920446919498, "learning_rate": 3.535773815871485e-07, "loss": 0.1081, "step": 30274 }, { "epoch": 0.8832195577338234, "grad_norm": 1.0407690985588065, "learning_rate": 3.534029018328516e-07, "loss": 0.1038, "step": 30275 }, { "epoch": 0.883248730964467, "grad_norm": 0.9803304389381334, "learning_rate": 3.5322846356226405e-07, "loss": 0.1218, "step": 30276 }, { "epoch": 0.8832779041951105, "grad_norm": 1.0250165615672384, "learning_rate": 3.5305406677694386e-07, "loss": 0.1137, "step": 30277 }, { "epoch": 0.8833070774257541, "grad_norm": 1.0597643994693118, "learning_rate": 3.5287971147844823e-07, "loss": 0.1113, "step": 30278 }, { "epoch": 0.8833362506563976, "grad_norm": 0.6969120680101855, "learning_rate": 3.5270539766833257e-07, "loss": 0.107, "step": 30279 }, { "epoch": 0.8833654238870412, "grad_norm": 1.0829992251926237, "learning_rate": 3.5253112534815336e-07, "loss": 0.1206, "step": 30280 }, { "epoch": 0.8833945971176849, "grad_norm": 1.1661278745786465, "learning_rate": 3.5235689451946775e-07, "loss": 0.1067, "step": 30281 }, { "epoch": 0.8834237703483284, "grad_norm": 1.0443561061873734, "learning_rate": 3.521827051838295e-07, "loss": 0.0962, "step": 30282 }, { "epoch": 0.883452943578972, "grad_norm": 0.9537307633965, "learning_rate": 3.52008557342795e-07, "loss": 0.1177, "step": 30283 }, { "epoch": 0.8834821168096155, "grad_norm": 0.7337475191683551, "learning_rate": 3.5183445099791825e-07, "loss": 0.0946, "step": 30284 }, { "epoch": 0.8835112900402591, "grad_norm": 1.1754368881679809, "learning_rate": 3.516603861507545e-07, "loss": 0.1096, "step": 30285 }, { "epoch": 0.8835404632709026, "grad_norm": 0.7435421860743248, "learning_rate": 3.5148636280285697e-07, "loss": 0.1121, "step": 30286 }, { "epoch": 0.8835696365015462, "grad_norm": 1.0549735837093794, "learning_rate": 3.513123809557789e-07, "loss": 0.1186, "step": 30287 }, { "epoch": 0.8835988097321897, "grad_norm": 0.8718229764373705, "learning_rate": 3.5113844061107404e-07, "loss": 0.1243, "step": 30288 }, { "epoch": 0.8836279829628333, "grad_norm": 2.081912003183867, "learning_rate": 3.509645417702967e-07, "loss": 0.1217, "step": 30289 }, { "epoch": 0.8836571561934768, "grad_norm": 1.1329910540016377, "learning_rate": 3.5079068443499676e-07, "loss": 0.1147, "step": 30290 }, { "epoch": 0.8836863294241204, "grad_norm": 0.8690301360376276, "learning_rate": 3.506168686067285e-07, "loss": 0.0998, "step": 30291 }, { "epoch": 0.883715502654764, "grad_norm": 1.1275935446741046, "learning_rate": 3.504430942870429e-07, "loss": 0.09, "step": 30292 }, { "epoch": 0.8837446758854075, "grad_norm": 0.7895830958470886, "learning_rate": 3.5026936147749104e-07, "loss": 0.1115, "step": 30293 }, { "epoch": 0.8837738491160511, "grad_norm": 0.8889682049730542, "learning_rate": 3.500956701796243e-07, "loss": 0.1199, "step": 30294 }, { "epoch": 0.8838030223466947, "grad_norm": 0.7569732883636934, "learning_rate": 3.4992202039499377e-07, "loss": 0.1126, "step": 30295 }, { "epoch": 0.8838321955773383, "grad_norm": 0.7844529605231352, "learning_rate": 3.497484121251499e-07, "loss": 0.0896, "step": 30296 }, { "epoch": 0.8838613688079818, "grad_norm": 0.6706005533343086, "learning_rate": 3.4957484537164076e-07, "loss": 0.1146, "step": 30297 }, { "epoch": 0.8838905420386254, "grad_norm": 0.7684084409947594, "learning_rate": 3.494013201360186e-07, "loss": 0.1139, "step": 30298 }, { "epoch": 0.8839197152692689, "grad_norm": 1.0253168927714207, "learning_rate": 3.492278364198309e-07, "loss": 0.1272, "step": 30299 }, { "epoch": 0.8839488884999125, "grad_norm": 0.9081985058797085, "learning_rate": 3.490543942246255e-07, "loss": 0.1214, "step": 30300 }, { "epoch": 0.883978061730556, "grad_norm": 0.8650806966578726, "learning_rate": 3.488809935519533e-07, "loss": 0.1174, "step": 30301 }, { "epoch": 0.8840072349611996, "grad_norm": 0.952124751383058, "learning_rate": 3.4870763440336185e-07, "loss": 0.1157, "step": 30302 }, { "epoch": 0.8840364081918431, "grad_norm": 0.8657833867463129, "learning_rate": 3.485343167803973e-07, "loss": 0.1027, "step": 30303 }, { "epoch": 0.8840655814224867, "grad_norm": 0.8805401006232512, "learning_rate": 3.4836104068460887e-07, "loss": 0.1072, "step": 30304 }, { "epoch": 0.8840947546531303, "grad_norm": 0.994418975411668, "learning_rate": 3.48187806117542e-07, "loss": 0.11, "step": 30305 }, { "epoch": 0.8841239278837738, "grad_norm": 0.7017094904926031, "learning_rate": 3.480146130807438e-07, "loss": 0.1046, "step": 30306 }, { "epoch": 0.8841531011144174, "grad_norm": 0.7848220442586089, "learning_rate": 3.4784146157576025e-07, "loss": 0.1075, "step": 30307 }, { "epoch": 0.884182274345061, "grad_norm": 0.9510315721539675, "learning_rate": 3.4766835160413846e-07, "loss": 0.1239, "step": 30308 }, { "epoch": 0.8842114475757046, "grad_norm": 1.1385160742814275, "learning_rate": 3.474952831674233e-07, "loss": 0.133, "step": 30309 }, { "epoch": 0.8842406208063481, "grad_norm": 0.8747383799876995, "learning_rate": 3.473222562671585e-07, "loss": 0.1212, "step": 30310 }, { "epoch": 0.8842697940369917, "grad_norm": 0.8685490919580368, "learning_rate": 3.4714927090489126e-07, "loss": 0.1087, "step": 30311 }, { "epoch": 0.8842989672676352, "grad_norm": 0.6771623979097176, "learning_rate": 3.46976327082163e-07, "loss": 0.1199, "step": 30312 }, { "epoch": 0.8843281404982788, "grad_norm": 0.794962219029144, "learning_rate": 3.468034248005209e-07, "loss": 0.1196, "step": 30313 }, { "epoch": 0.8843573137289223, "grad_norm": 0.8725883111671113, "learning_rate": 3.466305640615059e-07, "loss": 0.1123, "step": 30314 }, { "epoch": 0.8843864869595659, "grad_norm": 0.7223817005236681, "learning_rate": 3.4645774486666285e-07, "loss": 0.118, "step": 30315 }, { "epoch": 0.8844156601902095, "grad_norm": 0.749383241880221, "learning_rate": 3.4628496721753444e-07, "loss": 0.1165, "step": 30316 }, { "epoch": 0.884444833420853, "grad_norm": 0.9503710213224816, "learning_rate": 3.4611223111566226e-07, "loss": 0.0974, "step": 30317 }, { "epoch": 0.8844740066514966, "grad_norm": 0.8096765693535113, "learning_rate": 3.4593953656258896e-07, "loss": 0.105, "step": 30318 }, { "epoch": 0.8845031798821401, "grad_norm": 0.7867137257183453, "learning_rate": 3.457668835598571e-07, "loss": 0.1145, "step": 30319 }, { "epoch": 0.8845323531127837, "grad_norm": 1.102137486792454, "learning_rate": 3.4559427210900663e-07, "loss": 0.1027, "step": 30320 }, { "epoch": 0.8845615263434272, "grad_norm": 0.8603445635648463, "learning_rate": 3.454217022115802e-07, "loss": 0.122, "step": 30321 }, { "epoch": 0.8845906995740709, "grad_norm": 0.815858513164158, "learning_rate": 3.452491738691183e-07, "loss": 0.1111, "step": 30322 }, { "epoch": 0.8846198728047144, "grad_norm": 0.6665759354368596, "learning_rate": 3.45076687083159e-07, "loss": 0.102, "step": 30323 }, { "epoch": 0.884649046035358, "grad_norm": 1.4909042422648486, "learning_rate": 3.44904241855244e-07, "loss": 0.0927, "step": 30324 }, { "epoch": 0.8846782192660015, "grad_norm": 0.8096901343232552, "learning_rate": 3.447318381869136e-07, "loss": 0.1112, "step": 30325 }, { "epoch": 0.8847073924966451, "grad_norm": 0.9453686884324168, "learning_rate": 3.445594760797061e-07, "loss": 0.1079, "step": 30326 }, { "epoch": 0.8847365657272886, "grad_norm": 0.7234627055790871, "learning_rate": 3.443871555351597e-07, "loss": 0.0901, "step": 30327 }, { "epoch": 0.8847657389579322, "grad_norm": 0.8254188611841817, "learning_rate": 3.4421487655481366e-07, "loss": 0.0975, "step": 30328 }, { "epoch": 0.8847949121885758, "grad_norm": 0.7246687590352812, "learning_rate": 3.440426391402063e-07, "loss": 0.0927, "step": 30329 }, { "epoch": 0.8848240854192193, "grad_norm": 0.8764899631729903, "learning_rate": 3.43870443292873e-07, "loss": 0.1043, "step": 30330 }, { "epoch": 0.8848532586498629, "grad_norm": 0.8373716420881642, "learning_rate": 3.436982890143542e-07, "loss": 0.1229, "step": 30331 }, { "epoch": 0.8848824318805064, "grad_norm": 0.7997293485721308, "learning_rate": 3.435261763061859e-07, "loss": 0.1116, "step": 30332 }, { "epoch": 0.88491160511115, "grad_norm": 0.9667778708676278, "learning_rate": 3.43354105169903e-07, "loss": 0.135, "step": 30333 }, { "epoch": 0.8849407783417935, "grad_norm": 0.7458684991803146, "learning_rate": 3.431820756070442e-07, "loss": 0.1301, "step": 30334 }, { "epoch": 0.8849699515724372, "grad_norm": 0.8914313326095475, "learning_rate": 3.430100876191439e-07, "loss": 0.1234, "step": 30335 }, { "epoch": 0.8849991248030807, "grad_norm": 0.8700656007969616, "learning_rate": 3.4283814120773753e-07, "loss": 0.1036, "step": 30336 }, { "epoch": 0.8850282980337243, "grad_norm": 1.0175748851464634, "learning_rate": 3.426662363743599e-07, "loss": 0.1164, "step": 30337 }, { "epoch": 0.8850574712643678, "grad_norm": 0.9406545152020644, "learning_rate": 3.424943731205477e-07, "loss": 0.1189, "step": 30338 }, { "epoch": 0.8850866444950114, "grad_norm": 0.8426086877704665, "learning_rate": 3.4232255144783347e-07, "loss": 0.0977, "step": 30339 }, { "epoch": 0.885115817725655, "grad_norm": 0.9989210816802246, "learning_rate": 3.42150771357751e-07, "loss": 0.1201, "step": 30340 }, { "epoch": 0.8851449909562985, "grad_norm": 0.7275818106060049, "learning_rate": 3.419790328518352e-07, "loss": 0.1052, "step": 30341 }, { "epoch": 0.8851741641869421, "grad_norm": 0.7937811983282704, "learning_rate": 3.4180733593161764e-07, "loss": 0.0996, "step": 30342 }, { "epoch": 0.8852033374175856, "grad_norm": 0.6934409654506852, "learning_rate": 3.4163568059863374e-07, "loss": 0.0883, "step": 30343 }, { "epoch": 0.8852325106482292, "grad_norm": 1.0038186410180796, "learning_rate": 3.414640668544128e-07, "loss": 0.1137, "step": 30344 }, { "epoch": 0.8852616838788727, "grad_norm": 0.7151777752932028, "learning_rate": 3.4129249470048974e-07, "loss": 0.1123, "step": 30345 }, { "epoch": 0.8852908571095163, "grad_norm": 0.8763191749524867, "learning_rate": 3.41120964138395e-07, "loss": 0.1089, "step": 30346 }, { "epoch": 0.8853200303401598, "grad_norm": 0.8118309774776161, "learning_rate": 3.409494751696596e-07, "loss": 0.1074, "step": 30347 }, { "epoch": 0.8853492035708034, "grad_norm": 0.9261107718805629, "learning_rate": 3.4077802779581504e-07, "loss": 0.0919, "step": 30348 }, { "epoch": 0.885378376801447, "grad_norm": 0.864050094739019, "learning_rate": 3.406066220183929e-07, "loss": 0.1105, "step": 30349 }, { "epoch": 0.8854075500320906, "grad_norm": 0.8010293586694941, "learning_rate": 3.404352578389214e-07, "loss": 0.1065, "step": 30350 }, { "epoch": 0.8854367232627341, "grad_norm": 0.715757383092985, "learning_rate": 3.4026393525893266e-07, "loss": 0.1173, "step": 30351 }, { "epoch": 0.8854658964933777, "grad_norm": 0.7882231160982873, "learning_rate": 3.4009265427995483e-07, "loss": 0.1016, "step": 30352 }, { "epoch": 0.8854950697240213, "grad_norm": 0.7943256913202561, "learning_rate": 3.3992141490351685e-07, "loss": 0.0949, "step": 30353 }, { "epoch": 0.8855242429546648, "grad_norm": 0.792414663988053, "learning_rate": 3.3975021713114844e-07, "loss": 0.1012, "step": 30354 }, { "epoch": 0.8855534161853084, "grad_norm": 0.9474072103656929, "learning_rate": 3.395790609643779e-07, "loss": 0.1585, "step": 30355 }, { "epoch": 0.8855825894159519, "grad_norm": 1.0532927138993091, "learning_rate": 3.3940794640473284e-07, "loss": 0.1174, "step": 30356 }, { "epoch": 0.8856117626465955, "grad_norm": 0.6979188327652479, "learning_rate": 3.3923687345374046e-07, "loss": 0.1171, "step": 30357 }, { "epoch": 0.885640935877239, "grad_norm": 1.392831635861828, "learning_rate": 3.390658421129295e-07, "loss": 0.1041, "step": 30358 }, { "epoch": 0.8856701091078826, "grad_norm": 0.756555154112626, "learning_rate": 3.388948523838259e-07, "loss": 0.1172, "step": 30359 }, { "epoch": 0.8856992823385261, "grad_norm": 1.1926946760902775, "learning_rate": 3.3872390426795467e-07, "loss": 0.1008, "step": 30360 }, { "epoch": 0.8857284555691697, "grad_norm": 0.7766733283219366, "learning_rate": 3.385529977668456e-07, "loss": 0.1133, "step": 30361 }, { "epoch": 0.8857576287998133, "grad_norm": 0.7369969660292518, "learning_rate": 3.383821328820225e-07, "loss": 0.108, "step": 30362 }, { "epoch": 0.8857868020304569, "grad_norm": 1.0297691496982326, "learning_rate": 3.382113096150097e-07, "loss": 0.1336, "step": 30363 }, { "epoch": 0.8858159752611005, "grad_norm": 0.813095504044418, "learning_rate": 3.380405279673349e-07, "loss": 0.1031, "step": 30364 }, { "epoch": 0.885845148491744, "grad_norm": 0.9703168646153482, "learning_rate": 3.3786978794052126e-07, "loss": 0.1379, "step": 30365 }, { "epoch": 0.8858743217223876, "grad_norm": 0.9349777106145506, "learning_rate": 3.376990895360921e-07, "loss": 0.1289, "step": 30366 }, { "epoch": 0.8859034949530311, "grad_norm": 0.8325957039835654, "learning_rate": 3.3752843275557224e-07, "loss": 0.0895, "step": 30367 }, { "epoch": 0.8859326681836747, "grad_norm": 0.8702869652325833, "learning_rate": 3.3735781760048714e-07, "loss": 0.1073, "step": 30368 }, { "epoch": 0.8859618414143182, "grad_norm": 0.7044139624804796, "learning_rate": 3.371872440723578e-07, "loss": 0.1108, "step": 30369 }, { "epoch": 0.8859910146449618, "grad_norm": 0.8736508089684976, "learning_rate": 3.370167121727069e-07, "loss": 0.1027, "step": 30370 }, { "epoch": 0.8860201878756053, "grad_norm": 0.7745660315239808, "learning_rate": 3.3684622190305825e-07, "loss": 0.0869, "step": 30371 }, { "epoch": 0.8860493611062489, "grad_norm": 0.6383409050373748, "learning_rate": 3.3667577326493283e-07, "loss": 0.1149, "step": 30372 }, { "epoch": 0.8860785343368924, "grad_norm": 0.7618712373043488, "learning_rate": 3.3650536625985384e-07, "loss": 0.1091, "step": 30373 }, { "epoch": 0.886107707567536, "grad_norm": 1.0625097758772686, "learning_rate": 3.363350008893407e-07, "loss": 0.1144, "step": 30374 }, { "epoch": 0.8861368807981795, "grad_norm": 0.7828996684426, "learning_rate": 3.3616467715491654e-07, "loss": 0.1043, "step": 30375 }, { "epoch": 0.8861660540288232, "grad_norm": 0.70035622400141, "learning_rate": 3.3599439505810015e-07, "loss": 0.1211, "step": 30376 }, { "epoch": 0.8861952272594668, "grad_norm": 0.8927629012197409, "learning_rate": 3.358241546004121e-07, "loss": 0.1268, "step": 30377 }, { "epoch": 0.8862244004901103, "grad_norm": 0.9462367269295212, "learning_rate": 3.3565395578337214e-07, "loss": 0.1323, "step": 30378 }, { "epoch": 0.8862535737207539, "grad_norm": 0.974531682763787, "learning_rate": 3.354837986085013e-07, "loss": 0.1314, "step": 30379 }, { "epoch": 0.8862827469513974, "grad_norm": 0.8371538846374478, "learning_rate": 3.353136830773168e-07, "loss": 0.1071, "step": 30380 }, { "epoch": 0.886311920182041, "grad_norm": 0.8964652179080821, "learning_rate": 3.351436091913385e-07, "loss": 0.122, "step": 30381 }, { "epoch": 0.8863410934126845, "grad_norm": 0.772242333638482, "learning_rate": 3.349735769520851e-07, "loss": 0.0863, "step": 30382 }, { "epoch": 0.8863702666433281, "grad_norm": 0.7731169190457556, "learning_rate": 3.3480358636107267e-07, "loss": 0.1095, "step": 30383 }, { "epoch": 0.8863994398739716, "grad_norm": 0.7525963182497357, "learning_rate": 3.346336374198206e-07, "loss": 0.112, "step": 30384 }, { "epoch": 0.8864286131046152, "grad_norm": 0.7043585510672596, "learning_rate": 3.3446373012984647e-07, "loss": 0.1016, "step": 30385 }, { "epoch": 0.8864577863352587, "grad_norm": 0.6955703191132063, "learning_rate": 3.3429386449266634e-07, "loss": 0.1052, "step": 30386 }, { "epoch": 0.8864869595659023, "grad_norm": 0.9402183031429808, "learning_rate": 3.3412404050979564e-07, "loss": 0.1084, "step": 30387 }, { "epoch": 0.8865161327965458, "grad_norm": 0.9400248364293934, "learning_rate": 3.3395425818275264e-07, "loss": 0.0924, "step": 30388 }, { "epoch": 0.8865453060271895, "grad_norm": 0.893022260749343, "learning_rate": 3.337845175130522e-07, "loss": 0.1098, "step": 30389 }, { "epoch": 0.8865744792578331, "grad_norm": 0.9950712262562712, "learning_rate": 3.336148185022081e-07, "loss": 0.1274, "step": 30390 }, { "epoch": 0.8866036524884766, "grad_norm": 0.8041470734874085, "learning_rate": 3.3344516115173863e-07, "loss": 0.0984, "step": 30391 }, { "epoch": 0.8866328257191202, "grad_norm": 0.832538593492917, "learning_rate": 3.33275545463157e-07, "loss": 0.1172, "step": 30392 }, { "epoch": 0.8866619989497637, "grad_norm": 0.9968711353932206, "learning_rate": 3.3310597143797585e-07, "loss": 0.1215, "step": 30393 }, { "epoch": 0.8866911721804073, "grad_norm": 0.7186352544941272, "learning_rate": 3.329364390777118e-07, "loss": 0.1069, "step": 30394 }, { "epoch": 0.8867203454110508, "grad_norm": 1.319439937713482, "learning_rate": 3.327669483838758e-07, "loss": 0.1224, "step": 30395 }, { "epoch": 0.8867495186416944, "grad_norm": 0.8814657572629593, "learning_rate": 3.325974993579839e-07, "loss": 0.0908, "step": 30396 }, { "epoch": 0.8867786918723379, "grad_norm": 0.8183405807279585, "learning_rate": 3.3242809200154603e-07, "loss": 0.107, "step": 30397 }, { "epoch": 0.8868078651029815, "grad_norm": 0.9222259577856428, "learning_rate": 3.3225872631607646e-07, "loss": 0.1071, "step": 30398 }, { "epoch": 0.886837038333625, "grad_norm": 0.8522762644758137, "learning_rate": 3.320894023030868e-07, "loss": 0.1116, "step": 30399 }, { "epoch": 0.8868662115642686, "grad_norm": 0.7650674869070023, "learning_rate": 3.319201199640881e-07, "loss": 0.0998, "step": 30400 }, { "epoch": 0.8868953847949121, "grad_norm": 0.7338584267253627, "learning_rate": 3.3175087930059246e-07, "loss": 0.103, "step": 30401 }, { "epoch": 0.8869245580255557, "grad_norm": 1.0480343563621168, "learning_rate": 3.3158168031411085e-07, "loss": 0.1184, "step": 30402 }, { "epoch": 0.8869537312561994, "grad_norm": 0.9729019843719371, "learning_rate": 3.3141252300615377e-07, "loss": 0.1189, "step": 30403 }, { "epoch": 0.8869829044868429, "grad_norm": 0.7023401695125001, "learning_rate": 3.3124340737823056e-07, "loss": 0.1196, "step": 30404 }, { "epoch": 0.8870120777174865, "grad_norm": 0.7756368928637107, "learning_rate": 3.3107433343185224e-07, "loss": 0.1112, "step": 30405 }, { "epoch": 0.88704125094813, "grad_norm": 0.8369702667736236, "learning_rate": 3.3090530116852757e-07, "loss": 0.1031, "step": 30406 }, { "epoch": 0.8870704241787736, "grad_norm": 1.2126969738676157, "learning_rate": 3.3073631058976486e-07, "loss": 0.0888, "step": 30407 }, { "epoch": 0.8870995974094171, "grad_norm": 0.8124887938795178, "learning_rate": 3.305673616970745e-07, "loss": 0.1267, "step": 30408 }, { "epoch": 0.8871287706400607, "grad_norm": 0.8935019345644799, "learning_rate": 3.3039845449196473e-07, "loss": 0.1546, "step": 30409 }, { "epoch": 0.8871579438707042, "grad_norm": 0.820766387428527, "learning_rate": 3.3022958897594157e-07, "loss": 0.1252, "step": 30410 }, { "epoch": 0.8871871171013478, "grad_norm": 1.0688200141873976, "learning_rate": 3.30060765150515e-07, "loss": 0.1052, "step": 30411 }, { "epoch": 0.8872162903319913, "grad_norm": 0.9664418098268349, "learning_rate": 3.2989198301719095e-07, "loss": 0.1005, "step": 30412 }, { "epoch": 0.8872454635626349, "grad_norm": 1.0969730287842412, "learning_rate": 3.2972324257747543e-07, "loss": 0.1285, "step": 30413 }, { "epoch": 0.8872746367932784, "grad_norm": 0.9190987287618725, "learning_rate": 3.295545438328762e-07, "loss": 0.1015, "step": 30414 }, { "epoch": 0.887303810023922, "grad_norm": 1.037815457992433, "learning_rate": 3.293858867848998e-07, "loss": 0.1077, "step": 30415 }, { "epoch": 0.8873329832545657, "grad_norm": 0.749096335415323, "learning_rate": 3.2921727143505114e-07, "loss": 0.1158, "step": 30416 }, { "epoch": 0.8873621564852092, "grad_norm": 0.805998932613953, "learning_rate": 3.290486977848345e-07, "loss": 0.1025, "step": 30417 }, { "epoch": 0.8873913297158528, "grad_norm": 0.6761166927583815, "learning_rate": 3.2888016583575765e-07, "loss": 0.1073, "step": 30418 }, { "epoch": 0.8874205029464963, "grad_norm": 0.8600428273131147, "learning_rate": 3.2871167558932214e-07, "loss": 0.0964, "step": 30419 }, { "epoch": 0.8874496761771399, "grad_norm": 0.8516478008239923, "learning_rate": 3.28543227047034e-07, "loss": 0.1233, "step": 30420 }, { "epoch": 0.8874788494077834, "grad_norm": 0.9747511822210017, "learning_rate": 3.2837482021039757e-07, "loss": 0.108, "step": 30421 }, { "epoch": 0.887508022638427, "grad_norm": 0.673979853826961, "learning_rate": 3.282064550809155e-07, "loss": 0.1074, "step": 30422 }, { "epoch": 0.8875371958690705, "grad_norm": 1.093949927830266, "learning_rate": 3.2803813166009004e-07, "loss": 0.137, "step": 30423 }, { "epoch": 0.8875663690997141, "grad_norm": 0.9164141893291394, "learning_rate": 3.2786984994942596e-07, "loss": 0.1227, "step": 30424 }, { "epoch": 0.8875955423303576, "grad_norm": 0.760827304849373, "learning_rate": 3.2770160995042323e-07, "loss": 0.0866, "step": 30425 }, { "epoch": 0.8876247155610012, "grad_norm": 1.0346784719106643, "learning_rate": 3.275334116645867e-07, "loss": 0.117, "step": 30426 }, { "epoch": 0.8876538887916448, "grad_norm": 0.7707234390139983, "learning_rate": 3.2736525509341476e-07, "loss": 0.1127, "step": 30427 }, { "epoch": 0.8876830620222883, "grad_norm": 0.8335679426506698, "learning_rate": 3.2719714023841163e-07, "loss": 0.1262, "step": 30428 }, { "epoch": 0.8877122352529319, "grad_norm": 0.7506067864274482, "learning_rate": 3.270290671010773e-07, "loss": 0.0833, "step": 30429 }, { "epoch": 0.8877414084835755, "grad_norm": 0.8514175594375909, "learning_rate": 3.268610356829105e-07, "loss": 0.1138, "step": 30430 }, { "epoch": 0.8877705817142191, "grad_norm": 0.8487384069260846, "learning_rate": 3.266930459854134e-07, "loss": 0.1412, "step": 30431 }, { "epoch": 0.8877997549448626, "grad_norm": 0.7939705584414907, "learning_rate": 3.2652509801008536e-07, "loss": 0.1074, "step": 30432 }, { "epoch": 0.8878289281755062, "grad_norm": 0.8212437316467182, "learning_rate": 3.263571917584257e-07, "loss": 0.1118, "step": 30433 }, { "epoch": 0.8878581014061497, "grad_norm": 0.844297758483977, "learning_rate": 3.2618932723193274e-07, "loss": 0.103, "step": 30434 }, { "epoch": 0.8878872746367933, "grad_norm": 0.8544561466150221, "learning_rate": 3.260215044321069e-07, "loss": 0.1095, "step": 30435 }, { "epoch": 0.8879164478674368, "grad_norm": 0.7951195004484574, "learning_rate": 3.2585372336044473e-07, "loss": 0.1035, "step": 30436 }, { "epoch": 0.8879456210980804, "grad_norm": 0.865101965359929, "learning_rate": 3.2568598401844344e-07, "loss": 0.1073, "step": 30437 }, { "epoch": 0.887974794328724, "grad_norm": 0.9363742201354716, "learning_rate": 3.255182864076034e-07, "loss": 0.119, "step": 30438 }, { "epoch": 0.8880039675593675, "grad_norm": 0.8501357082705552, "learning_rate": 3.2535063052942015e-07, "loss": 0.1263, "step": 30439 }, { "epoch": 0.888033140790011, "grad_norm": 0.8730648155104872, "learning_rate": 3.2518301638538976e-07, "loss": 0.1014, "step": 30440 }, { "epoch": 0.8880623140206546, "grad_norm": 0.908414300042689, "learning_rate": 3.250154439770098e-07, "loss": 0.1016, "step": 30441 }, { "epoch": 0.8880914872512982, "grad_norm": 0.8683485720759831, "learning_rate": 3.2484791330577635e-07, "loss": 0.1504, "step": 30442 }, { "epoch": 0.8881206604819417, "grad_norm": 1.0706309103476457, "learning_rate": 3.246804243731838e-07, "loss": 0.1075, "step": 30443 }, { "epoch": 0.8881498337125854, "grad_norm": 0.6936667445533639, "learning_rate": 3.245129771807287e-07, "loss": 0.106, "step": 30444 }, { "epoch": 0.8881790069432289, "grad_norm": 0.8525681655626866, "learning_rate": 3.24345571729906e-07, "loss": 0.1237, "step": 30445 }, { "epoch": 0.8882081801738725, "grad_norm": 0.9698863640439978, "learning_rate": 3.2417820802221e-07, "loss": 0.1253, "step": 30446 }, { "epoch": 0.888237353404516, "grad_norm": 1.1217498817935327, "learning_rate": 3.24010886059134e-07, "loss": 0.0998, "step": 30447 }, { "epoch": 0.8882665266351596, "grad_norm": 0.8988503477875328, "learning_rate": 3.238436058421729e-07, "loss": 0.1288, "step": 30448 }, { "epoch": 0.8882956998658031, "grad_norm": 0.7698741762173608, "learning_rate": 3.236763673728194e-07, "loss": 0.093, "step": 30449 }, { "epoch": 0.8883248730964467, "grad_norm": 1.034610237159529, "learning_rate": 3.235091706525673e-07, "loss": 0.0992, "step": 30450 }, { "epoch": 0.8883540463270903, "grad_norm": 0.7852459626938117, "learning_rate": 3.2334201568290924e-07, "loss": 0.1391, "step": 30451 }, { "epoch": 0.8883832195577338, "grad_norm": 0.8902457560368345, "learning_rate": 3.2317490246533745e-07, "loss": 0.1137, "step": 30452 }, { "epoch": 0.8884123927883774, "grad_norm": 1.07622350155105, "learning_rate": 3.230078310013429e-07, "loss": 0.1048, "step": 30453 }, { "epoch": 0.8884415660190209, "grad_norm": 0.8138661621072937, "learning_rate": 3.2284080129241837e-07, "loss": 0.1096, "step": 30454 }, { "epoch": 0.8884707392496645, "grad_norm": 0.7863486309532878, "learning_rate": 3.226738133400542e-07, "loss": 0.1056, "step": 30455 }, { "epoch": 0.888499912480308, "grad_norm": 0.8666539740372097, "learning_rate": 3.225068671457426e-07, "loss": 0.1237, "step": 30456 }, { "epoch": 0.8885290857109517, "grad_norm": 0.8918594009942206, "learning_rate": 3.223399627109719e-07, "loss": 0.1275, "step": 30457 }, { "epoch": 0.8885582589415952, "grad_norm": 0.6043041679174953, "learning_rate": 3.2217310003723467e-07, "loss": 0.1047, "step": 30458 }, { "epoch": 0.8885874321722388, "grad_norm": 0.8028948186370006, "learning_rate": 3.2200627912601866e-07, "loss": 0.1278, "step": 30459 }, { "epoch": 0.8886166054028823, "grad_norm": 1.0191922252066992, "learning_rate": 3.218394999788138e-07, "loss": 0.124, "step": 30460 }, { "epoch": 0.8886457786335259, "grad_norm": 1.0013908355360317, "learning_rate": 3.216727625971083e-07, "loss": 0.1314, "step": 30461 }, { "epoch": 0.8886749518641694, "grad_norm": 0.6720942638188737, "learning_rate": 3.215060669823933e-07, "loss": 0.1286, "step": 30462 }, { "epoch": 0.888704125094813, "grad_norm": 0.785874262867982, "learning_rate": 3.213394131361547e-07, "loss": 0.1251, "step": 30463 }, { "epoch": 0.8887332983254566, "grad_norm": 0.7468232802908306, "learning_rate": 3.2117280105988026e-07, "loss": 0.0906, "step": 30464 }, { "epoch": 0.8887624715561001, "grad_norm": 0.7520984517669977, "learning_rate": 3.2100623075505874e-07, "loss": 0.0913, "step": 30465 }, { "epoch": 0.8887916447867437, "grad_norm": 1.0473963135644402, "learning_rate": 3.2083970222317686e-07, "loss": 0.1219, "step": 30466 }, { "epoch": 0.8888208180173872, "grad_norm": 0.8427324687372726, "learning_rate": 3.206732154657194e-07, "loss": 0.0994, "step": 30467 }, { "epoch": 0.8888499912480308, "grad_norm": 0.8416467101814705, "learning_rate": 3.2050677048417577e-07, "loss": 0.112, "step": 30468 }, { "epoch": 0.8888791644786743, "grad_norm": 0.7900828921500531, "learning_rate": 3.203403672800309e-07, "loss": 0.1002, "step": 30469 }, { "epoch": 0.8889083377093179, "grad_norm": 0.8763892121160695, "learning_rate": 3.2017400585476923e-07, "loss": 0.1045, "step": 30470 }, { "epoch": 0.8889375109399615, "grad_norm": 0.8571175985732294, "learning_rate": 3.2000768620987776e-07, "loss": 0.0971, "step": 30471 }, { "epoch": 0.8889666841706051, "grad_norm": 0.8307611178054767, "learning_rate": 3.198414083468404e-07, "loss": 0.114, "step": 30472 }, { "epoch": 0.8889958574012486, "grad_norm": 1.315998495462903, "learning_rate": 3.19675172267141e-07, "loss": 0.1126, "step": 30473 }, { "epoch": 0.8890250306318922, "grad_norm": 1.105312974477422, "learning_rate": 3.195089779722643e-07, "loss": 0.0932, "step": 30474 }, { "epoch": 0.8890542038625358, "grad_norm": 0.9735997191167188, "learning_rate": 3.193428254636949e-07, "loss": 0.1035, "step": 30475 }, { "epoch": 0.8890833770931793, "grad_norm": 0.7936149384272677, "learning_rate": 3.191767147429159e-07, "loss": 0.1118, "step": 30476 }, { "epoch": 0.8891125503238229, "grad_norm": 1.138799021036221, "learning_rate": 3.190106458114084e-07, "loss": 0.1215, "step": 30477 }, { "epoch": 0.8891417235544664, "grad_norm": 1.2291060948656234, "learning_rate": 3.188446186706573e-07, "loss": 0.1187, "step": 30478 }, { "epoch": 0.88917089678511, "grad_norm": 1.1005788815254303, "learning_rate": 3.186786333221431e-07, "loss": 0.1232, "step": 30479 }, { "epoch": 0.8892000700157535, "grad_norm": 0.6889063645022628, "learning_rate": 3.185126897673485e-07, "loss": 0.114, "step": 30480 }, { "epoch": 0.8892292432463971, "grad_norm": 0.983870192076746, "learning_rate": 3.183467880077562e-07, "loss": 0.1312, "step": 30481 }, { "epoch": 0.8892584164770406, "grad_norm": 0.6741017738651957, "learning_rate": 3.1818092804484556e-07, "loss": 0.0919, "step": 30482 }, { "epoch": 0.8892875897076842, "grad_norm": 0.7882326232819421, "learning_rate": 3.1801510988009765e-07, "loss": 0.1177, "step": 30483 }, { "epoch": 0.8893167629383278, "grad_norm": 0.8376701441824327, "learning_rate": 3.1784933351499404e-07, "loss": 0.1158, "step": 30484 }, { "epoch": 0.8893459361689714, "grad_norm": 1.136097784752444, "learning_rate": 3.1768359895101296e-07, "loss": 0.1146, "step": 30485 }, { "epoch": 0.889375109399615, "grad_norm": 0.932393242138555, "learning_rate": 3.175179061896355e-07, "loss": 0.1224, "step": 30486 }, { "epoch": 0.8894042826302585, "grad_norm": 0.748704135637435, "learning_rate": 3.173522552323399e-07, "loss": 0.1164, "step": 30487 }, { "epoch": 0.8894334558609021, "grad_norm": 0.8891019213388898, "learning_rate": 3.171866460806061e-07, "loss": 0.0986, "step": 30488 }, { "epoch": 0.8894626290915456, "grad_norm": 0.7344867986012511, "learning_rate": 3.170210787359118e-07, "loss": 0.1016, "step": 30489 }, { "epoch": 0.8894918023221892, "grad_norm": 1.1247175630906892, "learning_rate": 3.1685555319973525e-07, "loss": 0.1264, "step": 30490 }, { "epoch": 0.8895209755528327, "grad_norm": 0.94693035646125, "learning_rate": 3.166900694735542e-07, "loss": 0.1247, "step": 30491 }, { "epoch": 0.8895501487834763, "grad_norm": 0.882752799471745, "learning_rate": 3.1652462755884686e-07, "loss": 0.118, "step": 30492 }, { "epoch": 0.8895793220141198, "grad_norm": 0.8356541380491931, "learning_rate": 3.1635922745708927e-07, "loss": 0.1211, "step": 30493 }, { "epoch": 0.8896084952447634, "grad_norm": 0.9078331283278978, "learning_rate": 3.1619386916975804e-07, "loss": 0.0996, "step": 30494 }, { "epoch": 0.8896376684754069, "grad_norm": 0.988811410334127, "learning_rate": 3.160285526983303e-07, "loss": 0.1261, "step": 30495 }, { "epoch": 0.8896668417060505, "grad_norm": 1.0003746410143617, "learning_rate": 3.158632780442816e-07, "loss": 0.1246, "step": 30496 }, { "epoch": 0.889696014936694, "grad_norm": 0.8954152287289935, "learning_rate": 3.1569804520908633e-07, "loss": 0.0992, "step": 30497 }, { "epoch": 0.8897251881673377, "grad_norm": 0.8224363469838618, "learning_rate": 3.1553285419422153e-07, "loss": 0.1197, "step": 30498 }, { "epoch": 0.8897543613979813, "grad_norm": 1.0507848059549514, "learning_rate": 3.1536770500116164e-07, "loss": 0.1346, "step": 30499 }, { "epoch": 0.8897835346286248, "grad_norm": 0.8836399621266284, "learning_rate": 3.152025976313794e-07, "loss": 0.1221, "step": 30500 }, { "epoch": 0.8898127078592684, "grad_norm": 0.6925661010777691, "learning_rate": 3.150375320863508e-07, "loss": 0.0937, "step": 30501 }, { "epoch": 0.8898418810899119, "grad_norm": 0.8388241395615271, "learning_rate": 3.1487250836754915e-07, "loss": 0.1088, "step": 30502 }, { "epoch": 0.8898710543205555, "grad_norm": 0.9641978352144771, "learning_rate": 3.147075264764465e-07, "loss": 0.1265, "step": 30503 }, { "epoch": 0.889900227551199, "grad_norm": 0.8059912009626359, "learning_rate": 3.145425864145163e-07, "loss": 0.1036, "step": 30504 }, { "epoch": 0.8899294007818426, "grad_norm": 0.8709323443371206, "learning_rate": 3.143776881832322e-07, "loss": 0.1259, "step": 30505 }, { "epoch": 0.8899585740124861, "grad_norm": 0.7596751859457278, "learning_rate": 3.1421283178406537e-07, "loss": 0.1075, "step": 30506 }, { "epoch": 0.8899877472431297, "grad_norm": 0.8746650117572208, "learning_rate": 3.140480172184873e-07, "loss": 0.1099, "step": 30507 }, { "epoch": 0.8900169204737732, "grad_norm": 0.8399294210591024, "learning_rate": 3.1388324448797083e-07, "loss": 0.1325, "step": 30508 }, { "epoch": 0.8900460937044168, "grad_norm": 1.0775169203826622, "learning_rate": 3.1371851359398465e-07, "loss": 0.1154, "step": 30509 }, { "epoch": 0.8900752669350603, "grad_norm": 1.135219210523027, "learning_rate": 3.1355382453800155e-07, "loss": 0.1014, "step": 30510 }, { "epoch": 0.890104440165704, "grad_norm": 0.9120801805597152, "learning_rate": 3.133891773214914e-07, "loss": 0.1263, "step": 30511 }, { "epoch": 0.8901336133963476, "grad_norm": 0.8937106915785827, "learning_rate": 3.1322457194592426e-07, "loss": 0.1296, "step": 30512 }, { "epoch": 0.8901627866269911, "grad_norm": 0.8927612096903995, "learning_rate": 3.130600084127683e-07, "loss": 0.1092, "step": 30513 }, { "epoch": 0.8901919598576347, "grad_norm": 0.9258014059962174, "learning_rate": 3.1289548672349514e-07, "loss": 0.0989, "step": 30514 }, { "epoch": 0.8902211330882782, "grad_norm": 0.8992620417502106, "learning_rate": 3.127310068795708e-07, "loss": 0.1403, "step": 30515 }, { "epoch": 0.8902503063189218, "grad_norm": 0.8881986317519851, "learning_rate": 3.1256656888246586e-07, "loss": 0.1094, "step": 30516 }, { "epoch": 0.8902794795495653, "grad_norm": 0.8222217825382221, "learning_rate": 3.124021727336468e-07, "loss": 0.1184, "step": 30517 }, { "epoch": 0.8903086527802089, "grad_norm": 0.7635861878730664, "learning_rate": 3.1223781843458314e-07, "loss": 0.0984, "step": 30518 }, { "epoch": 0.8903378260108524, "grad_norm": 0.9682910058529923, "learning_rate": 3.1207350598674137e-07, "loss": 0.13, "step": 30519 }, { "epoch": 0.890366999241496, "grad_norm": 0.8115903491035434, "learning_rate": 3.11909235391587e-07, "loss": 0.1362, "step": 30520 }, { "epoch": 0.8903961724721395, "grad_norm": 0.7768085823349589, "learning_rate": 3.117450066505878e-07, "loss": 0.099, "step": 30521 }, { "epoch": 0.8904253457027831, "grad_norm": 0.8773722598141154, "learning_rate": 3.1158081976521094e-07, "loss": 0.1294, "step": 30522 }, { "epoch": 0.8904545189334266, "grad_norm": 0.8846059299708734, "learning_rate": 3.114166747369218e-07, "loss": 0.1125, "step": 30523 }, { "epoch": 0.8904836921640702, "grad_norm": 0.8924039041252876, "learning_rate": 3.112525715671838e-07, "loss": 0.1339, "step": 30524 }, { "epoch": 0.8905128653947139, "grad_norm": 0.9959765749310289, "learning_rate": 3.1108851025746457e-07, "loss": 0.1294, "step": 30525 }, { "epoch": 0.8905420386253574, "grad_norm": 0.8864124216035949, "learning_rate": 3.109244908092279e-07, "loss": 0.1142, "step": 30526 }, { "epoch": 0.890571211856001, "grad_norm": 1.0028369673101463, "learning_rate": 3.1076051322393663e-07, "loss": 0.1179, "step": 30527 }, { "epoch": 0.8906003850866445, "grad_norm": 0.9746235679010936, "learning_rate": 3.105965775030573e-07, "loss": 0.1197, "step": 30528 }, { "epoch": 0.8906295583172881, "grad_norm": 0.7617722022236654, "learning_rate": 3.1043268364805257e-07, "loss": 0.1113, "step": 30529 }, { "epoch": 0.8906587315479316, "grad_norm": 0.649378711927433, "learning_rate": 3.1026883166038413e-07, "loss": 0.1209, "step": 30530 }, { "epoch": 0.8906879047785752, "grad_norm": 0.8936101132561894, "learning_rate": 3.1010502154151743e-07, "loss": 0.1055, "step": 30531 }, { "epoch": 0.8907170780092187, "grad_norm": 0.997456688702685, "learning_rate": 3.09941253292913e-07, "loss": 0.112, "step": 30532 }, { "epoch": 0.8907462512398623, "grad_norm": 0.8087439596778104, "learning_rate": 3.0977752691603303e-07, "loss": 0.1236, "step": 30533 }, { "epoch": 0.8907754244705058, "grad_norm": 0.7940552257410859, "learning_rate": 3.0961384241233907e-07, "loss": 0.1104, "step": 30534 }, { "epoch": 0.8908045977011494, "grad_norm": 1.3486098578327579, "learning_rate": 3.094501997832944e-07, "loss": 0.1031, "step": 30535 }, { "epoch": 0.890833770931793, "grad_norm": 1.070137905667782, "learning_rate": 3.092865990303584e-07, "loss": 0.1328, "step": 30536 }, { "epoch": 0.8908629441624365, "grad_norm": 0.7014482598401466, "learning_rate": 3.0912304015499106e-07, "loss": 0.1088, "step": 30537 }, { "epoch": 0.8908921173930802, "grad_norm": 0.8931822380794611, "learning_rate": 3.089595231586545e-07, "loss": 0.1118, "step": 30538 }, { "epoch": 0.8909212906237237, "grad_norm": 0.7487271169363175, "learning_rate": 3.087960480428065e-07, "loss": 0.0883, "step": 30539 }, { "epoch": 0.8909504638543673, "grad_norm": 0.7756367771297727, "learning_rate": 3.086326148089075e-07, "loss": 0.1131, "step": 30540 }, { "epoch": 0.8909796370850108, "grad_norm": 0.8286140357712191, "learning_rate": 3.0846922345841746e-07, "loss": 0.1071, "step": 30541 }, { "epoch": 0.8910088103156544, "grad_norm": 0.8281252707009609, "learning_rate": 3.083058739927941e-07, "loss": 0.1344, "step": 30542 }, { "epoch": 0.8910379835462979, "grad_norm": 0.8690984771551428, "learning_rate": 3.0814256641349517e-07, "loss": 0.0929, "step": 30543 }, { "epoch": 0.8910671567769415, "grad_norm": 1.0453484297338385, "learning_rate": 3.0797930072198e-07, "loss": 0.1216, "step": 30544 }, { "epoch": 0.891096330007585, "grad_norm": 1.211958748447629, "learning_rate": 3.0781607691970474e-07, "loss": 0.1067, "step": 30545 }, { "epoch": 0.8911255032382286, "grad_norm": 0.960417382087224, "learning_rate": 3.0765289500812866e-07, "loss": 0.1116, "step": 30546 }, { "epoch": 0.8911546764688721, "grad_norm": 0.8513883921626092, "learning_rate": 3.0748975498870627e-07, "loss": 0.1019, "step": 30547 }, { "epoch": 0.8911838496995157, "grad_norm": 0.8598663139293854, "learning_rate": 3.0732665686289574e-07, "loss": 0.1111, "step": 30548 }, { "epoch": 0.8912130229301592, "grad_norm": 1.0060394657794325, "learning_rate": 3.071636006321527e-07, "loss": 0.0974, "step": 30549 }, { "epoch": 0.8912421961608028, "grad_norm": 0.9333911478303079, "learning_rate": 3.070005862979325e-07, "loss": 0.1098, "step": 30550 }, { "epoch": 0.8912713693914464, "grad_norm": 0.8556123932859099, "learning_rate": 3.068376138616902e-07, "loss": 0.1133, "step": 30551 }, { "epoch": 0.89130054262209, "grad_norm": 0.8275808135699773, "learning_rate": 3.0667468332488237e-07, "loss": 0.1081, "step": 30552 }, { "epoch": 0.8913297158527336, "grad_norm": 1.247262579244288, "learning_rate": 3.065117946889623e-07, "loss": 0.0892, "step": 30553 }, { "epoch": 0.8913588890833771, "grad_norm": 0.8238164952677675, "learning_rate": 3.0634894795538385e-07, "loss": 0.1198, "step": 30554 }, { "epoch": 0.8913880623140207, "grad_norm": 0.7707484983912157, "learning_rate": 3.0618614312560244e-07, "loss": 0.1196, "step": 30555 }, { "epoch": 0.8914172355446642, "grad_norm": 0.8191705907142699, "learning_rate": 3.060233802010709e-07, "loss": 0.1, "step": 30556 }, { "epoch": 0.8914464087753078, "grad_norm": 0.9204114270653742, "learning_rate": 3.0586065918324025e-07, "loss": 0.1119, "step": 30557 }, { "epoch": 0.8914755820059513, "grad_norm": 0.6168227730592508, "learning_rate": 3.0569798007356653e-07, "loss": 0.0896, "step": 30558 }, { "epoch": 0.8915047552365949, "grad_norm": 0.7855472813687441, "learning_rate": 3.055353428735003e-07, "loss": 0.1006, "step": 30559 }, { "epoch": 0.8915339284672384, "grad_norm": 0.7277658231052057, "learning_rate": 3.0537274758449366e-07, "loss": 0.0903, "step": 30560 }, { "epoch": 0.891563101697882, "grad_norm": 0.7969145948110496, "learning_rate": 3.052101942079988e-07, "loss": 0.1249, "step": 30561 }, { "epoch": 0.8915922749285256, "grad_norm": 0.877067568969228, "learning_rate": 3.050476827454668e-07, "loss": 0.1064, "step": 30562 }, { "epoch": 0.8916214481591691, "grad_norm": 0.8154884683646071, "learning_rate": 3.048852131983476e-07, "loss": 0.1162, "step": 30563 }, { "epoch": 0.8916506213898127, "grad_norm": 0.7883368860871747, "learning_rate": 3.0472278556809233e-07, "loss": 0.1112, "step": 30564 }, { "epoch": 0.8916797946204563, "grad_norm": 0.6825588540517084, "learning_rate": 3.0456039985615193e-07, "loss": 0.1182, "step": 30565 }, { "epoch": 0.8917089678510999, "grad_norm": 0.9066670376722872, "learning_rate": 3.0439805606397533e-07, "loss": 0.1112, "step": 30566 }, { "epoch": 0.8917381410817434, "grad_norm": 0.9240938686339571, "learning_rate": 3.042357541930113e-07, "loss": 0.0867, "step": 30567 }, { "epoch": 0.891767314312387, "grad_norm": 0.9674166330384046, "learning_rate": 3.0407349424471043e-07, "loss": 0.0945, "step": 30568 }, { "epoch": 0.8917964875430305, "grad_norm": 0.8731843201559136, "learning_rate": 3.039112762205193e-07, "loss": 0.0938, "step": 30569 }, { "epoch": 0.8918256607736741, "grad_norm": 0.7800262294296054, "learning_rate": 3.037491001218873e-07, "loss": 0.119, "step": 30570 }, { "epoch": 0.8918548340043176, "grad_norm": 0.9072854256014472, "learning_rate": 3.0358696595026327e-07, "loss": 0.1109, "step": 30571 }, { "epoch": 0.8918840072349612, "grad_norm": 0.7739425465977036, "learning_rate": 3.034248737070933e-07, "loss": 0.1044, "step": 30572 }, { "epoch": 0.8919131804656047, "grad_norm": 0.8250757646433379, "learning_rate": 3.0326282339382453e-07, "loss": 0.1087, "step": 30573 }, { "epoch": 0.8919423536962483, "grad_norm": 0.7924070911825599, "learning_rate": 3.0310081501190415e-07, "loss": 0.0985, "step": 30574 }, { "epoch": 0.8919715269268919, "grad_norm": 1.0470126728379414, "learning_rate": 3.029388485627782e-07, "loss": 0.1127, "step": 30575 }, { "epoch": 0.8920007001575354, "grad_norm": 0.7333108222090367, "learning_rate": 3.027769240478939e-07, "loss": 0.1336, "step": 30576 }, { "epoch": 0.892029873388179, "grad_norm": 0.8361327535722722, "learning_rate": 3.0261504146869457e-07, "loss": 0.134, "step": 30577 }, { "epoch": 0.8920590466188225, "grad_norm": 0.811324091203435, "learning_rate": 3.024532008266279e-07, "loss": 0.1014, "step": 30578 }, { "epoch": 0.8920882198494662, "grad_norm": 0.8761814206887861, "learning_rate": 3.0229140212313767e-07, "loss": 0.1054, "step": 30579 }, { "epoch": 0.8921173930801097, "grad_norm": 0.8650337702531615, "learning_rate": 3.021296453596678e-07, "loss": 0.1161, "step": 30580 }, { "epoch": 0.8921465663107533, "grad_norm": 0.8423918525707391, "learning_rate": 3.019679305376627e-07, "loss": 0.1031, "step": 30581 }, { "epoch": 0.8921757395413968, "grad_norm": 1.008493869777864, "learning_rate": 3.018062576585673e-07, "loss": 0.114, "step": 30582 }, { "epoch": 0.8922049127720404, "grad_norm": 0.8460407679440864, "learning_rate": 3.016446267238238e-07, "loss": 0.1039, "step": 30583 }, { "epoch": 0.892234086002684, "grad_norm": 0.8650983055723295, "learning_rate": 3.0148303773487486e-07, "loss": 0.0898, "step": 30584 }, { "epoch": 0.8922632592333275, "grad_norm": 0.7098856616095556, "learning_rate": 3.0132149069316497e-07, "loss": 0.1034, "step": 30585 }, { "epoch": 0.892292432463971, "grad_norm": 0.7638130427984393, "learning_rate": 3.0115998560013404e-07, "loss": 0.1122, "step": 30586 }, { "epoch": 0.8923216056946146, "grad_norm": 0.9767240295345735, "learning_rate": 3.0099852245722483e-07, "loss": 0.1272, "step": 30587 }, { "epoch": 0.8923507789252582, "grad_norm": 1.1747269220045504, "learning_rate": 3.0083710126588005e-07, "loss": 0.1167, "step": 30588 }, { "epoch": 0.8923799521559017, "grad_norm": 0.9038430151497058, "learning_rate": 3.006757220275397e-07, "loss": 0.0973, "step": 30589 }, { "epoch": 0.8924091253865453, "grad_norm": 0.7636481866103193, "learning_rate": 3.005143847436437e-07, "loss": 0.1106, "step": 30590 }, { "epoch": 0.8924382986171888, "grad_norm": 1.0234036792038594, "learning_rate": 3.003530894156348e-07, "loss": 0.1155, "step": 30591 }, { "epoch": 0.8924674718478325, "grad_norm": 0.8167228866756323, "learning_rate": 3.0019183604495075e-07, "loss": 0.1213, "step": 30592 }, { "epoch": 0.892496645078476, "grad_norm": 0.6343908880006888, "learning_rate": 3.0003062463303257e-07, "loss": 0.0835, "step": 30593 }, { "epoch": 0.8925258183091196, "grad_norm": 0.67414439241472, "learning_rate": 2.99869455181318e-07, "loss": 0.0921, "step": 30594 }, { "epoch": 0.8925549915397631, "grad_norm": 0.7684257627189851, "learning_rate": 2.9970832769124823e-07, "loss": 0.1224, "step": 30595 }, { "epoch": 0.8925841647704067, "grad_norm": 0.7164831969761788, "learning_rate": 2.995472421642598e-07, "loss": 0.0952, "step": 30596 }, { "epoch": 0.8926133380010502, "grad_norm": 0.774801751984761, "learning_rate": 2.993861986017915e-07, "loss": 0.0964, "step": 30597 }, { "epoch": 0.8926425112316938, "grad_norm": 0.8492196878393536, "learning_rate": 2.992251970052806e-07, "loss": 0.1069, "step": 30598 }, { "epoch": 0.8926716844623374, "grad_norm": 0.816706175559454, "learning_rate": 2.9906423737616595e-07, "loss": 0.1139, "step": 30599 }, { "epoch": 0.8927008576929809, "grad_norm": 1.1452740993489874, "learning_rate": 2.989033197158825e-07, "loss": 0.11, "step": 30600 }, { "epoch": 0.8927300309236245, "grad_norm": 0.7472179425536273, "learning_rate": 2.9874244402586903e-07, "loss": 0.1386, "step": 30601 }, { "epoch": 0.892759204154268, "grad_norm": 0.8886819051906715, "learning_rate": 2.985816103075606e-07, "loss": 0.1148, "step": 30602 }, { "epoch": 0.8927883773849116, "grad_norm": 1.1445926068268717, "learning_rate": 2.984208185623927e-07, "loss": 0.1115, "step": 30603 }, { "epoch": 0.8928175506155551, "grad_norm": 0.7815328900810175, "learning_rate": 2.982600687918014e-07, "loss": 0.1243, "step": 30604 }, { "epoch": 0.8928467238461987, "grad_norm": 0.7975707815032408, "learning_rate": 2.980993609972221e-07, "loss": 0.0996, "step": 30605 }, { "epoch": 0.8928758970768423, "grad_norm": 0.8032606289605577, "learning_rate": 2.9793869518009e-07, "loss": 0.0782, "step": 30606 }, { "epoch": 0.8929050703074859, "grad_norm": 0.8663623225654811, "learning_rate": 2.9777807134183714e-07, "loss": 0.1096, "step": 30607 }, { "epoch": 0.8929342435381294, "grad_norm": 0.7444972697751595, "learning_rate": 2.976174894839007e-07, "loss": 0.1195, "step": 30608 }, { "epoch": 0.892963416768773, "grad_norm": 0.8804128856649871, "learning_rate": 2.974569496077123e-07, "loss": 0.1303, "step": 30609 }, { "epoch": 0.8929925899994166, "grad_norm": 0.8628877034987134, "learning_rate": 2.972964517147048e-07, "loss": 0.1054, "step": 30610 }, { "epoch": 0.8930217632300601, "grad_norm": 0.8268818707012316, "learning_rate": 2.971359958063125e-07, "loss": 0.131, "step": 30611 }, { "epoch": 0.8930509364607037, "grad_norm": 0.9250978819681452, "learning_rate": 2.9697558188396757e-07, "loss": 0.12, "step": 30612 }, { "epoch": 0.8930801096913472, "grad_norm": 0.8044794877353744, "learning_rate": 2.968152099491023e-07, "loss": 0.113, "step": 30613 }, { "epoch": 0.8931092829219908, "grad_norm": 0.8176311630704783, "learning_rate": 2.966548800031471e-07, "loss": 0.0922, "step": 30614 }, { "epoch": 0.8931384561526343, "grad_norm": 0.7626515244155622, "learning_rate": 2.964945920475354e-07, "loss": 0.116, "step": 30615 }, { "epoch": 0.8931676293832779, "grad_norm": 0.7250689703709754, "learning_rate": 2.9633434608369596e-07, "loss": 0.1064, "step": 30616 }, { "epoch": 0.8931968026139214, "grad_norm": 2.6020126383282434, "learning_rate": 2.9617414211306093e-07, "loss": 0.1268, "step": 30617 }, { "epoch": 0.893225975844565, "grad_norm": 0.9140808306146535, "learning_rate": 2.9601398013706094e-07, "loss": 0.1186, "step": 30618 }, { "epoch": 0.8932551490752086, "grad_norm": 0.7610128111653633, "learning_rate": 2.9585386015712537e-07, "loss": 0.0968, "step": 30619 }, { "epoch": 0.8932843223058522, "grad_norm": 0.9511054648297602, "learning_rate": 2.9569378217468247e-07, "loss": 0.1079, "step": 30620 }, { "epoch": 0.8933134955364957, "grad_norm": 0.908847078825639, "learning_rate": 2.9553374619116335e-07, "loss": 0.1329, "step": 30621 }, { "epoch": 0.8933426687671393, "grad_norm": 0.8692358887319792, "learning_rate": 2.953737522079952e-07, "loss": 0.1308, "step": 30622 }, { "epoch": 0.8933718419977829, "grad_norm": 0.7723239781444972, "learning_rate": 2.952138002266081e-07, "loss": 0.1421, "step": 30623 }, { "epoch": 0.8934010152284264, "grad_norm": 0.8969370724316609, "learning_rate": 2.950538902484279e-07, "loss": 0.1169, "step": 30624 }, { "epoch": 0.89343018845907, "grad_norm": 0.7501624440476841, "learning_rate": 2.9489402227488474e-07, "loss": 0.0857, "step": 30625 }, { "epoch": 0.8934593616897135, "grad_norm": 0.8291425232771156, "learning_rate": 2.9473419630740405e-07, "loss": 0.1151, "step": 30626 }, { "epoch": 0.8934885349203571, "grad_norm": 0.8029671988119853, "learning_rate": 2.9457441234741256e-07, "loss": 0.0864, "step": 30627 }, { "epoch": 0.8935177081510006, "grad_norm": 0.7704961735744835, "learning_rate": 2.944146703963374e-07, "loss": 0.0901, "step": 30628 }, { "epoch": 0.8935468813816442, "grad_norm": 0.8741711341205287, "learning_rate": 2.942549704556058e-07, "loss": 0.1128, "step": 30629 }, { "epoch": 0.8935760546122877, "grad_norm": 0.9295472757177593, "learning_rate": 2.9409531252664105e-07, "loss": 0.1079, "step": 30630 }, { "epoch": 0.8936052278429313, "grad_norm": 0.9614111491802272, "learning_rate": 2.9393569661087143e-07, "loss": 0.1105, "step": 30631 }, { "epoch": 0.8936344010735748, "grad_norm": 1.0608409579751146, "learning_rate": 2.937761227097202e-07, "loss": 0.0896, "step": 30632 }, { "epoch": 0.8936635743042185, "grad_norm": 0.9306061138796965, "learning_rate": 2.9361659082461137e-07, "loss": 0.1199, "step": 30633 }, { "epoch": 0.893692747534862, "grad_norm": 2.088853597013049, "learning_rate": 2.9345710095697036e-07, "loss": 0.1011, "step": 30634 }, { "epoch": 0.8937219207655056, "grad_norm": 0.9394738653616855, "learning_rate": 2.9329765310822156e-07, "loss": 0.1088, "step": 30635 }, { "epoch": 0.8937510939961492, "grad_norm": 0.7388414370064872, "learning_rate": 2.931382472797878e-07, "loss": 0.1107, "step": 30636 }, { "epoch": 0.8937802672267927, "grad_norm": 0.6401330911219283, "learning_rate": 2.9297888347309124e-07, "loss": 0.1186, "step": 30637 }, { "epoch": 0.8938094404574363, "grad_norm": 0.7369857127213442, "learning_rate": 2.928195616895563e-07, "loss": 0.103, "step": 30638 }, { "epoch": 0.8938386136880798, "grad_norm": 1.0939213670571821, "learning_rate": 2.926602819306046e-07, "loss": 0.1145, "step": 30639 }, { "epoch": 0.8938677869187234, "grad_norm": 0.7832643718469073, "learning_rate": 2.9250104419765724e-07, "loss": 0.1139, "step": 30640 }, { "epoch": 0.8938969601493669, "grad_norm": 0.8266742583807348, "learning_rate": 2.9234184849213696e-07, "loss": 0.1126, "step": 30641 }, { "epoch": 0.8939261333800105, "grad_norm": 0.9793590388440535, "learning_rate": 2.9218269481546545e-07, "loss": 0.0995, "step": 30642 }, { "epoch": 0.893955306610654, "grad_norm": 0.7229233293580005, "learning_rate": 2.920235831690632e-07, "loss": 0.1271, "step": 30643 }, { "epoch": 0.8939844798412976, "grad_norm": 0.9154397096787203, "learning_rate": 2.9186451355435017e-07, "loss": 0.1333, "step": 30644 }, { "epoch": 0.8940136530719411, "grad_norm": 0.9308696874881223, "learning_rate": 2.9170548597274697e-07, "loss": 0.1305, "step": 30645 }, { "epoch": 0.8940428263025848, "grad_norm": 0.8685920500446513, "learning_rate": 2.915465004256729e-07, "loss": 0.0946, "step": 30646 }, { "epoch": 0.8940719995332284, "grad_norm": 0.9372730465917704, "learning_rate": 2.9138755691454745e-07, "loss": 0.1112, "step": 30647 }, { "epoch": 0.8941011727638719, "grad_norm": 0.8874817740803603, "learning_rate": 2.912286554407906e-07, "loss": 0.1358, "step": 30648 }, { "epoch": 0.8941303459945155, "grad_norm": 0.9400008207788705, "learning_rate": 2.910697960058201e-07, "loss": 0.1025, "step": 30649 }, { "epoch": 0.894159519225159, "grad_norm": 0.9180184949259226, "learning_rate": 2.9091097861105365e-07, "loss": 0.1202, "step": 30650 }, { "epoch": 0.8941886924558026, "grad_norm": 0.7490221131587833, "learning_rate": 2.9075220325791076e-07, "loss": 0.0969, "step": 30651 }, { "epoch": 0.8942178656864461, "grad_norm": 0.869059582480006, "learning_rate": 2.905934699478069e-07, "loss": 0.0943, "step": 30652 }, { "epoch": 0.8942470389170897, "grad_norm": 0.9224576121602542, "learning_rate": 2.9043477868216154e-07, "loss": 0.0965, "step": 30653 }, { "epoch": 0.8942762121477332, "grad_norm": 3.962025518557501, "learning_rate": 2.9027612946238906e-07, "loss": 0.125, "step": 30654 }, { "epoch": 0.8943053853783768, "grad_norm": 0.8240901935222994, "learning_rate": 2.901175222899083e-07, "loss": 0.1219, "step": 30655 }, { "epoch": 0.8943345586090203, "grad_norm": 1.1371779334655232, "learning_rate": 2.899589571661332e-07, "loss": 0.0914, "step": 30656 }, { "epoch": 0.8943637318396639, "grad_norm": 0.8406002026070913, "learning_rate": 2.898004340924798e-07, "loss": 0.1017, "step": 30657 }, { "epoch": 0.8943929050703074, "grad_norm": 0.77795371350629, "learning_rate": 2.896419530703637e-07, "loss": 0.1167, "step": 30658 }, { "epoch": 0.894422078300951, "grad_norm": 1.038122229345276, "learning_rate": 2.894835141012009e-07, "loss": 0.1116, "step": 30659 }, { "epoch": 0.8944512515315947, "grad_norm": 0.7155179875183134, "learning_rate": 2.8932511718640366e-07, "loss": 0.0913, "step": 30660 }, { "epoch": 0.8944804247622382, "grad_norm": 0.9850546589207486, "learning_rate": 2.891667623273881e-07, "loss": 0.1044, "step": 30661 }, { "epoch": 0.8945095979928818, "grad_norm": 0.9246766592180333, "learning_rate": 2.8900844952556685e-07, "loss": 0.1404, "step": 30662 }, { "epoch": 0.8945387712235253, "grad_norm": 0.8832666813312848, "learning_rate": 2.888501787823533e-07, "loss": 0.1197, "step": 30663 }, { "epoch": 0.8945679444541689, "grad_norm": 0.7829364176625517, "learning_rate": 2.886919500991603e-07, "loss": 0.1011, "step": 30664 }, { "epoch": 0.8945971176848124, "grad_norm": 0.8653227660922241, "learning_rate": 2.885337634774016e-07, "loss": 0.1069, "step": 30665 }, { "epoch": 0.894626290915456, "grad_norm": 0.7824465549723113, "learning_rate": 2.883756189184889e-07, "loss": 0.1057, "step": 30666 }, { "epoch": 0.8946554641460995, "grad_norm": 0.9997744609921497, "learning_rate": 2.882175164238332e-07, "loss": 0.0947, "step": 30667 }, { "epoch": 0.8946846373767431, "grad_norm": 0.7816483007528533, "learning_rate": 2.8805945599484743e-07, "loss": 0.1004, "step": 30668 }, { "epoch": 0.8947138106073866, "grad_norm": 0.7103235354537379, "learning_rate": 2.87901437632942e-07, "loss": 0.1237, "step": 30669 }, { "epoch": 0.8947429838380302, "grad_norm": 0.7618700438722354, "learning_rate": 2.877434613395269e-07, "loss": 0.1019, "step": 30670 }, { "epoch": 0.8947721570686737, "grad_norm": 0.8066672420541794, "learning_rate": 2.875855271160133e-07, "loss": 0.0943, "step": 30671 }, { "epoch": 0.8948013302993173, "grad_norm": 0.9799411310826015, "learning_rate": 2.874276349638122e-07, "loss": 0.1095, "step": 30672 }, { "epoch": 0.894830503529961, "grad_norm": 0.8487921395001287, "learning_rate": 2.87269784884332e-07, "loss": 0.1159, "step": 30673 }, { "epoch": 0.8948596767606045, "grad_norm": 1.1195891799567557, "learning_rate": 2.8711197687898097e-07, "loss": 0.1378, "step": 30674 }, { "epoch": 0.8948888499912481, "grad_norm": 0.7989028904936224, "learning_rate": 2.869542109491702e-07, "loss": 0.1056, "step": 30675 }, { "epoch": 0.8949180232218916, "grad_norm": 0.7346770494945787, "learning_rate": 2.867964870963069e-07, "loss": 0.0971, "step": 30676 }, { "epoch": 0.8949471964525352, "grad_norm": 1.0206758932178501, "learning_rate": 2.8663880532179887e-07, "loss": 0.1043, "step": 30677 }, { "epoch": 0.8949763696831787, "grad_norm": 0.8518157081409864, "learning_rate": 2.8648116562705494e-07, "loss": 0.1208, "step": 30678 }, { "epoch": 0.8950055429138223, "grad_norm": 0.9966931330463132, "learning_rate": 2.863235680134824e-07, "loss": 0.1239, "step": 30679 }, { "epoch": 0.8950347161444658, "grad_norm": 0.7942440447797972, "learning_rate": 2.861660124824872e-07, "loss": 0.1207, "step": 30680 }, { "epoch": 0.8950638893751094, "grad_norm": 0.780495041468004, "learning_rate": 2.8600849903547666e-07, "loss": 0.1128, "step": 30681 }, { "epoch": 0.8950930626057529, "grad_norm": 0.9115147619165473, "learning_rate": 2.8585102767385685e-07, "loss": 0.1214, "step": 30682 }, { "epoch": 0.8951222358363965, "grad_norm": 0.7222577368355971, "learning_rate": 2.856935983990339e-07, "loss": 0.1054, "step": 30683 }, { "epoch": 0.89515140906704, "grad_norm": 0.7301611366280484, "learning_rate": 2.855362112124127e-07, "loss": 0.1142, "step": 30684 }, { "epoch": 0.8951805822976836, "grad_norm": 0.7552815843178827, "learning_rate": 2.8537886611539945e-07, "loss": 0.1079, "step": 30685 }, { "epoch": 0.8952097555283272, "grad_norm": 0.8885221919914154, "learning_rate": 2.8522156310939797e-07, "loss": 0.1102, "step": 30686 }, { "epoch": 0.8952389287589708, "grad_norm": 0.6438831330036408, "learning_rate": 2.850643021958127e-07, "loss": 0.1161, "step": 30687 }, { "epoch": 0.8952681019896144, "grad_norm": 1.0867915301485433, "learning_rate": 2.8490708337604756e-07, "loss": 0.1039, "step": 30688 }, { "epoch": 0.8952972752202579, "grad_norm": 0.7095812392346565, "learning_rate": 2.847499066515069e-07, "loss": 0.1149, "step": 30689 }, { "epoch": 0.8953264484509015, "grad_norm": 0.8682813865885494, "learning_rate": 2.84592772023593e-07, "loss": 0.118, "step": 30690 }, { "epoch": 0.895355621681545, "grad_norm": 0.8883751743992266, "learning_rate": 2.8443567949370974e-07, "loss": 0.1098, "step": 30691 }, { "epoch": 0.8953847949121886, "grad_norm": 1.3194172476721755, "learning_rate": 2.8427862906325875e-07, "loss": 0.1219, "step": 30692 }, { "epoch": 0.8954139681428321, "grad_norm": 0.8805669145462485, "learning_rate": 2.8412162073364227e-07, "loss": 0.1039, "step": 30693 }, { "epoch": 0.8954431413734757, "grad_norm": 0.7842008713991359, "learning_rate": 2.8396465450626186e-07, "loss": 0.1162, "step": 30694 }, { "epoch": 0.8954723146041192, "grad_norm": 0.927920248075283, "learning_rate": 2.8380773038251984e-07, "loss": 0.1288, "step": 30695 }, { "epoch": 0.8955014878347628, "grad_norm": 0.8649481687855042, "learning_rate": 2.836508483638167e-07, "loss": 0.1096, "step": 30696 }, { "epoch": 0.8955306610654064, "grad_norm": 0.7721159957993788, "learning_rate": 2.8349400845155193e-07, "loss": 0.0965, "step": 30697 }, { "epoch": 0.8955598342960499, "grad_norm": 0.8514232986269841, "learning_rate": 2.833372106471277e-07, "loss": 0.1059, "step": 30698 }, { "epoch": 0.8955890075266935, "grad_norm": 0.8185380777442179, "learning_rate": 2.8318045495194293e-07, "loss": 0.1026, "step": 30699 }, { "epoch": 0.895618180757337, "grad_norm": 0.680964602932964, "learning_rate": 2.8302374136739643e-07, "loss": 0.0835, "step": 30700 }, { "epoch": 0.8956473539879807, "grad_norm": 0.7553616726414424, "learning_rate": 2.8286706989488766e-07, "loss": 0.1148, "step": 30701 }, { "epoch": 0.8956765272186242, "grad_norm": 0.760820917165124, "learning_rate": 2.8271044053581666e-07, "loss": 0.1132, "step": 30702 }, { "epoch": 0.8957057004492678, "grad_norm": 0.7823646909128376, "learning_rate": 2.8255385329158056e-07, "loss": 0.1031, "step": 30703 }, { "epoch": 0.8957348736799113, "grad_norm": 0.8388846348531964, "learning_rate": 2.823973081635767e-07, "loss": 0.1039, "step": 30704 }, { "epoch": 0.8957640469105549, "grad_norm": 0.6541002407280917, "learning_rate": 2.822408051532044e-07, "loss": 0.114, "step": 30705 }, { "epoch": 0.8957932201411984, "grad_norm": 0.7585429260662029, "learning_rate": 2.8208434426185926e-07, "loss": 0.1107, "step": 30706 }, { "epoch": 0.895822393371842, "grad_norm": 0.8984268695878658, "learning_rate": 2.819279254909385e-07, "loss": 0.0971, "step": 30707 }, { "epoch": 0.8958515666024855, "grad_norm": 0.9978469007664811, "learning_rate": 2.8177154884183986e-07, "loss": 0.1216, "step": 30708 }, { "epoch": 0.8958807398331291, "grad_norm": 0.8137297061545247, "learning_rate": 2.8161521431595897e-07, "loss": 0.1117, "step": 30709 }, { "epoch": 0.8959099130637727, "grad_norm": 0.7365263239636884, "learning_rate": 2.814589219146896e-07, "loss": 0.1022, "step": 30710 }, { "epoch": 0.8959390862944162, "grad_norm": 1.0540794711851977, "learning_rate": 2.813026716394296e-07, "loss": 0.0966, "step": 30711 }, { "epoch": 0.8959682595250598, "grad_norm": 0.7269967391048545, "learning_rate": 2.8114646349157227e-07, "loss": 0.0908, "step": 30712 }, { "epoch": 0.8959974327557033, "grad_norm": 0.793578367137509, "learning_rate": 2.8099029747251314e-07, "loss": 0.1286, "step": 30713 }, { "epoch": 0.896026605986347, "grad_norm": 0.8365626249923098, "learning_rate": 2.8083417358364615e-07, "loss": 0.0899, "step": 30714 }, { "epoch": 0.8960557792169905, "grad_norm": 0.8866360400757096, "learning_rate": 2.806780918263652e-07, "loss": 0.1257, "step": 30715 }, { "epoch": 0.8960849524476341, "grad_norm": 0.8700735130147824, "learning_rate": 2.8052205220206406e-07, "loss": 0.1288, "step": 30716 }, { "epoch": 0.8961141256782776, "grad_norm": 0.8824283698640039, "learning_rate": 2.8036605471213453e-07, "loss": 0.1095, "step": 30717 }, { "epoch": 0.8961432989089212, "grad_norm": 0.9074946088251287, "learning_rate": 2.802100993579698e-07, "loss": 0.1142, "step": 30718 }, { "epoch": 0.8961724721395647, "grad_norm": 0.9216507187468439, "learning_rate": 2.800541861409639e-07, "loss": 0.1228, "step": 30719 }, { "epoch": 0.8962016453702083, "grad_norm": 0.8283107882034496, "learning_rate": 2.798983150625062e-07, "loss": 0.1107, "step": 30720 }, { "epoch": 0.8962308186008519, "grad_norm": 0.9998284171102396, "learning_rate": 2.797424861239906e-07, "loss": 0.1166, "step": 30721 }, { "epoch": 0.8962599918314954, "grad_norm": 0.681729079454811, "learning_rate": 2.795866993268076e-07, "loss": 0.0987, "step": 30722 }, { "epoch": 0.896289165062139, "grad_norm": 0.7205330169882552, "learning_rate": 2.794309546723467e-07, "loss": 0.0987, "step": 30723 }, { "epoch": 0.8963183382927825, "grad_norm": 0.943005385774912, "learning_rate": 2.79275252161999e-07, "loss": 0.1013, "step": 30724 }, { "epoch": 0.8963475115234261, "grad_norm": 0.7708921117820327, "learning_rate": 2.791195917971562e-07, "loss": 0.0857, "step": 30725 }, { "epoch": 0.8963766847540696, "grad_norm": 0.710933459659192, "learning_rate": 2.7896397357920655e-07, "loss": 0.1099, "step": 30726 }, { "epoch": 0.8964058579847132, "grad_norm": 0.9451970832846561, "learning_rate": 2.788083975095385e-07, "loss": 0.1309, "step": 30727 }, { "epoch": 0.8964350312153568, "grad_norm": 0.8845957460741791, "learning_rate": 2.78652863589543e-07, "loss": 0.0908, "step": 30728 }, { "epoch": 0.8964642044460004, "grad_norm": 0.7861611751527217, "learning_rate": 2.7849737182060743e-07, "loss": 0.0954, "step": 30729 }, { "epoch": 0.8964933776766439, "grad_norm": 0.9881368903368798, "learning_rate": 2.7834192220412004e-07, "loss": 0.1213, "step": 30730 }, { "epoch": 0.8965225509072875, "grad_norm": 1.0046237997108478, "learning_rate": 2.7818651474146865e-07, "loss": 0.1232, "step": 30731 }, { "epoch": 0.896551724137931, "grad_norm": 0.8559461826711967, "learning_rate": 2.7803114943404096e-07, "loss": 0.0804, "step": 30732 }, { "epoch": 0.8965808973685746, "grad_norm": 0.8640787998027005, "learning_rate": 2.7787582628322484e-07, "loss": 0.1227, "step": 30733 }, { "epoch": 0.8966100705992182, "grad_norm": 0.765354985120999, "learning_rate": 2.777205452904047e-07, "loss": 0.1108, "step": 30734 }, { "epoch": 0.8966392438298617, "grad_norm": 0.8463059268957756, "learning_rate": 2.775653064569689e-07, "loss": 0.0985, "step": 30735 }, { "epoch": 0.8966684170605053, "grad_norm": 0.7819341705243084, "learning_rate": 2.774101097843024e-07, "loss": 0.104, "step": 30736 }, { "epoch": 0.8966975902911488, "grad_norm": 0.7381435723679869, "learning_rate": 2.7725495527379075e-07, "loss": 0.107, "step": 30737 }, { "epoch": 0.8967267635217924, "grad_norm": 0.9283154934314369, "learning_rate": 2.7709984292682067e-07, "loss": 0.1262, "step": 30738 }, { "epoch": 0.8967559367524359, "grad_norm": 0.8106571715015493, "learning_rate": 2.7694477274477547e-07, "loss": 0.0994, "step": 30739 }, { "epoch": 0.8967851099830795, "grad_norm": 0.7198788558290798, "learning_rate": 2.767897447290391e-07, "loss": 0.0992, "step": 30740 }, { "epoch": 0.8968142832137231, "grad_norm": 0.9750558874247081, "learning_rate": 2.76634758880997e-07, "loss": 0.1103, "step": 30741 }, { "epoch": 0.8968434564443667, "grad_norm": 0.9876056999943881, "learning_rate": 2.764798152020315e-07, "loss": 0.1046, "step": 30742 }, { "epoch": 0.8968726296750102, "grad_norm": 0.833314343991956, "learning_rate": 2.7632491369352756e-07, "loss": 0.0982, "step": 30743 }, { "epoch": 0.8969018029056538, "grad_norm": 0.8603811947496085, "learning_rate": 2.7617005435686626e-07, "loss": 0.1087, "step": 30744 }, { "epoch": 0.8969309761362974, "grad_norm": 0.8700389349659293, "learning_rate": 2.760152371934316e-07, "loss": 0.1125, "step": 30745 }, { "epoch": 0.8969601493669409, "grad_norm": 0.8254819574187079, "learning_rate": 2.758604622046057e-07, "loss": 0.0942, "step": 30746 }, { "epoch": 0.8969893225975845, "grad_norm": 0.6604077936248297, "learning_rate": 2.7570572939176866e-07, "loss": 0.1055, "step": 30747 }, { "epoch": 0.897018495828228, "grad_norm": 0.8877528725600868, "learning_rate": 2.755510387563032e-07, "loss": 0.1215, "step": 30748 }, { "epoch": 0.8970476690588716, "grad_norm": 0.924881130670953, "learning_rate": 2.7539639029959097e-07, "loss": 0.0986, "step": 30749 }, { "epoch": 0.8970768422895151, "grad_norm": 1.092146565671295, "learning_rate": 2.752417840230115e-07, "loss": 0.1184, "step": 30750 }, { "epoch": 0.8971060155201587, "grad_norm": 0.8404472563530829, "learning_rate": 2.7508721992794586e-07, "loss": 0.1135, "step": 30751 }, { "epoch": 0.8971351887508022, "grad_norm": 0.6649637523160014, "learning_rate": 2.749326980157735e-07, "loss": 0.1087, "step": 30752 }, { "epoch": 0.8971643619814458, "grad_norm": 0.7348002229460175, "learning_rate": 2.7477821828787333e-07, "loss": 0.1138, "step": 30753 }, { "epoch": 0.8971935352120893, "grad_norm": 0.8516718049135685, "learning_rate": 2.746237807456259e-07, "loss": 0.1132, "step": 30754 }, { "epoch": 0.897222708442733, "grad_norm": 0.7731775570235779, "learning_rate": 2.744693853904096e-07, "loss": 0.1218, "step": 30755 }, { "epoch": 0.8972518816733766, "grad_norm": 0.6942492578301084, "learning_rate": 2.743150322236021e-07, "loss": 0.1262, "step": 30756 }, { "epoch": 0.8972810549040201, "grad_norm": 0.9903813547306958, "learning_rate": 2.7416072124658186e-07, "loss": 0.089, "step": 30757 }, { "epoch": 0.8973102281346637, "grad_norm": 0.8046959363603583, "learning_rate": 2.740064524607267e-07, "loss": 0.1009, "step": 30758 }, { "epoch": 0.8973394013653072, "grad_norm": 0.8655088682922392, "learning_rate": 2.738522258674142e-07, "loss": 0.1156, "step": 30759 }, { "epoch": 0.8973685745959508, "grad_norm": 0.9014478945337064, "learning_rate": 2.736980414680196e-07, "loss": 0.1041, "step": 30760 }, { "epoch": 0.8973977478265943, "grad_norm": 0.8983628128044445, "learning_rate": 2.7354389926392113e-07, "loss": 0.1093, "step": 30761 }, { "epoch": 0.8974269210572379, "grad_norm": 0.8696555401269471, "learning_rate": 2.733897992564949e-07, "loss": 0.1236, "step": 30762 }, { "epoch": 0.8974560942878814, "grad_norm": 0.9487568686646244, "learning_rate": 2.732357414471165e-07, "loss": 0.1057, "step": 30763 }, { "epoch": 0.897485267518525, "grad_norm": 0.7836511441328771, "learning_rate": 2.7308172583715984e-07, "loss": 0.1099, "step": 30764 }, { "epoch": 0.8975144407491685, "grad_norm": 0.8953019495718707, "learning_rate": 2.729277524280022e-07, "loss": 0.1129, "step": 30765 }, { "epoch": 0.8975436139798121, "grad_norm": 0.7815891728054658, "learning_rate": 2.7277382122101627e-07, "loss": 0.1155, "step": 30766 }, { "epoch": 0.8975727872104556, "grad_norm": 0.9841914813041843, "learning_rate": 2.726199322175771e-07, "loss": 0.119, "step": 30767 }, { "epoch": 0.8976019604410993, "grad_norm": 0.721315614571614, "learning_rate": 2.7246608541905975e-07, "loss": 0.0907, "step": 30768 }, { "epoch": 0.8976311336717429, "grad_norm": 0.9775726056696618, "learning_rate": 2.7231228082683634e-07, "loss": 0.1115, "step": 30769 }, { "epoch": 0.8976603069023864, "grad_norm": 0.8163172049338536, "learning_rate": 2.7215851844227925e-07, "loss": 0.1174, "step": 30770 }, { "epoch": 0.89768948013303, "grad_norm": 1.0424985541986709, "learning_rate": 2.720047982667634e-07, "loss": 0.1033, "step": 30771 }, { "epoch": 0.8977186533636735, "grad_norm": 0.9386592665034721, "learning_rate": 2.718511203016594e-07, "loss": 0.1101, "step": 30772 }, { "epoch": 0.8977478265943171, "grad_norm": 0.6801122392266861, "learning_rate": 2.7169748454834055e-07, "loss": 0.1025, "step": 30773 }, { "epoch": 0.8977769998249606, "grad_norm": 0.782668735345411, "learning_rate": 2.715438910081769e-07, "loss": 0.0888, "step": 30774 }, { "epoch": 0.8978061730556042, "grad_norm": 0.8612139763576521, "learning_rate": 2.713903396825418e-07, "loss": 0.1076, "step": 30775 }, { "epoch": 0.8978353462862477, "grad_norm": 0.6433592810114508, "learning_rate": 2.712368305728047e-07, "loss": 0.0999, "step": 30776 }, { "epoch": 0.8978645195168913, "grad_norm": 0.8775201550517686, "learning_rate": 2.7108336368033505e-07, "loss": 0.1068, "step": 30777 }, { "epoch": 0.8978936927475348, "grad_norm": 0.9999598708750298, "learning_rate": 2.709299390065051e-07, "loss": 0.1337, "step": 30778 }, { "epoch": 0.8979228659781784, "grad_norm": 0.7312506172836786, "learning_rate": 2.7077655655268375e-07, "loss": 0.088, "step": 30779 }, { "epoch": 0.8979520392088219, "grad_norm": 1.1768203471175818, "learning_rate": 2.706232163202405e-07, "loss": 0.1097, "step": 30780 }, { "epoch": 0.8979812124394655, "grad_norm": 0.7983772650753197, "learning_rate": 2.704699183105441e-07, "loss": 0.1066, "step": 30781 }, { "epoch": 0.8980103856701092, "grad_norm": 0.877758334830019, "learning_rate": 2.7031666252496367e-07, "loss": 0.0904, "step": 30782 }, { "epoch": 0.8980395589007527, "grad_norm": 1.0949126353961296, "learning_rate": 2.701634489648658e-07, "loss": 0.1134, "step": 30783 }, { "epoch": 0.8980687321313963, "grad_norm": 0.8147996950949038, "learning_rate": 2.700102776316199e-07, "loss": 0.1035, "step": 30784 }, { "epoch": 0.8980979053620398, "grad_norm": 0.9267408923909373, "learning_rate": 2.6985714852659386e-07, "loss": 0.1177, "step": 30785 }, { "epoch": 0.8981270785926834, "grad_norm": 1.0054067499868213, "learning_rate": 2.6970406165115425e-07, "loss": 0.118, "step": 30786 }, { "epoch": 0.8981562518233269, "grad_norm": 0.852842728820599, "learning_rate": 2.695510170066662e-07, "loss": 0.1134, "step": 30787 }, { "epoch": 0.8981854250539705, "grad_norm": 0.9854147793022071, "learning_rate": 2.6939801459449856e-07, "loss": 0.1362, "step": 30788 }, { "epoch": 0.898214598284614, "grad_norm": 0.891330330666457, "learning_rate": 2.692450544160152e-07, "loss": 0.0972, "step": 30789 }, { "epoch": 0.8982437715152576, "grad_norm": 0.7731519930964349, "learning_rate": 2.6909213647258404e-07, "loss": 0.1103, "step": 30790 }, { "epoch": 0.8982729447459011, "grad_norm": 1.002568118988472, "learning_rate": 2.6893926076556774e-07, "loss": 0.1372, "step": 30791 }, { "epoch": 0.8983021179765447, "grad_norm": 0.8203451141084636, "learning_rate": 2.6878642729633307e-07, "loss": 0.1301, "step": 30792 }, { "epoch": 0.8983312912071882, "grad_norm": 0.9201695529116912, "learning_rate": 2.686336360662434e-07, "loss": 0.1026, "step": 30793 }, { "epoch": 0.8983604644378318, "grad_norm": 0.6820850081623775, "learning_rate": 2.6848088707666307e-07, "loss": 0.117, "step": 30794 }, { "epoch": 0.8983896376684755, "grad_norm": 0.8914170321809327, "learning_rate": 2.683281803289556e-07, "loss": 0.1249, "step": 30795 }, { "epoch": 0.898418810899119, "grad_norm": 0.9629246405962423, "learning_rate": 2.681755158244853e-07, "loss": 0.0944, "step": 30796 }, { "epoch": 0.8984479841297626, "grad_norm": 0.7348982561750177, "learning_rate": 2.680228935646134e-07, "loss": 0.115, "step": 30797 }, { "epoch": 0.8984771573604061, "grad_norm": 0.864887716885459, "learning_rate": 2.6787031355070435e-07, "loss": 0.1004, "step": 30798 }, { "epoch": 0.8985063305910497, "grad_norm": 0.7809241301837349, "learning_rate": 2.6771777578411983e-07, "loss": 0.1068, "step": 30799 }, { "epoch": 0.8985355038216932, "grad_norm": 0.8871663708259863, "learning_rate": 2.6756528026622043e-07, "loss": 0.1067, "step": 30800 }, { "epoch": 0.8985646770523368, "grad_norm": 0.8317968895716673, "learning_rate": 2.6741282699836837e-07, "loss": 0.1183, "step": 30801 }, { "epoch": 0.8985938502829803, "grad_norm": 0.8902794853332741, "learning_rate": 2.6726041598192585e-07, "loss": 0.1132, "step": 30802 }, { "epoch": 0.8986230235136239, "grad_norm": 0.8742007970379547, "learning_rate": 2.6710804721825246e-07, "loss": 0.087, "step": 30803 }, { "epoch": 0.8986521967442674, "grad_norm": 0.9510818055178514, "learning_rate": 2.669557207087076e-07, "loss": 0.1238, "step": 30804 }, { "epoch": 0.898681369974911, "grad_norm": 0.7604341231543608, "learning_rate": 2.668034364546529e-07, "loss": 0.1206, "step": 30805 }, { "epoch": 0.8987105432055545, "grad_norm": 0.8376054704151209, "learning_rate": 2.6665119445744736e-07, "loss": 0.1054, "step": 30806 }, { "epoch": 0.8987397164361981, "grad_norm": 1.085951157882634, "learning_rate": 2.6649899471844875e-07, "loss": 0.1024, "step": 30807 }, { "epoch": 0.8987688896668417, "grad_norm": 0.7417634980802448, "learning_rate": 2.663468372390182e-07, "loss": 0.0976, "step": 30808 }, { "epoch": 0.8987980628974853, "grad_norm": 0.8267745571301569, "learning_rate": 2.6619472202051356e-07, "loss": 0.1098, "step": 30809 }, { "epoch": 0.8988272361281289, "grad_norm": 0.9105857106462436, "learning_rate": 2.6604264906429143e-07, "loss": 0.0986, "step": 30810 }, { "epoch": 0.8988564093587724, "grad_norm": 1.2090116911069029, "learning_rate": 2.658906183717108e-07, "loss": 0.1303, "step": 30811 }, { "epoch": 0.898885582589416, "grad_norm": 0.7385412598763644, "learning_rate": 2.657386299441289e-07, "loss": 0.1109, "step": 30812 }, { "epoch": 0.8989147558200595, "grad_norm": 0.813675632854998, "learning_rate": 2.655866837829019e-07, "loss": 0.1125, "step": 30813 }, { "epoch": 0.8989439290507031, "grad_norm": 0.9246775517224919, "learning_rate": 2.654347798893864e-07, "loss": 0.1004, "step": 30814 }, { "epoch": 0.8989731022813466, "grad_norm": 0.9039317095109196, "learning_rate": 2.652829182649397e-07, "loss": 0.1098, "step": 30815 }, { "epoch": 0.8990022755119902, "grad_norm": 0.8844314416017702, "learning_rate": 2.651310989109174e-07, "loss": 0.1233, "step": 30816 }, { "epoch": 0.8990314487426337, "grad_norm": 0.8500331010090957, "learning_rate": 2.649793218286728e-07, "loss": 0.1022, "step": 30817 }, { "epoch": 0.8990606219732773, "grad_norm": 0.8048821621514879, "learning_rate": 2.6482758701956377e-07, "loss": 0.0893, "step": 30818 }, { "epoch": 0.8990897952039208, "grad_norm": 0.7685700013816277, "learning_rate": 2.6467589448494255e-07, "loss": 0.1259, "step": 30819 }, { "epoch": 0.8991189684345644, "grad_norm": 0.9148224900349895, "learning_rate": 2.645242442261659e-07, "loss": 0.1175, "step": 30820 }, { "epoch": 0.899148141665208, "grad_norm": 0.7447199833000645, "learning_rate": 2.6437263624458474e-07, "loss": 0.1107, "step": 30821 }, { "epoch": 0.8991773148958516, "grad_norm": 0.6002362659498117, "learning_rate": 2.642210705415554e-07, "loss": 0.1035, "step": 30822 }, { "epoch": 0.8992064881264952, "grad_norm": 0.9959436406921786, "learning_rate": 2.6406954711843014e-07, "loss": 0.1231, "step": 30823 }, { "epoch": 0.8992356613571387, "grad_norm": 0.9342524108593679, "learning_rate": 2.6391806597656003e-07, "loss": 0.1154, "step": 30824 }, { "epoch": 0.8992648345877823, "grad_norm": 0.9400089022570661, "learning_rate": 2.637666271172995e-07, "loss": 0.109, "step": 30825 }, { "epoch": 0.8992940078184258, "grad_norm": 0.8065494860891471, "learning_rate": 2.636152305419998e-07, "loss": 0.1314, "step": 30826 }, { "epoch": 0.8993231810490694, "grad_norm": 0.7908419503663388, "learning_rate": 2.634638762520125e-07, "loss": 0.117, "step": 30827 }, { "epoch": 0.8993523542797129, "grad_norm": 1.0302652218095083, "learning_rate": 2.6331256424869e-07, "loss": 0.0926, "step": 30828 }, { "epoch": 0.8993815275103565, "grad_norm": 0.7005029814400058, "learning_rate": 2.631612945333817e-07, "loss": 0.1076, "step": 30829 }, { "epoch": 0.899410700741, "grad_norm": 0.8424244710245236, "learning_rate": 2.630100671074376e-07, "loss": 0.1141, "step": 30830 }, { "epoch": 0.8994398739716436, "grad_norm": 1.0077946107225784, "learning_rate": 2.628588819722094e-07, "loss": 0.1099, "step": 30831 }, { "epoch": 0.8994690472022872, "grad_norm": 0.8653977940915419, "learning_rate": 2.627077391290467e-07, "loss": 0.0934, "step": 30832 }, { "epoch": 0.8994982204329307, "grad_norm": 0.6753000315672723, "learning_rate": 2.625566385792988e-07, "loss": 0.1042, "step": 30833 }, { "epoch": 0.8995273936635743, "grad_norm": 0.8426563397021365, "learning_rate": 2.6240558032431307e-07, "loss": 0.1162, "step": 30834 }, { "epoch": 0.8995565668942178, "grad_norm": 0.8772241797615372, "learning_rate": 2.622545643654401e-07, "loss": 0.0923, "step": 30835 }, { "epoch": 0.8995857401248615, "grad_norm": 0.9294651252284601, "learning_rate": 2.621035907040276e-07, "loss": 0.1218, "step": 30836 }, { "epoch": 0.899614913355505, "grad_norm": 0.7503295861919965, "learning_rate": 2.6195265934142177e-07, "loss": 0.1339, "step": 30837 }, { "epoch": 0.8996440865861486, "grad_norm": 0.9234185871140549, "learning_rate": 2.6180177027897326e-07, "loss": 0.1176, "step": 30838 }, { "epoch": 0.8996732598167921, "grad_norm": 0.8301220374617003, "learning_rate": 2.61650923518027e-07, "loss": 0.0955, "step": 30839 }, { "epoch": 0.8997024330474357, "grad_norm": 1.0647598043586948, "learning_rate": 2.6150011905992977e-07, "loss": 0.1012, "step": 30840 }, { "epoch": 0.8997316062780792, "grad_norm": 0.8196386934894001, "learning_rate": 2.613493569060288e-07, "loss": 0.1471, "step": 30841 }, { "epoch": 0.8997607795087228, "grad_norm": 0.7867605259723952, "learning_rate": 2.6119863705766967e-07, "loss": 0.0898, "step": 30842 }, { "epoch": 0.8997899527393663, "grad_norm": 0.8799963901929425, "learning_rate": 2.610479595161969e-07, "loss": 0.1258, "step": 30843 }, { "epoch": 0.8998191259700099, "grad_norm": 0.6158166063517552, "learning_rate": 2.6089732428295654e-07, "loss": 0.1008, "step": 30844 }, { "epoch": 0.8998482992006535, "grad_norm": 1.0886071944431155, "learning_rate": 2.6074673135929486e-07, "loss": 0.1368, "step": 30845 }, { "epoch": 0.899877472431297, "grad_norm": 0.8235006316894611, "learning_rate": 2.6059618074655457e-07, "loss": 0.1114, "step": 30846 }, { "epoch": 0.8999066456619406, "grad_norm": 0.7278380548351747, "learning_rate": 2.6044567244607963e-07, "loss": 0.106, "step": 30847 }, { "epoch": 0.8999358188925841, "grad_norm": 0.8271609162705068, "learning_rate": 2.6029520645921515e-07, "loss": 0.1188, "step": 30848 }, { "epoch": 0.8999649921232278, "grad_norm": 0.8698388363891452, "learning_rate": 2.601447827873027e-07, "loss": 0.1279, "step": 30849 }, { "epoch": 0.8999941653538713, "grad_norm": 0.7373099240975741, "learning_rate": 2.5999440143168686e-07, "loss": 0.0866, "step": 30850 }, { "epoch": 0.9000233385845149, "grad_norm": 0.7588799540457818, "learning_rate": 2.5984406239370874e-07, "loss": 0.1118, "step": 30851 }, { "epoch": 0.9000525118151584, "grad_norm": 0.920814935558577, "learning_rate": 2.5969376567471226e-07, "loss": 0.1166, "step": 30852 }, { "epoch": 0.900081685045802, "grad_norm": 0.8258080195124249, "learning_rate": 2.5954351127603807e-07, "loss": 0.1226, "step": 30853 }, { "epoch": 0.9001108582764455, "grad_norm": 1.0723653998016915, "learning_rate": 2.593932991990272e-07, "loss": 0.1029, "step": 30854 }, { "epoch": 0.9001400315070891, "grad_norm": 0.8500662747471033, "learning_rate": 2.5924312944502095e-07, "loss": 0.0984, "step": 30855 }, { "epoch": 0.9001692047377327, "grad_norm": 0.8728339231385889, "learning_rate": 2.590930020153609e-07, "loss": 0.1193, "step": 30856 }, { "epoch": 0.9001983779683762, "grad_norm": 0.8822052485185008, "learning_rate": 2.589429169113866e-07, "loss": 0.0893, "step": 30857 }, { "epoch": 0.9002275511990198, "grad_norm": 0.7788704914360438, "learning_rate": 2.5879287413443863e-07, "loss": 0.1235, "step": 30858 }, { "epoch": 0.9002567244296633, "grad_norm": 0.8237787340704235, "learning_rate": 2.5864287368585596e-07, "loss": 0.108, "step": 30859 }, { "epoch": 0.9002858976603069, "grad_norm": 0.955336411671347, "learning_rate": 2.584929155669774e-07, "loss": 0.1245, "step": 30860 }, { "epoch": 0.9003150708909504, "grad_norm": 0.8322752354288175, "learning_rate": 2.5834299977914203e-07, "loss": 0.0947, "step": 30861 }, { "epoch": 0.900344244121594, "grad_norm": 0.8808347603018455, "learning_rate": 2.581931263236892e-07, "loss": 0.139, "step": 30862 }, { "epoch": 0.9003734173522376, "grad_norm": 0.7896792013506789, "learning_rate": 2.5804329520195625e-07, "loss": 0.1091, "step": 30863 }, { "epoch": 0.9004025905828812, "grad_norm": 0.7428391325210728, "learning_rate": 2.5789350641527987e-07, "loss": 0.1038, "step": 30864 }, { "epoch": 0.9004317638135247, "grad_norm": 0.8425847751115024, "learning_rate": 2.57743759964999e-07, "loss": 0.1187, "step": 30865 }, { "epoch": 0.9004609370441683, "grad_norm": 0.7751035477123707, "learning_rate": 2.575940558524498e-07, "loss": 0.1127, "step": 30866 }, { "epoch": 0.9004901102748119, "grad_norm": 1.1087439040171116, "learning_rate": 2.5744439407896725e-07, "loss": 0.119, "step": 30867 }, { "epoch": 0.9005192835054554, "grad_norm": 0.844292477344265, "learning_rate": 2.5729477464589037e-07, "loss": 0.099, "step": 30868 }, { "epoch": 0.900548456736099, "grad_norm": 0.7187965661116755, "learning_rate": 2.5714519755455416e-07, "loss": 0.1004, "step": 30869 }, { "epoch": 0.9005776299667425, "grad_norm": 0.8663774904698612, "learning_rate": 2.5699566280629196e-07, "loss": 0.1334, "step": 30870 }, { "epoch": 0.9006068031973861, "grad_norm": 0.8035676158163162, "learning_rate": 2.568461704024411e-07, "loss": 0.1072, "step": 30871 }, { "epoch": 0.9006359764280296, "grad_norm": 1.0698248448789052, "learning_rate": 2.5669672034433544e-07, "loss": 0.1221, "step": 30872 }, { "epoch": 0.9006651496586732, "grad_norm": 0.8177173393092028, "learning_rate": 2.56547312633309e-07, "loss": 0.1058, "step": 30873 }, { "epoch": 0.9006943228893167, "grad_norm": 0.8131376079609332, "learning_rate": 2.563979472706951e-07, "loss": 0.1116, "step": 30874 }, { "epoch": 0.9007234961199603, "grad_norm": 0.9209480791815651, "learning_rate": 2.562486242578288e-07, "loss": 0.111, "step": 30875 }, { "epoch": 0.9007526693506039, "grad_norm": 0.6814739903364504, "learning_rate": 2.560993435960424e-07, "loss": 0.1054, "step": 30876 }, { "epoch": 0.9007818425812475, "grad_norm": 1.072332176891597, "learning_rate": 2.559501052866681e-07, "loss": 0.1447, "step": 30877 }, { "epoch": 0.900811015811891, "grad_norm": 0.7242888196953966, "learning_rate": 2.558009093310393e-07, "loss": 0.1091, "step": 30878 }, { "epoch": 0.9008401890425346, "grad_norm": 0.826829439552671, "learning_rate": 2.556517557304866e-07, "loss": 0.0992, "step": 30879 }, { "epoch": 0.9008693622731782, "grad_norm": 0.6926146048625293, "learning_rate": 2.5550264448634285e-07, "loss": 0.1045, "step": 30880 }, { "epoch": 0.9008985355038217, "grad_norm": 0.8374251096933836, "learning_rate": 2.553535755999387e-07, "loss": 0.1059, "step": 30881 }, { "epoch": 0.9009277087344653, "grad_norm": 0.8812956683022092, "learning_rate": 2.552045490726057e-07, "loss": 0.0972, "step": 30882 }, { "epoch": 0.9009568819651088, "grad_norm": 0.8180317243090963, "learning_rate": 2.5505556490567405e-07, "loss": 0.1059, "step": 30883 }, { "epoch": 0.9009860551957524, "grad_norm": 0.9969662856760693, "learning_rate": 2.5490662310047264e-07, "loss": 0.1173, "step": 30884 }, { "epoch": 0.9010152284263959, "grad_norm": 1.2642738594841663, "learning_rate": 2.547577236583326e-07, "loss": 0.0814, "step": 30885 }, { "epoch": 0.9010444016570395, "grad_norm": 0.8798486010831635, "learning_rate": 2.5460886658058295e-07, "loss": 0.1076, "step": 30886 }, { "epoch": 0.901073574887683, "grad_norm": 0.6230444000273161, "learning_rate": 2.544600518685519e-07, "loss": 0.0998, "step": 30887 }, { "epoch": 0.9011027481183266, "grad_norm": 0.7321902723437084, "learning_rate": 2.543112795235697e-07, "loss": 0.108, "step": 30888 }, { "epoch": 0.9011319213489701, "grad_norm": 0.7923578488218115, "learning_rate": 2.541625495469635e-07, "loss": 0.1009, "step": 30889 }, { "epoch": 0.9011610945796138, "grad_norm": 0.7111146089392812, "learning_rate": 2.5401386194006005e-07, "loss": 0.0948, "step": 30890 }, { "epoch": 0.9011902678102574, "grad_norm": 0.7253050259418145, "learning_rate": 2.538652167041883e-07, "loss": 0.1153, "step": 30891 }, { "epoch": 0.9012194410409009, "grad_norm": 0.7411366341909867, "learning_rate": 2.5371661384067546e-07, "loss": 0.1037, "step": 30892 }, { "epoch": 0.9012486142715445, "grad_norm": 0.8317204427851583, "learning_rate": 2.5356805335084776e-07, "loss": 0.0949, "step": 30893 }, { "epoch": 0.901277787502188, "grad_norm": 0.6538123182560942, "learning_rate": 2.5341953523603024e-07, "loss": 0.1088, "step": 30894 }, { "epoch": 0.9013069607328316, "grad_norm": 0.8232839203533794, "learning_rate": 2.5327105949755125e-07, "loss": 0.103, "step": 30895 }, { "epoch": 0.9013361339634751, "grad_norm": 0.9527635326361865, "learning_rate": 2.5312262613673476e-07, "loss": 0.119, "step": 30896 }, { "epoch": 0.9013653071941187, "grad_norm": 0.7507188860357058, "learning_rate": 2.5297423515490584e-07, "loss": 0.1225, "step": 30897 }, { "epoch": 0.9013944804247622, "grad_norm": 0.6477173770002925, "learning_rate": 2.5282588655338947e-07, "loss": 0.1153, "step": 30898 }, { "epoch": 0.9014236536554058, "grad_norm": 0.7759081563079759, "learning_rate": 2.526775803335113e-07, "loss": 0.1176, "step": 30899 }, { "epoch": 0.9014528268860493, "grad_norm": 0.7528650119604344, "learning_rate": 2.525293164965936e-07, "loss": 0.1, "step": 30900 }, { "epoch": 0.9014820001166929, "grad_norm": 0.8382682887800795, "learning_rate": 2.523810950439615e-07, "loss": 0.1159, "step": 30901 }, { "epoch": 0.9015111733473364, "grad_norm": 0.9790493988981085, "learning_rate": 2.5223291597693764e-07, "loss": 0.1206, "step": 30902 }, { "epoch": 0.9015403465779801, "grad_norm": 0.9686947625084419, "learning_rate": 2.520847792968445e-07, "loss": 0.0947, "step": 30903 }, { "epoch": 0.9015695198086237, "grad_norm": 1.0043684611504828, "learning_rate": 2.519366850050048e-07, "loss": 0.1195, "step": 30904 }, { "epoch": 0.9015986930392672, "grad_norm": 1.1410283074329841, "learning_rate": 2.5178863310274136e-07, "loss": 0.108, "step": 30905 }, { "epoch": 0.9016278662699108, "grad_norm": 0.822753373595886, "learning_rate": 2.51640623591376e-07, "loss": 0.0961, "step": 30906 }, { "epoch": 0.9016570395005543, "grad_norm": 0.7734796717613033, "learning_rate": 2.5149265647222863e-07, "loss": 0.0935, "step": 30907 }, { "epoch": 0.9016862127311979, "grad_norm": 0.8922035087707321, "learning_rate": 2.513447317466222e-07, "loss": 0.1136, "step": 30908 }, { "epoch": 0.9017153859618414, "grad_norm": 0.9762875819094442, "learning_rate": 2.511968494158751e-07, "loss": 0.1387, "step": 30909 }, { "epoch": 0.901744559192485, "grad_norm": 0.7317975276067623, "learning_rate": 2.510490094813101e-07, "loss": 0.0967, "step": 30910 }, { "epoch": 0.9017737324231285, "grad_norm": 0.7514206912013227, "learning_rate": 2.5090121194424554e-07, "loss": 0.1044, "step": 30911 }, { "epoch": 0.9018029056537721, "grad_norm": 1.0323943227713905, "learning_rate": 2.5075345680600107e-07, "loss": 0.102, "step": 30912 }, { "epoch": 0.9018320788844156, "grad_norm": 0.8356574035684364, "learning_rate": 2.5060574406789664e-07, "loss": 0.1103, "step": 30913 }, { "epoch": 0.9018612521150592, "grad_norm": 0.661884839685752, "learning_rate": 2.504580737312495e-07, "loss": 0.0862, "step": 30914 }, { "epoch": 0.9018904253457027, "grad_norm": 0.7698925091478908, "learning_rate": 2.503104457973787e-07, "loss": 0.1014, "step": 30915 }, { "epoch": 0.9019195985763463, "grad_norm": 1.1653480649798673, "learning_rate": 2.501628602676037e-07, "loss": 0.1072, "step": 30916 }, { "epoch": 0.90194877180699, "grad_norm": 0.8357527387352833, "learning_rate": 2.500153171432396e-07, "loss": 0.1254, "step": 30917 }, { "epoch": 0.9019779450376335, "grad_norm": 0.7601997967500363, "learning_rate": 2.498678164256052e-07, "loss": 0.1405, "step": 30918 }, { "epoch": 0.9020071182682771, "grad_norm": 0.9844474328297425, "learning_rate": 2.497203581160174e-07, "loss": 0.1058, "step": 30919 }, { "epoch": 0.9020362914989206, "grad_norm": 0.9306600426511832, "learning_rate": 2.4957294221579166e-07, "loss": 0.1323, "step": 30920 }, { "epoch": 0.9020654647295642, "grad_norm": 0.811693818410758, "learning_rate": 2.4942556872624477e-07, "loss": 0.1107, "step": 30921 }, { "epoch": 0.9020946379602077, "grad_norm": 0.813405748983144, "learning_rate": 2.4927823764869296e-07, "loss": 0.1039, "step": 30922 }, { "epoch": 0.9021238111908513, "grad_norm": 0.7775052672368832, "learning_rate": 2.4913094898445066e-07, "loss": 0.121, "step": 30923 }, { "epoch": 0.9021529844214948, "grad_norm": 1.168129538557138, "learning_rate": 2.489837027348324e-07, "loss": 0.1124, "step": 30924 }, { "epoch": 0.9021821576521384, "grad_norm": 0.8427444648072628, "learning_rate": 2.488364989011544e-07, "loss": 0.1146, "step": 30925 }, { "epoch": 0.9022113308827819, "grad_norm": 0.7314329053327707, "learning_rate": 2.486893374847299e-07, "loss": 0.1171, "step": 30926 }, { "epoch": 0.9022405041134255, "grad_norm": 0.9614666321424571, "learning_rate": 2.4854221848687245e-07, "loss": 0.1134, "step": 30927 }, { "epoch": 0.902269677344069, "grad_norm": 0.8172701848878543, "learning_rate": 2.4839514190889534e-07, "loss": 0.0922, "step": 30928 }, { "epoch": 0.9022988505747126, "grad_norm": 0.8324606832777022, "learning_rate": 2.482481077521126e-07, "loss": 0.1075, "step": 30929 }, { "epoch": 0.9023280238053561, "grad_norm": 0.7730009621892097, "learning_rate": 2.481011160178365e-07, "loss": 0.1299, "step": 30930 }, { "epoch": 0.9023571970359998, "grad_norm": 1.0540633963722275, "learning_rate": 2.4795416670737925e-07, "loss": 0.1041, "step": 30931 }, { "epoch": 0.9023863702666434, "grad_norm": 0.7690313082884614, "learning_rate": 2.478072598220532e-07, "loss": 0.1178, "step": 30932 }, { "epoch": 0.9024155434972869, "grad_norm": 0.9766291913066324, "learning_rate": 2.4766039536316843e-07, "loss": 0.1105, "step": 30933 }, { "epoch": 0.9024447167279305, "grad_norm": 0.8108029336761172, "learning_rate": 2.475135733320372e-07, "loss": 0.1208, "step": 30934 }, { "epoch": 0.902473889958574, "grad_norm": 0.8216731486601332, "learning_rate": 2.473667937299712e-07, "loss": 0.1149, "step": 30935 }, { "epoch": 0.9025030631892176, "grad_norm": 0.7140596228755002, "learning_rate": 2.4722005655827995e-07, "loss": 0.0882, "step": 30936 }, { "epoch": 0.9025322364198611, "grad_norm": 0.9323876320571479, "learning_rate": 2.47073361818273e-07, "loss": 0.1222, "step": 30937 }, { "epoch": 0.9025614096505047, "grad_norm": 1.040450051990931, "learning_rate": 2.4692670951126043e-07, "loss": 0.1355, "step": 30938 }, { "epoch": 0.9025905828811482, "grad_norm": 1.0706327905908033, "learning_rate": 2.4678009963855165e-07, "loss": 0.0996, "step": 30939 }, { "epoch": 0.9026197561117918, "grad_norm": 0.8522603385455392, "learning_rate": 2.466335322014557e-07, "loss": 0.1033, "step": 30940 }, { "epoch": 0.9026489293424353, "grad_norm": 0.7507943013091795, "learning_rate": 2.4648700720128036e-07, "loss": 0.116, "step": 30941 }, { "epoch": 0.9026781025730789, "grad_norm": 0.6786135439008352, "learning_rate": 2.4634052463933466e-07, "loss": 0.1013, "step": 30942 }, { "epoch": 0.9027072758037225, "grad_norm": 0.8883417035087033, "learning_rate": 2.4619408451692584e-07, "loss": 0.1081, "step": 30943 }, { "epoch": 0.9027364490343661, "grad_norm": 0.847454523909641, "learning_rate": 2.460476868353612e-07, "loss": 0.1239, "step": 30944 }, { "epoch": 0.9027656222650097, "grad_norm": 0.6801634304033013, "learning_rate": 2.45901331595948e-07, "loss": 0.1167, "step": 30945 }, { "epoch": 0.9027947954956532, "grad_norm": 0.7889849025518476, "learning_rate": 2.4575501879999295e-07, "loss": 0.101, "step": 30946 }, { "epoch": 0.9028239687262968, "grad_norm": 0.6949769325179381, "learning_rate": 2.456087484488018e-07, "loss": 0.1186, "step": 30947 }, { "epoch": 0.9028531419569403, "grad_norm": 0.8075382518895727, "learning_rate": 2.454625205436817e-07, "loss": 0.1208, "step": 30948 }, { "epoch": 0.9028823151875839, "grad_norm": 0.7407950225445782, "learning_rate": 2.4531633508593665e-07, "loss": 0.1166, "step": 30949 }, { "epoch": 0.9029114884182274, "grad_norm": 0.9062719691739753, "learning_rate": 2.451701920768723e-07, "loss": 0.1167, "step": 30950 }, { "epoch": 0.902940661648871, "grad_norm": 0.6634029421397257, "learning_rate": 2.4502409151779317e-07, "loss": 0.1192, "step": 30951 }, { "epoch": 0.9029698348795145, "grad_norm": 0.9013022547794278, "learning_rate": 2.4487803341000425e-07, "loss": 0.1226, "step": 30952 }, { "epoch": 0.9029990081101581, "grad_norm": 0.8704093365322719, "learning_rate": 2.44732017754809e-07, "loss": 0.0961, "step": 30953 }, { "epoch": 0.9030281813408017, "grad_norm": 0.9566523073249599, "learning_rate": 2.445860445535109e-07, "loss": 0.0978, "step": 30954 }, { "epoch": 0.9030573545714452, "grad_norm": 0.7246687325679024, "learning_rate": 2.4444011380741375e-07, "loss": 0.1432, "step": 30955 }, { "epoch": 0.9030865278020888, "grad_norm": 0.8785881788994038, "learning_rate": 2.4429422551782046e-07, "loss": 0.1089, "step": 30956 }, { "epoch": 0.9031157010327323, "grad_norm": 0.6631408368634061, "learning_rate": 2.4414837968603223e-07, "loss": 0.0962, "step": 30957 }, { "epoch": 0.903144874263376, "grad_norm": 0.7138819774688472, "learning_rate": 2.4400257631335136e-07, "loss": 0.1202, "step": 30958 }, { "epoch": 0.9031740474940195, "grad_norm": 0.8350813516994058, "learning_rate": 2.4385681540108117e-07, "loss": 0.1019, "step": 30959 }, { "epoch": 0.9032032207246631, "grad_norm": 0.8326867081345029, "learning_rate": 2.4371109695052185e-07, "loss": 0.0901, "step": 30960 }, { "epoch": 0.9032323939553066, "grad_norm": 0.9466281671113214, "learning_rate": 2.435654209629745e-07, "loss": 0.1093, "step": 30961 }, { "epoch": 0.9032615671859502, "grad_norm": 0.7263247733108613, "learning_rate": 2.434197874397398e-07, "loss": 0.1239, "step": 30962 }, { "epoch": 0.9032907404165937, "grad_norm": 1.1284205234836768, "learning_rate": 2.432741963821167e-07, "loss": 0.1144, "step": 30963 }, { "epoch": 0.9033199136472373, "grad_norm": 0.8764197879557643, "learning_rate": 2.4312864779140633e-07, "loss": 0.115, "step": 30964 }, { "epoch": 0.9033490868778808, "grad_norm": 0.8498423461680746, "learning_rate": 2.429831416689088e-07, "loss": 0.0999, "step": 30965 }, { "epoch": 0.9033782601085244, "grad_norm": 0.964680119609015, "learning_rate": 2.4283767801592196e-07, "loss": 0.1004, "step": 30966 }, { "epoch": 0.903407433339168, "grad_norm": 0.7881218667683954, "learning_rate": 2.426922568337442e-07, "loss": 0.091, "step": 30967 }, { "epoch": 0.9034366065698115, "grad_norm": 0.7746170126969204, "learning_rate": 2.425468781236745e-07, "loss": 0.0981, "step": 30968 }, { "epoch": 0.9034657798004551, "grad_norm": 0.6896526807509923, "learning_rate": 2.4240154188701013e-07, "loss": 0.1071, "step": 30969 }, { "epoch": 0.9034949530310986, "grad_norm": 0.880851038956034, "learning_rate": 2.422562481250501e-07, "loss": 0.1166, "step": 30970 }, { "epoch": 0.9035241262617423, "grad_norm": 0.8271895146708425, "learning_rate": 2.421109968390895e-07, "loss": 0.112, "step": 30971 }, { "epoch": 0.9035532994923858, "grad_norm": 0.8047694310509017, "learning_rate": 2.419657880304266e-07, "loss": 0.1297, "step": 30972 }, { "epoch": 0.9035824727230294, "grad_norm": 0.7329982488742748, "learning_rate": 2.418206217003577e-07, "loss": 0.1152, "step": 30973 }, { "epoch": 0.9036116459536729, "grad_norm": 0.8524591097579353, "learning_rate": 2.4167549785017676e-07, "loss": 0.102, "step": 30974 }, { "epoch": 0.9036408191843165, "grad_norm": 1.0279475728860563, "learning_rate": 2.415304164811827e-07, "loss": 0.1175, "step": 30975 }, { "epoch": 0.90366999241496, "grad_norm": 0.7631475659126696, "learning_rate": 2.4138537759466894e-07, "loss": 0.1055, "step": 30976 }, { "epoch": 0.9036991656456036, "grad_norm": 1.0890616148085686, "learning_rate": 2.4124038119193006e-07, "loss": 0.1283, "step": 30977 }, { "epoch": 0.9037283388762472, "grad_norm": 0.9686796762385809, "learning_rate": 2.410954272742616e-07, "loss": 0.1085, "step": 30978 }, { "epoch": 0.9037575121068907, "grad_norm": 0.7816770751882689, "learning_rate": 2.4095051584295704e-07, "loss": 0.134, "step": 30979 }, { "epoch": 0.9037866853375343, "grad_norm": 0.891180047793578, "learning_rate": 2.4080564689930974e-07, "loss": 0.1066, "step": 30980 }, { "epoch": 0.9038158585681778, "grad_norm": 0.9566231189326931, "learning_rate": 2.406608204446137e-07, "loss": 0.1225, "step": 30981 }, { "epoch": 0.9038450317988214, "grad_norm": 0.732768771886364, "learning_rate": 2.4051603648016176e-07, "loss": 0.099, "step": 30982 }, { "epoch": 0.9038742050294649, "grad_norm": 0.9489851905210597, "learning_rate": 2.4037129500724675e-07, "loss": 0.1364, "step": 30983 }, { "epoch": 0.9039033782601085, "grad_norm": 0.981945530141147, "learning_rate": 2.402265960271599e-07, "loss": 0.1373, "step": 30984 }, { "epoch": 0.9039325514907521, "grad_norm": 0.8241497624676498, "learning_rate": 2.400819395411946e-07, "loss": 0.1099, "step": 30985 }, { "epoch": 0.9039617247213957, "grad_norm": 0.8656803812664841, "learning_rate": 2.39937325550641e-07, "loss": 0.1164, "step": 30986 }, { "epoch": 0.9039908979520392, "grad_norm": 0.9454769588806509, "learning_rate": 2.397927540567907e-07, "loss": 0.1225, "step": 30987 }, { "epoch": 0.9040200711826828, "grad_norm": 0.9195737657724945, "learning_rate": 2.396482250609339e-07, "loss": 0.1211, "step": 30988 }, { "epoch": 0.9040492444133263, "grad_norm": 0.8605926343562299, "learning_rate": 2.395037385643623e-07, "loss": 0.1167, "step": 30989 }, { "epoch": 0.9040784176439699, "grad_norm": 0.7684792064288665, "learning_rate": 2.393592945683648e-07, "loss": 0.1058, "step": 30990 }, { "epoch": 0.9041075908746135, "grad_norm": 0.9725175925245941, "learning_rate": 2.3921489307422994e-07, "loss": 0.1375, "step": 30991 }, { "epoch": 0.904136764105257, "grad_norm": 0.8903827468080072, "learning_rate": 2.3907053408324885e-07, "loss": 0.1289, "step": 30992 }, { "epoch": 0.9041659373359006, "grad_norm": 0.7438117327272914, "learning_rate": 2.3892621759670943e-07, "loss": 0.1012, "step": 30993 }, { "epoch": 0.9041951105665441, "grad_norm": 0.8033488966088786, "learning_rate": 2.3878194361590003e-07, "loss": 0.1216, "step": 30994 }, { "epoch": 0.9042242837971877, "grad_norm": 0.8563267664831004, "learning_rate": 2.386377121421091e-07, "loss": 0.1022, "step": 30995 }, { "epoch": 0.9042534570278312, "grad_norm": 0.8219721406685765, "learning_rate": 2.3849352317662446e-07, "loss": 0.1125, "step": 30996 }, { "epoch": 0.9042826302584748, "grad_norm": 0.6910867898795853, "learning_rate": 2.3834937672073178e-07, "loss": 0.0953, "step": 30997 }, { "epoch": 0.9043118034891184, "grad_norm": 1.2877588495692245, "learning_rate": 2.3820527277571949e-07, "loss": 0.1343, "step": 30998 }, { "epoch": 0.904340976719762, "grad_norm": 0.9232830015681499, "learning_rate": 2.380612113428743e-07, "loss": 0.1171, "step": 30999 }, { "epoch": 0.9043701499504055, "grad_norm": 0.8927480596637919, "learning_rate": 2.3791719242348188e-07, "loss": 0.0948, "step": 31000 }, { "epoch": 0.9043993231810491, "grad_norm": 0.9465587826538443, "learning_rate": 2.377732160188273e-07, "loss": 0.1136, "step": 31001 }, { "epoch": 0.9044284964116927, "grad_norm": 0.8571432623023012, "learning_rate": 2.3762928213019786e-07, "loss": 0.0976, "step": 31002 }, { "epoch": 0.9044576696423362, "grad_norm": 0.8745611444710717, "learning_rate": 2.3748539075887646e-07, "loss": 0.1145, "step": 31003 }, { "epoch": 0.9044868428729798, "grad_norm": 0.8098827712414928, "learning_rate": 2.3734154190614755e-07, "loss": 0.1167, "step": 31004 }, { "epoch": 0.9045160161036233, "grad_norm": 0.7728558326997578, "learning_rate": 2.3719773557329794e-07, "loss": 0.1036, "step": 31005 }, { "epoch": 0.9045451893342669, "grad_norm": 0.9925806399402782, "learning_rate": 2.3705397176160994e-07, "loss": 0.1134, "step": 31006 }, { "epoch": 0.9045743625649104, "grad_norm": 0.7889660460696515, "learning_rate": 2.3691025047236637e-07, "loss": 0.1048, "step": 31007 }, { "epoch": 0.904603535795554, "grad_norm": 0.7049189496488726, "learning_rate": 2.367665717068518e-07, "loss": 0.1169, "step": 31008 }, { "epoch": 0.9046327090261975, "grad_norm": 0.834736438102681, "learning_rate": 2.3662293546634796e-07, "loss": 0.1024, "step": 31009 }, { "epoch": 0.9046618822568411, "grad_norm": 0.7792846950019244, "learning_rate": 2.364793417521366e-07, "loss": 0.1128, "step": 31010 }, { "epoch": 0.9046910554874846, "grad_norm": 0.8957140921369928, "learning_rate": 2.3633579056550115e-07, "loss": 0.1363, "step": 31011 }, { "epoch": 0.9047202287181283, "grad_norm": 0.6520278082438606, "learning_rate": 2.3619228190772282e-07, "loss": 0.1141, "step": 31012 }, { "epoch": 0.9047494019487718, "grad_norm": 0.8356939446502553, "learning_rate": 2.3604881578008276e-07, "loss": 0.1189, "step": 31013 }, { "epoch": 0.9047785751794154, "grad_norm": 0.7724826162424849, "learning_rate": 2.3590539218386056e-07, "loss": 0.1028, "step": 31014 }, { "epoch": 0.904807748410059, "grad_norm": 0.8208899102690861, "learning_rate": 2.3576201112033903e-07, "loss": 0.0979, "step": 31015 }, { "epoch": 0.9048369216407025, "grad_norm": 0.9358438814407456, "learning_rate": 2.3561867259079607e-07, "loss": 0.117, "step": 31016 }, { "epoch": 0.9048660948713461, "grad_norm": 0.952807885742543, "learning_rate": 2.3547537659651286e-07, "loss": 0.1138, "step": 31017 }, { "epoch": 0.9048952681019896, "grad_norm": 0.765959134979808, "learning_rate": 2.353321231387673e-07, "loss": 0.1204, "step": 31018 }, { "epoch": 0.9049244413326332, "grad_norm": 0.7022546939782963, "learning_rate": 2.3518891221884e-07, "loss": 0.1135, "step": 31019 }, { "epoch": 0.9049536145632767, "grad_norm": 0.831842049537802, "learning_rate": 2.3504574383800825e-07, "loss": 0.1097, "step": 31020 }, { "epoch": 0.9049827877939203, "grad_norm": 0.7555123556790414, "learning_rate": 2.3490261799755e-07, "loss": 0.0981, "step": 31021 }, { "epoch": 0.9050119610245638, "grad_norm": 0.8825737255745918, "learning_rate": 2.3475953469874413e-07, "loss": 0.1375, "step": 31022 }, { "epoch": 0.9050411342552074, "grad_norm": 0.8603160876024761, "learning_rate": 2.34616493942868e-07, "loss": 0.125, "step": 31023 }, { "epoch": 0.9050703074858509, "grad_norm": 0.7903554247217, "learning_rate": 2.3447349573119725e-07, "loss": 0.0977, "step": 31024 }, { "epoch": 0.9050994807164946, "grad_norm": 0.8445792710266081, "learning_rate": 2.3433054006501087e-07, "loss": 0.0881, "step": 31025 }, { "epoch": 0.9051286539471382, "grad_norm": 0.6327029514433683, "learning_rate": 2.341876269455834e-07, "loss": 0.0999, "step": 31026 }, { "epoch": 0.9051578271777817, "grad_norm": 0.8723141299911839, "learning_rate": 2.3404475637419045e-07, "loss": 0.1183, "step": 31027 }, { "epoch": 0.9051870004084253, "grad_norm": 0.8394980661644551, "learning_rate": 2.3390192835210824e-07, "loss": 0.0942, "step": 31028 }, { "epoch": 0.9052161736390688, "grad_norm": 0.776989093761145, "learning_rate": 2.337591428806124e-07, "loss": 0.0981, "step": 31029 }, { "epoch": 0.9052453468697124, "grad_norm": 0.8312850821874578, "learning_rate": 2.3361639996097697e-07, "loss": 0.1182, "step": 31030 }, { "epoch": 0.9052745201003559, "grad_norm": 0.804418812954818, "learning_rate": 2.3347369959447584e-07, "loss": 0.1065, "step": 31031 }, { "epoch": 0.9053036933309995, "grad_norm": 1.229864617067817, "learning_rate": 2.3333104178238475e-07, "loss": 0.0858, "step": 31032 }, { "epoch": 0.905332866561643, "grad_norm": 0.8527797931521814, "learning_rate": 2.3318842652597595e-07, "loss": 0.1266, "step": 31033 }, { "epoch": 0.9053620397922866, "grad_norm": 0.7333503546245682, "learning_rate": 2.3304585382652178e-07, "loss": 0.1215, "step": 31034 }, { "epoch": 0.9053912130229301, "grad_norm": 0.9030188895183978, "learning_rate": 2.3290332368529734e-07, "loss": 0.1002, "step": 31035 }, { "epoch": 0.9054203862535737, "grad_norm": 0.8612737379561668, "learning_rate": 2.3276083610357436e-07, "loss": 0.116, "step": 31036 }, { "epoch": 0.9054495594842172, "grad_norm": 0.8589736419008908, "learning_rate": 2.3261839108262353e-07, "loss": 0.1321, "step": 31037 }, { "epoch": 0.9054787327148608, "grad_norm": 1.0454511600397138, "learning_rate": 2.3247598862371878e-07, "loss": 0.1098, "step": 31038 }, { "epoch": 0.9055079059455045, "grad_norm": 0.8891984577817922, "learning_rate": 2.323336287281297e-07, "loss": 0.1172, "step": 31039 }, { "epoch": 0.905537079176148, "grad_norm": 0.8612033636963312, "learning_rate": 2.3219131139712746e-07, "loss": 0.1037, "step": 31040 }, { "epoch": 0.9055662524067916, "grad_norm": 1.1069143747697723, "learning_rate": 2.320490366319833e-07, "loss": 0.1265, "step": 31041 }, { "epoch": 0.9055954256374351, "grad_norm": 0.9442900554547359, "learning_rate": 2.3190680443396784e-07, "loss": 0.1238, "step": 31042 }, { "epoch": 0.9056245988680787, "grad_norm": 0.7584375799488945, "learning_rate": 2.3176461480434954e-07, "loss": 0.1181, "step": 31043 }, { "epoch": 0.9056537720987222, "grad_norm": 0.8124001834974975, "learning_rate": 2.3162246774439845e-07, "loss": 0.1161, "step": 31044 }, { "epoch": 0.9056829453293658, "grad_norm": 0.8476413654548273, "learning_rate": 2.3148036325538414e-07, "loss": 0.1176, "step": 31045 }, { "epoch": 0.9057121185600093, "grad_norm": 0.9265441993099188, "learning_rate": 2.3133830133857393e-07, "loss": 0.0995, "step": 31046 }, { "epoch": 0.9057412917906529, "grad_norm": 0.8002302248988808, "learning_rate": 2.3119628199523792e-07, "loss": 0.1017, "step": 31047 }, { "epoch": 0.9057704650212964, "grad_norm": 0.8202679698560909, "learning_rate": 2.310543052266423e-07, "loss": 0.1194, "step": 31048 }, { "epoch": 0.90579963825194, "grad_norm": 0.7965327943716995, "learning_rate": 2.3091237103405606e-07, "loss": 0.1057, "step": 31049 }, { "epoch": 0.9058288114825835, "grad_norm": 0.7776205645230267, "learning_rate": 2.3077047941874597e-07, "loss": 0.0935, "step": 31050 }, { "epoch": 0.9058579847132271, "grad_norm": 0.6608344382665353, "learning_rate": 2.306286303819777e-07, "loss": 0.1199, "step": 31051 }, { "epoch": 0.9058871579438708, "grad_norm": 0.8951919596240322, "learning_rate": 2.3048682392501854e-07, "loss": 0.1156, "step": 31052 }, { "epoch": 0.9059163311745143, "grad_norm": 0.9607789177046562, "learning_rate": 2.303450600491347e-07, "loss": 0.1322, "step": 31053 }, { "epoch": 0.9059455044051579, "grad_norm": 0.779468320438791, "learning_rate": 2.3020333875559132e-07, "loss": 0.1013, "step": 31054 }, { "epoch": 0.9059746776358014, "grad_norm": 0.6703479128871883, "learning_rate": 2.3006166004565454e-07, "loss": 0.1241, "step": 31055 }, { "epoch": 0.906003850866445, "grad_norm": 0.8106083888563351, "learning_rate": 2.2992002392058843e-07, "loss": 0.1359, "step": 31056 }, { "epoch": 0.9060330240970885, "grad_norm": 1.0194871420894678, "learning_rate": 2.2977843038165693e-07, "loss": 0.1166, "step": 31057 }, { "epoch": 0.9060621973277321, "grad_norm": 0.6440779035411326, "learning_rate": 2.2963687943012515e-07, "loss": 0.1041, "step": 31058 }, { "epoch": 0.9060913705583756, "grad_norm": 0.7043635813284241, "learning_rate": 2.294953710672565e-07, "loss": 0.0986, "step": 31059 }, { "epoch": 0.9061205437890192, "grad_norm": 0.8523291980476269, "learning_rate": 2.29353905294315e-07, "loss": 0.1145, "step": 31060 }, { "epoch": 0.9061497170196627, "grad_norm": 0.7977194639494615, "learning_rate": 2.2921248211256242e-07, "loss": 0.1101, "step": 31061 }, { "epoch": 0.9061788902503063, "grad_norm": 0.7833624685213701, "learning_rate": 2.2907110152326217e-07, "loss": 0.113, "step": 31062 }, { "epoch": 0.9062080634809498, "grad_norm": 0.9112102627067392, "learning_rate": 2.289297635276766e-07, "loss": 0.1274, "step": 31063 }, { "epoch": 0.9062372367115934, "grad_norm": 0.7919046650217728, "learning_rate": 2.2878846812706524e-07, "loss": 0.0988, "step": 31064 }, { "epoch": 0.906266409942237, "grad_norm": 1.4700491593039289, "learning_rate": 2.2864721532269317e-07, "loss": 0.1413, "step": 31065 }, { "epoch": 0.9062955831728806, "grad_norm": 0.7976698184707087, "learning_rate": 2.2850600511582e-07, "loss": 0.1366, "step": 31066 }, { "epoch": 0.9063247564035242, "grad_norm": 0.8000100874950398, "learning_rate": 2.283648375077052e-07, "loss": 0.0992, "step": 31067 }, { "epoch": 0.9063539296341677, "grad_norm": 0.6631769861970778, "learning_rate": 2.282237124996106e-07, "loss": 0.0908, "step": 31068 }, { "epoch": 0.9063831028648113, "grad_norm": 0.6905358890076473, "learning_rate": 2.2808263009279574e-07, "loss": 0.1244, "step": 31069 }, { "epoch": 0.9064122760954548, "grad_norm": 0.7779457979066686, "learning_rate": 2.2794159028851958e-07, "loss": 0.1234, "step": 31070 }, { "epoch": 0.9064414493260984, "grad_norm": 0.8832020763851377, "learning_rate": 2.2780059308804116e-07, "loss": 0.1196, "step": 31071 }, { "epoch": 0.9064706225567419, "grad_norm": 0.7292286484002232, "learning_rate": 2.2765963849262107e-07, "loss": 0.1031, "step": 31072 }, { "epoch": 0.9064997957873855, "grad_norm": 0.7474740082202941, "learning_rate": 2.2751872650351614e-07, "loss": 0.1218, "step": 31073 }, { "epoch": 0.906528969018029, "grad_norm": 0.9404091708721877, "learning_rate": 2.2737785712198423e-07, "loss": 0.0952, "step": 31074 }, { "epoch": 0.9065581422486726, "grad_norm": 0.8599232304803229, "learning_rate": 2.2723703034928435e-07, "loss": 0.1165, "step": 31075 }, { "epoch": 0.9065873154793161, "grad_norm": 0.6120221323168671, "learning_rate": 2.2709624618667159e-07, "loss": 0.1024, "step": 31076 }, { "epoch": 0.9066164887099597, "grad_norm": 0.7979718073030723, "learning_rate": 2.269555046354055e-07, "loss": 0.1167, "step": 31077 }, { "epoch": 0.9066456619406033, "grad_norm": 1.2851241355542093, "learning_rate": 2.2681480569674007e-07, "loss": 0.1044, "step": 31078 }, { "epoch": 0.9066748351712469, "grad_norm": 0.8137264629751338, "learning_rate": 2.2667414937193378e-07, "loss": 0.1069, "step": 31079 }, { "epoch": 0.9067040084018905, "grad_norm": 0.8026044382238032, "learning_rate": 2.2653353566224058e-07, "loss": 0.0959, "step": 31080 }, { "epoch": 0.906733181632534, "grad_norm": 0.9254360566307075, "learning_rate": 2.2639296456891612e-07, "loss": 0.1094, "step": 31081 }, { "epoch": 0.9067623548631776, "grad_norm": 0.8374644847661314, "learning_rate": 2.262524360932161e-07, "loss": 0.1064, "step": 31082 }, { "epoch": 0.9067915280938211, "grad_norm": 0.8899565088495911, "learning_rate": 2.261119502363951e-07, "loss": 0.1023, "step": 31083 }, { "epoch": 0.9068207013244647, "grad_norm": 0.6607754550107457, "learning_rate": 2.2597150699970594e-07, "loss": 0.1093, "step": 31084 }, { "epoch": 0.9068498745551082, "grad_norm": 0.7933378432633355, "learning_rate": 2.258311063844043e-07, "loss": 0.1014, "step": 31085 }, { "epoch": 0.9068790477857518, "grad_norm": 0.9711117450350107, "learning_rate": 2.256907483917431e-07, "loss": 0.097, "step": 31086 }, { "epoch": 0.9069082210163953, "grad_norm": 0.7130955809590389, "learning_rate": 2.2555043302297464e-07, "loss": 0.1079, "step": 31087 }, { "epoch": 0.9069373942470389, "grad_norm": 0.9837946899650742, "learning_rate": 2.254101602793518e-07, "loss": 0.1113, "step": 31088 }, { "epoch": 0.9069665674776825, "grad_norm": 0.8483504739284345, "learning_rate": 2.252699301621286e-07, "loss": 0.1125, "step": 31089 }, { "epoch": 0.906995740708326, "grad_norm": 0.820332575581785, "learning_rate": 2.2512974267255517e-07, "loss": 0.1033, "step": 31090 }, { "epoch": 0.9070249139389696, "grad_norm": 0.7765796816180711, "learning_rate": 2.2498959781188267e-07, "loss": 0.1264, "step": 31091 }, { "epoch": 0.9070540871696131, "grad_norm": 0.8304216817017167, "learning_rate": 2.2484949558136405e-07, "loss": 0.1119, "step": 31092 }, { "epoch": 0.9070832604002568, "grad_norm": 0.859422112818967, "learning_rate": 2.2470943598224936e-07, "loss": 0.0962, "step": 31093 }, { "epoch": 0.9071124336309003, "grad_norm": 0.6354218403920345, "learning_rate": 2.245694190157871e-07, "loss": 0.1054, "step": 31094 }, { "epoch": 0.9071416068615439, "grad_norm": 0.7047697085873138, "learning_rate": 2.244294446832307e-07, "loss": 0.1197, "step": 31095 }, { "epoch": 0.9071707800921874, "grad_norm": 0.8612226207933252, "learning_rate": 2.24289512985828e-07, "loss": 0.1021, "step": 31096 }, { "epoch": 0.907199953322831, "grad_norm": 0.814602887405063, "learning_rate": 2.241496239248281e-07, "loss": 0.1203, "step": 31097 }, { "epoch": 0.9072291265534745, "grad_norm": 0.8157422395148158, "learning_rate": 2.24009777501481e-07, "loss": 0.1115, "step": 31098 }, { "epoch": 0.9072582997841181, "grad_norm": 0.9433895599957898, "learning_rate": 2.2386997371703413e-07, "loss": 0.1034, "step": 31099 }, { "epoch": 0.9072874730147616, "grad_norm": 0.9194868283420881, "learning_rate": 2.237302125727353e-07, "loss": 0.1002, "step": 31100 }, { "epoch": 0.9073166462454052, "grad_norm": 0.9143603771129105, "learning_rate": 2.2359049406983358e-07, "loss": 0.0983, "step": 31101 }, { "epoch": 0.9073458194760488, "grad_norm": 0.893875061229067, "learning_rate": 2.234508182095757e-07, "loss": 0.125, "step": 31102 }, { "epoch": 0.9073749927066923, "grad_norm": 0.8063523363762451, "learning_rate": 2.2331118499320904e-07, "loss": 0.116, "step": 31103 }, { "epoch": 0.9074041659373359, "grad_norm": 0.9565084545115901, "learning_rate": 2.2317159442197868e-07, "loss": 0.1179, "step": 31104 }, { "epoch": 0.9074333391679794, "grad_norm": 0.8637748883300185, "learning_rate": 2.2303204649713305e-07, "loss": 0.1137, "step": 31105 }, { "epoch": 0.9074625123986231, "grad_norm": 0.8914017938165445, "learning_rate": 2.228925412199162e-07, "loss": 0.1096, "step": 31106 }, { "epoch": 0.9074916856292666, "grad_norm": 0.7612287769685566, "learning_rate": 2.2275307859157546e-07, "loss": 0.0936, "step": 31107 }, { "epoch": 0.9075208588599102, "grad_norm": 0.9977039789906293, "learning_rate": 2.2261365861335372e-07, "loss": 0.12, "step": 31108 }, { "epoch": 0.9075500320905537, "grad_norm": 0.954415893967994, "learning_rate": 2.2247428128649717e-07, "loss": 0.1214, "step": 31109 }, { "epoch": 0.9075792053211973, "grad_norm": 0.7368586790932046, "learning_rate": 2.2233494661225042e-07, "loss": 0.0997, "step": 31110 }, { "epoch": 0.9076083785518408, "grad_norm": 0.8528026718875293, "learning_rate": 2.2219565459185578e-07, "loss": 0.0907, "step": 31111 }, { "epoch": 0.9076375517824844, "grad_norm": 0.7457147550208287, "learning_rate": 2.2205640522655725e-07, "loss": 0.1451, "step": 31112 }, { "epoch": 0.907666725013128, "grad_norm": 0.9140024106994731, "learning_rate": 2.2191719851759996e-07, "loss": 0.1118, "step": 31113 }, { "epoch": 0.9076958982437715, "grad_norm": 0.9136693296214689, "learning_rate": 2.2177803446622404e-07, "loss": 0.0968, "step": 31114 }, { "epoch": 0.9077250714744151, "grad_norm": 0.7244222394257598, "learning_rate": 2.2163891307367457e-07, "loss": 0.101, "step": 31115 }, { "epoch": 0.9077542447050586, "grad_norm": 0.8501133370857192, "learning_rate": 2.214998343411917e-07, "loss": 0.129, "step": 31116 }, { "epoch": 0.9077834179357022, "grad_norm": 0.8108296197082614, "learning_rate": 2.2136079827001666e-07, "loss": 0.1176, "step": 31117 }, { "epoch": 0.9078125911663457, "grad_norm": 0.7676470133077243, "learning_rate": 2.2122180486139232e-07, "loss": 0.1272, "step": 31118 }, { "epoch": 0.9078417643969893, "grad_norm": 0.9231091928417677, "learning_rate": 2.2108285411655938e-07, "loss": 0.1052, "step": 31119 }, { "epoch": 0.9078709376276329, "grad_norm": 0.9125915237622088, "learning_rate": 2.209439460367574e-07, "loss": 0.1269, "step": 31120 }, { "epoch": 0.9079001108582765, "grad_norm": 1.3371222585899003, "learning_rate": 2.2080508062322704e-07, "loss": 0.1045, "step": 31121 }, { "epoch": 0.90792928408892, "grad_norm": 0.7902946209997209, "learning_rate": 2.2066625787720842e-07, "loss": 0.1254, "step": 31122 }, { "epoch": 0.9079584573195636, "grad_norm": 0.6485061366155017, "learning_rate": 2.2052747779994055e-07, "loss": 0.1125, "step": 31123 }, { "epoch": 0.9079876305502071, "grad_norm": 1.104568038545473, "learning_rate": 2.2038874039266077e-07, "loss": 0.1224, "step": 31124 }, { "epoch": 0.9080168037808507, "grad_norm": 0.7622552739176846, "learning_rate": 2.202500456566109e-07, "loss": 0.1066, "step": 31125 }, { "epoch": 0.9080459770114943, "grad_norm": 0.7422091194608011, "learning_rate": 2.201113935930277e-07, "loss": 0.1074, "step": 31126 }, { "epoch": 0.9080751502421378, "grad_norm": 0.8907907529926969, "learning_rate": 2.1997278420314848e-07, "loss": 0.1117, "step": 31127 }, { "epoch": 0.9081043234727814, "grad_norm": 0.9067372482328319, "learning_rate": 2.198342174882112e-07, "loss": 0.1036, "step": 31128 }, { "epoch": 0.9081334967034249, "grad_norm": 0.7713778168392936, "learning_rate": 2.196956934494532e-07, "loss": 0.1138, "step": 31129 }, { "epoch": 0.9081626699340685, "grad_norm": 0.6670136652004125, "learning_rate": 2.1955721208811066e-07, "loss": 0.1027, "step": 31130 }, { "epoch": 0.908191843164712, "grad_norm": 0.8619621585420121, "learning_rate": 2.1941877340541984e-07, "loss": 0.1326, "step": 31131 }, { "epoch": 0.9082210163953556, "grad_norm": 0.8390283872276735, "learning_rate": 2.1928037740261753e-07, "loss": 0.1058, "step": 31132 }, { "epoch": 0.9082501896259992, "grad_norm": 0.7881674467248587, "learning_rate": 2.1914202408093887e-07, "loss": 0.105, "step": 31133 }, { "epoch": 0.9082793628566428, "grad_norm": 0.7892069065864602, "learning_rate": 2.1900371344161787e-07, "loss": 0.0975, "step": 31134 }, { "epoch": 0.9083085360872863, "grad_norm": 0.7709085175835558, "learning_rate": 2.1886544548589184e-07, "loss": 0.1178, "step": 31135 }, { "epoch": 0.9083377093179299, "grad_norm": 0.6235509734500673, "learning_rate": 2.187272202149926e-07, "loss": 0.1023, "step": 31136 }, { "epoch": 0.9083668825485735, "grad_norm": 0.6889896345070166, "learning_rate": 2.1858903763015583e-07, "loss": 0.1144, "step": 31137 }, { "epoch": 0.908396055779217, "grad_norm": 0.8440367631430685, "learning_rate": 2.184508977326144e-07, "loss": 0.1128, "step": 31138 }, { "epoch": 0.9084252290098606, "grad_norm": 0.8555631303594844, "learning_rate": 2.1831280052360238e-07, "loss": 0.1042, "step": 31139 }, { "epoch": 0.9084544022405041, "grad_norm": 0.9960188899937072, "learning_rate": 2.1817474600435262e-07, "loss": 0.1311, "step": 31140 }, { "epoch": 0.9084835754711477, "grad_norm": 0.7604910924176862, "learning_rate": 2.1803673417609584e-07, "loss": 0.1087, "step": 31141 }, { "epoch": 0.9085127487017912, "grad_norm": 0.8788464053917843, "learning_rate": 2.1789876504006601e-07, "loss": 0.1196, "step": 31142 }, { "epoch": 0.9085419219324348, "grad_norm": 0.8429605800895439, "learning_rate": 2.1776083859749498e-07, "loss": 0.0924, "step": 31143 }, { "epoch": 0.9085710951630783, "grad_norm": 0.9128799379517005, "learning_rate": 2.176229548496134e-07, "loss": 0.1232, "step": 31144 }, { "epoch": 0.9086002683937219, "grad_norm": 0.9937007117197812, "learning_rate": 2.1748511379765247e-07, "loss": 0.1039, "step": 31145 }, { "epoch": 0.9086294416243654, "grad_norm": 1.179439316220124, "learning_rate": 2.1734731544284293e-07, "loss": 0.1173, "step": 31146 }, { "epoch": 0.9086586148550091, "grad_norm": 0.8566170913287037, "learning_rate": 2.1720955978641433e-07, "loss": 0.1144, "step": 31147 }, { "epoch": 0.9086877880856526, "grad_norm": 0.8013007543558315, "learning_rate": 2.170718468295968e-07, "loss": 0.1093, "step": 31148 }, { "epoch": 0.9087169613162962, "grad_norm": 1.1102425067230988, "learning_rate": 2.1693417657362048e-07, "loss": 0.1153, "step": 31149 }, { "epoch": 0.9087461345469398, "grad_norm": 0.7486575885918128, "learning_rate": 2.1679654901971436e-07, "loss": 0.1024, "step": 31150 }, { "epoch": 0.9087753077775833, "grad_norm": 0.8329626115948966, "learning_rate": 2.1665896416910638e-07, "loss": 0.1171, "step": 31151 }, { "epoch": 0.9088044810082269, "grad_norm": 0.7603322985587933, "learning_rate": 2.165214220230255e-07, "loss": 0.1066, "step": 31152 }, { "epoch": 0.9088336542388704, "grad_norm": 0.7644941059406194, "learning_rate": 2.163839225826997e-07, "loss": 0.0884, "step": 31153 }, { "epoch": 0.908862827469514, "grad_norm": 0.8187194717508908, "learning_rate": 2.1624646584935515e-07, "loss": 0.1062, "step": 31154 }, { "epoch": 0.9088920007001575, "grad_norm": 0.8938377716105418, "learning_rate": 2.161090518242215e-07, "loss": 0.1072, "step": 31155 }, { "epoch": 0.9089211739308011, "grad_norm": 0.7978155172171842, "learning_rate": 2.159716805085238e-07, "loss": 0.1165, "step": 31156 }, { "epoch": 0.9089503471614446, "grad_norm": 0.7328847864974878, "learning_rate": 2.1583435190348833e-07, "loss": 0.096, "step": 31157 }, { "epoch": 0.9089795203920882, "grad_norm": 0.6593593177615167, "learning_rate": 2.1569706601034246e-07, "loss": 0.1249, "step": 31158 }, { "epoch": 0.9090086936227317, "grad_norm": 0.855227361992073, "learning_rate": 2.155598228303113e-07, "loss": 0.1173, "step": 31159 }, { "epoch": 0.9090378668533754, "grad_norm": 0.813050944243014, "learning_rate": 2.1542262236461887e-07, "loss": 0.0982, "step": 31160 }, { "epoch": 0.909067040084019, "grad_norm": 0.9921805848528199, "learning_rate": 2.152854646144914e-07, "loss": 0.098, "step": 31161 }, { "epoch": 0.9090962133146625, "grad_norm": 0.7507611875558359, "learning_rate": 2.151483495811535e-07, "loss": 0.1068, "step": 31162 }, { "epoch": 0.9091253865453061, "grad_norm": 0.822681609407171, "learning_rate": 2.1501127726582916e-07, "loss": 0.1313, "step": 31163 }, { "epoch": 0.9091545597759496, "grad_norm": 0.976159035189303, "learning_rate": 2.148742476697413e-07, "loss": 0.1231, "step": 31164 }, { "epoch": 0.9091837330065932, "grad_norm": 0.7753825272428042, "learning_rate": 2.1473726079411394e-07, "loss": 0.1147, "step": 31165 }, { "epoch": 0.9092129062372367, "grad_norm": 1.1635609513305534, "learning_rate": 2.1460031664017002e-07, "loss": 0.1184, "step": 31166 }, { "epoch": 0.9092420794678803, "grad_norm": 0.9433089715287637, "learning_rate": 2.1446341520913238e-07, "loss": 0.1118, "step": 31167 }, { "epoch": 0.9092712526985238, "grad_norm": 0.6221398742753601, "learning_rate": 2.1432655650222234e-07, "loss": 0.1106, "step": 31168 }, { "epoch": 0.9093004259291674, "grad_norm": 0.811485942160595, "learning_rate": 2.1418974052066276e-07, "loss": 0.1407, "step": 31169 }, { "epoch": 0.9093295991598109, "grad_norm": 0.8165279238446315, "learning_rate": 2.1405296726567493e-07, "loss": 0.125, "step": 31170 }, { "epoch": 0.9093587723904545, "grad_norm": 0.9310940873584964, "learning_rate": 2.139162367384784e-07, "loss": 0.1085, "step": 31171 }, { "epoch": 0.909387945621098, "grad_norm": 0.6928230019094935, "learning_rate": 2.1377954894029662e-07, "loss": 0.0968, "step": 31172 }, { "epoch": 0.9094171188517416, "grad_norm": 0.7918110263515088, "learning_rate": 2.1364290387234864e-07, "loss": 0.1196, "step": 31173 }, { "epoch": 0.9094462920823853, "grad_norm": 0.8102359101197163, "learning_rate": 2.135063015358535e-07, "loss": 0.1077, "step": 31174 }, { "epoch": 0.9094754653130288, "grad_norm": 0.9933870456354755, "learning_rate": 2.1336974193203185e-07, "loss": 0.0799, "step": 31175 }, { "epoch": 0.9095046385436724, "grad_norm": 0.768122671744198, "learning_rate": 2.132332250621022e-07, "loss": 0.1067, "step": 31176 }, { "epoch": 0.9095338117743159, "grad_norm": 0.8469302239599373, "learning_rate": 2.1309675092728353e-07, "loss": 0.0991, "step": 31177 }, { "epoch": 0.9095629850049595, "grad_norm": 0.8711834960374405, "learning_rate": 2.1296031952879437e-07, "loss": 0.1015, "step": 31178 }, { "epoch": 0.909592158235603, "grad_norm": 0.8359144380449869, "learning_rate": 2.1282393086785313e-07, "loss": 0.108, "step": 31179 }, { "epoch": 0.9096213314662466, "grad_norm": 0.771065559095617, "learning_rate": 2.1268758494567666e-07, "loss": 0.13, "step": 31180 }, { "epoch": 0.9096505046968901, "grad_norm": 0.8667820910889501, "learning_rate": 2.1255128176348283e-07, "loss": 0.1193, "step": 31181 }, { "epoch": 0.9096796779275337, "grad_norm": 0.7451349820479439, "learning_rate": 2.1241502132248848e-07, "loss": 0.1084, "step": 31182 }, { "epoch": 0.9097088511581772, "grad_norm": 0.7770633929395803, "learning_rate": 2.122788036239093e-07, "loss": 0.1314, "step": 31183 }, { "epoch": 0.9097380243888208, "grad_norm": 0.8238839116750369, "learning_rate": 2.1214262866896208e-07, "loss": 0.0927, "step": 31184 }, { "epoch": 0.9097671976194643, "grad_norm": 0.8207904619219264, "learning_rate": 2.1200649645886308e-07, "loss": 0.1215, "step": 31185 }, { "epoch": 0.9097963708501079, "grad_norm": 0.905414138949186, "learning_rate": 2.1187040699482685e-07, "loss": 0.1098, "step": 31186 }, { "epoch": 0.9098255440807514, "grad_norm": 0.8058840126251363, "learning_rate": 2.117343602780686e-07, "loss": 0.1078, "step": 31187 }, { "epoch": 0.9098547173113951, "grad_norm": 0.8401359675866493, "learning_rate": 2.1159835630980286e-07, "loss": 0.1306, "step": 31188 }, { "epoch": 0.9098838905420387, "grad_norm": 0.8153818141633182, "learning_rate": 2.1146239509124365e-07, "loss": 0.097, "step": 31189 }, { "epoch": 0.9099130637726822, "grad_norm": 0.9347981040478424, "learning_rate": 2.1132647662360562e-07, "loss": 0.0835, "step": 31190 }, { "epoch": 0.9099422370033258, "grad_norm": 0.88370779193522, "learning_rate": 2.1119060090810106e-07, "loss": 0.102, "step": 31191 }, { "epoch": 0.9099714102339693, "grad_norm": 0.9322960137650994, "learning_rate": 2.110547679459446e-07, "loss": 0.1082, "step": 31192 }, { "epoch": 0.9100005834646129, "grad_norm": 0.7839507296136591, "learning_rate": 2.1091897773834746e-07, "loss": 0.0998, "step": 31193 }, { "epoch": 0.9100297566952564, "grad_norm": 0.7603850833206659, "learning_rate": 2.1078323028652203e-07, "loss": 0.1055, "step": 31194 }, { "epoch": 0.9100589299259, "grad_norm": 0.6832395959495823, "learning_rate": 2.1064752559168067e-07, "loss": 0.1009, "step": 31195 }, { "epoch": 0.9100881031565435, "grad_norm": 0.8325881847933028, "learning_rate": 2.1051186365503517e-07, "loss": 0.128, "step": 31196 }, { "epoch": 0.9101172763871871, "grad_norm": 0.6845476421445118, "learning_rate": 2.1037624447779682e-07, "loss": 0.1121, "step": 31197 }, { "epoch": 0.9101464496178306, "grad_norm": 0.7073666026472823, "learning_rate": 2.1024066806117515e-07, "loss": 0.1075, "step": 31198 }, { "epoch": 0.9101756228484742, "grad_norm": 1.0179873335506506, "learning_rate": 2.1010513440638203e-07, "loss": 0.1141, "step": 31199 }, { "epoch": 0.9102047960791178, "grad_norm": 0.8199139157129577, "learning_rate": 2.09969643514627e-07, "loss": 0.0954, "step": 31200 }, { "epoch": 0.9102339693097614, "grad_norm": 1.125528827216129, "learning_rate": 2.0983419538711803e-07, "loss": 0.1098, "step": 31201 }, { "epoch": 0.910263142540405, "grad_norm": 0.7962419334239833, "learning_rate": 2.0969879002506742e-07, "loss": 0.1239, "step": 31202 }, { "epoch": 0.9102923157710485, "grad_norm": 0.7826052581298216, "learning_rate": 2.095634274296826e-07, "loss": 0.0953, "step": 31203 }, { "epoch": 0.9103214890016921, "grad_norm": 0.9102591702918901, "learning_rate": 2.0942810760217092e-07, "loss": 0.1033, "step": 31204 }, { "epoch": 0.9103506622323356, "grad_norm": 0.8062521425848791, "learning_rate": 2.0929283054374193e-07, "loss": 0.1047, "step": 31205 }, { "epoch": 0.9103798354629792, "grad_norm": 0.8918295512444327, "learning_rate": 2.0915759625560306e-07, "loss": 0.1228, "step": 31206 }, { "epoch": 0.9104090086936227, "grad_norm": 1.0958800628665835, "learning_rate": 2.0902240473896106e-07, "loss": 0.1021, "step": 31207 }, { "epoch": 0.9104381819242663, "grad_norm": 0.8979327014990276, "learning_rate": 2.0888725599502335e-07, "loss": 0.1171, "step": 31208 }, { "epoch": 0.9104673551549098, "grad_norm": 0.8538666372363959, "learning_rate": 2.0875215002499727e-07, "loss": 0.1012, "step": 31209 }, { "epoch": 0.9104965283855534, "grad_norm": 1.3116497012888577, "learning_rate": 2.0861708683008796e-07, "loss": 0.1114, "step": 31210 }, { "epoch": 0.910525701616197, "grad_norm": 0.8829732928385619, "learning_rate": 2.084820664115006e-07, "loss": 0.1142, "step": 31211 }, { "epoch": 0.9105548748468405, "grad_norm": 0.8813183058437354, "learning_rate": 2.0834708877044252e-07, "loss": 0.1215, "step": 31212 }, { "epoch": 0.910584048077484, "grad_norm": 0.8921581346672733, "learning_rate": 2.0821215390811722e-07, "loss": 0.1125, "step": 31213 }, { "epoch": 0.9106132213081276, "grad_norm": 0.8727611817341433, "learning_rate": 2.0807726182572984e-07, "loss": 0.1025, "step": 31214 }, { "epoch": 0.9106423945387713, "grad_norm": 0.7044576566301238, "learning_rate": 2.0794241252448554e-07, "loss": 0.107, "step": 31215 }, { "epoch": 0.9106715677694148, "grad_norm": 0.7849471879663136, "learning_rate": 2.0780760600558724e-07, "loss": 0.1085, "step": 31216 }, { "epoch": 0.9107007410000584, "grad_norm": 0.8331097134214188, "learning_rate": 2.0767284227023786e-07, "loss": 0.1132, "step": 31217 }, { "epoch": 0.9107299142307019, "grad_norm": 0.6959226899889344, "learning_rate": 2.0753812131964202e-07, "loss": 0.1337, "step": 31218 }, { "epoch": 0.9107590874613455, "grad_norm": 0.967610349999905, "learning_rate": 2.0740344315500093e-07, "loss": 0.1025, "step": 31219 }, { "epoch": 0.910788260691989, "grad_norm": 0.9678259605853304, "learning_rate": 2.0726880777751922e-07, "loss": 0.1306, "step": 31220 }, { "epoch": 0.9108174339226326, "grad_norm": 0.8755955747118267, "learning_rate": 2.0713421518839595e-07, "loss": 0.1215, "step": 31221 }, { "epoch": 0.9108466071532761, "grad_norm": 0.7458522518925815, "learning_rate": 2.0699966538883565e-07, "loss": 0.125, "step": 31222 }, { "epoch": 0.9108757803839197, "grad_norm": 0.9182134613131028, "learning_rate": 2.068651583800374e-07, "loss": 0.1196, "step": 31223 }, { "epoch": 0.9109049536145633, "grad_norm": 0.8866340951449045, "learning_rate": 2.0673069416320303e-07, "loss": 0.1035, "step": 31224 }, { "epoch": 0.9109341268452068, "grad_norm": 0.8933500504473191, "learning_rate": 2.065962727395321e-07, "loss": 0.1116, "step": 31225 }, { "epoch": 0.9109633000758504, "grad_norm": 0.7485970038879802, "learning_rate": 2.0646189411022588e-07, "loss": 0.1031, "step": 31226 }, { "epoch": 0.9109924733064939, "grad_norm": 0.8148759799355173, "learning_rate": 2.0632755827648397e-07, "loss": 0.1201, "step": 31227 }, { "epoch": 0.9110216465371376, "grad_norm": 0.7876042273869697, "learning_rate": 2.061932652395049e-07, "loss": 0.0992, "step": 31228 }, { "epoch": 0.9110508197677811, "grad_norm": 0.9348614192922458, "learning_rate": 2.060590150004882e-07, "loss": 0.1073, "step": 31229 }, { "epoch": 0.9110799929984247, "grad_norm": 0.8014039247808129, "learning_rate": 2.0592480756063237e-07, "loss": 0.1041, "step": 31230 }, { "epoch": 0.9111091662290682, "grad_norm": 0.8874166261027333, "learning_rate": 2.057906429211337e-07, "loss": 0.1228, "step": 31231 }, { "epoch": 0.9111383394597118, "grad_norm": 0.7143287227728006, "learning_rate": 2.0565652108319344e-07, "loss": 0.1181, "step": 31232 }, { "epoch": 0.9111675126903553, "grad_norm": 0.7281203535430113, "learning_rate": 2.055224420480073e-07, "loss": 0.1188, "step": 31233 }, { "epoch": 0.9111966859209989, "grad_norm": 0.763141740829035, "learning_rate": 2.0538840581677156e-07, "loss": 0.0935, "step": 31234 }, { "epoch": 0.9112258591516424, "grad_norm": 0.9045996524010984, "learning_rate": 2.052544123906841e-07, "loss": 0.112, "step": 31235 }, { "epoch": 0.911255032382286, "grad_norm": 0.7547713704952629, "learning_rate": 2.051204617709407e-07, "loss": 0.106, "step": 31236 }, { "epoch": 0.9112842056129296, "grad_norm": 1.0281046979497552, "learning_rate": 2.0498655395873645e-07, "loss": 0.1105, "step": 31237 }, { "epoch": 0.9113133788435731, "grad_norm": 0.9824969572957485, "learning_rate": 2.0485268895526766e-07, "loss": 0.1236, "step": 31238 }, { "epoch": 0.9113425520742167, "grad_norm": 0.8476378577194715, "learning_rate": 2.0471886676173002e-07, "loss": 0.0903, "step": 31239 }, { "epoch": 0.9113717253048602, "grad_norm": 0.7982521157682128, "learning_rate": 2.045850873793176e-07, "loss": 0.1229, "step": 31240 }, { "epoch": 0.9114008985355038, "grad_norm": 0.9620654671412657, "learning_rate": 2.044513508092244e-07, "loss": 0.1272, "step": 31241 }, { "epoch": 0.9114300717661474, "grad_norm": 0.7299604073402126, "learning_rate": 2.0431765705264505e-07, "loss": 0.0799, "step": 31242 }, { "epoch": 0.911459244996791, "grad_norm": 0.7012347569197621, "learning_rate": 2.0418400611077194e-07, "loss": 0.1011, "step": 31243 }, { "epoch": 0.9114884182274345, "grad_norm": 0.7623281519241979, "learning_rate": 2.0405039798479964e-07, "loss": 0.0953, "step": 31244 }, { "epoch": 0.9115175914580781, "grad_norm": 0.8991371714453419, "learning_rate": 2.039168326759211e-07, "loss": 0.1127, "step": 31245 }, { "epoch": 0.9115467646887216, "grad_norm": 1.036221718478377, "learning_rate": 2.0378331018532814e-07, "loss": 0.1039, "step": 31246 }, { "epoch": 0.9115759379193652, "grad_norm": 0.794707890425934, "learning_rate": 2.0364983051421204e-07, "loss": 0.0994, "step": 31247 }, { "epoch": 0.9116051111500088, "grad_norm": 0.9886205039691118, "learning_rate": 2.0351639366376575e-07, "loss": 0.1216, "step": 31248 }, { "epoch": 0.9116342843806523, "grad_norm": 0.9432682229351107, "learning_rate": 2.0338299963517993e-07, "loss": 0.1286, "step": 31249 }, { "epoch": 0.9116634576112959, "grad_norm": 0.8112227455028246, "learning_rate": 2.0324964842964589e-07, "loss": 0.0921, "step": 31250 }, { "epoch": 0.9116926308419394, "grad_norm": 0.7402480177845739, "learning_rate": 2.0311634004835324e-07, "loss": 0.1389, "step": 31251 }, { "epoch": 0.911721804072583, "grad_norm": 0.8635522058155916, "learning_rate": 2.0298307449249377e-07, "loss": 0.1064, "step": 31252 }, { "epoch": 0.9117509773032265, "grad_norm": 0.9832749334854058, "learning_rate": 2.02849851763256e-07, "loss": 0.1093, "step": 31253 }, { "epoch": 0.9117801505338701, "grad_norm": 1.0136663990298818, "learning_rate": 2.0271667186182897e-07, "loss": 0.1018, "step": 31254 }, { "epoch": 0.9118093237645137, "grad_norm": 0.7260314617532732, "learning_rate": 2.025835347894023e-07, "loss": 0.1097, "step": 31255 }, { "epoch": 0.9118384969951573, "grad_norm": 0.9468722254681623, "learning_rate": 2.0245044054716557e-07, "loss": 0.1295, "step": 31256 }, { "epoch": 0.9118676702258008, "grad_norm": 1.0606643474312076, "learning_rate": 2.023173891363056e-07, "loss": 0.1195, "step": 31257 }, { "epoch": 0.9118968434564444, "grad_norm": 0.9831047885398735, "learning_rate": 2.0218438055801038e-07, "loss": 0.1126, "step": 31258 }, { "epoch": 0.911926016687088, "grad_norm": 0.859583230323244, "learning_rate": 2.0205141481346835e-07, "loss": 0.1127, "step": 31259 }, { "epoch": 0.9119551899177315, "grad_norm": 0.8819680571912979, "learning_rate": 2.0191849190386526e-07, "loss": 0.1071, "step": 31260 }, { "epoch": 0.911984363148375, "grad_norm": 0.7757292719689446, "learning_rate": 2.0178561183038793e-07, "loss": 0.1175, "step": 31261 }, { "epoch": 0.9120135363790186, "grad_norm": 0.8075015649567129, "learning_rate": 2.0165277459422428e-07, "loss": 0.1094, "step": 31262 }, { "epoch": 0.9120427096096622, "grad_norm": 0.8592848982920227, "learning_rate": 2.0151998019655895e-07, "loss": 0.1265, "step": 31263 }, { "epoch": 0.9120718828403057, "grad_norm": 1.12269639300866, "learning_rate": 2.0138722863857762e-07, "loss": 0.1202, "step": 31264 }, { "epoch": 0.9121010560709493, "grad_norm": 0.8409779073218775, "learning_rate": 2.0125451992146606e-07, "loss": 0.1012, "step": 31265 }, { "epoch": 0.9121302293015928, "grad_norm": 0.7299165332371558, "learning_rate": 2.0112185404640827e-07, "loss": 0.102, "step": 31266 }, { "epoch": 0.9121594025322364, "grad_norm": 0.8173740844966054, "learning_rate": 2.0098923101458833e-07, "loss": 0.1041, "step": 31267 }, { "epoch": 0.9121885757628799, "grad_norm": 0.6394795081855168, "learning_rate": 2.0085665082719142e-07, "loss": 0.0918, "step": 31268 }, { "epoch": 0.9122177489935236, "grad_norm": 0.7054351889259936, "learning_rate": 2.00724113485401e-07, "loss": 0.1102, "step": 31269 }, { "epoch": 0.9122469222241671, "grad_norm": 0.8141005998471933, "learning_rate": 2.0059161899040001e-07, "loss": 0.1159, "step": 31270 }, { "epoch": 0.9122760954548107, "grad_norm": 0.9182301652023297, "learning_rate": 2.004591673433709e-07, "loss": 0.1069, "step": 31271 }, { "epoch": 0.9123052686854543, "grad_norm": 0.7432654349729967, "learning_rate": 2.003267585454971e-07, "loss": 0.11, "step": 31272 }, { "epoch": 0.9123344419160978, "grad_norm": 0.8057741532574665, "learning_rate": 2.0019439259795935e-07, "loss": 0.1161, "step": 31273 }, { "epoch": 0.9123636151467414, "grad_norm": 1.125519038579545, "learning_rate": 2.0006206950194063e-07, "loss": 0.1175, "step": 31274 }, { "epoch": 0.9123927883773849, "grad_norm": 0.8786397529807448, "learning_rate": 1.9992978925862215e-07, "loss": 0.113, "step": 31275 }, { "epoch": 0.9124219616080285, "grad_norm": 0.8508726654735528, "learning_rate": 1.9979755186918525e-07, "loss": 0.1181, "step": 31276 }, { "epoch": 0.912451134838672, "grad_norm": 0.916797857464992, "learning_rate": 1.9966535733480897e-07, "loss": 0.1113, "step": 31277 }, { "epoch": 0.9124803080693156, "grad_norm": 0.8103247524664272, "learning_rate": 1.9953320565667457e-07, "loss": 0.1079, "step": 31278 }, { "epoch": 0.9125094812999591, "grad_norm": 0.8911431660576439, "learning_rate": 1.9940109683596165e-07, "loss": 0.1151, "step": 31279 }, { "epoch": 0.9125386545306027, "grad_norm": 0.768540094033884, "learning_rate": 1.9926903087385042e-07, "loss": 0.111, "step": 31280 }, { "epoch": 0.9125678277612462, "grad_norm": 0.8326238160989231, "learning_rate": 1.9913700777151823e-07, "loss": 0.1025, "step": 31281 }, { "epoch": 0.9125970009918899, "grad_norm": 1.2324609342680006, "learning_rate": 1.9900502753014584e-07, "loss": 0.0899, "step": 31282 }, { "epoch": 0.9126261742225334, "grad_norm": 0.7990047517751675, "learning_rate": 1.988730901509106e-07, "loss": 0.1146, "step": 31283 }, { "epoch": 0.912655347453177, "grad_norm": 0.7296273030813718, "learning_rate": 1.987411956349894e-07, "loss": 0.1181, "step": 31284 }, { "epoch": 0.9126845206838206, "grad_norm": 0.8432077346000755, "learning_rate": 1.9860934398356013e-07, "loss": 0.117, "step": 31285 }, { "epoch": 0.9127136939144641, "grad_norm": 0.8749049709019081, "learning_rate": 1.9847753519780188e-07, "loss": 0.1075, "step": 31286 }, { "epoch": 0.9127428671451077, "grad_norm": 0.7978980913542358, "learning_rate": 1.983457692788898e-07, "loss": 0.1059, "step": 31287 }, { "epoch": 0.9127720403757512, "grad_norm": 0.7783015059887668, "learning_rate": 1.9821404622799966e-07, "loss": 0.1023, "step": 31288 }, { "epoch": 0.9128012136063948, "grad_norm": 0.7633953813240251, "learning_rate": 1.9808236604630882e-07, "loss": 0.1187, "step": 31289 }, { "epoch": 0.9128303868370383, "grad_norm": 0.6597935910223036, "learning_rate": 1.9795072873499245e-07, "loss": 0.0849, "step": 31290 }, { "epoch": 0.9128595600676819, "grad_norm": 0.8487592715030142, "learning_rate": 1.9781913429522403e-07, "loss": 0.1089, "step": 31291 }, { "epoch": 0.9128887332983254, "grad_norm": 0.9464287978142161, "learning_rate": 1.9768758272818155e-07, "loss": 0.1104, "step": 31292 }, { "epoch": 0.912917906528969, "grad_norm": 1.1129414968464362, "learning_rate": 1.9755607403503797e-07, "loss": 0.1168, "step": 31293 }, { "epoch": 0.9129470797596125, "grad_norm": 0.7808060210399638, "learning_rate": 1.9742460821696674e-07, "loss": 0.1284, "step": 31294 }, { "epoch": 0.9129762529902561, "grad_norm": 0.9637865048798887, "learning_rate": 1.972931852751425e-07, "loss": 0.1215, "step": 31295 }, { "epoch": 0.9130054262208998, "grad_norm": 0.6754609672884183, "learning_rate": 1.9716180521073823e-07, "loss": 0.0873, "step": 31296 }, { "epoch": 0.9130345994515433, "grad_norm": 0.7668595822596354, "learning_rate": 1.9703046802492687e-07, "loss": 0.1092, "step": 31297 }, { "epoch": 0.9130637726821869, "grad_norm": 0.8351857025860321, "learning_rate": 1.9689917371888024e-07, "loss": 0.1223, "step": 31298 }, { "epoch": 0.9130929459128304, "grad_norm": 0.9598557356092702, "learning_rate": 1.9676792229377184e-07, "loss": 0.1364, "step": 31299 }, { "epoch": 0.913122119143474, "grad_norm": 0.799230935536841, "learning_rate": 1.9663671375077298e-07, "loss": 0.1027, "step": 31300 }, { "epoch": 0.9131512923741175, "grad_norm": 0.6777469754416956, "learning_rate": 1.9650554809105438e-07, "loss": 0.1189, "step": 31301 }, { "epoch": 0.9131804656047611, "grad_norm": 0.9142433031010697, "learning_rate": 1.9637442531578787e-07, "loss": 0.1191, "step": 31302 }, { "epoch": 0.9132096388354046, "grad_norm": 0.906970151087242, "learning_rate": 1.962433454261431e-07, "loss": 0.1158, "step": 31303 }, { "epoch": 0.9132388120660482, "grad_norm": 0.8526403777218867, "learning_rate": 1.9611230842329133e-07, "loss": 0.0898, "step": 31304 }, { "epoch": 0.9132679852966917, "grad_norm": 0.5530258071756962, "learning_rate": 1.9598131430840272e-07, "loss": 0.1151, "step": 31305 }, { "epoch": 0.9132971585273353, "grad_norm": 0.7692251461411396, "learning_rate": 1.9585036308264582e-07, "loss": 0.094, "step": 31306 }, { "epoch": 0.9133263317579788, "grad_norm": 0.9758808536373275, "learning_rate": 1.9571945474718967e-07, "loss": 0.0996, "step": 31307 }, { "epoch": 0.9133555049886224, "grad_norm": 0.8248060555101733, "learning_rate": 1.955885893032039e-07, "loss": 0.1169, "step": 31308 }, { "epoch": 0.913384678219266, "grad_norm": 0.7753417412953666, "learning_rate": 1.954577667518559e-07, "loss": 0.1001, "step": 31309 }, { "epoch": 0.9134138514499096, "grad_norm": 0.8813853837993885, "learning_rate": 1.953269870943142e-07, "loss": 0.1028, "step": 31310 }, { "epoch": 0.9134430246805532, "grad_norm": 0.7867172767948929, "learning_rate": 1.9519625033174562e-07, "loss": 0.093, "step": 31311 }, { "epoch": 0.9134721979111967, "grad_norm": 0.6112482339889289, "learning_rate": 1.9506555646531867e-07, "loss": 0.1046, "step": 31312 }, { "epoch": 0.9135013711418403, "grad_norm": 0.9858858148612809, "learning_rate": 1.9493490549619965e-07, "loss": 0.1101, "step": 31313 }, { "epoch": 0.9135305443724838, "grad_norm": 0.8754013133805054, "learning_rate": 1.9480429742555374e-07, "loss": 0.1102, "step": 31314 }, { "epoch": 0.9135597176031274, "grad_norm": 0.9901464111457875, "learning_rate": 1.9467373225454832e-07, "loss": 0.1031, "step": 31315 }, { "epoch": 0.9135888908337709, "grad_norm": 0.7140144003983445, "learning_rate": 1.9454320998434918e-07, "loss": 0.1245, "step": 31316 }, { "epoch": 0.9136180640644145, "grad_norm": 0.9792188827829292, "learning_rate": 1.9441273061612087e-07, "loss": 0.1102, "step": 31317 }, { "epoch": 0.913647237295058, "grad_norm": 0.7464639286634547, "learning_rate": 1.9428229415102807e-07, "loss": 0.1128, "step": 31318 }, { "epoch": 0.9136764105257016, "grad_norm": 0.6927326790382387, "learning_rate": 1.9415190059023647e-07, "loss": 0.1166, "step": 31319 }, { "epoch": 0.9137055837563451, "grad_norm": 0.8408633978323875, "learning_rate": 1.9402154993490962e-07, "loss": 0.1198, "step": 31320 }, { "epoch": 0.9137347569869887, "grad_norm": 1.1110077527922588, "learning_rate": 1.9389124218620937e-07, "loss": 0.1052, "step": 31321 }, { "epoch": 0.9137639302176322, "grad_norm": 1.0907989034999677, "learning_rate": 1.9376097734530196e-07, "loss": 0.1177, "step": 31322 }, { "epoch": 0.9137931034482759, "grad_norm": 0.842923815502877, "learning_rate": 1.9363075541334986e-07, "loss": 0.1135, "step": 31323 }, { "epoch": 0.9138222766789195, "grad_norm": 0.8011246284710944, "learning_rate": 1.9350057639151377e-07, "loss": 0.1004, "step": 31324 }, { "epoch": 0.913851449909563, "grad_norm": 0.8584964658344592, "learning_rate": 1.933704402809583e-07, "loss": 0.113, "step": 31325 }, { "epoch": 0.9138806231402066, "grad_norm": 0.9878084824197102, "learning_rate": 1.9324034708284368e-07, "loss": 0.111, "step": 31326 }, { "epoch": 0.9139097963708501, "grad_norm": 1.0150341445004687, "learning_rate": 1.9311029679833115e-07, "loss": 0.1242, "step": 31327 }, { "epoch": 0.9139389696014937, "grad_norm": 0.749833970785854, "learning_rate": 1.929802894285826e-07, "loss": 0.0944, "step": 31328 }, { "epoch": 0.9139681428321372, "grad_norm": 0.8103429378662382, "learning_rate": 1.9285032497475876e-07, "loss": 0.1373, "step": 31329 }, { "epoch": 0.9139973160627808, "grad_norm": 0.6616439365790598, "learning_rate": 1.927204034380198e-07, "loss": 0.0981, "step": 31330 }, { "epoch": 0.9140264892934243, "grad_norm": 0.7877981710024274, "learning_rate": 1.9259052481952534e-07, "loss": 0.1087, "step": 31331 }, { "epoch": 0.9140556625240679, "grad_norm": 1.1921717562156433, "learning_rate": 1.9246068912043504e-07, "loss": 0.1068, "step": 31332 }, { "epoch": 0.9140848357547114, "grad_norm": 0.674428734976857, "learning_rate": 1.9233089634190794e-07, "loss": 0.1142, "step": 31333 }, { "epoch": 0.914114008985355, "grad_norm": 0.9418225486542654, "learning_rate": 1.9220114648510259e-07, "loss": 0.136, "step": 31334 }, { "epoch": 0.9141431822159986, "grad_norm": 0.819986673302682, "learning_rate": 1.9207143955117858e-07, "loss": 0.0952, "step": 31335 }, { "epoch": 0.9141723554466422, "grad_norm": 0.9348377373039002, "learning_rate": 1.919417755412928e-07, "loss": 0.114, "step": 31336 }, { "epoch": 0.9142015286772858, "grad_norm": 0.833627907377239, "learning_rate": 1.918121544566026e-07, "loss": 0.1363, "step": 31337 }, { "epoch": 0.9142307019079293, "grad_norm": 0.9850077049898679, "learning_rate": 1.9168257629826604e-07, "loss": 0.123, "step": 31338 }, { "epoch": 0.9142598751385729, "grad_norm": 0.6545767772470965, "learning_rate": 1.9155304106743932e-07, "loss": 0.0977, "step": 31339 }, { "epoch": 0.9142890483692164, "grad_norm": 1.2227961436415566, "learning_rate": 1.9142354876527935e-07, "loss": 0.1189, "step": 31340 }, { "epoch": 0.91431822159986, "grad_norm": 0.6897087811078801, "learning_rate": 1.9129409939294185e-07, "loss": 0.1173, "step": 31341 }, { "epoch": 0.9143473948305035, "grad_norm": 1.1786573132138842, "learning_rate": 1.9116469295158312e-07, "loss": 0.1249, "step": 31342 }, { "epoch": 0.9143765680611471, "grad_norm": 0.9441161938170052, "learning_rate": 1.9103532944235781e-07, "loss": 0.0996, "step": 31343 }, { "epoch": 0.9144057412917906, "grad_norm": 0.7107152263994747, "learning_rate": 1.9090600886642109e-07, "loss": 0.1031, "step": 31344 }, { "epoch": 0.9144349145224342, "grad_norm": 0.9142669952677954, "learning_rate": 1.9077673122492702e-07, "loss": 0.107, "step": 31345 }, { "epoch": 0.9144640877530777, "grad_norm": 0.8485615026657264, "learning_rate": 1.906474965190308e-07, "loss": 0.1267, "step": 31346 }, { "epoch": 0.9144932609837213, "grad_norm": 0.6411471885901804, "learning_rate": 1.9051830474988597e-07, "loss": 0.1069, "step": 31347 }, { "epoch": 0.9145224342143649, "grad_norm": 0.8686416439086416, "learning_rate": 1.9038915591864493e-07, "loss": 0.1032, "step": 31348 }, { "epoch": 0.9145516074450084, "grad_norm": 0.8858180694698129, "learning_rate": 1.9026005002646174e-07, "loss": 0.1239, "step": 31349 }, { "epoch": 0.9145807806756521, "grad_norm": 0.7773263938989426, "learning_rate": 1.9013098707448885e-07, "loss": 0.1051, "step": 31350 }, { "epoch": 0.9146099539062956, "grad_norm": 0.9766792669376659, "learning_rate": 1.9000196706387697e-07, "loss": 0.1083, "step": 31351 }, { "epoch": 0.9146391271369392, "grad_norm": 0.8726764601428715, "learning_rate": 1.8987298999578076e-07, "loss": 0.1061, "step": 31352 }, { "epoch": 0.9146683003675827, "grad_norm": 0.8840248440184265, "learning_rate": 1.897440558713498e-07, "loss": 0.0984, "step": 31353 }, { "epoch": 0.9146974735982263, "grad_norm": 0.8373228389039238, "learning_rate": 1.8961516469173547e-07, "loss": 0.1066, "step": 31354 }, { "epoch": 0.9147266468288698, "grad_norm": 0.771490868722007, "learning_rate": 1.89486316458089e-07, "loss": 0.1168, "step": 31355 }, { "epoch": 0.9147558200595134, "grad_norm": 0.894960769756662, "learning_rate": 1.8935751117156008e-07, "loss": 0.1254, "step": 31356 }, { "epoch": 0.914784993290157, "grad_norm": 0.7806313323142715, "learning_rate": 1.8922874883329888e-07, "loss": 0.0965, "step": 31357 }, { "epoch": 0.9148141665208005, "grad_norm": 0.6557312970640437, "learning_rate": 1.8910002944445448e-07, "loss": 0.1062, "step": 31358 }, { "epoch": 0.914843339751444, "grad_norm": 0.6793510244450381, "learning_rate": 1.8897135300617708e-07, "loss": 0.1101, "step": 31359 }, { "epoch": 0.9148725129820876, "grad_norm": 0.9202773589720421, "learning_rate": 1.888427195196152e-07, "loss": 0.1169, "step": 31360 }, { "epoch": 0.9149016862127312, "grad_norm": 0.6585451880768798, "learning_rate": 1.8871412898591678e-07, "loss": 0.1034, "step": 31361 }, { "epoch": 0.9149308594433747, "grad_norm": 0.947702899641979, "learning_rate": 1.8858558140622928e-07, "loss": 0.1189, "step": 31362 }, { "epoch": 0.9149600326740184, "grad_norm": 0.7113266911015266, "learning_rate": 1.8845707678170232e-07, "loss": 0.0929, "step": 31363 }, { "epoch": 0.9149892059046619, "grad_norm": 0.7883772252736441, "learning_rate": 1.883286151134811e-07, "loss": 0.1026, "step": 31364 }, { "epoch": 0.9150183791353055, "grad_norm": 0.6954327366328499, "learning_rate": 1.8820019640271414e-07, "loss": 0.1091, "step": 31365 }, { "epoch": 0.915047552365949, "grad_norm": 0.8511500789027039, "learning_rate": 1.880718206505472e-07, "loss": 0.1197, "step": 31366 }, { "epoch": 0.9150767255965926, "grad_norm": 1.0019832829025392, "learning_rate": 1.8794348785812545e-07, "loss": 0.1044, "step": 31367 }, { "epoch": 0.9151058988272361, "grad_norm": 0.790297852887975, "learning_rate": 1.8781519802659577e-07, "loss": 0.1213, "step": 31368 }, { "epoch": 0.9151350720578797, "grad_norm": 0.6716434249793395, "learning_rate": 1.876869511571039e-07, "loss": 0.1045, "step": 31369 }, { "epoch": 0.9151642452885232, "grad_norm": 0.922704496735113, "learning_rate": 1.8755874725079394e-07, "loss": 0.1276, "step": 31370 }, { "epoch": 0.9151934185191668, "grad_norm": 0.7622788651070335, "learning_rate": 1.8743058630880993e-07, "loss": 0.0853, "step": 31371 }, { "epoch": 0.9152225917498104, "grad_norm": 0.8387169493634814, "learning_rate": 1.8730246833229772e-07, "loss": 0.1099, "step": 31372 }, { "epoch": 0.9152517649804539, "grad_norm": 0.6763488529931732, "learning_rate": 1.8717439332240017e-07, "loss": 0.111, "step": 31373 }, { "epoch": 0.9152809382110975, "grad_norm": 0.8951673166253913, "learning_rate": 1.8704636128025978e-07, "loss": 0.1099, "step": 31374 }, { "epoch": 0.915310111441741, "grad_norm": 0.8297555505125109, "learning_rate": 1.8691837220702113e-07, "loss": 0.1041, "step": 31375 }, { "epoch": 0.9153392846723846, "grad_norm": 0.9665615025615534, "learning_rate": 1.8679042610382613e-07, "loss": 0.1201, "step": 31376 }, { "epoch": 0.9153684579030282, "grad_norm": 0.9289330274306193, "learning_rate": 1.8666252297181776e-07, "loss": 0.1184, "step": 31377 }, { "epoch": 0.9153976311336718, "grad_norm": 1.014049638060475, "learning_rate": 1.865346628121367e-07, "loss": 0.1215, "step": 31378 }, { "epoch": 0.9154268043643153, "grad_norm": 0.9387538042209965, "learning_rate": 1.8640684562592548e-07, "loss": 0.1107, "step": 31379 }, { "epoch": 0.9154559775949589, "grad_norm": 0.7807314757172528, "learning_rate": 1.8627907141432422e-07, "loss": 0.143, "step": 31380 }, { "epoch": 0.9154851508256024, "grad_norm": 0.8032832425950431, "learning_rate": 1.8615134017847426e-07, "loss": 0.1143, "step": 31381 }, { "epoch": 0.915514324056246, "grad_norm": 0.9384815469369953, "learning_rate": 1.8602365191951687e-07, "loss": 0.1098, "step": 31382 }, { "epoch": 0.9155434972868896, "grad_norm": 1.1222528703641497, "learning_rate": 1.858960066385912e-07, "loss": 0.0858, "step": 31383 }, { "epoch": 0.9155726705175331, "grad_norm": 0.7615160881334084, "learning_rate": 1.8576840433683574e-07, "loss": 0.1175, "step": 31384 }, { "epoch": 0.9156018437481767, "grad_norm": 1.0065607394870228, "learning_rate": 1.8564084501539181e-07, "loss": 0.0987, "step": 31385 }, { "epoch": 0.9156310169788202, "grad_norm": 0.9255308678433942, "learning_rate": 1.8551332867539572e-07, "loss": 0.0969, "step": 31386 }, { "epoch": 0.9156601902094638, "grad_norm": 0.930750996718049, "learning_rate": 1.8538585531798881e-07, "loss": 0.1391, "step": 31387 }, { "epoch": 0.9156893634401073, "grad_norm": 0.8228319762684867, "learning_rate": 1.852584249443068e-07, "loss": 0.1052, "step": 31388 }, { "epoch": 0.9157185366707509, "grad_norm": 0.8235112268013984, "learning_rate": 1.8513103755548822e-07, "loss": 0.0928, "step": 31389 }, { "epoch": 0.9157477099013945, "grad_norm": 0.7145745651504304, "learning_rate": 1.8500369315267108e-07, "loss": 0.0915, "step": 31390 }, { "epoch": 0.9157768831320381, "grad_norm": 0.7421971743172168, "learning_rate": 1.8487639173699057e-07, "loss": 0.1101, "step": 31391 }, { "epoch": 0.9158060563626816, "grad_norm": 0.9867879653682757, "learning_rate": 1.847491333095841e-07, "loss": 0.1162, "step": 31392 }, { "epoch": 0.9158352295933252, "grad_norm": 0.843077088935799, "learning_rate": 1.8462191787158855e-07, "loss": 0.0963, "step": 31393 }, { "epoch": 0.9158644028239687, "grad_norm": 0.7220111672319596, "learning_rate": 1.8449474542413858e-07, "loss": 0.1055, "step": 31394 }, { "epoch": 0.9158935760546123, "grad_norm": 0.8945048968438312, "learning_rate": 1.843676159683705e-07, "loss": 0.1162, "step": 31395 }, { "epoch": 0.9159227492852559, "grad_norm": 0.8184157935974352, "learning_rate": 1.8424052950541892e-07, "loss": 0.1001, "step": 31396 }, { "epoch": 0.9159519225158994, "grad_norm": 0.9280777362350853, "learning_rate": 1.8411348603641743e-07, "loss": 0.1094, "step": 31397 }, { "epoch": 0.915981095746543, "grad_norm": 0.7392705056496901, "learning_rate": 1.8398648556250122e-07, "loss": 0.1035, "step": 31398 }, { "epoch": 0.9160102689771865, "grad_norm": 0.8085351305562487, "learning_rate": 1.8385952808480434e-07, "loss": 0.1192, "step": 31399 }, { "epoch": 0.9160394422078301, "grad_norm": 0.927287870661808, "learning_rate": 1.8373261360445983e-07, "loss": 0.1095, "step": 31400 }, { "epoch": 0.9160686154384736, "grad_norm": 0.8250135573400705, "learning_rate": 1.8360574212260063e-07, "loss": 0.121, "step": 31401 }, { "epoch": 0.9160977886691172, "grad_norm": 0.8695358461892482, "learning_rate": 1.8347891364035974e-07, "loss": 0.1178, "step": 31402 }, { "epoch": 0.9161269618997607, "grad_norm": 0.9794220199794262, "learning_rate": 1.833521281588696e-07, "loss": 0.1341, "step": 31403 }, { "epoch": 0.9161561351304044, "grad_norm": 0.9536576826698145, "learning_rate": 1.8322538567926152e-07, "loss": 0.1042, "step": 31404 }, { "epoch": 0.916185308361048, "grad_norm": 0.773398645910214, "learning_rate": 1.830986862026668e-07, "loss": 0.1068, "step": 31405 }, { "epoch": 0.9162144815916915, "grad_norm": 0.834665420479556, "learning_rate": 1.8297202973021787e-07, "loss": 0.1204, "step": 31406 }, { "epoch": 0.916243654822335, "grad_norm": 1.12506311343205, "learning_rate": 1.8284541626304496e-07, "loss": 0.0881, "step": 31407 }, { "epoch": 0.9162728280529786, "grad_norm": 0.7596616311493886, "learning_rate": 1.8271884580227716e-07, "loss": 0.1263, "step": 31408 }, { "epoch": 0.9163020012836222, "grad_norm": 0.7707059374752233, "learning_rate": 1.8259231834904689e-07, "loss": 0.1047, "step": 31409 }, { "epoch": 0.9163311745142657, "grad_norm": 0.7854974420155609, "learning_rate": 1.8246583390448102e-07, "loss": 0.1105, "step": 31410 }, { "epoch": 0.9163603477449093, "grad_norm": 0.8750514815335055, "learning_rate": 1.823393924697109e-07, "loss": 0.0884, "step": 31411 }, { "epoch": 0.9163895209755528, "grad_norm": 0.947681277184962, "learning_rate": 1.8221299404586445e-07, "loss": 0.1129, "step": 31412 }, { "epoch": 0.9164186942061964, "grad_norm": 0.9993209605677023, "learning_rate": 1.8208663863407083e-07, "loss": 0.121, "step": 31413 }, { "epoch": 0.9164478674368399, "grad_norm": 0.8050607414401262, "learning_rate": 1.819603262354569e-07, "loss": 0.0999, "step": 31414 }, { "epoch": 0.9164770406674835, "grad_norm": 0.8008160192839413, "learning_rate": 1.818340568511512e-07, "loss": 0.1305, "step": 31415 }, { "epoch": 0.916506213898127, "grad_norm": 0.875097615447291, "learning_rate": 1.8170783048228057e-07, "loss": 0.1149, "step": 31416 }, { "epoch": 0.9165353871287707, "grad_norm": 0.8675509678375348, "learning_rate": 1.8158164712997306e-07, "loss": 0.1067, "step": 31417 }, { "epoch": 0.9165645603594142, "grad_norm": 0.7910112209571196, "learning_rate": 1.8145550679535329e-07, "loss": 0.1177, "step": 31418 }, { "epoch": 0.9165937335900578, "grad_norm": 0.8016354528718228, "learning_rate": 1.8132940947954924e-07, "loss": 0.1315, "step": 31419 }, { "epoch": 0.9166229068207014, "grad_norm": 0.981834198358148, "learning_rate": 1.8120335518368614e-07, "loss": 0.1057, "step": 31420 }, { "epoch": 0.9166520800513449, "grad_norm": 0.823750186569676, "learning_rate": 1.8107734390888809e-07, "loss": 0.1175, "step": 31421 }, { "epoch": 0.9166812532819885, "grad_norm": 0.9265404485278054, "learning_rate": 1.809513756562814e-07, "loss": 0.1117, "step": 31422 }, { "epoch": 0.916710426512632, "grad_norm": 0.8217818917326766, "learning_rate": 1.808254504269913e-07, "loss": 0.0889, "step": 31423 }, { "epoch": 0.9167395997432756, "grad_norm": 0.7781014107621114, "learning_rate": 1.8069956822214018e-07, "loss": 0.108, "step": 31424 }, { "epoch": 0.9167687729739191, "grad_norm": 0.8778356214948702, "learning_rate": 1.805737290428533e-07, "loss": 0.1091, "step": 31425 }, { "epoch": 0.9167979462045627, "grad_norm": 0.6689752395268365, "learning_rate": 1.804479328902542e-07, "loss": 0.1044, "step": 31426 }, { "epoch": 0.9168271194352062, "grad_norm": 0.9883087329036726, "learning_rate": 1.8032217976546418e-07, "loss": 0.1035, "step": 31427 }, { "epoch": 0.9168562926658498, "grad_norm": 0.928078768675476, "learning_rate": 1.801964696696079e-07, "loss": 0.1163, "step": 31428 }, { "epoch": 0.9168854658964933, "grad_norm": 0.8242058167547271, "learning_rate": 1.8007080260380727e-07, "loss": 0.1364, "step": 31429 }, { "epoch": 0.9169146391271369, "grad_norm": 0.9462811646216613, "learning_rate": 1.7994517856918359e-07, "loss": 0.1061, "step": 31430 }, { "epoch": 0.9169438123577806, "grad_norm": 0.721219548430664, "learning_rate": 1.7981959756685875e-07, "loss": 0.1091, "step": 31431 }, { "epoch": 0.9169729855884241, "grad_norm": 0.848426286099118, "learning_rate": 1.7969405959795404e-07, "loss": 0.1011, "step": 31432 }, { "epoch": 0.9170021588190677, "grad_norm": 0.7847501625730108, "learning_rate": 1.7956856466358974e-07, "loss": 0.1313, "step": 31433 }, { "epoch": 0.9170313320497112, "grad_norm": 0.8439447752271549, "learning_rate": 1.7944311276488656e-07, "loss": 0.1344, "step": 31434 }, { "epoch": 0.9170605052803548, "grad_norm": 0.886942847069556, "learning_rate": 1.7931770390296423e-07, "loss": 0.1318, "step": 31435 }, { "epoch": 0.9170896785109983, "grad_norm": 0.9204156745615503, "learning_rate": 1.7919233807894343e-07, "loss": 0.0984, "step": 31436 }, { "epoch": 0.9171188517416419, "grad_norm": 0.9114936709997368, "learning_rate": 1.7906701529394277e-07, "loss": 0.1084, "step": 31437 }, { "epoch": 0.9171480249722854, "grad_norm": 1.0084742204612493, "learning_rate": 1.7894173554907967e-07, "loss": 0.1289, "step": 31438 }, { "epoch": 0.917177198202929, "grad_norm": 1.079762771034041, "learning_rate": 1.7881649884547492e-07, "loss": 0.0979, "step": 31439 }, { "epoch": 0.9172063714335725, "grad_norm": 0.8832897504243681, "learning_rate": 1.7869130518424538e-07, "loss": 0.1246, "step": 31440 }, { "epoch": 0.9172355446642161, "grad_norm": 0.7904649565451088, "learning_rate": 1.785661545665085e-07, "loss": 0.1275, "step": 31441 }, { "epoch": 0.9172647178948596, "grad_norm": 0.9835395240531951, "learning_rate": 1.7844104699338228e-07, "loss": 0.1032, "step": 31442 }, { "epoch": 0.9172938911255032, "grad_norm": 0.7235054219206365, "learning_rate": 1.783159824659836e-07, "loss": 0.0801, "step": 31443 }, { "epoch": 0.9173230643561467, "grad_norm": 0.829841051846416, "learning_rate": 1.7819096098542876e-07, "loss": 0.1281, "step": 31444 }, { "epoch": 0.9173522375867904, "grad_norm": 0.9330809494992617, "learning_rate": 1.7806598255283415e-07, "loss": 0.1278, "step": 31445 }, { "epoch": 0.917381410817434, "grad_norm": 0.7713121767513006, "learning_rate": 1.7794104716931437e-07, "loss": 0.0967, "step": 31446 }, { "epoch": 0.9174105840480775, "grad_norm": 0.7427712675609687, "learning_rate": 1.778161548359869e-07, "loss": 0.0993, "step": 31447 }, { "epoch": 0.9174397572787211, "grad_norm": 0.7985539646604289, "learning_rate": 1.7769130555396476e-07, "loss": 0.1081, "step": 31448 }, { "epoch": 0.9174689305093646, "grad_norm": 0.745822331716892, "learning_rate": 1.775664993243642e-07, "loss": 0.1267, "step": 31449 }, { "epoch": 0.9174981037400082, "grad_norm": 0.6665221359510427, "learning_rate": 1.7744173614829885e-07, "loss": 0.1091, "step": 31450 }, { "epoch": 0.9175272769706517, "grad_norm": 0.7913045617358165, "learning_rate": 1.7731701602688168e-07, "loss": 0.1176, "step": 31451 }, { "epoch": 0.9175564502012953, "grad_norm": 0.8724016247432931, "learning_rate": 1.7719233896122733e-07, "loss": 0.1168, "step": 31452 }, { "epoch": 0.9175856234319388, "grad_norm": 0.8798424447611202, "learning_rate": 1.7706770495244884e-07, "loss": 0.1, "step": 31453 }, { "epoch": 0.9176147966625824, "grad_norm": 0.9129538205177682, "learning_rate": 1.7694311400165753e-07, "loss": 0.1011, "step": 31454 }, { "epoch": 0.9176439698932259, "grad_norm": 0.7626105906124192, "learning_rate": 1.768185661099675e-07, "loss": 0.1063, "step": 31455 }, { "epoch": 0.9176731431238695, "grad_norm": 0.9591430922037872, "learning_rate": 1.766940612784901e-07, "loss": 0.1225, "step": 31456 }, { "epoch": 0.917702316354513, "grad_norm": 0.8823448331257628, "learning_rate": 1.7656959950833608e-07, "loss": 0.0987, "step": 31457 }, { "epoch": 0.9177314895851567, "grad_norm": 0.6726474082165065, "learning_rate": 1.7644518080061735e-07, "loss": 0.1016, "step": 31458 }, { "epoch": 0.9177606628158003, "grad_norm": 0.9953081672778715, "learning_rate": 1.7632080515644523e-07, "loss": 0.1399, "step": 31459 }, { "epoch": 0.9177898360464438, "grad_norm": 1.0520249333383132, "learning_rate": 1.761964725769294e-07, "loss": 0.0915, "step": 31460 }, { "epoch": 0.9178190092770874, "grad_norm": 0.7312001105015509, "learning_rate": 1.7607218306317896e-07, "loss": 0.09, "step": 31461 }, { "epoch": 0.9178481825077309, "grad_norm": 1.160657903192297, "learning_rate": 1.7594793661630526e-07, "loss": 0.1025, "step": 31462 }, { "epoch": 0.9178773557383745, "grad_norm": 0.8381060018723425, "learning_rate": 1.7582373323741686e-07, "loss": 0.1258, "step": 31463 }, { "epoch": 0.917906528969018, "grad_norm": 0.8742176123686887, "learning_rate": 1.756995729276223e-07, "loss": 0.118, "step": 31464 }, { "epoch": 0.9179357021996616, "grad_norm": 0.6928916489087649, "learning_rate": 1.7557545568803014e-07, "loss": 0.1119, "step": 31465 }, { "epoch": 0.9179648754303051, "grad_norm": 0.7375050529998568, "learning_rate": 1.7545138151974895e-07, "loss": 0.1241, "step": 31466 }, { "epoch": 0.9179940486609487, "grad_norm": 1.665030995723457, "learning_rate": 1.7532735042388617e-07, "loss": 0.1073, "step": 31467 }, { "epoch": 0.9180232218915922, "grad_norm": 0.8717161143773435, "learning_rate": 1.7520336240154867e-07, "loss": 0.095, "step": 31468 }, { "epoch": 0.9180523951222358, "grad_norm": 0.870272092108847, "learning_rate": 1.7507941745384394e-07, "loss": 0.1024, "step": 31469 }, { "epoch": 0.9180815683528794, "grad_norm": 0.940668690315811, "learning_rate": 1.749555155818783e-07, "loss": 0.1045, "step": 31470 }, { "epoch": 0.9181107415835229, "grad_norm": 0.6265336298709263, "learning_rate": 1.748316567867575e-07, "loss": 0.0996, "step": 31471 }, { "epoch": 0.9181399148141666, "grad_norm": 0.8199566455457374, "learning_rate": 1.7470784106958903e-07, "loss": 0.1025, "step": 31472 }, { "epoch": 0.9181690880448101, "grad_norm": 0.8002080741057659, "learning_rate": 1.7458406843147647e-07, "loss": 0.134, "step": 31473 }, { "epoch": 0.9181982612754537, "grad_norm": 0.8333135633377379, "learning_rate": 1.7446033887352498e-07, "loss": 0.121, "step": 31474 }, { "epoch": 0.9182274345060972, "grad_norm": 0.8724163486160376, "learning_rate": 1.7433665239684038e-07, "loss": 0.1135, "step": 31475 }, { "epoch": 0.9182566077367408, "grad_norm": 0.7607951231388181, "learning_rate": 1.742130090025257e-07, "loss": 0.1351, "step": 31476 }, { "epoch": 0.9182857809673843, "grad_norm": 0.866945882665733, "learning_rate": 1.7408940869168556e-07, "loss": 0.1216, "step": 31477 }, { "epoch": 0.9183149541980279, "grad_norm": 0.901991959683693, "learning_rate": 1.7396585146542245e-07, "loss": 0.1126, "step": 31478 }, { "epoch": 0.9183441274286714, "grad_norm": 0.7517972495851978, "learning_rate": 1.73842337324841e-07, "loss": 0.1195, "step": 31479 }, { "epoch": 0.918373300659315, "grad_norm": 0.9077314335051077, "learning_rate": 1.7371886627104317e-07, "loss": 0.1282, "step": 31480 }, { "epoch": 0.9184024738899585, "grad_norm": 0.719193242973939, "learning_rate": 1.7359543830513027e-07, "loss": 0.1065, "step": 31481 }, { "epoch": 0.9184316471206021, "grad_norm": 0.9507072030075335, "learning_rate": 1.734720534282053e-07, "loss": 0.1167, "step": 31482 }, { "epoch": 0.9184608203512457, "grad_norm": 0.8482801160596916, "learning_rate": 1.7334871164137013e-07, "loss": 0.1118, "step": 31483 }, { "epoch": 0.9184899935818892, "grad_norm": 0.7959575808115095, "learning_rate": 1.7322541294572505e-07, "loss": 0.1274, "step": 31484 }, { "epoch": 0.9185191668125329, "grad_norm": 0.8047235547623197, "learning_rate": 1.731021573423719e-07, "loss": 0.093, "step": 31485 }, { "epoch": 0.9185483400431764, "grad_norm": 0.7961735318616792, "learning_rate": 1.7297894483240984e-07, "loss": 0.1374, "step": 31486 }, { "epoch": 0.91857751327382, "grad_norm": 0.6548827610332445, "learning_rate": 1.7285577541693966e-07, "loss": 0.118, "step": 31487 }, { "epoch": 0.9186066865044635, "grad_norm": 0.8131044492909674, "learning_rate": 1.7273264909706043e-07, "loss": 0.117, "step": 31488 }, { "epoch": 0.9186358597351071, "grad_norm": 0.7032682394703215, "learning_rate": 1.7260956587387245e-07, "loss": 0.1158, "step": 31489 }, { "epoch": 0.9186650329657506, "grad_norm": 0.8437393397753546, "learning_rate": 1.7248652574847367e-07, "loss": 0.1176, "step": 31490 }, { "epoch": 0.9186942061963942, "grad_norm": 0.8217591225454703, "learning_rate": 1.7236352872196216e-07, "loss": 0.1155, "step": 31491 }, { "epoch": 0.9187233794270377, "grad_norm": 0.7316706954362365, "learning_rate": 1.72240574795437e-07, "loss": 0.1017, "step": 31492 }, { "epoch": 0.9187525526576813, "grad_norm": 0.7630958289001806, "learning_rate": 1.721176639699962e-07, "loss": 0.1119, "step": 31493 }, { "epoch": 0.9187817258883249, "grad_norm": 0.9153354818383868, "learning_rate": 1.7199479624673498e-07, "loss": 0.1219, "step": 31494 }, { "epoch": 0.9188108991189684, "grad_norm": 0.898372333136174, "learning_rate": 1.7187197162675252e-07, "loss": 0.1242, "step": 31495 }, { "epoch": 0.918840072349612, "grad_norm": 0.9563962679915206, "learning_rate": 1.7174919011114455e-07, "loss": 0.131, "step": 31496 }, { "epoch": 0.9188692455802555, "grad_norm": 0.9190040010802156, "learning_rate": 1.7162645170100746e-07, "loss": 0.1096, "step": 31497 }, { "epoch": 0.9188984188108991, "grad_norm": 0.9572626954655384, "learning_rate": 1.715037563974359e-07, "loss": 0.1085, "step": 31498 }, { "epoch": 0.9189275920415427, "grad_norm": 0.839287077203837, "learning_rate": 1.7138110420152676e-07, "loss": 0.106, "step": 31499 }, { "epoch": 0.9189567652721863, "grad_norm": 0.827734241275159, "learning_rate": 1.712584951143742e-07, "loss": 0.1079, "step": 31500 }, { "epoch": 0.9189859385028298, "grad_norm": 0.8295905862872595, "learning_rate": 1.711359291370729e-07, "loss": 0.1103, "step": 31501 }, { "epoch": 0.9190151117334734, "grad_norm": 0.9027734528844755, "learning_rate": 1.7101340627071804e-07, "loss": 0.0934, "step": 31502 }, { "epoch": 0.9190442849641169, "grad_norm": 0.8794472254828379, "learning_rate": 1.708909265164027e-07, "loss": 0.0978, "step": 31503 }, { "epoch": 0.9190734581947605, "grad_norm": 0.7755228381034948, "learning_rate": 1.7076848987521933e-07, "loss": 0.1121, "step": 31504 }, { "epoch": 0.919102631425404, "grad_norm": 0.8267621166910578, "learning_rate": 1.706460963482631e-07, "loss": 0.1178, "step": 31505 }, { "epoch": 0.9191318046560476, "grad_norm": 0.9177075774316056, "learning_rate": 1.7052374593662492e-07, "loss": 0.1037, "step": 31506 }, { "epoch": 0.9191609778866912, "grad_norm": 1.0129853969583604, "learning_rate": 1.7040143864139825e-07, "loss": 0.1022, "step": 31507 }, { "epoch": 0.9191901511173347, "grad_norm": 0.9014312892017495, "learning_rate": 1.7027917446367447e-07, "loss": 0.1245, "step": 31508 }, { "epoch": 0.9192193243479783, "grad_norm": 0.9386499379140206, "learning_rate": 1.7015695340454552e-07, "loss": 0.0983, "step": 31509 }, { "epoch": 0.9192484975786218, "grad_norm": 1.12692336892604, "learning_rate": 1.7003477546510217e-07, "loss": 0.1137, "step": 31510 }, { "epoch": 0.9192776708092654, "grad_norm": 0.8824618055641689, "learning_rate": 1.699126406464352e-07, "loss": 0.1049, "step": 31511 }, { "epoch": 0.919306844039909, "grad_norm": 0.7877852882881141, "learning_rate": 1.6979054894963486e-07, "loss": 0.1189, "step": 31512 }, { "epoch": 0.9193360172705526, "grad_norm": 1.2274157217371404, "learning_rate": 1.6966850037579196e-07, "loss": 0.1175, "step": 31513 }, { "epoch": 0.9193651905011961, "grad_norm": 0.8035846976351573, "learning_rate": 1.6954649492599507e-07, "loss": 0.1046, "step": 31514 }, { "epoch": 0.9193943637318397, "grad_norm": 0.7122975510823543, "learning_rate": 1.6942453260133497e-07, "loss": 0.1246, "step": 31515 }, { "epoch": 0.9194235369624832, "grad_norm": 0.8803363288778558, "learning_rate": 1.693026134028991e-07, "loss": 0.1143, "step": 31516 }, { "epoch": 0.9194527101931268, "grad_norm": 0.8039546970931579, "learning_rate": 1.6918073733177554e-07, "loss": 0.0965, "step": 31517 }, { "epoch": 0.9194818834237704, "grad_norm": 0.8576721603822425, "learning_rate": 1.6905890438905338e-07, "loss": 0.1258, "step": 31518 }, { "epoch": 0.9195110566544139, "grad_norm": 0.9595800848719139, "learning_rate": 1.6893711457582064e-07, "loss": 0.1138, "step": 31519 }, { "epoch": 0.9195402298850575, "grad_norm": 0.7927836583174125, "learning_rate": 1.6881536789316422e-07, "loss": 0.1252, "step": 31520 }, { "epoch": 0.919569403115701, "grad_norm": 0.7674785520740165, "learning_rate": 1.6869366434216993e-07, "loss": 0.0888, "step": 31521 }, { "epoch": 0.9195985763463446, "grad_norm": 0.8523844486809289, "learning_rate": 1.6857200392392635e-07, "loss": 0.1336, "step": 31522 }, { "epoch": 0.9196277495769881, "grad_norm": 0.8203410314200877, "learning_rate": 1.684503866395182e-07, "loss": 0.104, "step": 31523 }, { "epoch": 0.9196569228076317, "grad_norm": 1.0542733564281603, "learning_rate": 1.683288124900312e-07, "loss": 0.1096, "step": 31524 }, { "epoch": 0.9196860960382752, "grad_norm": 0.8929220501399922, "learning_rate": 1.682072814765512e-07, "loss": 0.1203, "step": 31525 }, { "epoch": 0.9197152692689189, "grad_norm": 0.9651186180879701, "learning_rate": 1.6808579360016343e-07, "loss": 0.1158, "step": 31526 }, { "epoch": 0.9197444424995624, "grad_norm": 0.9016379181013806, "learning_rate": 1.6796434886195256e-07, "loss": 0.1308, "step": 31527 }, { "epoch": 0.919773615730206, "grad_norm": 0.8093931544203599, "learning_rate": 1.6784294726300166e-07, "loss": 0.1066, "step": 31528 }, { "epoch": 0.9198027889608495, "grad_norm": 0.8010758614571243, "learning_rate": 1.6772158880439594e-07, "loss": 0.0962, "step": 31529 }, { "epoch": 0.9198319621914931, "grad_norm": 0.8112238434650944, "learning_rate": 1.6760027348721785e-07, "loss": 0.1079, "step": 31530 }, { "epoch": 0.9198611354221367, "grad_norm": 0.8691568220414676, "learning_rate": 1.6747900131255102e-07, "loss": 0.1323, "step": 31531 }, { "epoch": 0.9198903086527802, "grad_norm": 0.7292816387389669, "learning_rate": 1.6735777228147842e-07, "loss": 0.1101, "step": 31532 }, { "epoch": 0.9199194818834238, "grad_norm": 0.7633244929188836, "learning_rate": 1.6723658639508257e-07, "loss": 0.1122, "step": 31533 }, { "epoch": 0.9199486551140673, "grad_norm": 1.0608089952896051, "learning_rate": 1.6711544365444367e-07, "loss": 0.1312, "step": 31534 }, { "epoch": 0.9199778283447109, "grad_norm": 0.8281099543989686, "learning_rate": 1.669943440606453e-07, "loss": 0.1193, "step": 31535 }, { "epoch": 0.9200070015753544, "grad_norm": 0.7216056971282659, "learning_rate": 1.668732876147666e-07, "loss": 0.1164, "step": 31536 }, { "epoch": 0.920036174805998, "grad_norm": 0.7507557229293694, "learning_rate": 1.6675227431789009e-07, "loss": 0.1079, "step": 31537 }, { "epoch": 0.9200653480366415, "grad_norm": 0.8338918816597148, "learning_rate": 1.666313041710954e-07, "loss": 0.1182, "step": 31538 }, { "epoch": 0.9200945212672852, "grad_norm": 0.7484368235347966, "learning_rate": 1.6651037717546281e-07, "loss": 0.1114, "step": 31539 }, { "epoch": 0.9201236944979287, "grad_norm": 0.95879932611911, "learning_rate": 1.66389493332072e-07, "loss": 0.0989, "step": 31540 }, { "epoch": 0.9201528677285723, "grad_norm": 0.8048620998555469, "learning_rate": 1.6626865264200097e-07, "loss": 0.0904, "step": 31541 }, { "epoch": 0.9201820409592159, "grad_norm": 0.8090812381727864, "learning_rate": 1.6614785510633002e-07, "loss": 0.1112, "step": 31542 }, { "epoch": 0.9202112141898594, "grad_norm": 0.6789774324548513, "learning_rate": 1.6602710072613715e-07, "loss": 0.1107, "step": 31543 }, { "epoch": 0.920240387420503, "grad_norm": 1.1174933592605976, "learning_rate": 1.6590638950249982e-07, "loss": 0.1067, "step": 31544 }, { "epoch": 0.9202695606511465, "grad_norm": 0.7905427998092589, "learning_rate": 1.657857214364972e-07, "loss": 0.1029, "step": 31545 }, { "epoch": 0.9202987338817901, "grad_norm": 0.9453729966948536, "learning_rate": 1.656650965292056e-07, "loss": 0.1135, "step": 31546 }, { "epoch": 0.9203279071124336, "grad_norm": 0.8392599511973942, "learning_rate": 1.6554451478170085e-07, "loss": 0.1242, "step": 31547 }, { "epoch": 0.9203570803430772, "grad_norm": 0.5591915862294311, "learning_rate": 1.65423976195061e-07, "loss": 0.0989, "step": 31548 }, { "epoch": 0.9203862535737207, "grad_norm": 0.9739971293006879, "learning_rate": 1.653034807703624e-07, "loss": 0.1131, "step": 31549 }, { "epoch": 0.9204154268043643, "grad_norm": 0.8296837190875574, "learning_rate": 1.651830285086803e-07, "loss": 0.0911, "step": 31550 }, { "epoch": 0.9204446000350078, "grad_norm": 0.7338766359170078, "learning_rate": 1.650626194110888e-07, "loss": 0.1122, "step": 31551 }, { "epoch": 0.9204737732656514, "grad_norm": 0.9081774787175226, "learning_rate": 1.6494225347866543e-07, "loss": 0.1236, "step": 31552 }, { "epoch": 0.920502946496295, "grad_norm": 1.1705428651744463, "learning_rate": 1.6482193071248264e-07, "loss": 0.1046, "step": 31553 }, { "epoch": 0.9205321197269386, "grad_norm": 0.7857833395439243, "learning_rate": 1.6470165111361514e-07, "loss": 0.1125, "step": 31554 }, { "epoch": 0.9205612929575822, "grad_norm": 0.7386346121884209, "learning_rate": 1.6458141468313705e-07, "loss": 0.1016, "step": 31555 }, { "epoch": 0.9205904661882257, "grad_norm": 0.9834408545286342, "learning_rate": 1.644612214221225e-07, "loss": 0.097, "step": 31556 }, { "epoch": 0.9206196394188693, "grad_norm": 0.8244546780176799, "learning_rate": 1.6434107133164402e-07, "loss": 0.0978, "step": 31557 }, { "epoch": 0.9206488126495128, "grad_norm": 0.7044948386735506, "learning_rate": 1.6422096441277292e-07, "loss": 0.1168, "step": 31558 }, { "epoch": 0.9206779858801564, "grad_norm": 0.820548093941712, "learning_rate": 1.641009006665828e-07, "loss": 0.1092, "step": 31559 }, { "epoch": 0.9207071591107999, "grad_norm": 0.963253671691206, "learning_rate": 1.6398088009414616e-07, "loss": 0.1206, "step": 31560 }, { "epoch": 0.9207363323414435, "grad_norm": 0.7454790088032144, "learning_rate": 1.6386090269653322e-07, "loss": 0.1006, "step": 31561 }, { "epoch": 0.920765505572087, "grad_norm": 0.8767584099696616, "learning_rate": 1.637409684748159e-07, "loss": 0.1263, "step": 31562 }, { "epoch": 0.9207946788027306, "grad_norm": 0.808533787829623, "learning_rate": 1.6362107743006507e-07, "loss": 0.1131, "step": 31563 }, { "epoch": 0.9208238520333741, "grad_norm": 0.8194379113041063, "learning_rate": 1.6350122956335035e-07, "loss": 0.1065, "step": 31564 }, { "epoch": 0.9208530252640177, "grad_norm": 0.6831599856540861, "learning_rate": 1.633814248757415e-07, "loss": 0.1133, "step": 31565 }, { "epoch": 0.9208821984946614, "grad_norm": 0.784619164142305, "learning_rate": 1.6326166336830985e-07, "loss": 0.1008, "step": 31566 }, { "epoch": 0.9209113717253049, "grad_norm": 0.8300106688756127, "learning_rate": 1.6314194504212287e-07, "loss": 0.1122, "step": 31567 }, { "epoch": 0.9209405449559485, "grad_norm": 0.8176939053385948, "learning_rate": 1.6302226989824976e-07, "loss": 0.1037, "step": 31568 }, { "epoch": 0.920969718186592, "grad_norm": 0.6868479781352995, "learning_rate": 1.6290263793775962e-07, "loss": 0.1127, "step": 31569 }, { "epoch": 0.9209988914172356, "grad_norm": 0.7253165730691489, "learning_rate": 1.6278304916171995e-07, "loss": 0.1474, "step": 31570 }, { "epoch": 0.9210280646478791, "grad_norm": 0.7775839000205057, "learning_rate": 1.6266350357119765e-07, "loss": 0.1039, "step": 31571 }, { "epoch": 0.9210572378785227, "grad_norm": 0.6301357172828128, "learning_rate": 1.6254400116726133e-07, "loss": 0.0894, "step": 31572 }, { "epoch": 0.9210864111091662, "grad_norm": 0.8042375168854584, "learning_rate": 1.624245419509779e-07, "loss": 0.109, "step": 31573 }, { "epoch": 0.9211155843398098, "grad_norm": 0.8331649296079149, "learning_rate": 1.6230512592341263e-07, "loss": 0.1196, "step": 31574 }, { "epoch": 0.9211447575704533, "grad_norm": 0.7302033742048548, "learning_rate": 1.62185753085633e-07, "loss": 0.1123, "step": 31575 }, { "epoch": 0.9211739308010969, "grad_norm": 0.9631957716768756, "learning_rate": 1.6206642343870427e-07, "loss": 0.1191, "step": 31576 }, { "epoch": 0.9212031040317404, "grad_norm": 0.9610146717478596, "learning_rate": 1.6194713698369057e-07, "loss": 0.1328, "step": 31577 }, { "epoch": 0.921232277262384, "grad_norm": 0.6558015522080165, "learning_rate": 1.618278937216583e-07, "loss": 0.1077, "step": 31578 }, { "epoch": 0.9212614504930275, "grad_norm": 0.7104860258779933, "learning_rate": 1.6170869365367158e-07, "loss": 0.1055, "step": 31579 }, { "epoch": 0.9212906237236712, "grad_norm": 0.8201677319994595, "learning_rate": 1.6158953678079515e-07, "loss": 0.12, "step": 31580 }, { "epoch": 0.9213197969543148, "grad_norm": 1.1372388083613736, "learning_rate": 1.61470423104092e-07, "loss": 0.1189, "step": 31581 }, { "epoch": 0.9213489701849583, "grad_norm": 2.693507197644251, "learning_rate": 1.6135135262462577e-07, "loss": 0.0907, "step": 31582 }, { "epoch": 0.9213781434156019, "grad_norm": 0.8207127203449859, "learning_rate": 1.6123232534345946e-07, "loss": 0.1374, "step": 31583 }, { "epoch": 0.9214073166462454, "grad_norm": 0.7659081986364706, "learning_rate": 1.6111334126165611e-07, "loss": 0.1311, "step": 31584 }, { "epoch": 0.921436489876889, "grad_norm": 0.8268252639756813, "learning_rate": 1.609944003802777e-07, "loss": 0.1046, "step": 31585 }, { "epoch": 0.9214656631075325, "grad_norm": 0.7053070577774613, "learning_rate": 1.608755027003861e-07, "loss": 0.098, "step": 31586 }, { "epoch": 0.9214948363381761, "grad_norm": 0.7269891485623712, "learning_rate": 1.607566482230427e-07, "loss": 0.104, "step": 31587 }, { "epoch": 0.9215240095688196, "grad_norm": 0.7715076818314276, "learning_rate": 1.606378369493089e-07, "loss": 0.1146, "step": 31588 }, { "epoch": 0.9215531827994632, "grad_norm": 0.9468994735051999, "learning_rate": 1.6051906888024494e-07, "loss": 0.13, "step": 31589 }, { "epoch": 0.9215823560301067, "grad_norm": 0.9145211725139617, "learning_rate": 1.6040034401691163e-07, "loss": 0.1309, "step": 31590 }, { "epoch": 0.9216115292607503, "grad_norm": 0.8534177807613806, "learning_rate": 1.6028166236036868e-07, "loss": 0.1193, "step": 31591 }, { "epoch": 0.9216407024913938, "grad_norm": 0.640869112211262, "learning_rate": 1.601630239116758e-07, "loss": 0.0996, "step": 31592 }, { "epoch": 0.9216698757220375, "grad_norm": 0.8611550427653011, "learning_rate": 1.6004442867189217e-07, "loss": 0.1234, "step": 31593 }, { "epoch": 0.9216990489526811, "grad_norm": 0.8012778753854138, "learning_rate": 1.5992587664207638e-07, "loss": 0.0917, "step": 31594 }, { "epoch": 0.9217282221833246, "grad_norm": 0.7906154408473884, "learning_rate": 1.5980736782328644e-07, "loss": 0.1189, "step": 31595 }, { "epoch": 0.9217573954139682, "grad_norm": 0.9313369846310038, "learning_rate": 1.5968890221658207e-07, "loss": 0.1253, "step": 31596 }, { "epoch": 0.9217865686446117, "grad_norm": 0.73495900928635, "learning_rate": 1.595704798230191e-07, "loss": 0.1123, "step": 31597 }, { "epoch": 0.9218157418752553, "grad_norm": 0.8927840892463856, "learning_rate": 1.5945210064365503e-07, "loss": 0.1566, "step": 31598 }, { "epoch": 0.9218449151058988, "grad_norm": 0.8984768986685986, "learning_rate": 1.593337646795473e-07, "loss": 0.1018, "step": 31599 }, { "epoch": 0.9218740883365424, "grad_norm": 0.8765152866815265, "learning_rate": 1.5921547193175292e-07, "loss": 0.0899, "step": 31600 }, { "epoch": 0.9219032615671859, "grad_norm": 0.7967989330865396, "learning_rate": 1.5909722240132542e-07, "loss": 0.1142, "step": 31601 }, { "epoch": 0.9219324347978295, "grad_norm": 0.7442665459955101, "learning_rate": 1.5897901608932342e-07, "loss": 0.1072, "step": 31602 }, { "epoch": 0.921961608028473, "grad_norm": 0.9868247398129755, "learning_rate": 1.5886085299680166e-07, "loss": 0.1185, "step": 31603 }, { "epoch": 0.9219907812591166, "grad_norm": 0.78649939067307, "learning_rate": 1.5874273312481368e-07, "loss": 0.0949, "step": 31604 }, { "epoch": 0.9220199544897602, "grad_norm": 1.1103592657883514, "learning_rate": 1.5862465647441537e-07, "loss": 0.127, "step": 31605 }, { "epoch": 0.9220491277204037, "grad_norm": 0.8698085582139836, "learning_rate": 1.585066230466603e-07, "loss": 0.1126, "step": 31606 }, { "epoch": 0.9220783009510474, "grad_norm": 0.8990790270924715, "learning_rate": 1.5838863284260208e-07, "loss": 0.1079, "step": 31607 }, { "epoch": 0.9221074741816909, "grad_norm": 0.8297752819122285, "learning_rate": 1.582706858632943e-07, "loss": 0.1154, "step": 31608 }, { "epoch": 0.9221366474123345, "grad_norm": 0.846981054375405, "learning_rate": 1.5815278210979056e-07, "loss": 0.1038, "step": 31609 }, { "epoch": 0.922165820642978, "grad_norm": 0.8276836928239499, "learning_rate": 1.5803492158314283e-07, "loss": 0.1, "step": 31610 }, { "epoch": 0.9221949938736216, "grad_norm": 0.798405376271041, "learning_rate": 1.57917104284403e-07, "loss": 0.1101, "step": 31611 }, { "epoch": 0.9222241671042651, "grad_norm": 0.7716831291840625, "learning_rate": 1.5779933021462357e-07, "loss": 0.0903, "step": 31612 }, { "epoch": 0.9222533403349087, "grad_norm": 0.7221244000074717, "learning_rate": 1.5768159937485538e-07, "loss": 0.0964, "step": 31613 }, { "epoch": 0.9222825135655522, "grad_norm": 0.6758694466963024, "learning_rate": 1.5756391176615092e-07, "loss": 0.1065, "step": 31614 }, { "epoch": 0.9223116867961958, "grad_norm": 0.7512265184986077, "learning_rate": 1.5744626738955883e-07, "loss": 0.0868, "step": 31615 }, { "epoch": 0.9223408600268393, "grad_norm": 0.8501602567042736, "learning_rate": 1.5732866624613152e-07, "loss": 0.1137, "step": 31616 }, { "epoch": 0.9223700332574829, "grad_norm": 0.7045020634778086, "learning_rate": 1.572111083369171e-07, "loss": 0.0969, "step": 31617 }, { "epoch": 0.9223992064881265, "grad_norm": 0.8468483635821392, "learning_rate": 1.5709359366296583e-07, "loss": 0.1157, "step": 31618 }, { "epoch": 0.92242837971877, "grad_norm": 0.8278444364184621, "learning_rate": 1.5697612222532687e-07, "loss": 0.0975, "step": 31619 }, { "epoch": 0.9224575529494137, "grad_norm": 0.7522274086515124, "learning_rate": 1.5685869402504938e-07, "loss": 0.1062, "step": 31620 }, { "epoch": 0.9224867261800572, "grad_norm": 1.0404808666507726, "learning_rate": 1.5674130906318085e-07, "loss": 0.0988, "step": 31621 }, { "epoch": 0.9225158994107008, "grad_norm": 0.9457242979288193, "learning_rate": 1.566239673407699e-07, "loss": 0.106, "step": 31622 }, { "epoch": 0.9225450726413443, "grad_norm": 0.7968748994921456, "learning_rate": 1.5650666885886457e-07, "loss": 0.1583, "step": 31623 }, { "epoch": 0.9225742458719879, "grad_norm": 0.9325826393941028, "learning_rate": 1.5638941361851069e-07, "loss": 0.1074, "step": 31624 }, { "epoch": 0.9226034191026314, "grad_norm": 1.063030955184054, "learning_rate": 1.5627220162075574e-07, "loss": 0.1351, "step": 31625 }, { "epoch": 0.922632592333275, "grad_norm": 1.026518357218933, "learning_rate": 1.5615503286664668e-07, "loss": 0.1059, "step": 31626 }, { "epoch": 0.9226617655639185, "grad_norm": 0.7980818277717988, "learning_rate": 1.5603790735722933e-07, "loss": 0.1176, "step": 31627 }, { "epoch": 0.9226909387945621, "grad_norm": 0.8576736422214318, "learning_rate": 1.5592082509354845e-07, "loss": 0.1167, "step": 31628 }, { "epoch": 0.9227201120252057, "grad_norm": 0.7265952864485458, "learning_rate": 1.5580378607665092e-07, "loss": 0.1081, "step": 31629 }, { "epoch": 0.9227492852558492, "grad_norm": 0.7084197499329727, "learning_rate": 1.5568679030758095e-07, "loss": 0.1181, "step": 31630 }, { "epoch": 0.9227784584864928, "grad_norm": 0.836058832165904, "learning_rate": 1.555698377873821e-07, "loss": 0.1036, "step": 31631 }, { "epoch": 0.9228076317171363, "grad_norm": 0.7518532359470982, "learning_rate": 1.5545292851709915e-07, "loss": 0.0979, "step": 31632 }, { "epoch": 0.9228368049477799, "grad_norm": 0.7655427662612161, "learning_rate": 1.5533606249777677e-07, "loss": 0.1162, "step": 31633 }, { "epoch": 0.9228659781784235, "grad_norm": 0.7174580099324258, "learning_rate": 1.5521923973045694e-07, "loss": 0.1197, "step": 31634 }, { "epoch": 0.9228951514090671, "grad_norm": 0.9234571164611187, "learning_rate": 1.5510246021618325e-07, "loss": 0.1072, "step": 31635 }, { "epoch": 0.9229243246397106, "grad_norm": 1.3094640955070806, "learning_rate": 1.5498572395599877e-07, "loss": 0.133, "step": 31636 }, { "epoch": 0.9229534978703542, "grad_norm": 0.7505909050299393, "learning_rate": 1.548690309509443e-07, "loss": 0.1269, "step": 31637 }, { "epoch": 0.9229826711009977, "grad_norm": 0.6915615289355302, "learning_rate": 1.5475238120206293e-07, "loss": 0.0924, "step": 31638 }, { "epoch": 0.9230118443316413, "grad_norm": 0.9557879590278532, "learning_rate": 1.5463577471039548e-07, "loss": 0.1054, "step": 31639 }, { "epoch": 0.9230410175622848, "grad_norm": 0.9334558976212776, "learning_rate": 1.545192114769839e-07, "loss": 0.1501, "step": 31640 }, { "epoch": 0.9230701907929284, "grad_norm": 0.8468658970243811, "learning_rate": 1.5440269150286734e-07, "loss": 0.1224, "step": 31641 }, { "epoch": 0.923099364023572, "grad_norm": 0.9010276123425714, "learning_rate": 1.5428621478908723e-07, "loss": 0.1194, "step": 31642 }, { "epoch": 0.9231285372542155, "grad_norm": 0.7955231366219253, "learning_rate": 1.5416978133668213e-07, "loss": 0.0969, "step": 31643 }, { "epoch": 0.9231577104848591, "grad_norm": 0.7320602127955743, "learning_rate": 1.5405339114669348e-07, "loss": 0.1338, "step": 31644 }, { "epoch": 0.9231868837155026, "grad_norm": 0.860636207288666, "learning_rate": 1.5393704422015875e-07, "loss": 0.1185, "step": 31645 }, { "epoch": 0.9232160569461462, "grad_norm": 0.7553259095559253, "learning_rate": 1.5382074055811768e-07, "loss": 0.1141, "step": 31646 }, { "epoch": 0.9232452301767898, "grad_norm": 1.1257911236360727, "learning_rate": 1.5370448016160778e-07, "loss": 0.0884, "step": 31647 }, { "epoch": 0.9232744034074334, "grad_norm": 0.764563354961511, "learning_rate": 1.5358826303166764e-07, "loss": 0.1183, "step": 31648 }, { "epoch": 0.9233035766380769, "grad_norm": 0.8925355857901404, "learning_rate": 1.5347208916933366e-07, "loss": 0.097, "step": 31649 }, { "epoch": 0.9233327498687205, "grad_norm": 0.7161358890200636, "learning_rate": 1.5335595857564501e-07, "loss": 0.0823, "step": 31650 }, { "epoch": 0.923361923099364, "grad_norm": 1.0281336572021094, "learning_rate": 1.5323987125163697e-07, "loss": 0.122, "step": 31651 }, { "epoch": 0.9233910963300076, "grad_norm": 0.9173171067427917, "learning_rate": 1.531238271983465e-07, "loss": 0.113, "step": 31652 }, { "epoch": 0.9234202695606512, "grad_norm": 0.8164228517657877, "learning_rate": 1.5300782641680945e-07, "loss": 0.119, "step": 31653 }, { "epoch": 0.9234494427912947, "grad_norm": 0.7005962818840026, "learning_rate": 1.5289186890806108e-07, "loss": 0.1141, "step": 31654 }, { "epoch": 0.9234786160219383, "grad_norm": 0.8436412188491318, "learning_rate": 1.5277595467313723e-07, "loss": 0.1278, "step": 31655 }, { "epoch": 0.9235077892525818, "grad_norm": 1.0043470679257775, "learning_rate": 1.5266008371307262e-07, "loss": 0.0983, "step": 31656 }, { "epoch": 0.9235369624832254, "grad_norm": 0.9269625408708297, "learning_rate": 1.52544256028902e-07, "loss": 0.1022, "step": 31657 }, { "epoch": 0.9235661357138689, "grad_norm": 0.8423703202891744, "learning_rate": 1.5242847162165843e-07, "loss": 0.1178, "step": 31658 }, { "epoch": 0.9235953089445125, "grad_norm": 0.7931191425893267, "learning_rate": 1.523127304923766e-07, "loss": 0.1071, "step": 31659 }, { "epoch": 0.923624482175156, "grad_norm": 0.8677367734275767, "learning_rate": 1.5219703264208963e-07, "loss": 0.1022, "step": 31660 }, { "epoch": 0.9236536554057997, "grad_norm": 0.9929099802499494, "learning_rate": 1.5208137807183e-07, "loss": 0.1114, "step": 31661 }, { "epoch": 0.9236828286364432, "grad_norm": 0.6626511392472205, "learning_rate": 1.519657667826302e-07, "loss": 0.1134, "step": 31662 }, { "epoch": 0.9237120018670868, "grad_norm": 0.8092447279789307, "learning_rate": 1.518501987755233e-07, "loss": 0.1024, "step": 31663 }, { "epoch": 0.9237411750977303, "grad_norm": 1.0462278416088158, "learning_rate": 1.517346740515402e-07, "loss": 0.1233, "step": 31664 }, { "epoch": 0.9237703483283739, "grad_norm": 0.7076361377472974, "learning_rate": 1.5161919261171275e-07, "loss": 0.0986, "step": 31665 }, { "epoch": 0.9237995215590175, "grad_norm": 0.5744799080853549, "learning_rate": 1.5150375445707188e-07, "loss": 0.0979, "step": 31666 }, { "epoch": 0.923828694789661, "grad_norm": 1.062507853611053, "learning_rate": 1.5138835958864728e-07, "loss": 0.0956, "step": 31667 }, { "epoch": 0.9238578680203046, "grad_norm": 0.9327357428416654, "learning_rate": 1.5127300800747036e-07, "loss": 0.1027, "step": 31668 }, { "epoch": 0.9238870412509481, "grad_norm": 0.8754677669518118, "learning_rate": 1.5115769971457084e-07, "loss": 0.1177, "step": 31669 }, { "epoch": 0.9239162144815917, "grad_norm": 0.8856063982246912, "learning_rate": 1.510424347109779e-07, "loss": 0.1191, "step": 31670 }, { "epoch": 0.9239453877122352, "grad_norm": 0.7518185981710238, "learning_rate": 1.5092721299772017e-07, "loss": 0.0836, "step": 31671 }, { "epoch": 0.9239745609428788, "grad_norm": 0.799150001983761, "learning_rate": 1.5081203457582738e-07, "loss": 0.1002, "step": 31672 }, { "epoch": 0.9240037341735223, "grad_norm": 0.9102159400402953, "learning_rate": 1.5069689944632648e-07, "loss": 0.104, "step": 31673 }, { "epoch": 0.9240329074041659, "grad_norm": 0.9043552789685778, "learning_rate": 1.505818076102461e-07, "loss": 0.1379, "step": 31674 }, { "epoch": 0.9240620806348095, "grad_norm": 0.815463861856309, "learning_rate": 1.5046675906861374e-07, "loss": 0.1027, "step": 31675 }, { "epoch": 0.9240912538654531, "grad_norm": 0.7249940916778375, "learning_rate": 1.5035175382245692e-07, "loss": 0.112, "step": 31676 }, { "epoch": 0.9241204270960967, "grad_norm": 1.0881781103280483, "learning_rate": 1.502367918728015e-07, "loss": 0.1342, "step": 31677 }, { "epoch": 0.9241496003267402, "grad_norm": 0.9374697187390513, "learning_rate": 1.5012187322067439e-07, "loss": 0.133, "step": 31678 }, { "epoch": 0.9241787735573838, "grad_norm": 0.9271405452120017, "learning_rate": 1.5000699786710092e-07, "loss": 0.1134, "step": 31679 }, { "epoch": 0.9242079467880273, "grad_norm": 0.8913088739471651, "learning_rate": 1.4989216581310805e-07, "loss": 0.1128, "step": 31680 }, { "epoch": 0.9242371200186709, "grad_norm": 0.8566853906111136, "learning_rate": 1.497773770597194e-07, "loss": 0.0892, "step": 31681 }, { "epoch": 0.9242662932493144, "grad_norm": 0.8363044898995028, "learning_rate": 1.496626316079608e-07, "loss": 0.1206, "step": 31682 }, { "epoch": 0.924295466479958, "grad_norm": 0.8974881530619577, "learning_rate": 1.4954792945885643e-07, "loss": 0.1088, "step": 31683 }, { "epoch": 0.9243246397106015, "grad_norm": 0.8425431127384165, "learning_rate": 1.4943327061342993e-07, "loss": 0.1229, "step": 31684 }, { "epoch": 0.9243538129412451, "grad_norm": 0.9562055036842013, "learning_rate": 1.4931865507270548e-07, "loss": 0.1183, "step": 31685 }, { "epoch": 0.9243829861718886, "grad_norm": 0.7798587627156494, "learning_rate": 1.4920408283770616e-07, "loss": 0.0912, "step": 31686 }, { "epoch": 0.9244121594025322, "grad_norm": 1.0245868531379574, "learning_rate": 1.49089553909455e-07, "loss": 0.1396, "step": 31687 }, { "epoch": 0.9244413326331758, "grad_norm": 0.8113574996612866, "learning_rate": 1.48975068288974e-07, "loss": 0.1302, "step": 31688 }, { "epoch": 0.9244705058638194, "grad_norm": 0.8802965898818487, "learning_rate": 1.4886062597728567e-07, "loss": 0.1198, "step": 31689 }, { "epoch": 0.924499679094463, "grad_norm": 0.8279329466765452, "learning_rate": 1.4874622697541196e-07, "loss": 0.1119, "step": 31690 }, { "epoch": 0.9245288523251065, "grad_norm": 0.957264706814189, "learning_rate": 1.4863187128437317e-07, "loss": 0.1263, "step": 31691 }, { "epoch": 0.9245580255557501, "grad_norm": 0.8328102267929881, "learning_rate": 1.4851755890519125e-07, "loss": 0.0919, "step": 31692 }, { "epoch": 0.9245871987863936, "grad_norm": 0.7208471124902135, "learning_rate": 1.4840328983888653e-07, "loss": 0.0953, "step": 31693 }, { "epoch": 0.9246163720170372, "grad_norm": 0.801460191767315, "learning_rate": 1.482890640864787e-07, "loss": 0.135, "step": 31694 }, { "epoch": 0.9246455452476807, "grad_norm": 0.7721325867798572, "learning_rate": 1.4817488164898863e-07, "loss": 0.1061, "step": 31695 }, { "epoch": 0.9246747184783243, "grad_norm": 0.9239235113400591, "learning_rate": 1.480607425274344e-07, "loss": 0.0998, "step": 31696 }, { "epoch": 0.9247038917089678, "grad_norm": 0.8160326047347986, "learning_rate": 1.4794664672283577e-07, "loss": 0.1242, "step": 31697 }, { "epoch": 0.9247330649396114, "grad_norm": 0.748262937428363, "learning_rate": 1.4783259423621076e-07, "loss": 0.1087, "step": 31698 }, { "epoch": 0.9247622381702549, "grad_norm": 0.8645288476102676, "learning_rate": 1.4771858506857862e-07, "loss": 0.1115, "step": 31699 }, { "epoch": 0.9247914114008985, "grad_norm": 0.7152663028836039, "learning_rate": 1.476046192209568e-07, "loss": 0.0943, "step": 31700 }, { "epoch": 0.924820584631542, "grad_norm": 0.6501551081778637, "learning_rate": 1.4749069669436179e-07, "loss": 0.117, "step": 31701 }, { "epoch": 0.9248497578621857, "grad_norm": 0.6584951160164141, "learning_rate": 1.4737681748981214e-07, "loss": 0.0845, "step": 31702 }, { "epoch": 0.9248789310928293, "grad_norm": 0.7890857304082028, "learning_rate": 1.472629816083232e-07, "loss": 0.103, "step": 31703 }, { "epoch": 0.9249081043234728, "grad_norm": 0.8117915023448221, "learning_rate": 1.4714918905091246e-07, "loss": 0.1131, "step": 31704 }, { "epoch": 0.9249372775541164, "grad_norm": 0.7919583720680227, "learning_rate": 1.4703543981859524e-07, "loss": 0.1245, "step": 31705 }, { "epoch": 0.9249664507847599, "grad_norm": 0.7812810231682721, "learning_rate": 1.4692173391238684e-07, "loss": 0.1107, "step": 31706 }, { "epoch": 0.9249956240154035, "grad_norm": 0.8852469426470593, "learning_rate": 1.4680807133330312e-07, "loss": 0.1178, "step": 31707 }, { "epoch": 0.925024797246047, "grad_norm": 0.7730858926753635, "learning_rate": 1.466944520823582e-07, "loss": 0.0879, "step": 31708 }, { "epoch": 0.9250539704766906, "grad_norm": 0.8014526540941422, "learning_rate": 1.4658087616056582e-07, "loss": 0.1083, "step": 31709 }, { "epoch": 0.9250831437073341, "grad_norm": 0.865674395824994, "learning_rate": 1.4646734356894177e-07, "loss": 0.1022, "step": 31710 }, { "epoch": 0.9251123169379777, "grad_norm": 0.7474366567978811, "learning_rate": 1.4635385430849857e-07, "loss": 0.1113, "step": 31711 }, { "epoch": 0.9251414901686212, "grad_norm": 1.0057173331831166, "learning_rate": 1.4624040838024933e-07, "loss": 0.1215, "step": 31712 }, { "epoch": 0.9251706633992648, "grad_norm": 0.7728605512356185, "learning_rate": 1.461270057852071e-07, "loss": 0.1333, "step": 31713 }, { "epoch": 0.9251998366299083, "grad_norm": 0.7762412372580629, "learning_rate": 1.4601364652438387e-07, "loss": 0.1045, "step": 31714 }, { "epoch": 0.925229009860552, "grad_norm": 1.0697619655151576, "learning_rate": 1.4590033059879216e-07, "loss": 0.1314, "step": 31715 }, { "epoch": 0.9252581830911956, "grad_norm": 0.7180033330029212, "learning_rate": 1.4578705800944392e-07, "loss": 0.102, "step": 31716 }, { "epoch": 0.9252873563218391, "grad_norm": 0.9335884476519792, "learning_rate": 1.4567382875735002e-07, "loss": 0.1018, "step": 31717 }, { "epoch": 0.9253165295524827, "grad_norm": 0.6776272690764072, "learning_rate": 1.4556064284352135e-07, "loss": 0.0834, "step": 31718 }, { "epoch": 0.9253457027831262, "grad_norm": 0.6304707389915937, "learning_rate": 1.4544750026896814e-07, "loss": 0.1015, "step": 31719 }, { "epoch": 0.9253748760137698, "grad_norm": 0.8153619968939354, "learning_rate": 1.4533440103470132e-07, "loss": 0.1043, "step": 31720 }, { "epoch": 0.9254040492444133, "grad_norm": 0.9327875910475767, "learning_rate": 1.4522134514172948e-07, "loss": 0.1093, "step": 31721 }, { "epoch": 0.9254332224750569, "grad_norm": 0.9585100548820297, "learning_rate": 1.451083325910624e-07, "loss": 0.1055, "step": 31722 }, { "epoch": 0.9254623957057004, "grad_norm": 0.8694365073157463, "learning_rate": 1.449953633837098e-07, "loss": 0.1133, "step": 31723 }, { "epoch": 0.925491568936344, "grad_norm": 0.8312840869919882, "learning_rate": 1.448824375206792e-07, "loss": 0.1024, "step": 31724 }, { "epoch": 0.9255207421669875, "grad_norm": 0.7670830538040962, "learning_rate": 1.447695550029793e-07, "loss": 0.0885, "step": 31725 }, { "epoch": 0.9255499153976311, "grad_norm": 0.7136171008459236, "learning_rate": 1.4465671583161755e-07, "loss": 0.1015, "step": 31726 }, { "epoch": 0.9255790886282746, "grad_norm": 0.7312121747966311, "learning_rate": 1.4454392000760154e-07, "loss": 0.1108, "step": 31727 }, { "epoch": 0.9256082618589182, "grad_norm": 1.0217869543902838, "learning_rate": 1.444311675319382e-07, "loss": 0.0977, "step": 31728 }, { "epoch": 0.9256374350895619, "grad_norm": 0.734487254337658, "learning_rate": 1.4431845840563508e-07, "loss": 0.1235, "step": 31729 }, { "epoch": 0.9256666083202054, "grad_norm": 0.8334042837425122, "learning_rate": 1.4420579262969748e-07, "loss": 0.1168, "step": 31730 }, { "epoch": 0.925695781550849, "grad_norm": 0.7957090527856719, "learning_rate": 1.440931702051307e-07, "loss": 0.1004, "step": 31731 }, { "epoch": 0.9257249547814925, "grad_norm": 0.8122803162626553, "learning_rate": 1.439805911329417e-07, "loss": 0.108, "step": 31732 }, { "epoch": 0.9257541280121361, "grad_norm": 0.722345553644814, "learning_rate": 1.4386805541413361e-07, "loss": 0.1185, "step": 31733 }, { "epoch": 0.9257833012427796, "grad_norm": 0.8192274853642497, "learning_rate": 1.4375556304971338e-07, "loss": 0.1207, "step": 31734 }, { "epoch": 0.9258124744734232, "grad_norm": 0.8564909861136769, "learning_rate": 1.4364311404068355e-07, "loss": 0.0914, "step": 31735 }, { "epoch": 0.9258416477040667, "grad_norm": 1.0361703726960494, "learning_rate": 1.435307083880494e-07, "loss": 0.0932, "step": 31736 }, { "epoch": 0.9258708209347103, "grad_norm": 0.9041171044957181, "learning_rate": 1.4341834609281346e-07, "loss": 0.1138, "step": 31737 }, { "epoch": 0.9258999941653538, "grad_norm": 0.8281386330147424, "learning_rate": 1.4330602715597886e-07, "loss": 0.1159, "step": 31738 }, { "epoch": 0.9259291673959974, "grad_norm": 0.855771458799659, "learning_rate": 1.431937515785481e-07, "loss": 0.096, "step": 31739 }, { "epoch": 0.925958340626641, "grad_norm": 0.7884953687926183, "learning_rate": 1.4308151936152537e-07, "loss": 0.1097, "step": 31740 }, { "epoch": 0.9259875138572845, "grad_norm": 0.8772604667798529, "learning_rate": 1.4296933050591043e-07, "loss": 0.1147, "step": 31741 }, { "epoch": 0.9260166870879282, "grad_norm": 1.0410341867643633, "learning_rate": 1.428571850127064e-07, "loss": 0.121, "step": 31742 }, { "epoch": 0.9260458603185717, "grad_norm": 1.055490587638585, "learning_rate": 1.4274508288291411e-07, "loss": 0.109, "step": 31743 }, { "epoch": 0.9260750335492153, "grad_norm": 0.6718414874251398, "learning_rate": 1.4263302411753388e-07, "loss": 0.1284, "step": 31744 }, { "epoch": 0.9261042067798588, "grad_norm": 0.8776701654134024, "learning_rate": 1.42521008717566e-07, "loss": 0.1138, "step": 31745 }, { "epoch": 0.9261333800105024, "grad_norm": 0.8780409718093165, "learning_rate": 1.424090366840114e-07, "loss": 0.1165, "step": 31746 }, { "epoch": 0.9261625532411459, "grad_norm": 0.8087318675701313, "learning_rate": 1.422971080178698e-07, "loss": 0.1235, "step": 31747 }, { "epoch": 0.9261917264717895, "grad_norm": 0.8035138993306693, "learning_rate": 1.4218522272013924e-07, "loss": 0.1323, "step": 31748 }, { "epoch": 0.926220899702433, "grad_norm": 1.3027535291669212, "learning_rate": 1.420733807918201e-07, "loss": 0.1018, "step": 31749 }, { "epoch": 0.9262500729330766, "grad_norm": 0.7610153643861877, "learning_rate": 1.4196158223390987e-07, "loss": 0.1081, "step": 31750 }, { "epoch": 0.9262792461637201, "grad_norm": 0.8453250451507696, "learning_rate": 1.4184982704740668e-07, "loss": 0.1146, "step": 31751 }, { "epoch": 0.9263084193943637, "grad_norm": 0.9790357627712215, "learning_rate": 1.4173811523330804e-07, "loss": 0.1124, "step": 31752 }, { "epoch": 0.9263375926250073, "grad_norm": 0.9046145975244893, "learning_rate": 1.4162644679261262e-07, "loss": 0.0958, "step": 31753 }, { "epoch": 0.9263667658556508, "grad_norm": 1.1355467678113773, "learning_rate": 1.4151482172631627e-07, "loss": 0.1199, "step": 31754 }, { "epoch": 0.9263959390862944, "grad_norm": 1.057588745451631, "learning_rate": 1.4140324003541538e-07, "loss": 0.1414, "step": 31755 }, { "epoch": 0.926425112316938, "grad_norm": 0.9310083548881187, "learning_rate": 1.4129170172090645e-07, "loss": 0.1246, "step": 31756 }, { "epoch": 0.9264542855475816, "grad_norm": 0.6704608895088169, "learning_rate": 1.411802067837864e-07, "loss": 0.0913, "step": 31757 }, { "epoch": 0.9264834587782251, "grad_norm": 0.7102930648508062, "learning_rate": 1.4106875522504836e-07, "loss": 0.1242, "step": 31758 }, { "epoch": 0.9265126320088687, "grad_norm": 0.7892150966461705, "learning_rate": 1.409573470456893e-07, "loss": 0.1254, "step": 31759 }, { "epoch": 0.9265418052395122, "grad_norm": 0.890289439329342, "learning_rate": 1.4084598224670343e-07, "loss": 0.1324, "step": 31760 }, { "epoch": 0.9265709784701558, "grad_norm": 0.9709724081723901, "learning_rate": 1.4073466082908382e-07, "loss": 0.1005, "step": 31761 }, { "epoch": 0.9266001517007993, "grad_norm": 0.8535624983542714, "learning_rate": 1.406233827938247e-07, "loss": 0.1012, "step": 31762 }, { "epoch": 0.9266293249314429, "grad_norm": 0.830784851460545, "learning_rate": 1.405121481419214e-07, "loss": 0.109, "step": 31763 }, { "epoch": 0.9266584981620865, "grad_norm": 0.7923320312471503, "learning_rate": 1.4040095687436473e-07, "loss": 0.1017, "step": 31764 }, { "epoch": 0.92668767139273, "grad_norm": 0.8768415708836962, "learning_rate": 1.402898089921484e-07, "loss": 0.1191, "step": 31765 }, { "epoch": 0.9267168446233736, "grad_norm": 0.8864873670478928, "learning_rate": 1.4017870449626492e-07, "loss": 0.1401, "step": 31766 }, { "epoch": 0.9267460178540171, "grad_norm": 1.097276375567561, "learning_rate": 1.4006764338770573e-07, "loss": 0.1342, "step": 31767 }, { "epoch": 0.9267751910846607, "grad_norm": 0.7614307100585425, "learning_rate": 1.3995662566746115e-07, "loss": 0.0982, "step": 31768 }, { "epoch": 0.9268043643153043, "grad_norm": 0.6780695007409673, "learning_rate": 1.3984565133652484e-07, "loss": 0.0997, "step": 31769 }, { "epoch": 0.9268335375459479, "grad_norm": 0.88304632227594, "learning_rate": 1.3973472039588654e-07, "loss": 0.1148, "step": 31770 }, { "epoch": 0.9268627107765914, "grad_norm": 0.8076494746165436, "learning_rate": 1.39623832846536e-07, "loss": 0.0999, "step": 31771 }, { "epoch": 0.926891884007235, "grad_norm": 0.9200410378993642, "learning_rate": 1.395129886894636e-07, "loss": 0.106, "step": 31772 }, { "epoch": 0.9269210572378785, "grad_norm": 0.8777988146574237, "learning_rate": 1.3940218792565964e-07, "loss": 0.1227, "step": 31773 }, { "epoch": 0.9269502304685221, "grad_norm": 0.8797267955743507, "learning_rate": 1.3929143055611162e-07, "loss": 0.1173, "step": 31774 }, { "epoch": 0.9269794036991656, "grad_norm": 0.8029341141164189, "learning_rate": 1.391807165818093e-07, "loss": 0.1108, "step": 31775 }, { "epoch": 0.9270085769298092, "grad_norm": 0.8464957889930641, "learning_rate": 1.3907004600374198e-07, "loss": 0.106, "step": 31776 }, { "epoch": 0.9270377501604528, "grad_norm": 0.7510489981223771, "learning_rate": 1.389594188228971e-07, "loss": 0.1138, "step": 31777 }, { "epoch": 0.9270669233910963, "grad_norm": 0.7993096754734151, "learning_rate": 1.3884883504026116e-07, "loss": 0.0964, "step": 31778 }, { "epoch": 0.9270960966217399, "grad_norm": 0.7978983676680128, "learning_rate": 1.3873829465682277e-07, "loss": 0.098, "step": 31779 }, { "epoch": 0.9271252698523834, "grad_norm": 0.8653884600183934, "learning_rate": 1.3862779767356838e-07, "loss": 0.1233, "step": 31780 }, { "epoch": 0.927154443083027, "grad_norm": 0.7238549802724723, "learning_rate": 1.3851734409148443e-07, "loss": 0.1054, "step": 31781 }, { "epoch": 0.9271836163136705, "grad_norm": 0.9286978961897876, "learning_rate": 1.3840693391155735e-07, "loss": 0.0951, "step": 31782 }, { "epoch": 0.9272127895443142, "grad_norm": 0.6889075601075261, "learning_rate": 1.3829656713477247e-07, "loss": 0.1067, "step": 31783 }, { "epoch": 0.9272419627749577, "grad_norm": 0.8273047894290693, "learning_rate": 1.3818624376211564e-07, "loss": 0.1265, "step": 31784 }, { "epoch": 0.9272711360056013, "grad_norm": 0.8189016133973054, "learning_rate": 1.3807596379457056e-07, "loss": 0.099, "step": 31785 }, { "epoch": 0.9273003092362448, "grad_norm": 0.925306070894806, "learning_rate": 1.3796572723312308e-07, "loss": 0.1255, "step": 31786 }, { "epoch": 0.9273294824668884, "grad_norm": 0.7317591662671944, "learning_rate": 1.3785553407875685e-07, "loss": 0.1076, "step": 31787 }, { "epoch": 0.927358655697532, "grad_norm": 0.8200400377904487, "learning_rate": 1.3774538433245555e-07, "loss": 0.0947, "step": 31788 }, { "epoch": 0.9273878289281755, "grad_norm": 0.8756006166518264, "learning_rate": 1.376352779952034e-07, "loss": 0.1294, "step": 31789 }, { "epoch": 0.9274170021588191, "grad_norm": 0.6485610255946412, "learning_rate": 1.3752521506798233e-07, "loss": 0.0814, "step": 31790 }, { "epoch": 0.9274461753894626, "grad_norm": 0.9764972503010176, "learning_rate": 1.374151955517755e-07, "loss": 0.1108, "step": 31791 }, { "epoch": 0.9274753486201062, "grad_norm": 0.8023872679716414, "learning_rate": 1.3730521944756437e-07, "loss": 0.1228, "step": 31792 }, { "epoch": 0.9275045218507497, "grad_norm": 0.7619632234424019, "learning_rate": 1.3719528675633254e-07, "loss": 0.1042, "step": 31793 }, { "epoch": 0.9275336950813933, "grad_norm": 1.0058169030589184, "learning_rate": 1.370853974790598e-07, "loss": 0.1115, "step": 31794 }, { "epoch": 0.9275628683120368, "grad_norm": 0.8253121798893776, "learning_rate": 1.369755516167276e-07, "loss": 0.1266, "step": 31795 }, { "epoch": 0.9275920415426805, "grad_norm": 1.0321014996611595, "learning_rate": 1.368657491703168e-07, "loss": 0.0953, "step": 31796 }, { "epoch": 0.927621214773324, "grad_norm": 0.9970188652347979, "learning_rate": 1.3675599014080832e-07, "loss": 0.0953, "step": 31797 }, { "epoch": 0.9276503880039676, "grad_norm": 0.909214366606785, "learning_rate": 1.3664627452918021e-07, "loss": 0.0975, "step": 31798 }, { "epoch": 0.9276795612346111, "grad_norm": 0.8532918532153793, "learning_rate": 1.3653660233641397e-07, "loss": 0.0843, "step": 31799 }, { "epoch": 0.9277087344652547, "grad_norm": 0.9876479818329683, "learning_rate": 1.364269735634882e-07, "loss": 0.1055, "step": 31800 }, { "epoch": 0.9277379076958983, "grad_norm": 0.6862723268211314, "learning_rate": 1.363173882113805e-07, "loss": 0.0961, "step": 31801 }, { "epoch": 0.9277670809265418, "grad_norm": 0.8376938152866861, "learning_rate": 1.3620784628107065e-07, "loss": 0.1088, "step": 31802 }, { "epoch": 0.9277962541571854, "grad_norm": 0.8994906102792165, "learning_rate": 1.3609834777353669e-07, "loss": 0.1069, "step": 31803 }, { "epoch": 0.9278254273878289, "grad_norm": 1.007395043983961, "learning_rate": 1.3598889268975457e-07, "loss": 0.1073, "step": 31804 }, { "epoch": 0.9278546006184725, "grad_norm": 0.7227518113123437, "learning_rate": 1.3587948103070237e-07, "loss": 0.1094, "step": 31805 }, { "epoch": 0.927883773849116, "grad_norm": 0.849188319709201, "learning_rate": 1.3577011279735763e-07, "loss": 0.0976, "step": 31806 }, { "epoch": 0.9279129470797596, "grad_norm": 0.7660060966761473, "learning_rate": 1.3566078799069625e-07, "loss": 0.1007, "step": 31807 }, { "epoch": 0.9279421203104031, "grad_norm": 0.703551761846541, "learning_rate": 1.3555150661169358e-07, "loss": 0.1146, "step": 31808 }, { "epoch": 0.9279712935410467, "grad_norm": 0.9981733909666994, "learning_rate": 1.3544226866132658e-07, "loss": 0.1049, "step": 31809 }, { "epoch": 0.9280004667716903, "grad_norm": 0.8990268865529819, "learning_rate": 1.3533307414056894e-07, "loss": 0.1067, "step": 31810 }, { "epoch": 0.9280296400023339, "grad_norm": 0.8876133612278219, "learning_rate": 1.3522392305039656e-07, "loss": 0.1215, "step": 31811 }, { "epoch": 0.9280588132329775, "grad_norm": 0.8881889152314952, "learning_rate": 1.3511481539178362e-07, "loss": 0.1132, "step": 31812 }, { "epoch": 0.928087986463621, "grad_norm": 0.9820246173766248, "learning_rate": 1.350057511657049e-07, "loss": 0.1052, "step": 31813 }, { "epoch": 0.9281171596942646, "grad_norm": 0.868547077141067, "learning_rate": 1.34896730373133e-07, "loss": 0.1432, "step": 31814 }, { "epoch": 0.9281463329249081, "grad_norm": 0.7482346363401683, "learning_rate": 1.3478775301504154e-07, "loss": 0.1048, "step": 31815 }, { "epoch": 0.9281755061555517, "grad_norm": 0.7306615085695191, "learning_rate": 1.346788190924031e-07, "loss": 0.1037, "step": 31816 }, { "epoch": 0.9282046793861952, "grad_norm": 0.8326001612957701, "learning_rate": 1.3456992860619188e-07, "loss": 0.112, "step": 31817 }, { "epoch": 0.9282338526168388, "grad_norm": 0.9366844088472931, "learning_rate": 1.3446108155737826e-07, "loss": 0.1194, "step": 31818 }, { "epoch": 0.9282630258474823, "grad_norm": 0.872850129913893, "learning_rate": 1.3435227794693472e-07, "loss": 0.107, "step": 31819 }, { "epoch": 0.9282921990781259, "grad_norm": 0.8333367549240172, "learning_rate": 1.3424351777583278e-07, "loss": 0.1209, "step": 31820 }, { "epoch": 0.9283213723087694, "grad_norm": 0.7771479190120988, "learning_rate": 1.3413480104504272e-07, "loss": 0.1027, "step": 31821 }, { "epoch": 0.928350545539413, "grad_norm": 0.790493073916453, "learning_rate": 1.3402612775553546e-07, "loss": 0.107, "step": 31822 }, { "epoch": 0.9283797187700567, "grad_norm": 0.900113995987599, "learning_rate": 1.339174979082819e-07, "loss": 0.1121, "step": 31823 }, { "epoch": 0.9284088920007002, "grad_norm": 0.7251102759118273, "learning_rate": 1.3380891150425068e-07, "loss": 0.0845, "step": 31824 }, { "epoch": 0.9284380652313438, "grad_norm": 1.0832926473263882, "learning_rate": 1.3370036854441216e-07, "loss": 0.1111, "step": 31825 }, { "epoch": 0.9284672384619873, "grad_norm": 0.7568245320348015, "learning_rate": 1.3359186902973554e-07, "loss": 0.1318, "step": 31826 }, { "epoch": 0.9284964116926309, "grad_norm": 0.8860534980102193, "learning_rate": 1.334834129611884e-07, "loss": 0.1185, "step": 31827 }, { "epoch": 0.9285255849232744, "grad_norm": 0.8883106338757215, "learning_rate": 1.3337500033973882e-07, "loss": 0.1006, "step": 31828 }, { "epoch": 0.928554758153918, "grad_norm": 0.9404115406259522, "learning_rate": 1.3326663116635717e-07, "loss": 0.1163, "step": 31829 }, { "epoch": 0.9285839313845615, "grad_norm": 0.7562662765938616, "learning_rate": 1.3315830544200826e-07, "loss": 0.1209, "step": 31830 }, { "epoch": 0.9286131046152051, "grad_norm": 0.7330848003186264, "learning_rate": 1.3305002316766013e-07, "loss": 0.1243, "step": 31831 }, { "epoch": 0.9286422778458486, "grad_norm": 0.9121372954925305, "learning_rate": 1.329417843442804e-07, "loss": 0.1208, "step": 31832 }, { "epoch": 0.9286714510764922, "grad_norm": 0.8451668065558846, "learning_rate": 1.3283358897283438e-07, "loss": 0.1244, "step": 31833 }, { "epoch": 0.9287006243071357, "grad_norm": 0.8449992401771372, "learning_rate": 1.3272543705428742e-07, "loss": 0.1137, "step": 31834 }, { "epoch": 0.9287297975377793, "grad_norm": 0.9208350676847564, "learning_rate": 1.3261732858960598e-07, "loss": 0.0831, "step": 31835 }, { "epoch": 0.9287589707684228, "grad_norm": 0.6965852645334017, "learning_rate": 1.3250926357975537e-07, "loss": 0.1007, "step": 31836 }, { "epoch": 0.9287881439990665, "grad_norm": 0.72587681724953, "learning_rate": 1.3240124202570038e-07, "loss": 0.1049, "step": 31837 }, { "epoch": 0.9288173172297101, "grad_norm": 0.6481746470484258, "learning_rate": 1.3229326392840468e-07, "loss": 0.1028, "step": 31838 }, { "epoch": 0.9288464904603536, "grad_norm": 0.8006667847317643, "learning_rate": 1.321853292888331e-07, "loss": 0.099, "step": 31839 }, { "epoch": 0.9288756636909972, "grad_norm": 0.7546421097337805, "learning_rate": 1.3207743810794815e-07, "loss": 0.1228, "step": 31840 }, { "epoch": 0.9289048369216407, "grad_norm": 0.6913794747741638, "learning_rate": 1.3196959038671464e-07, "loss": 0.1136, "step": 31841 }, { "epoch": 0.9289340101522843, "grad_norm": 0.9952210474587667, "learning_rate": 1.3186178612609346e-07, "loss": 0.1047, "step": 31842 }, { "epoch": 0.9289631833829278, "grad_norm": 0.9197226240457339, "learning_rate": 1.317540253270494e-07, "loss": 0.1214, "step": 31843 }, { "epoch": 0.9289923566135714, "grad_norm": 0.7698174611805969, "learning_rate": 1.316463079905428e-07, "loss": 0.1196, "step": 31844 }, { "epoch": 0.9290215298442149, "grad_norm": 0.9122035865888464, "learning_rate": 1.3153863411753508e-07, "loss": 0.1147, "step": 31845 }, { "epoch": 0.9290507030748585, "grad_norm": 0.7490563563043272, "learning_rate": 1.3143100370898886e-07, "loss": 0.0893, "step": 31846 }, { "epoch": 0.929079876305502, "grad_norm": 0.8431775605464544, "learning_rate": 1.3132341676586447e-07, "loss": 0.1234, "step": 31847 }, { "epoch": 0.9291090495361456, "grad_norm": 0.7153092145860344, "learning_rate": 1.3121587328912222e-07, "loss": 0.1118, "step": 31848 }, { "epoch": 0.9291382227667891, "grad_norm": 1.19776123598946, "learning_rate": 1.3110837327972248e-07, "loss": 0.1406, "step": 31849 }, { "epoch": 0.9291673959974328, "grad_norm": 0.8648257833374993, "learning_rate": 1.3100091673862502e-07, "loss": 0.1015, "step": 31850 }, { "epoch": 0.9291965692280764, "grad_norm": 0.6255037728739352, "learning_rate": 1.3089350366678855e-07, "loss": 0.1013, "step": 31851 }, { "epoch": 0.9292257424587199, "grad_norm": 0.8270467120913118, "learning_rate": 1.3078613406517228e-07, "loss": 0.095, "step": 31852 }, { "epoch": 0.9292549156893635, "grad_norm": 0.9490322085209174, "learning_rate": 1.3067880793473597e-07, "loss": 0.1041, "step": 31853 }, { "epoch": 0.929284088920007, "grad_norm": 0.7924175106467733, "learning_rate": 1.3057152527643668e-07, "loss": 0.1159, "step": 31854 }, { "epoch": 0.9293132621506506, "grad_norm": 0.8274527911865962, "learning_rate": 1.3046428609123196e-07, "loss": 0.1125, "step": 31855 }, { "epoch": 0.9293424353812941, "grad_norm": 0.9026162693608782, "learning_rate": 1.3035709038007993e-07, "loss": 0.1152, "step": 31856 }, { "epoch": 0.9293716086119377, "grad_norm": 0.780596824956173, "learning_rate": 1.302499381439376e-07, "loss": 0.1127, "step": 31857 }, { "epoch": 0.9294007818425812, "grad_norm": 0.5554970680070257, "learning_rate": 1.3014282938376034e-07, "loss": 0.1046, "step": 31858 }, { "epoch": 0.9294299550732248, "grad_norm": 0.7311824802092963, "learning_rate": 1.3003576410050623e-07, "loss": 0.1093, "step": 31859 }, { "epoch": 0.9294591283038683, "grad_norm": 0.8817108602131031, "learning_rate": 1.299287422951301e-07, "loss": 0.096, "step": 31860 }, { "epoch": 0.9294883015345119, "grad_norm": 0.7418745675867275, "learning_rate": 1.2982176396858725e-07, "loss": 0.0966, "step": 31861 }, { "epoch": 0.9295174747651554, "grad_norm": 0.8576244592350082, "learning_rate": 1.2971482912183363e-07, "loss": 0.1137, "step": 31862 }, { "epoch": 0.929546647995799, "grad_norm": 0.9123038104314924, "learning_rate": 1.2960793775582347e-07, "loss": 0.1107, "step": 31863 }, { "epoch": 0.9295758212264427, "grad_norm": 0.6741393165961056, "learning_rate": 1.2950108987151045e-07, "loss": 0.0852, "step": 31864 }, { "epoch": 0.9296049944570862, "grad_norm": 0.7158006914295666, "learning_rate": 1.2939428546984878e-07, "loss": 0.099, "step": 31865 }, { "epoch": 0.9296341676877298, "grad_norm": 0.8480768058484065, "learning_rate": 1.292875245517927e-07, "loss": 0.104, "step": 31866 }, { "epoch": 0.9296633409183733, "grad_norm": 1.2313390211905544, "learning_rate": 1.2918080711829483e-07, "loss": 0.1127, "step": 31867 }, { "epoch": 0.9296925141490169, "grad_norm": 0.8906095587323312, "learning_rate": 1.2907413317030771e-07, "loss": 0.1576, "step": 31868 }, { "epoch": 0.9297216873796604, "grad_norm": 0.6460838578639841, "learning_rate": 1.2896750270878445e-07, "loss": 0.101, "step": 31869 }, { "epoch": 0.929750860610304, "grad_norm": 0.9412761702099182, "learning_rate": 1.2886091573467597e-07, "loss": 0.1133, "step": 31870 }, { "epoch": 0.9297800338409475, "grad_norm": 0.8029726891533026, "learning_rate": 1.2875437224893485e-07, "loss": 0.1114, "step": 31871 }, { "epoch": 0.9298092070715911, "grad_norm": 0.7234335704268551, "learning_rate": 1.2864787225251141e-07, "loss": 0.1216, "step": 31872 }, { "epoch": 0.9298383803022346, "grad_norm": 0.8342462671527551, "learning_rate": 1.2854141574635714e-07, "loss": 0.1079, "step": 31873 }, { "epoch": 0.9298675535328782, "grad_norm": 0.8613265968178566, "learning_rate": 1.284350027314224e-07, "loss": 0.1159, "step": 31874 }, { "epoch": 0.9298967267635218, "grad_norm": 0.8961476307437986, "learning_rate": 1.2832863320865696e-07, "loss": 0.1247, "step": 31875 }, { "epoch": 0.9299258999941653, "grad_norm": 0.7027444420307065, "learning_rate": 1.282223071790101e-07, "loss": 0.1247, "step": 31876 }, { "epoch": 0.929955073224809, "grad_norm": 0.9564352845338436, "learning_rate": 1.2811602464343155e-07, "loss": 0.1157, "step": 31877 }, { "epoch": 0.9299842464554525, "grad_norm": 1.1018576709758896, "learning_rate": 1.2800978560287002e-07, "loss": 0.1266, "step": 31878 }, { "epoch": 0.9300134196860961, "grad_norm": 0.8748241148358271, "learning_rate": 1.279035900582748e-07, "loss": 0.1055, "step": 31879 }, { "epoch": 0.9300425929167396, "grad_norm": 0.9006780298953522, "learning_rate": 1.2779743801059285e-07, "loss": 0.1082, "step": 31880 }, { "epoch": 0.9300717661473832, "grad_norm": 0.8733716203451908, "learning_rate": 1.2769132946077235e-07, "loss": 0.1543, "step": 31881 }, { "epoch": 0.9301009393780267, "grad_norm": 0.7921478917179678, "learning_rate": 1.2758526440976028e-07, "loss": 0.1141, "step": 31882 }, { "epoch": 0.9301301126086703, "grad_norm": 0.716895583888058, "learning_rate": 1.274792428585042e-07, "loss": 0.093, "step": 31883 }, { "epoch": 0.9301592858393138, "grad_norm": 0.9013773962196722, "learning_rate": 1.273732648079501e-07, "loss": 0.118, "step": 31884 }, { "epoch": 0.9301884590699574, "grad_norm": 0.8308549976620867, "learning_rate": 1.2726733025904436e-07, "loss": 0.1038, "step": 31885 }, { "epoch": 0.930217632300601, "grad_norm": 0.796908700369008, "learning_rate": 1.271614392127324e-07, "loss": 0.1128, "step": 31886 }, { "epoch": 0.9302468055312445, "grad_norm": 0.7871456170316263, "learning_rate": 1.2705559166996063e-07, "loss": 0.1173, "step": 31887 }, { "epoch": 0.9302759787618881, "grad_norm": 0.9454585672221922, "learning_rate": 1.2694978763167165e-07, "loss": 0.1364, "step": 31888 }, { "epoch": 0.9303051519925316, "grad_norm": 1.0150272764416546, "learning_rate": 1.2684402709881305e-07, "loss": 0.1072, "step": 31889 }, { "epoch": 0.9303343252231752, "grad_norm": 0.7229796377824613, "learning_rate": 1.2673831007232795e-07, "loss": 0.1134, "step": 31890 }, { "epoch": 0.9303634984538188, "grad_norm": 0.8158135053267521, "learning_rate": 1.2663263655315894e-07, "loss": 0.1112, "step": 31891 }, { "epoch": 0.9303926716844624, "grad_norm": 0.8555090184417553, "learning_rate": 1.265270065422508e-07, "loss": 0.1061, "step": 31892 }, { "epoch": 0.9304218449151059, "grad_norm": 0.771408734266717, "learning_rate": 1.2642142004054615e-07, "loss": 0.1042, "step": 31893 }, { "epoch": 0.9304510181457495, "grad_norm": 0.8500817337899947, "learning_rate": 1.2631587704898752e-07, "loss": 0.1167, "step": 31894 }, { "epoch": 0.930480191376393, "grad_norm": 0.8788507385964561, "learning_rate": 1.2621037756851695e-07, "loss": 0.1069, "step": 31895 }, { "epoch": 0.9305093646070366, "grad_norm": 0.8389933634147309, "learning_rate": 1.261049216000776e-07, "loss": 0.1039, "step": 31896 }, { "epoch": 0.9305385378376801, "grad_norm": 0.6556630091108445, "learning_rate": 1.259995091446098e-07, "loss": 0.1062, "step": 31897 }, { "epoch": 0.9305677110683237, "grad_norm": 0.7419564977416073, "learning_rate": 1.258941402030539e-07, "loss": 0.1197, "step": 31898 }, { "epoch": 0.9305968842989673, "grad_norm": 0.9155257068684064, "learning_rate": 1.2578881477635252e-07, "loss": 0.1234, "step": 31899 }, { "epoch": 0.9306260575296108, "grad_norm": 0.9316605553815461, "learning_rate": 1.2568353286544432e-07, "loss": 0.1127, "step": 31900 }, { "epoch": 0.9306552307602544, "grad_norm": 0.7153606449528723, "learning_rate": 1.2557829447127078e-07, "loss": 0.1421, "step": 31901 }, { "epoch": 0.9306844039908979, "grad_norm": 0.8417139113878996, "learning_rate": 1.2547309959477006e-07, "loss": 0.121, "step": 31902 }, { "epoch": 0.9307135772215415, "grad_norm": 0.8498931817356064, "learning_rate": 1.253679482368819e-07, "loss": 0.1217, "step": 31903 }, { "epoch": 0.9307427504521851, "grad_norm": 0.8064080262598519, "learning_rate": 1.2526284039854563e-07, "loss": 0.1281, "step": 31904 }, { "epoch": 0.9307719236828287, "grad_norm": 0.6760580647127779, "learning_rate": 1.2515777608069823e-07, "loss": 0.1054, "step": 31905 }, { "epoch": 0.9308010969134722, "grad_norm": 0.8039425443445063, "learning_rate": 1.250527552842784e-07, "loss": 0.1087, "step": 31906 }, { "epoch": 0.9308302701441158, "grad_norm": 0.954453858923193, "learning_rate": 1.2494777801022427e-07, "loss": 0.0989, "step": 31907 }, { "epoch": 0.9308594433747593, "grad_norm": 0.9314655885064037, "learning_rate": 1.2484284425947236e-07, "loss": 0.1338, "step": 31908 }, { "epoch": 0.9308886166054029, "grad_norm": 1.0844910436201352, "learning_rate": 1.2473795403296018e-07, "loss": 0.1432, "step": 31909 }, { "epoch": 0.9309177898360464, "grad_norm": 0.8381180465942429, "learning_rate": 1.2463310733162371e-07, "loss": 0.1106, "step": 31910 }, { "epoch": 0.93094696306669, "grad_norm": 0.6859338086230792, "learning_rate": 1.2452830415639882e-07, "loss": 0.1147, "step": 31911 }, { "epoch": 0.9309761362973336, "grad_norm": 0.7367652410515695, "learning_rate": 1.2442354450822092e-07, "loss": 0.1113, "step": 31912 }, { "epoch": 0.9310053095279771, "grad_norm": 0.9388791727098107, "learning_rate": 1.2431882838802646e-07, "loss": 0.0942, "step": 31913 }, { "epoch": 0.9310344827586207, "grad_norm": 1.6551175408790229, "learning_rate": 1.242141557967491e-07, "loss": 0.1083, "step": 31914 }, { "epoch": 0.9310636559892642, "grad_norm": 0.9551090036043243, "learning_rate": 1.2410952673532372e-07, "loss": 0.1214, "step": 31915 }, { "epoch": 0.9310928292199078, "grad_norm": 1.22625024778241, "learning_rate": 1.240049412046851e-07, "loss": 0.1086, "step": 31916 }, { "epoch": 0.9311220024505513, "grad_norm": 0.8530652157131269, "learning_rate": 1.2390039920576636e-07, "loss": 0.112, "step": 31917 }, { "epoch": 0.931151175681195, "grad_norm": 0.8429285448486027, "learning_rate": 1.2379590073949953e-07, "loss": 0.1048, "step": 31918 }, { "epoch": 0.9311803489118385, "grad_norm": 0.8755016187817724, "learning_rate": 1.2369144580682002e-07, "loss": 0.11, "step": 31919 }, { "epoch": 0.9312095221424821, "grad_norm": 0.826503120525939, "learning_rate": 1.2358703440865928e-07, "loss": 0.1269, "step": 31920 }, { "epoch": 0.9312386953731256, "grad_norm": 0.8628695192756125, "learning_rate": 1.2348266654594932e-07, "loss": 0.1065, "step": 31921 }, { "epoch": 0.9312678686037692, "grad_norm": 0.8469502563510405, "learning_rate": 1.2337834221962165e-07, "loss": 0.1195, "step": 31922 }, { "epoch": 0.9312970418344128, "grad_norm": 1.1229549960110636, "learning_rate": 1.2327406143060826e-07, "loss": 0.1244, "step": 31923 }, { "epoch": 0.9313262150650563, "grad_norm": 0.7639869690161986, "learning_rate": 1.2316982417983958e-07, "loss": 0.1116, "step": 31924 }, { "epoch": 0.9313553882956999, "grad_norm": 0.7455086304433893, "learning_rate": 1.230656304682465e-07, "loss": 0.1109, "step": 31925 }, { "epoch": 0.9313845615263434, "grad_norm": 1.0352930780386584, "learning_rate": 1.229614802967599e-07, "loss": 0.1054, "step": 31926 }, { "epoch": 0.931413734756987, "grad_norm": 0.7285119701188629, "learning_rate": 1.2285737366630857e-07, "loss": 0.1099, "step": 31927 }, { "epoch": 0.9314429079876305, "grad_norm": 0.8975598205261752, "learning_rate": 1.2275331057782224e-07, "loss": 0.0959, "step": 31928 }, { "epoch": 0.9314720812182741, "grad_norm": 0.870180636556917, "learning_rate": 1.226492910322302e-07, "loss": 0.1271, "step": 31929 }, { "epoch": 0.9315012544489176, "grad_norm": 0.8956178959689346, "learning_rate": 1.2254531503046062e-07, "loss": 0.1095, "step": 31930 }, { "epoch": 0.9315304276795612, "grad_norm": 0.6840812003072003, "learning_rate": 1.2244138257344275e-07, "loss": 0.0945, "step": 31931 }, { "epoch": 0.9315596009102048, "grad_norm": 0.7504785332715114, "learning_rate": 1.22337493662103e-07, "loss": 0.1007, "step": 31932 }, { "epoch": 0.9315887741408484, "grad_norm": 0.7472896064001563, "learning_rate": 1.2223364829737072e-07, "loss": 0.1053, "step": 31933 }, { "epoch": 0.931617947371492, "grad_norm": 0.8041613913350402, "learning_rate": 1.221298464801718e-07, "loss": 0.104, "step": 31934 }, { "epoch": 0.9316471206021355, "grad_norm": 0.915643845045573, "learning_rate": 1.220260882114327e-07, "loss": 0.0918, "step": 31935 }, { "epoch": 0.9316762938327791, "grad_norm": 0.6798992345577578, "learning_rate": 1.2192237349207993e-07, "loss": 0.1321, "step": 31936 }, { "epoch": 0.9317054670634226, "grad_norm": 0.8095701910911953, "learning_rate": 1.218187023230405e-07, "loss": 0.1049, "step": 31937 }, { "epoch": 0.9317346402940662, "grad_norm": 0.7939759489021841, "learning_rate": 1.2171507470523868e-07, "loss": 0.1138, "step": 31938 }, { "epoch": 0.9317638135247097, "grad_norm": 0.9562131842452575, "learning_rate": 1.2161149063960042e-07, "loss": 0.1215, "step": 31939 }, { "epoch": 0.9317929867553533, "grad_norm": 0.6817020416031795, "learning_rate": 1.2150795012705053e-07, "loss": 0.0997, "step": 31940 }, { "epoch": 0.9318221599859968, "grad_norm": 0.8691699055196034, "learning_rate": 1.2140445316851212e-07, "loss": 0.1235, "step": 31941 }, { "epoch": 0.9318513332166404, "grad_norm": 0.956361779604108, "learning_rate": 1.2130099976491062e-07, "loss": 0.1195, "step": 31942 }, { "epoch": 0.9318805064472839, "grad_norm": 0.7365849996540791, "learning_rate": 1.2119758991716912e-07, "loss": 0.1021, "step": 31943 }, { "epoch": 0.9319096796779275, "grad_norm": 0.6974657489834551, "learning_rate": 1.2109422362621138e-07, "loss": 0.1101, "step": 31944 }, { "epoch": 0.9319388529085711, "grad_norm": 0.9246905293246134, "learning_rate": 1.2099090089295884e-07, "loss": 0.13, "step": 31945 }, { "epoch": 0.9319680261392147, "grad_norm": 0.8507907797179879, "learning_rate": 1.2088762171833579e-07, "loss": 0.1279, "step": 31946 }, { "epoch": 0.9319971993698583, "grad_norm": 0.7888337363170564, "learning_rate": 1.207843861032626e-07, "loss": 0.1046, "step": 31947 }, { "epoch": 0.9320263726005018, "grad_norm": 0.7420549042870341, "learning_rate": 1.206811940486613e-07, "loss": 0.1101, "step": 31948 }, { "epoch": 0.9320555458311454, "grad_norm": 0.7910105412932991, "learning_rate": 1.205780455554545e-07, "loss": 0.1016, "step": 31949 }, { "epoch": 0.9320847190617889, "grad_norm": 0.8889477829657815, "learning_rate": 1.2047494062456199e-07, "loss": 0.1001, "step": 31950 }, { "epoch": 0.9321138922924325, "grad_norm": 0.7223779251255619, "learning_rate": 1.2037187925690364e-07, "loss": 0.1186, "step": 31951 }, { "epoch": 0.932143065523076, "grad_norm": 0.7825001685835803, "learning_rate": 1.2026886145340088e-07, "loss": 0.1219, "step": 31952 }, { "epoch": 0.9321722387537196, "grad_norm": 1.058982674765109, "learning_rate": 1.2016588721497247e-07, "loss": 0.1181, "step": 31953 }, { "epoch": 0.9322014119843631, "grad_norm": 0.9374541064444769, "learning_rate": 1.200629565425382e-07, "loss": 0.11, "step": 31954 }, { "epoch": 0.9322305852150067, "grad_norm": 0.9975085468401156, "learning_rate": 1.1996006943701676e-07, "loss": 0.1447, "step": 31955 }, { "epoch": 0.9322597584456502, "grad_norm": 0.8064543591715934, "learning_rate": 1.1985722589932747e-07, "loss": 0.1047, "step": 31956 }, { "epoch": 0.9322889316762938, "grad_norm": 0.7057703760831878, "learning_rate": 1.1975442593038788e-07, "loss": 0.1044, "step": 31957 }, { "epoch": 0.9323181049069373, "grad_norm": 0.6704747895430312, "learning_rate": 1.1965166953111508e-07, "loss": 0.0972, "step": 31958 }, { "epoch": 0.932347278137581, "grad_norm": 0.7856997979761003, "learning_rate": 1.1954895670242717e-07, "loss": 0.1125, "step": 31959 }, { "epoch": 0.9323764513682246, "grad_norm": 0.8636025047379615, "learning_rate": 1.194462874452418e-07, "loss": 0.1028, "step": 31960 }, { "epoch": 0.9324056245988681, "grad_norm": 0.9302947967398764, "learning_rate": 1.193436617604743e-07, "loss": 0.1224, "step": 31961 }, { "epoch": 0.9324347978295117, "grad_norm": 0.8765440605059343, "learning_rate": 1.1924107964904175e-07, "loss": 0.1388, "step": 31962 }, { "epoch": 0.9324639710601552, "grad_norm": 0.852805482050188, "learning_rate": 1.1913854111186008e-07, "loss": 0.133, "step": 31963 }, { "epoch": 0.9324931442907988, "grad_norm": 0.8567529364954466, "learning_rate": 1.190360461498441e-07, "loss": 0.1119, "step": 31964 }, { "epoch": 0.9325223175214423, "grad_norm": 0.8774635317691548, "learning_rate": 1.1893359476390809e-07, "loss": 0.1172, "step": 31965 }, { "epoch": 0.9325514907520859, "grad_norm": 0.7730345767223594, "learning_rate": 1.1883118695496853e-07, "loss": 0.1099, "step": 31966 }, { "epoch": 0.9325806639827294, "grad_norm": 0.954948814954049, "learning_rate": 1.1872882272393915e-07, "loss": 0.1122, "step": 31967 }, { "epoch": 0.932609837213373, "grad_norm": 0.8080294698130455, "learning_rate": 1.1862650207173365e-07, "loss": 0.1199, "step": 31968 }, { "epoch": 0.9326390104440165, "grad_norm": 1.2116619766730552, "learning_rate": 1.1852422499926519e-07, "loss": 0.0941, "step": 31969 }, { "epoch": 0.9326681836746601, "grad_norm": 0.8350327503632, "learning_rate": 1.1842199150744749e-07, "loss": 0.1017, "step": 31970 }, { "epoch": 0.9326973569053036, "grad_norm": 0.8437080265607998, "learning_rate": 1.1831980159719203e-07, "loss": 0.0813, "step": 31971 }, { "epoch": 0.9327265301359473, "grad_norm": 0.8397235501594001, "learning_rate": 1.1821765526941254e-07, "loss": 0.1091, "step": 31972 }, { "epoch": 0.9327557033665909, "grad_norm": 0.7604132266193332, "learning_rate": 1.1811555252502105e-07, "loss": 0.114, "step": 31973 }, { "epoch": 0.9327848765972344, "grad_norm": 0.842547410919885, "learning_rate": 1.1801349336492796e-07, "loss": 0.1102, "step": 31974 }, { "epoch": 0.932814049827878, "grad_norm": 1.0731862014744726, "learning_rate": 1.1791147779004474e-07, "loss": 0.0986, "step": 31975 }, { "epoch": 0.9328432230585215, "grad_norm": 0.696695142887009, "learning_rate": 1.1780950580128292e-07, "loss": 0.1141, "step": 31976 }, { "epoch": 0.9328723962891651, "grad_norm": 0.8698705517419311, "learning_rate": 1.1770757739955174e-07, "loss": 0.1044, "step": 31977 }, { "epoch": 0.9329015695198086, "grad_norm": 0.9605724397268341, "learning_rate": 1.1760569258576215e-07, "loss": 0.1014, "step": 31978 }, { "epoch": 0.9329307427504522, "grad_norm": 0.9189215535029513, "learning_rate": 1.1750385136082343e-07, "loss": 0.1069, "step": 31979 }, { "epoch": 0.9329599159810957, "grad_norm": 0.8198878868640496, "learning_rate": 1.1740205372564484e-07, "loss": 0.1059, "step": 31980 }, { "epoch": 0.9329890892117393, "grad_norm": 0.8415747852264213, "learning_rate": 1.173002996811351e-07, "loss": 0.1285, "step": 31981 }, { "epoch": 0.9330182624423828, "grad_norm": 0.7685357106287352, "learning_rate": 1.1719858922820293e-07, "loss": 0.1131, "step": 31982 }, { "epoch": 0.9330474356730264, "grad_norm": 0.830730054729226, "learning_rate": 1.1709692236775538e-07, "loss": 0.1162, "step": 31983 }, { "epoch": 0.93307660890367, "grad_norm": 0.7540497253570982, "learning_rate": 1.1699529910070173e-07, "loss": 0.1008, "step": 31984 }, { "epoch": 0.9331057821343135, "grad_norm": 0.924277777353587, "learning_rate": 1.1689371942794791e-07, "loss": 0.1048, "step": 31985 }, { "epoch": 0.9331349553649572, "grad_norm": 0.7920417138843862, "learning_rate": 1.1679218335040155e-07, "loss": 0.0971, "step": 31986 }, { "epoch": 0.9331641285956007, "grad_norm": 0.7963482116421964, "learning_rate": 1.1669069086896911e-07, "loss": 0.1026, "step": 31987 }, { "epoch": 0.9331933018262443, "grad_norm": 0.807518314704998, "learning_rate": 1.1658924198455546e-07, "loss": 0.0989, "step": 31988 }, { "epoch": 0.9332224750568878, "grad_norm": 0.9167264203577618, "learning_rate": 1.1648783669806762e-07, "loss": 0.0817, "step": 31989 }, { "epoch": 0.9332516482875314, "grad_norm": 0.7444747618564228, "learning_rate": 1.16386475010411e-07, "loss": 0.1103, "step": 31990 }, { "epoch": 0.9332808215181749, "grad_norm": 0.8915099216220743, "learning_rate": 1.1628515692249042e-07, "loss": 0.1158, "step": 31991 }, { "epoch": 0.9333099947488185, "grad_norm": 0.8595692223499609, "learning_rate": 1.1618388243520906e-07, "loss": 0.1202, "step": 31992 }, { "epoch": 0.933339167979462, "grad_norm": 0.7402023856329026, "learning_rate": 1.1608265154947285e-07, "loss": 0.1016, "step": 31993 }, { "epoch": 0.9333683412101056, "grad_norm": 0.8873259396295621, "learning_rate": 1.1598146426618495e-07, "loss": 0.1211, "step": 31994 }, { "epoch": 0.9333975144407491, "grad_norm": 0.9479931842533931, "learning_rate": 1.1588032058624798e-07, "loss": 0.1532, "step": 31995 }, { "epoch": 0.9334266876713927, "grad_norm": 0.898471466202595, "learning_rate": 1.1577922051056622e-07, "loss": 0.1103, "step": 31996 }, { "epoch": 0.9334558609020362, "grad_norm": 0.7137174191426734, "learning_rate": 1.1567816404004173e-07, "loss": 0.1149, "step": 31997 }, { "epoch": 0.9334850341326798, "grad_norm": 1.0439192802552064, "learning_rate": 1.15577151175576e-07, "loss": 0.1083, "step": 31998 }, { "epoch": 0.9335142073633235, "grad_norm": 0.9115704089747924, "learning_rate": 1.1547618191807164e-07, "loss": 0.0964, "step": 31999 }, { "epoch": 0.933543380593967, "grad_norm": 0.9143799232175939, "learning_rate": 1.1537525626843016e-07, "loss": 0.1159, "step": 32000 }, { "epoch": 0.9335725538246106, "grad_norm": 0.7971847368233306, "learning_rate": 1.1527437422755194e-07, "loss": 0.1197, "step": 32001 }, { "epoch": 0.9336017270552541, "grad_norm": 0.8082456541975708, "learning_rate": 1.1517353579633795e-07, "loss": 0.1226, "step": 32002 }, { "epoch": 0.9336309002858977, "grad_norm": 1.0664364553710668, "learning_rate": 1.150727409756891e-07, "loss": 0.1239, "step": 32003 }, { "epoch": 0.9336600735165412, "grad_norm": 0.7235267375604488, "learning_rate": 1.149719897665047e-07, "loss": 0.0996, "step": 32004 }, { "epoch": 0.9336892467471848, "grad_norm": 0.7508616798731628, "learning_rate": 1.1487128216968346e-07, "loss": 0.1029, "step": 32005 }, { "epoch": 0.9337184199778283, "grad_norm": 0.8663010406096253, "learning_rate": 1.1477061818612634e-07, "loss": 0.1005, "step": 32006 }, { "epoch": 0.9337475932084719, "grad_norm": 0.790146975826344, "learning_rate": 1.1466999781672982e-07, "loss": 0.1071, "step": 32007 }, { "epoch": 0.9337767664391154, "grad_norm": 0.7646811019953647, "learning_rate": 1.1456942106239377e-07, "loss": 0.1154, "step": 32008 }, { "epoch": 0.933805939669759, "grad_norm": 0.8885198983739938, "learning_rate": 1.1446888792401578e-07, "loss": 0.1067, "step": 32009 }, { "epoch": 0.9338351129004026, "grad_norm": 0.8334308513836821, "learning_rate": 1.1436839840249347e-07, "loss": 0.1062, "step": 32010 }, { "epoch": 0.9338642861310461, "grad_norm": 0.7974042968390833, "learning_rate": 1.1426795249872335e-07, "loss": 0.1165, "step": 32011 }, { "epoch": 0.9338934593616897, "grad_norm": 0.844353420312567, "learning_rate": 1.1416755021360304e-07, "loss": 0.1104, "step": 32012 }, { "epoch": 0.9339226325923333, "grad_norm": 1.0480805887133557, "learning_rate": 1.1406719154802848e-07, "loss": 0.1339, "step": 32013 }, { "epoch": 0.9339518058229769, "grad_norm": 0.9778261300554228, "learning_rate": 1.1396687650289561e-07, "loss": 0.1067, "step": 32014 }, { "epoch": 0.9339809790536204, "grad_norm": 0.9799128343500716, "learning_rate": 1.1386660507909986e-07, "loss": 0.1121, "step": 32015 }, { "epoch": 0.934010152284264, "grad_norm": 0.7926375659453858, "learning_rate": 1.1376637727753658e-07, "loss": 0.1157, "step": 32016 }, { "epoch": 0.9340393255149075, "grad_norm": 0.9591995660342114, "learning_rate": 1.136661930991012e-07, "loss": 0.1117, "step": 32017 }, { "epoch": 0.9340684987455511, "grad_norm": 0.8650595079341008, "learning_rate": 1.135660525446869e-07, "loss": 0.1177, "step": 32018 }, { "epoch": 0.9340976719761946, "grad_norm": 0.8103428031610451, "learning_rate": 1.1346595561518848e-07, "loss": 0.1335, "step": 32019 }, { "epoch": 0.9341268452068382, "grad_norm": 1.015601796023541, "learning_rate": 1.1336590231150024e-07, "loss": 0.1404, "step": 32020 }, { "epoch": 0.9341560184374817, "grad_norm": 1.02095304218318, "learning_rate": 1.1326589263451427e-07, "loss": 0.1121, "step": 32021 }, { "epoch": 0.9341851916681253, "grad_norm": 0.7554192565240885, "learning_rate": 1.1316592658512371e-07, "loss": 0.0964, "step": 32022 }, { "epoch": 0.9342143648987689, "grad_norm": 0.6385913709875719, "learning_rate": 1.130660041642212e-07, "loss": 0.1118, "step": 32023 }, { "epoch": 0.9342435381294124, "grad_norm": 0.9340508884192655, "learning_rate": 1.1296612537269935e-07, "loss": 0.1145, "step": 32024 }, { "epoch": 0.934272711360056, "grad_norm": 0.8276418139511901, "learning_rate": 1.1286629021144802e-07, "loss": 0.112, "step": 32025 }, { "epoch": 0.9343018845906996, "grad_norm": 0.8096684738787501, "learning_rate": 1.1276649868136091e-07, "loss": 0.1191, "step": 32026 }, { "epoch": 0.9343310578213432, "grad_norm": 0.9568940920296325, "learning_rate": 1.1266675078332734e-07, "loss": 0.1004, "step": 32027 }, { "epoch": 0.9343602310519867, "grad_norm": 0.7776215396400586, "learning_rate": 1.1256704651823825e-07, "loss": 0.1171, "step": 32028 }, { "epoch": 0.9343894042826303, "grad_norm": 0.7800938299421314, "learning_rate": 1.1246738588698458e-07, "loss": 0.0976, "step": 32029 }, { "epoch": 0.9344185775132738, "grad_norm": 0.6713518889285239, "learning_rate": 1.1236776889045508e-07, "loss": 0.1123, "step": 32030 }, { "epoch": 0.9344477507439174, "grad_norm": 1.3320616497280364, "learning_rate": 1.1226819552953849e-07, "loss": 0.1193, "step": 32031 }, { "epoch": 0.934476923974561, "grad_norm": 0.8440007410344869, "learning_rate": 1.121686658051252e-07, "loss": 0.1149, "step": 32032 }, { "epoch": 0.9345060972052045, "grad_norm": 0.7885423931377564, "learning_rate": 1.1206917971810339e-07, "loss": 0.098, "step": 32033 }, { "epoch": 0.934535270435848, "grad_norm": 1.829464074456043, "learning_rate": 1.1196973726936122e-07, "loss": 0.1248, "step": 32034 }, { "epoch": 0.9345644436664916, "grad_norm": 0.7603648793100546, "learning_rate": 1.1187033845978635e-07, "loss": 0.0902, "step": 32035 }, { "epoch": 0.9345936168971352, "grad_norm": 0.8812911824695407, "learning_rate": 1.1177098329026581e-07, "loss": 0.1017, "step": 32036 }, { "epoch": 0.9346227901277787, "grad_norm": 0.8232236047285875, "learning_rate": 1.1167167176168725e-07, "loss": 0.1251, "step": 32037 }, { "epoch": 0.9346519633584223, "grad_norm": 0.8740217291491303, "learning_rate": 1.1157240387493662e-07, "loss": 0.1219, "step": 32038 }, { "epoch": 0.9346811365890658, "grad_norm": 0.8123527785586346, "learning_rate": 1.1147317963090154e-07, "loss": 0.1116, "step": 32039 }, { "epoch": 0.9347103098197095, "grad_norm": 0.889147925885064, "learning_rate": 1.113739990304663e-07, "loss": 0.1074, "step": 32040 }, { "epoch": 0.934739483050353, "grad_norm": 0.796997302515763, "learning_rate": 1.1127486207451687e-07, "loss": 0.1272, "step": 32041 }, { "epoch": 0.9347686562809966, "grad_norm": 0.866471962490944, "learning_rate": 1.1117576876393921e-07, "loss": 0.1015, "step": 32042 }, { "epoch": 0.9347978295116401, "grad_norm": 0.7993024059883606, "learning_rate": 1.1107671909961648e-07, "loss": 0.0878, "step": 32043 }, { "epoch": 0.9348270027422837, "grad_norm": 0.8144702879758206, "learning_rate": 1.109777130824341e-07, "loss": 0.1101, "step": 32044 }, { "epoch": 0.9348561759729273, "grad_norm": 0.8887016433567662, "learning_rate": 1.1087875071327525e-07, "loss": 0.0909, "step": 32045 }, { "epoch": 0.9348853492035708, "grad_norm": 0.974323575414125, "learning_rate": 1.1077983199302422e-07, "loss": 0.1086, "step": 32046 }, { "epoch": 0.9349145224342144, "grad_norm": 0.700641465039684, "learning_rate": 1.1068095692256364e-07, "loss": 0.1202, "step": 32047 }, { "epoch": 0.9349436956648579, "grad_norm": 0.8344842765064419, "learning_rate": 1.1058212550277558e-07, "loss": 0.1092, "step": 32048 }, { "epoch": 0.9349728688955015, "grad_norm": 0.8276408686580056, "learning_rate": 1.1048333773454378e-07, "loss": 0.1098, "step": 32049 }, { "epoch": 0.935002042126145, "grad_norm": 0.6993079332475102, "learning_rate": 1.103845936187492e-07, "loss": 0.0848, "step": 32050 }, { "epoch": 0.9350312153567886, "grad_norm": 0.779769502283324, "learning_rate": 1.1028589315627448e-07, "loss": 0.1039, "step": 32051 }, { "epoch": 0.9350603885874321, "grad_norm": 1.1898230035142983, "learning_rate": 1.1018723634799888e-07, "loss": 0.1305, "step": 32052 }, { "epoch": 0.9350895618180758, "grad_norm": 0.77744475191222, "learning_rate": 1.1008862319480562e-07, "loss": 0.1017, "step": 32053 }, { "epoch": 0.9351187350487193, "grad_norm": 0.8275057891511748, "learning_rate": 1.0999005369757287e-07, "loss": 0.1094, "step": 32054 }, { "epoch": 0.9351479082793629, "grad_norm": 0.7830985358322079, "learning_rate": 1.098915278571816e-07, "loss": 0.1077, "step": 32055 }, { "epoch": 0.9351770815100064, "grad_norm": 1.038007306517236, "learning_rate": 1.0979304567451166e-07, "loss": 0.1104, "step": 32056 }, { "epoch": 0.93520625474065, "grad_norm": 0.7998792968146583, "learning_rate": 1.0969460715044234e-07, "loss": 0.1038, "step": 32057 }, { "epoch": 0.9352354279712936, "grad_norm": 0.7130960643140338, "learning_rate": 1.0959621228585126e-07, "loss": 0.0875, "step": 32058 }, { "epoch": 0.9352646012019371, "grad_norm": 0.7991544545494476, "learning_rate": 1.0949786108161885e-07, "loss": 0.1205, "step": 32059 }, { "epoch": 0.9352937744325807, "grad_norm": 0.9583770351838974, "learning_rate": 1.0939955353862164e-07, "loss": 0.1215, "step": 32060 }, { "epoch": 0.9353229476632242, "grad_norm": 0.708747277978358, "learning_rate": 1.0930128965773723e-07, "loss": 0.1091, "step": 32061 }, { "epoch": 0.9353521208938678, "grad_norm": 0.6758522069728627, "learning_rate": 1.0920306943984383e-07, "loss": 0.0796, "step": 32062 }, { "epoch": 0.9353812941245113, "grad_norm": 0.7543564942859913, "learning_rate": 1.0910489288581794e-07, "loss": 0.0836, "step": 32063 }, { "epoch": 0.9354104673551549, "grad_norm": 1.2929445917883506, "learning_rate": 1.0900675999653609e-07, "loss": 0.096, "step": 32064 }, { "epoch": 0.9354396405857984, "grad_norm": 0.8529796359826995, "learning_rate": 1.0890867077287425e-07, "loss": 0.0917, "step": 32065 }, { "epoch": 0.935468813816442, "grad_norm": 0.7370111459922979, "learning_rate": 1.088106252157084e-07, "loss": 0.1283, "step": 32066 }, { "epoch": 0.9354979870470856, "grad_norm": 0.9501203927021816, "learning_rate": 1.0871262332591281e-07, "loss": 0.1104, "step": 32067 }, { "epoch": 0.9355271602777292, "grad_norm": 0.8333221115080217, "learning_rate": 1.0861466510436347e-07, "loss": 0.1006, "step": 32068 }, { "epoch": 0.9355563335083728, "grad_norm": 0.7896071035671067, "learning_rate": 1.0851675055193579e-07, "loss": 0.1054, "step": 32069 }, { "epoch": 0.9355855067390163, "grad_norm": 0.8726117427575014, "learning_rate": 1.0841887966950237e-07, "loss": 0.1, "step": 32070 }, { "epoch": 0.9356146799696599, "grad_norm": 0.8472957512982819, "learning_rate": 1.08321052457937e-07, "loss": 0.1025, "step": 32071 }, { "epoch": 0.9356438532003034, "grad_norm": 0.9050049817836707, "learning_rate": 1.0822326891811396e-07, "loss": 0.1128, "step": 32072 }, { "epoch": 0.935673026430947, "grad_norm": 0.7629335989022237, "learning_rate": 1.0812552905090534e-07, "loss": 0.142, "step": 32073 }, { "epoch": 0.9357021996615905, "grad_norm": 0.7569119322303541, "learning_rate": 1.0802783285718488e-07, "loss": 0.0998, "step": 32074 }, { "epoch": 0.9357313728922341, "grad_norm": 0.8736446102508948, "learning_rate": 1.0793018033782355e-07, "loss": 0.107, "step": 32075 }, { "epoch": 0.9357605461228776, "grad_norm": 0.9946571567558669, "learning_rate": 1.0783257149369453e-07, "loss": 0.1203, "step": 32076 }, { "epoch": 0.9357897193535212, "grad_norm": 1.1956383419050371, "learning_rate": 1.0773500632566769e-07, "loss": 0.1192, "step": 32077 }, { "epoch": 0.9358188925841647, "grad_norm": 0.8033344768674102, "learning_rate": 1.0763748483461511e-07, "loss": 0.1095, "step": 32078 }, { "epoch": 0.9358480658148083, "grad_norm": 0.8577610732697442, "learning_rate": 1.0754000702140666e-07, "loss": 0.1083, "step": 32079 }, { "epoch": 0.935877239045452, "grad_norm": 1.2640094165415217, "learning_rate": 1.0744257288691384e-07, "loss": 0.1301, "step": 32080 }, { "epoch": 0.9359064122760955, "grad_norm": 0.88340311691982, "learning_rate": 1.0734518243200598e-07, "loss": 0.1192, "step": 32081 }, { "epoch": 0.935935585506739, "grad_norm": 0.8064643768696518, "learning_rate": 1.0724783565755126e-07, "loss": 0.112, "step": 32082 }, { "epoch": 0.9359647587373826, "grad_norm": 0.8475180988623313, "learning_rate": 1.071505325644212e-07, "loss": 0.1187, "step": 32083 }, { "epoch": 0.9359939319680262, "grad_norm": 0.9446665934044493, "learning_rate": 1.0705327315348235e-07, "loss": 0.1235, "step": 32084 }, { "epoch": 0.9360231051986697, "grad_norm": 0.768170012115103, "learning_rate": 1.0695605742560345e-07, "loss": 0.1149, "step": 32085 }, { "epoch": 0.9360522784293133, "grad_norm": 0.7996103309772392, "learning_rate": 1.0685888538165323e-07, "loss": 0.1085, "step": 32086 }, { "epoch": 0.9360814516599568, "grad_norm": 0.8840239057508736, "learning_rate": 1.0676175702249936e-07, "loss": 0.1163, "step": 32087 }, { "epoch": 0.9361106248906004, "grad_norm": 1.0532790900297067, "learning_rate": 1.0666467234900779e-07, "loss": 0.1052, "step": 32088 }, { "epoch": 0.9361397981212439, "grad_norm": 0.8433485382052007, "learning_rate": 1.0656763136204617e-07, "loss": 0.1038, "step": 32089 }, { "epoch": 0.9361689713518875, "grad_norm": 0.7560474299323872, "learning_rate": 1.0647063406248048e-07, "loss": 0.1131, "step": 32090 }, { "epoch": 0.936198144582531, "grad_norm": 0.7042252536882668, "learning_rate": 1.0637368045117669e-07, "loss": 0.1216, "step": 32091 }, { "epoch": 0.9362273178131746, "grad_norm": 0.9983765184405167, "learning_rate": 1.062767705290002e-07, "loss": 0.1196, "step": 32092 }, { "epoch": 0.9362564910438181, "grad_norm": 0.8783148249243051, "learning_rate": 1.0617990429681702e-07, "loss": 0.1095, "step": 32093 }, { "epoch": 0.9362856642744618, "grad_norm": 0.7706835504619733, "learning_rate": 1.0608308175549142e-07, "loss": 0.1227, "step": 32094 }, { "epoch": 0.9363148375051054, "grad_norm": 0.8939887644792357, "learning_rate": 1.0598630290588718e-07, "loss": 0.1011, "step": 32095 }, { "epoch": 0.9363440107357489, "grad_norm": 0.7309322511998092, "learning_rate": 1.0588956774886971e-07, "loss": 0.1088, "step": 32096 }, { "epoch": 0.9363731839663925, "grad_norm": 0.6661812416048095, "learning_rate": 1.057928762853011e-07, "loss": 0.1095, "step": 32097 }, { "epoch": 0.936402357197036, "grad_norm": 0.8456105650420225, "learning_rate": 1.0569622851604567e-07, "loss": 0.109, "step": 32098 }, { "epoch": 0.9364315304276796, "grad_norm": 0.86400129011861, "learning_rate": 1.0559962444196603e-07, "loss": 0.1139, "step": 32099 }, { "epoch": 0.9364607036583231, "grad_norm": 1.114747802139998, "learning_rate": 1.0550306406392486e-07, "loss": 0.0981, "step": 32100 }, { "epoch": 0.9364898768889667, "grad_norm": 0.8562614775002866, "learning_rate": 1.0540654738278366e-07, "loss": 0.1225, "step": 32101 }, { "epoch": 0.9365190501196102, "grad_norm": 0.8197540439884026, "learning_rate": 1.0531007439940455e-07, "loss": 0.0977, "step": 32102 }, { "epoch": 0.9365482233502538, "grad_norm": 1.1298989345727763, "learning_rate": 1.0521364511464794e-07, "loss": 0.1151, "step": 32103 }, { "epoch": 0.9365773965808973, "grad_norm": 0.833695601396288, "learning_rate": 1.051172595293759e-07, "loss": 0.1046, "step": 32104 }, { "epoch": 0.9366065698115409, "grad_norm": 0.9868865547606661, "learning_rate": 1.0502091764444833e-07, "loss": 0.1126, "step": 32105 }, { "epoch": 0.9366357430421844, "grad_norm": 0.8271959984514432, "learning_rate": 1.0492461946072563e-07, "loss": 0.1155, "step": 32106 }, { "epoch": 0.9366649162728281, "grad_norm": 0.8410155090686664, "learning_rate": 1.0482836497906768e-07, "loss": 0.1013, "step": 32107 }, { "epoch": 0.9366940895034717, "grad_norm": 0.7599988649280618, "learning_rate": 1.0473215420033322e-07, "loss": 0.123, "step": 32108 }, { "epoch": 0.9367232627341152, "grad_norm": 0.7464152785830234, "learning_rate": 1.0463598712538104e-07, "loss": 0.1349, "step": 32109 }, { "epoch": 0.9367524359647588, "grad_norm": 0.9067006560858255, "learning_rate": 1.0453986375507097e-07, "loss": 0.0916, "step": 32110 }, { "epoch": 0.9367816091954023, "grad_norm": 0.7312901611799437, "learning_rate": 1.0444378409026012e-07, "loss": 0.1003, "step": 32111 }, { "epoch": 0.9368107824260459, "grad_norm": 0.9593404712318456, "learning_rate": 1.0434774813180615e-07, "loss": 0.1263, "step": 32112 }, { "epoch": 0.9368399556566894, "grad_norm": 0.9865768206964413, "learning_rate": 1.0425175588056724e-07, "loss": 0.1046, "step": 32113 }, { "epoch": 0.936869128887333, "grad_norm": 0.9644272670639106, "learning_rate": 1.0415580733739994e-07, "loss": 0.0898, "step": 32114 }, { "epoch": 0.9368983021179765, "grad_norm": 0.7781047636323862, "learning_rate": 1.0405990250315967e-07, "loss": 0.0851, "step": 32115 }, { "epoch": 0.9369274753486201, "grad_norm": 0.9933907973999181, "learning_rate": 1.039640413787052e-07, "loss": 0.1071, "step": 32116 }, { "epoch": 0.9369566485792636, "grad_norm": 0.8128984664761882, "learning_rate": 1.0386822396489027e-07, "loss": 0.1204, "step": 32117 }, { "epoch": 0.9369858218099072, "grad_norm": 0.8148280473503925, "learning_rate": 1.0377245026257143e-07, "loss": 0.1088, "step": 32118 }, { "epoch": 0.9370149950405507, "grad_norm": 0.6600777179891715, "learning_rate": 1.0367672027260356e-07, "loss": 0.1235, "step": 32119 }, { "epoch": 0.9370441682711943, "grad_norm": 0.7702166296209475, "learning_rate": 1.0358103399584096e-07, "loss": 0.0957, "step": 32120 }, { "epoch": 0.937073341501838, "grad_norm": 0.8574890822227507, "learning_rate": 1.0348539143313741e-07, "loss": 0.103, "step": 32121 }, { "epoch": 0.9371025147324815, "grad_norm": 0.7744931859645473, "learning_rate": 1.0338979258534776e-07, "loss": 0.102, "step": 32122 }, { "epoch": 0.9371316879631251, "grad_norm": 0.7518278067773781, "learning_rate": 1.0329423745332523e-07, "loss": 0.107, "step": 32123 }, { "epoch": 0.9371608611937686, "grad_norm": 1.0942535864638883, "learning_rate": 1.0319872603792302e-07, "loss": 0.1049, "step": 32124 }, { "epoch": 0.9371900344244122, "grad_norm": 1.0858144600217252, "learning_rate": 1.0310325833999269e-07, "loss": 0.0933, "step": 32125 }, { "epoch": 0.9372192076550557, "grad_norm": 1.026925791389885, "learning_rate": 1.0300783436038852e-07, "loss": 0.125, "step": 32126 }, { "epoch": 0.9372483808856993, "grad_norm": 0.8653665951855168, "learning_rate": 1.0291245409996097e-07, "loss": 0.1069, "step": 32127 }, { "epoch": 0.9372775541163428, "grad_norm": 1.0504917778941396, "learning_rate": 1.0281711755956159e-07, "loss": 0.0888, "step": 32128 }, { "epoch": 0.9373067273469864, "grad_norm": 0.9266732479399925, "learning_rate": 1.0272182474004299e-07, "loss": 0.1189, "step": 32129 }, { "epoch": 0.9373359005776299, "grad_norm": 0.9230162594241422, "learning_rate": 1.0262657564225397e-07, "loss": 0.1184, "step": 32130 }, { "epoch": 0.9373650738082735, "grad_norm": 0.8241398713510739, "learning_rate": 1.0253137026704607e-07, "loss": 0.1142, "step": 32131 }, { "epoch": 0.937394247038917, "grad_norm": 0.8606522985963572, "learning_rate": 1.0243620861526915e-07, "loss": 0.0991, "step": 32132 }, { "epoch": 0.9374234202695606, "grad_norm": 0.8115170765044549, "learning_rate": 1.0234109068777254e-07, "loss": 0.1125, "step": 32133 }, { "epoch": 0.9374525935002043, "grad_norm": 0.7330669259049344, "learning_rate": 1.0224601648540555e-07, "loss": 0.1186, "step": 32134 }, { "epoch": 0.9374817667308478, "grad_norm": 0.9528809566799136, "learning_rate": 1.021509860090164e-07, "loss": 0.1318, "step": 32135 }, { "epoch": 0.9375109399614914, "grad_norm": 1.2634693443948881, "learning_rate": 1.0205599925945442e-07, "loss": 0.1086, "step": 32136 }, { "epoch": 0.9375401131921349, "grad_norm": 0.861775568004273, "learning_rate": 1.0196105623756781e-07, "loss": 0.1107, "step": 32137 }, { "epoch": 0.9375692864227785, "grad_norm": 0.8751065366212872, "learning_rate": 1.0186615694420255e-07, "loss": 0.1303, "step": 32138 }, { "epoch": 0.937598459653422, "grad_norm": 0.7100950551611143, "learning_rate": 1.0177130138020741e-07, "loss": 0.0952, "step": 32139 }, { "epoch": 0.9376276328840656, "grad_norm": 0.8449839806862404, "learning_rate": 1.0167648954642895e-07, "loss": 0.127, "step": 32140 }, { "epoch": 0.9376568061147091, "grad_norm": 0.6529840587223028, "learning_rate": 1.0158172144371369e-07, "loss": 0.1051, "step": 32141 }, { "epoch": 0.9376859793453527, "grad_norm": 0.8914275159657254, "learning_rate": 1.0148699707290711e-07, "loss": 0.1235, "step": 32142 }, { "epoch": 0.9377151525759962, "grad_norm": 0.7751532525943846, "learning_rate": 1.013923164348557e-07, "loss": 0.1017, "step": 32143 }, { "epoch": 0.9377443258066398, "grad_norm": 0.9557705989379754, "learning_rate": 1.0129767953040326e-07, "loss": 0.1229, "step": 32144 }, { "epoch": 0.9377734990372834, "grad_norm": 0.8410134147210956, "learning_rate": 1.0120308636039632e-07, "loss": 0.1123, "step": 32145 }, { "epoch": 0.9378026722679269, "grad_norm": 1.1326334761987327, "learning_rate": 1.0110853692567924e-07, "loss": 0.1247, "step": 32146 }, { "epoch": 0.9378318454985705, "grad_norm": 0.9496660829235316, "learning_rate": 1.0101403122709518e-07, "loss": 0.1071, "step": 32147 }, { "epoch": 0.9378610187292141, "grad_norm": 0.8437905836275701, "learning_rate": 1.0091956926548852e-07, "loss": 0.1181, "step": 32148 }, { "epoch": 0.9378901919598577, "grad_norm": 1.0453297402604065, "learning_rate": 1.0082515104170243e-07, "loss": 0.12, "step": 32149 }, { "epoch": 0.9379193651905012, "grad_norm": 0.8428288523939395, "learning_rate": 1.0073077655657959e-07, "loss": 0.1256, "step": 32150 }, { "epoch": 0.9379485384211448, "grad_norm": 0.7548680701419213, "learning_rate": 1.0063644581096322e-07, "loss": 0.1148, "step": 32151 }, { "epoch": 0.9379777116517883, "grad_norm": 0.9450970919153946, "learning_rate": 1.0054215880569485e-07, "loss": 0.1304, "step": 32152 }, { "epoch": 0.9380068848824319, "grad_norm": 0.8915063403696859, "learning_rate": 1.0044791554161659e-07, "loss": 0.1171, "step": 32153 }, { "epoch": 0.9380360581130754, "grad_norm": 0.8769882247763162, "learning_rate": 1.0035371601957e-07, "loss": 0.0982, "step": 32154 }, { "epoch": 0.938065231343719, "grad_norm": 0.8917244343084209, "learning_rate": 1.0025956024039551e-07, "loss": 0.1008, "step": 32155 }, { "epoch": 0.9380944045743626, "grad_norm": 0.8660089227440425, "learning_rate": 1.0016544820493357e-07, "loss": 0.1061, "step": 32156 }, { "epoch": 0.9381235778050061, "grad_norm": 0.8980029735838663, "learning_rate": 1.0007137991402572e-07, "loss": 0.0905, "step": 32157 }, { "epoch": 0.9381527510356497, "grad_norm": 0.7552683733797441, "learning_rate": 9.997735536851017e-08, "loss": 0.1182, "step": 32158 }, { "epoch": 0.9381819242662932, "grad_norm": 0.9670731861815297, "learning_rate": 9.988337456922737e-08, "loss": 0.1212, "step": 32159 }, { "epoch": 0.9382110974969368, "grad_norm": 0.7434491962365531, "learning_rate": 9.978943751701609e-08, "loss": 0.1103, "step": 32160 }, { "epoch": 0.9382402707275804, "grad_norm": 0.7559641120074799, "learning_rate": 9.969554421271455e-08, "loss": 0.0968, "step": 32161 }, { "epoch": 0.938269443958224, "grad_norm": 0.8034284376231915, "learning_rate": 9.960169465716152e-08, "loss": 0.1308, "step": 32162 }, { "epoch": 0.9382986171888675, "grad_norm": 0.757018958058187, "learning_rate": 9.950788885119522e-08, "loss": 0.1431, "step": 32163 }, { "epoch": 0.9383277904195111, "grad_norm": 1.1414076423319532, "learning_rate": 9.941412679565276e-08, "loss": 0.1374, "step": 32164 }, { "epoch": 0.9383569636501546, "grad_norm": 1.070905765134784, "learning_rate": 9.932040849137014e-08, "loss": 0.0941, "step": 32165 }, { "epoch": 0.9383861368807982, "grad_norm": 0.8973320288044775, "learning_rate": 9.922673393918614e-08, "loss": 0.1241, "step": 32166 }, { "epoch": 0.9384153101114417, "grad_norm": 1.2789932825467665, "learning_rate": 9.913310313993562e-08, "loss": 0.0778, "step": 32167 }, { "epoch": 0.9384444833420853, "grad_norm": 0.8551584840952743, "learning_rate": 9.903951609445406e-08, "loss": 0.0932, "step": 32168 }, { "epoch": 0.9384736565727289, "grad_norm": 0.7892981867974093, "learning_rate": 9.894597280357798e-08, "loss": 0.1129, "step": 32169 }, { "epoch": 0.9385028298033724, "grad_norm": 1.038712545214295, "learning_rate": 9.885247326814285e-08, "loss": 0.1077, "step": 32170 }, { "epoch": 0.938532003034016, "grad_norm": 1.0693996277997444, "learning_rate": 9.875901748898298e-08, "loss": 0.0953, "step": 32171 }, { "epoch": 0.9385611762646595, "grad_norm": 0.8098148128622791, "learning_rate": 9.86656054669316e-08, "loss": 0.1142, "step": 32172 }, { "epoch": 0.9385903494953031, "grad_norm": 0.7900266748948263, "learning_rate": 9.857223720282472e-08, "loss": 0.1078, "step": 32173 }, { "epoch": 0.9386195227259466, "grad_norm": 0.9237403114889481, "learning_rate": 9.847891269749388e-08, "loss": 0.1015, "step": 32174 }, { "epoch": 0.9386486959565903, "grad_norm": 0.8923289365142516, "learning_rate": 9.838563195177342e-08, "loss": 0.1209, "step": 32175 }, { "epoch": 0.9386778691872338, "grad_norm": 0.7569550043956034, "learning_rate": 9.829239496649656e-08, "loss": 0.1195, "step": 32176 }, { "epoch": 0.9387070424178774, "grad_norm": 0.6888915327303003, "learning_rate": 9.819920174249486e-08, "loss": 0.1012, "step": 32177 }, { "epoch": 0.9387362156485209, "grad_norm": 0.7796935344380027, "learning_rate": 9.810605228059988e-08, "loss": 0.0985, "step": 32178 }, { "epoch": 0.9387653888791645, "grad_norm": 0.7837266477355513, "learning_rate": 9.801294658164484e-08, "loss": 0.1067, "step": 32179 }, { "epoch": 0.938794562109808, "grad_norm": 0.8458686648916788, "learning_rate": 9.791988464645907e-08, "loss": 0.096, "step": 32180 }, { "epoch": 0.9388237353404516, "grad_norm": 0.7529063868239642, "learning_rate": 9.782686647587524e-08, "loss": 0.1069, "step": 32181 }, { "epoch": 0.9388529085710952, "grad_norm": 0.8567471399250969, "learning_rate": 9.773389207072214e-08, "loss": 0.104, "step": 32182 }, { "epoch": 0.9388820818017387, "grad_norm": 0.7800869455553334, "learning_rate": 9.764096143183133e-08, "loss": 0.1197, "step": 32183 }, { "epoch": 0.9389112550323823, "grad_norm": 0.9397806084771332, "learning_rate": 9.754807456003157e-08, "loss": 0.1264, "step": 32184 }, { "epoch": 0.9389404282630258, "grad_norm": 0.809745770145711, "learning_rate": 9.745523145615166e-08, "loss": 0.0863, "step": 32185 }, { "epoch": 0.9389696014936694, "grad_norm": 0.9002286358801612, "learning_rate": 9.736243212102147e-08, "loss": 0.1165, "step": 32186 }, { "epoch": 0.9389987747243129, "grad_norm": 0.7805276518654509, "learning_rate": 9.726967655546926e-08, "loss": 0.122, "step": 32187 }, { "epoch": 0.9390279479549565, "grad_norm": 0.9600304009593331, "learning_rate": 9.717696476032267e-08, "loss": 0.1003, "step": 32188 }, { "epoch": 0.9390571211856001, "grad_norm": 0.754286919698732, "learning_rate": 9.708429673640995e-08, "loss": 0.1065, "step": 32189 }, { "epoch": 0.9390862944162437, "grad_norm": 0.8368083610034457, "learning_rate": 9.699167248455876e-08, "loss": 0.1218, "step": 32190 }, { "epoch": 0.9391154676468872, "grad_norm": 0.7588346705004958, "learning_rate": 9.689909200559455e-08, "loss": 0.1006, "step": 32191 }, { "epoch": 0.9391446408775308, "grad_norm": 0.8304193073111201, "learning_rate": 9.6806555300345e-08, "loss": 0.1198, "step": 32192 }, { "epoch": 0.9391738141081744, "grad_norm": 0.7431972213954714, "learning_rate": 9.671406236963666e-08, "loss": 0.1089, "step": 32193 }, { "epoch": 0.9392029873388179, "grad_norm": 0.624978760386684, "learning_rate": 9.662161321429441e-08, "loss": 0.0997, "step": 32194 }, { "epoch": 0.9392321605694615, "grad_norm": 0.6894520225294304, "learning_rate": 9.652920783514319e-08, "loss": 0.0965, "step": 32195 }, { "epoch": 0.939261333800105, "grad_norm": 0.8174341948911126, "learning_rate": 9.643684623300953e-08, "loss": 0.084, "step": 32196 }, { "epoch": 0.9392905070307486, "grad_norm": 1.0582396097798812, "learning_rate": 9.634452840871667e-08, "loss": 0.1178, "step": 32197 }, { "epoch": 0.9393196802613921, "grad_norm": 0.7454065307174922, "learning_rate": 9.625225436308949e-08, "loss": 0.1038, "step": 32198 }, { "epoch": 0.9393488534920357, "grad_norm": 0.8494845543516857, "learning_rate": 9.616002409695069e-08, "loss": 0.1187, "step": 32199 }, { "epoch": 0.9393780267226792, "grad_norm": 0.7343299135462298, "learning_rate": 9.60678376111257e-08, "loss": 0.1044, "step": 32200 }, { "epoch": 0.9394071999533228, "grad_norm": 0.8797948726980744, "learning_rate": 9.59756949064361e-08, "loss": 0.114, "step": 32201 }, { "epoch": 0.9394363731839664, "grad_norm": 0.8113452424525245, "learning_rate": 9.588359598370456e-08, "loss": 0.1128, "step": 32202 }, { "epoch": 0.93946554641461, "grad_norm": 0.7252834619600642, "learning_rate": 9.579154084375375e-08, "loss": 0.1058, "step": 32203 }, { "epoch": 0.9394947196452536, "grad_norm": 0.8894169244713941, "learning_rate": 9.569952948740525e-08, "loss": 0.1201, "step": 32204 }, { "epoch": 0.9395238928758971, "grad_norm": 0.7713508233508065, "learning_rate": 9.560756191548004e-08, "loss": 0.1009, "step": 32205 }, { "epoch": 0.9395530661065407, "grad_norm": 0.860856659261507, "learning_rate": 9.551563812880083e-08, "loss": 0.1411, "step": 32206 }, { "epoch": 0.9395822393371842, "grad_norm": 0.8472523668044764, "learning_rate": 9.542375812818694e-08, "loss": 0.0957, "step": 32207 }, { "epoch": 0.9396114125678278, "grad_norm": 0.8393016543633706, "learning_rate": 9.533192191445828e-08, "loss": 0.1207, "step": 32208 }, { "epoch": 0.9396405857984713, "grad_norm": 0.8120017488468546, "learning_rate": 9.524012948843586e-08, "loss": 0.1046, "step": 32209 }, { "epoch": 0.9396697590291149, "grad_norm": 0.8621657024627788, "learning_rate": 9.514838085093847e-08, "loss": 0.1239, "step": 32210 }, { "epoch": 0.9396989322597584, "grad_norm": 0.84849491023201, "learning_rate": 9.5056676002786e-08, "loss": 0.1414, "step": 32211 }, { "epoch": 0.939728105490402, "grad_norm": 0.6648422318792367, "learning_rate": 9.496501494479615e-08, "loss": 0.1237, "step": 32212 }, { "epoch": 0.9397572787210455, "grad_norm": 0.6795376554667742, "learning_rate": 9.48733976777877e-08, "loss": 0.1037, "step": 32213 }, { "epoch": 0.9397864519516891, "grad_norm": 0.7282466940752219, "learning_rate": 9.478182420257887e-08, "loss": 0.1184, "step": 32214 }, { "epoch": 0.9398156251823326, "grad_norm": 0.7548401752090214, "learning_rate": 9.46902945199868e-08, "loss": 0.0954, "step": 32215 }, { "epoch": 0.9398447984129763, "grad_norm": 0.8676952289040676, "learning_rate": 9.45988086308286e-08, "loss": 0.117, "step": 32216 }, { "epoch": 0.9398739716436199, "grad_norm": 0.8790333954437566, "learning_rate": 9.45073665359214e-08, "loss": 0.1184, "step": 32217 }, { "epoch": 0.9399031448742634, "grad_norm": 0.8883700766774195, "learning_rate": 9.441596823608123e-08, "loss": 0.0971, "step": 32218 }, { "epoch": 0.939932318104907, "grad_norm": 0.7900934281400136, "learning_rate": 9.432461373212465e-08, "loss": 0.1061, "step": 32219 }, { "epoch": 0.9399614913355505, "grad_norm": 0.9932949804585048, "learning_rate": 9.423330302486655e-08, "loss": 0.1203, "step": 32220 }, { "epoch": 0.9399906645661941, "grad_norm": 0.8228353632921792, "learning_rate": 9.41420361151224e-08, "loss": 0.0977, "step": 32221 }, { "epoch": 0.9400198377968376, "grad_norm": 0.7413417391788512, "learning_rate": 9.405081300370712e-08, "loss": 0.1055, "step": 32222 }, { "epoch": 0.9400490110274812, "grad_norm": 0.9451930479248437, "learning_rate": 9.395963369143501e-08, "loss": 0.1083, "step": 32223 }, { "epoch": 0.9400781842581247, "grad_norm": 0.8405664626413575, "learning_rate": 9.386849817912047e-08, "loss": 0.0938, "step": 32224 }, { "epoch": 0.9401073574887683, "grad_norm": 1.0092823366652526, "learning_rate": 9.377740646757616e-08, "loss": 0.1025, "step": 32225 }, { "epoch": 0.9401365307194118, "grad_norm": 0.7185649144012645, "learning_rate": 9.368635855761642e-08, "loss": 0.1294, "step": 32226 }, { "epoch": 0.9401657039500554, "grad_norm": 0.6742166336971404, "learning_rate": 9.35953544500534e-08, "loss": 0.1332, "step": 32227 }, { "epoch": 0.9401948771806989, "grad_norm": 0.9077843544231706, "learning_rate": 9.350439414569978e-08, "loss": 0.1196, "step": 32228 }, { "epoch": 0.9402240504113426, "grad_norm": 1.8356673550242073, "learning_rate": 9.341347764536768e-08, "loss": 0.1123, "step": 32229 }, { "epoch": 0.9402532236419862, "grad_norm": 0.8468451722893325, "learning_rate": 9.332260494986866e-08, "loss": 0.1417, "step": 32230 }, { "epoch": 0.9402823968726297, "grad_norm": 1.4832384056344383, "learning_rate": 9.323177606001433e-08, "loss": 0.1128, "step": 32231 }, { "epoch": 0.9403115701032733, "grad_norm": 0.8977563019204942, "learning_rate": 9.314099097661511e-08, "loss": 0.1274, "step": 32232 }, { "epoch": 0.9403407433339168, "grad_norm": 0.7821348721161093, "learning_rate": 9.30502497004826e-08, "loss": 0.1133, "step": 32233 }, { "epoch": 0.9403699165645604, "grad_norm": 1.015399189609931, "learning_rate": 9.295955223242503e-08, "loss": 0.1313, "step": 32234 }, { "epoch": 0.9403990897952039, "grad_norm": 0.7625042097865166, "learning_rate": 9.286889857325343e-08, "loss": 0.1284, "step": 32235 }, { "epoch": 0.9404282630258475, "grad_norm": 0.7857895787778961, "learning_rate": 9.277828872377714e-08, "loss": 0.1001, "step": 32236 }, { "epoch": 0.940457436256491, "grad_norm": 0.8786706465429232, "learning_rate": 9.268772268480498e-08, "loss": 0.1039, "step": 32237 }, { "epoch": 0.9404866094871346, "grad_norm": 0.8768587824741662, "learning_rate": 9.25972004571446e-08, "loss": 0.0987, "step": 32238 }, { "epoch": 0.9405157827177781, "grad_norm": 0.8281244894290761, "learning_rate": 9.250672204160538e-08, "loss": 0.1132, "step": 32239 }, { "epoch": 0.9405449559484217, "grad_norm": 0.8666201805007784, "learning_rate": 9.241628743899445e-08, "loss": 0.1054, "step": 32240 }, { "epoch": 0.9405741291790652, "grad_norm": 0.9872921626328762, "learning_rate": 9.232589665012004e-08, "loss": 0.1145, "step": 32241 }, { "epoch": 0.9406033024097088, "grad_norm": 0.9099156945262562, "learning_rate": 9.223554967578763e-08, "loss": 0.1263, "step": 32242 }, { "epoch": 0.9406324756403525, "grad_norm": 0.7816016344764367, "learning_rate": 9.214524651680545e-08, "loss": 0.1092, "step": 32243 }, { "epoch": 0.940661648870996, "grad_norm": 0.750991763943585, "learning_rate": 9.205498717397843e-08, "loss": 0.1075, "step": 32244 }, { "epoch": 0.9406908221016396, "grad_norm": 0.8534571402613954, "learning_rate": 9.196477164811313e-08, "loss": 0.1177, "step": 32245 }, { "epoch": 0.9407199953322831, "grad_norm": 0.9121166727582298, "learning_rate": 9.18745999400139e-08, "loss": 0.1059, "step": 32246 }, { "epoch": 0.9407491685629267, "grad_norm": 0.8343823646972929, "learning_rate": 9.178447205048735e-08, "loss": 0.1119, "step": 32247 }, { "epoch": 0.9407783417935702, "grad_norm": 0.8591376981119724, "learning_rate": 9.169438798033725e-08, "loss": 0.1116, "step": 32248 }, { "epoch": 0.9408075150242138, "grad_norm": 0.8586963944195071, "learning_rate": 9.160434773036797e-08, "loss": 0.1219, "step": 32249 }, { "epoch": 0.9408366882548573, "grad_norm": 0.720980567545608, "learning_rate": 9.151435130138331e-08, "loss": 0.0961, "step": 32250 }, { "epoch": 0.9408658614855009, "grad_norm": 0.6589360325697987, "learning_rate": 9.142439869418651e-08, "loss": 0.111, "step": 32251 }, { "epoch": 0.9408950347161444, "grad_norm": 0.953257786619246, "learning_rate": 9.133448990958083e-08, "loss": 0.1047, "step": 32252 }, { "epoch": 0.940924207946788, "grad_norm": 1.0535537006691844, "learning_rate": 9.12446249483695e-08, "loss": 0.1357, "step": 32253 }, { "epoch": 0.9409533811774315, "grad_norm": 0.9181781532984107, "learning_rate": 9.115480381135466e-08, "loss": 0.1141, "step": 32254 }, { "epoch": 0.9409825544080751, "grad_norm": 0.7004570130653078, "learning_rate": 9.106502649933679e-08, "loss": 0.1197, "step": 32255 }, { "epoch": 0.9410117276387188, "grad_norm": 0.7811468517955279, "learning_rate": 9.097529301311969e-08, "loss": 0.1161, "step": 32256 }, { "epoch": 0.9410409008693623, "grad_norm": 0.9753875994681088, "learning_rate": 9.088560335350272e-08, "loss": 0.102, "step": 32257 }, { "epoch": 0.9410700741000059, "grad_norm": 0.7541581719261251, "learning_rate": 9.07959575212869e-08, "loss": 0.12, "step": 32258 }, { "epoch": 0.9410992473306494, "grad_norm": 0.8599202291508046, "learning_rate": 9.07063555172727e-08, "loss": 0.1118, "step": 32259 }, { "epoch": 0.941128420561293, "grad_norm": 0.8262972867905722, "learning_rate": 9.061679734226115e-08, "loss": 0.0981, "step": 32260 }, { "epoch": 0.9411575937919365, "grad_norm": 0.8200818163966893, "learning_rate": 9.05272829970505e-08, "loss": 0.096, "step": 32261 }, { "epoch": 0.9411867670225801, "grad_norm": 0.6948563973969044, "learning_rate": 9.043781248244011e-08, "loss": 0.108, "step": 32262 }, { "epoch": 0.9412159402532236, "grad_norm": 0.9039716304296024, "learning_rate": 9.034838579922878e-08, "loss": 0.125, "step": 32263 }, { "epoch": 0.9412451134838672, "grad_norm": 0.948507507146162, "learning_rate": 9.025900294821533e-08, "loss": 0.1054, "step": 32264 }, { "epoch": 0.9412742867145107, "grad_norm": 0.8100205279020445, "learning_rate": 9.016966393019688e-08, "loss": 0.1169, "step": 32265 }, { "epoch": 0.9413034599451543, "grad_norm": 0.7338423233355329, "learning_rate": 9.008036874597226e-08, "loss": 0.0986, "step": 32266 }, { "epoch": 0.9413326331757979, "grad_norm": 1.026312423149906, "learning_rate": 8.999111739633803e-08, "loss": 0.1079, "step": 32267 }, { "epoch": 0.9413618064064414, "grad_norm": 0.9176621989537525, "learning_rate": 8.990190988209025e-08, "loss": 0.1179, "step": 32268 }, { "epoch": 0.941390979637085, "grad_norm": 0.7518978043031617, "learning_rate": 8.981274620402713e-08, "loss": 0.1076, "step": 32269 }, { "epoch": 0.9414201528677286, "grad_norm": 0.7782733248428133, "learning_rate": 8.972362636294307e-08, "loss": 0.1037, "step": 32270 }, { "epoch": 0.9414493260983722, "grad_norm": 0.88650877935467, "learning_rate": 8.963455035963409e-08, "loss": 0.1086, "step": 32271 }, { "epoch": 0.9414784993290157, "grad_norm": 0.8043260987467993, "learning_rate": 8.954551819489565e-08, "loss": 0.1127, "step": 32272 }, { "epoch": 0.9415076725596593, "grad_norm": 0.7528719477366806, "learning_rate": 8.945652986952325e-08, "loss": 0.0967, "step": 32273 }, { "epoch": 0.9415368457903028, "grad_norm": 0.9861613255844213, "learning_rate": 8.936758538431067e-08, "loss": 0.1141, "step": 32274 }, { "epoch": 0.9415660190209464, "grad_norm": 0.8871444967746114, "learning_rate": 8.92786847400512e-08, "loss": 0.1181, "step": 32275 }, { "epoch": 0.9415951922515899, "grad_norm": 0.7471736057557377, "learning_rate": 8.918982793753972e-08, "loss": 0.1081, "step": 32276 }, { "epoch": 0.9416243654822335, "grad_norm": 0.7884487385782475, "learning_rate": 8.910101497756951e-08, "loss": 0.0975, "step": 32277 }, { "epoch": 0.941653538712877, "grad_norm": 1.1634976749767227, "learning_rate": 8.901224586093271e-08, "loss": 0.1253, "step": 32278 }, { "epoch": 0.9416827119435206, "grad_norm": 0.8013935716951578, "learning_rate": 8.892352058842258e-08, "loss": 0.113, "step": 32279 }, { "epoch": 0.9417118851741642, "grad_norm": 0.908155959134173, "learning_rate": 8.883483916083068e-08, "loss": 0.1102, "step": 32280 }, { "epoch": 0.9417410584048077, "grad_norm": 0.926664533373589, "learning_rate": 8.874620157894864e-08, "loss": 0.1175, "step": 32281 }, { "epoch": 0.9417702316354513, "grad_norm": 0.9466495200848368, "learning_rate": 8.865760784356859e-08, "loss": 0.105, "step": 32282 }, { "epoch": 0.9417994048660949, "grad_norm": 0.9163070272330989, "learning_rate": 8.856905795548098e-08, "loss": 0.1314, "step": 32283 }, { "epoch": 0.9418285780967385, "grad_norm": 0.7883809830136383, "learning_rate": 8.848055191547633e-08, "loss": 0.1055, "step": 32284 }, { "epoch": 0.941857751327382, "grad_norm": 0.8342912192103017, "learning_rate": 8.839208972434455e-08, "loss": 0.1023, "step": 32285 }, { "epoch": 0.9418869245580256, "grad_norm": 0.8348360942403166, "learning_rate": 8.830367138287555e-08, "loss": 0.1087, "step": 32286 }, { "epoch": 0.9419160977886691, "grad_norm": 1.0003868136548781, "learning_rate": 8.821529689185981e-08, "loss": 0.1174, "step": 32287 }, { "epoch": 0.9419452710193127, "grad_norm": 0.8001076929016344, "learning_rate": 8.81269662520845e-08, "loss": 0.1091, "step": 32288 }, { "epoch": 0.9419744442499562, "grad_norm": 1.2193951438076844, "learning_rate": 8.803867946433897e-08, "loss": 0.115, "step": 32289 }, { "epoch": 0.9420036174805998, "grad_norm": 0.8640853572218443, "learning_rate": 8.795043652941204e-08, "loss": 0.1199, "step": 32290 }, { "epoch": 0.9420327907112434, "grad_norm": 0.7240457643107918, "learning_rate": 8.786223744809085e-08, "loss": 0.1039, "step": 32291 }, { "epoch": 0.9420619639418869, "grad_norm": 0.9594373398207939, "learning_rate": 8.777408222116257e-08, "loss": 0.0993, "step": 32292 }, { "epoch": 0.9420911371725305, "grad_norm": 0.7836878391748856, "learning_rate": 8.768597084941543e-08, "loss": 0.1092, "step": 32293 }, { "epoch": 0.942120310403174, "grad_norm": 0.7213913717550353, "learning_rate": 8.759790333363439e-08, "loss": 0.1087, "step": 32294 }, { "epoch": 0.9421494836338176, "grad_norm": 0.7591685255798649, "learning_rate": 8.750987967460711e-08, "loss": 0.11, "step": 32295 }, { "epoch": 0.9421786568644611, "grad_norm": 0.8033655210901949, "learning_rate": 8.74218998731191e-08, "loss": 0.112, "step": 32296 }, { "epoch": 0.9422078300951048, "grad_norm": 1.0044959807320812, "learning_rate": 8.733396392995531e-08, "loss": 0.1237, "step": 32297 }, { "epoch": 0.9422370033257483, "grad_norm": 0.7833893556060869, "learning_rate": 8.724607184590117e-08, "loss": 0.1222, "step": 32298 }, { "epoch": 0.9422661765563919, "grad_norm": 0.8650228097079342, "learning_rate": 8.715822362174165e-08, "loss": 0.1107, "step": 32299 }, { "epoch": 0.9422953497870354, "grad_norm": 0.8301736997589824, "learning_rate": 8.707041925826054e-08, "loss": 0.1245, "step": 32300 }, { "epoch": 0.942324523017679, "grad_norm": 0.7275716008408162, "learning_rate": 8.698265875624168e-08, "loss": 0.1235, "step": 32301 }, { "epoch": 0.9423536962483225, "grad_norm": 0.8993784203191924, "learning_rate": 8.689494211646887e-08, "loss": 0.1119, "step": 32302 }, { "epoch": 0.9423828694789661, "grad_norm": 1.1175801904530618, "learning_rate": 8.680726933972538e-08, "loss": 0.1101, "step": 32303 }, { "epoch": 0.9424120427096097, "grad_norm": 0.8399352229791115, "learning_rate": 8.671964042679392e-08, "loss": 0.1215, "step": 32304 }, { "epoch": 0.9424412159402532, "grad_norm": 0.7335315001881796, "learning_rate": 8.663205537845609e-08, "loss": 0.1098, "step": 32305 }, { "epoch": 0.9424703891708968, "grad_norm": 0.861878034998344, "learning_rate": 8.654451419549459e-08, "loss": 0.1251, "step": 32306 }, { "epoch": 0.9424995624015403, "grad_norm": 0.7872442627576178, "learning_rate": 8.645701687869046e-08, "loss": 0.1145, "step": 32307 }, { "epoch": 0.9425287356321839, "grad_norm": 0.7276279017712955, "learning_rate": 8.63695634288253e-08, "loss": 0.1241, "step": 32308 }, { "epoch": 0.9425579088628274, "grad_norm": 0.8494474455200676, "learning_rate": 8.628215384668015e-08, "loss": 0.0962, "step": 32309 }, { "epoch": 0.9425870820934711, "grad_norm": 0.9091544186677172, "learning_rate": 8.61947881330344e-08, "loss": 0.1053, "step": 32310 }, { "epoch": 0.9426162553241146, "grad_norm": 0.800220103593855, "learning_rate": 8.610746628866851e-08, "loss": 0.1024, "step": 32311 }, { "epoch": 0.9426454285547582, "grad_norm": 0.6635900744825833, "learning_rate": 8.602018831436243e-08, "loss": 0.0982, "step": 32312 }, { "epoch": 0.9426746017854017, "grad_norm": 0.8273136038765367, "learning_rate": 8.593295421089498e-08, "loss": 0.1251, "step": 32313 }, { "epoch": 0.9427037750160453, "grad_norm": 0.9357943378239129, "learning_rate": 8.584576397904498e-08, "loss": 0.1015, "step": 32314 }, { "epoch": 0.9427329482466889, "grad_norm": 0.7543237702459774, "learning_rate": 8.57586176195907e-08, "loss": 0.1262, "step": 32315 }, { "epoch": 0.9427621214773324, "grad_norm": 0.8876534827741925, "learning_rate": 8.567151513331096e-08, "loss": 0.1259, "step": 32316 }, { "epoch": 0.942791294707976, "grad_norm": 0.8748683143361243, "learning_rate": 8.558445652098291e-08, "loss": 0.1071, "step": 32317 }, { "epoch": 0.9428204679386195, "grad_norm": 1.019682043259005, "learning_rate": 8.549744178338259e-08, "loss": 0.1214, "step": 32318 }, { "epoch": 0.9428496411692631, "grad_norm": 0.7724503288375518, "learning_rate": 8.54104709212883e-08, "loss": 0.1062, "step": 32319 }, { "epoch": 0.9428788143999066, "grad_norm": 0.8884411022332633, "learning_rate": 8.53235439354766e-08, "loss": 0.0994, "step": 32320 }, { "epoch": 0.9429079876305502, "grad_norm": 0.9568008653287678, "learning_rate": 8.5236660826723e-08, "loss": 0.1107, "step": 32321 }, { "epoch": 0.9429371608611937, "grad_norm": 0.7016210509757499, "learning_rate": 8.514982159580298e-08, "loss": 0.1078, "step": 32322 }, { "epoch": 0.9429663340918373, "grad_norm": 0.9077299274976988, "learning_rate": 8.506302624349205e-08, "loss": 0.1139, "step": 32323 }, { "epoch": 0.9429955073224809, "grad_norm": 0.7983976891298535, "learning_rate": 8.497627477056514e-08, "loss": 0.1156, "step": 32324 }, { "epoch": 0.9430246805531245, "grad_norm": 0.887561043756048, "learning_rate": 8.488956717779661e-08, "loss": 0.104, "step": 32325 }, { "epoch": 0.943053853783768, "grad_norm": 0.8654210067457109, "learning_rate": 8.480290346596087e-08, "loss": 0.1238, "step": 32326 }, { "epoch": 0.9430830270144116, "grad_norm": 0.7727612534057382, "learning_rate": 8.471628363583174e-08, "loss": 0.1094, "step": 32327 }, { "epoch": 0.9431122002450552, "grad_norm": 0.8980402081435425, "learning_rate": 8.46297076881819e-08, "loss": 0.1149, "step": 32328 }, { "epoch": 0.9431413734756987, "grad_norm": 0.6740917503354988, "learning_rate": 8.454317562378467e-08, "loss": 0.1058, "step": 32329 }, { "epoch": 0.9431705467063423, "grad_norm": 0.7988787708196102, "learning_rate": 8.445668744341274e-08, "loss": 0.1105, "step": 32330 }, { "epoch": 0.9431997199369858, "grad_norm": 0.7638095409849585, "learning_rate": 8.43702431478377e-08, "loss": 0.1008, "step": 32331 }, { "epoch": 0.9432288931676294, "grad_norm": 0.8684555261019707, "learning_rate": 8.428384273783175e-08, "loss": 0.1109, "step": 32332 }, { "epoch": 0.9432580663982729, "grad_norm": 0.6328075299142163, "learning_rate": 8.419748621416646e-08, "loss": 0.0915, "step": 32333 }, { "epoch": 0.9432872396289165, "grad_norm": 0.9430103200458374, "learning_rate": 8.411117357761289e-08, "loss": 0.1011, "step": 32334 }, { "epoch": 0.94331641285956, "grad_norm": 0.8249591915840404, "learning_rate": 8.402490482893988e-08, "loss": 0.1203, "step": 32335 }, { "epoch": 0.9433455860902036, "grad_norm": 0.7291717954263409, "learning_rate": 8.393867996892014e-08, "loss": 0.0926, "step": 32336 }, { "epoch": 0.9433747593208472, "grad_norm": 0.8703143796152049, "learning_rate": 8.385249899832249e-08, "loss": 0.1244, "step": 32337 }, { "epoch": 0.9434039325514908, "grad_norm": 0.779070003302947, "learning_rate": 8.37663619179152e-08, "loss": 0.1155, "step": 32338 }, { "epoch": 0.9434331057821344, "grad_norm": 1.0629336366988282, "learning_rate": 8.36802687284688e-08, "loss": 0.1194, "step": 32339 }, { "epoch": 0.9434622790127779, "grad_norm": 0.7835093401142775, "learning_rate": 8.359421943075153e-08, "loss": 0.0926, "step": 32340 }, { "epoch": 0.9434914522434215, "grad_norm": 0.9044756377979435, "learning_rate": 8.350821402553111e-08, "loss": 0.1051, "step": 32341 }, { "epoch": 0.943520625474065, "grad_norm": 0.8804094413338565, "learning_rate": 8.342225251357527e-08, "loss": 0.1142, "step": 32342 }, { "epoch": 0.9435497987047086, "grad_norm": 1.3066257204772769, "learning_rate": 8.333633489565284e-08, "loss": 0.137, "step": 32343 }, { "epoch": 0.9435789719353521, "grad_norm": 0.7875741132318617, "learning_rate": 8.325046117252933e-08, "loss": 0.1035, "step": 32344 }, { "epoch": 0.9436081451659957, "grad_norm": 0.8731378998190231, "learning_rate": 8.316463134497188e-08, "loss": 0.1231, "step": 32345 }, { "epoch": 0.9436373183966392, "grad_norm": 0.8524985718756968, "learning_rate": 8.30788454137471e-08, "loss": 0.0967, "step": 32346 }, { "epoch": 0.9436664916272828, "grad_norm": 0.7584908542335126, "learning_rate": 8.299310337962052e-08, "loss": 0.1058, "step": 32347 }, { "epoch": 0.9436956648579263, "grad_norm": 0.6681495845564815, "learning_rate": 8.290740524335817e-08, "loss": 0.0966, "step": 32348 }, { "epoch": 0.9437248380885699, "grad_norm": 0.8429285285254013, "learning_rate": 8.282175100572387e-08, "loss": 0.1175, "step": 32349 }, { "epoch": 0.9437540113192134, "grad_norm": 0.8027082932232955, "learning_rate": 8.27361406674837e-08, "loss": 0.0911, "step": 32350 }, { "epoch": 0.9437831845498571, "grad_norm": 0.8567381679743021, "learning_rate": 8.265057422940148e-08, "loss": 0.1255, "step": 32351 }, { "epoch": 0.9438123577805007, "grad_norm": 0.8541403054887776, "learning_rate": 8.256505169224105e-08, "loss": 0.0995, "step": 32352 }, { "epoch": 0.9438415310111442, "grad_norm": 0.7698180543207574, "learning_rate": 8.247957305676568e-08, "loss": 0.0865, "step": 32353 }, { "epoch": 0.9438707042417878, "grad_norm": 1.0897995547477701, "learning_rate": 8.239413832373865e-08, "loss": 0.1105, "step": 32354 }, { "epoch": 0.9438998774724313, "grad_norm": 0.894631470770573, "learning_rate": 8.230874749392326e-08, "loss": 0.1217, "step": 32355 }, { "epoch": 0.9439290507030749, "grad_norm": 0.8897706786415958, "learning_rate": 8.222340056808109e-08, "loss": 0.1373, "step": 32356 }, { "epoch": 0.9439582239337184, "grad_norm": 0.8475899700246209, "learning_rate": 8.213809754697489e-08, "loss": 0.1133, "step": 32357 }, { "epoch": 0.943987397164362, "grad_norm": 0.7119139509831721, "learning_rate": 8.205283843136513e-08, "loss": 0.1076, "step": 32358 }, { "epoch": 0.9440165703950055, "grad_norm": 0.8062916356226331, "learning_rate": 8.196762322201401e-08, "loss": 0.1006, "step": 32359 }, { "epoch": 0.9440457436256491, "grad_norm": 1.0753529891432876, "learning_rate": 8.188245191968202e-08, "loss": 0.101, "step": 32360 }, { "epoch": 0.9440749168562926, "grad_norm": 0.8753470313202542, "learning_rate": 8.179732452512911e-08, "loss": 0.0974, "step": 32361 }, { "epoch": 0.9441040900869362, "grad_norm": 0.7564879364630781, "learning_rate": 8.17122410391158e-08, "loss": 0.1147, "step": 32362 }, { "epoch": 0.9441332633175797, "grad_norm": 0.7842977363139345, "learning_rate": 8.162720146240144e-08, "loss": 0.1151, "step": 32363 }, { "epoch": 0.9441624365482234, "grad_norm": 0.7961969065277302, "learning_rate": 8.154220579574601e-08, "loss": 0.1064, "step": 32364 }, { "epoch": 0.944191609778867, "grad_norm": 0.9942200818097617, "learning_rate": 8.145725403990668e-08, "loss": 0.1206, "step": 32365 }, { "epoch": 0.9442207830095105, "grad_norm": 0.8811670485871332, "learning_rate": 8.137234619564282e-08, "loss": 0.1039, "step": 32366 }, { "epoch": 0.9442499562401541, "grad_norm": 0.8439337869759281, "learning_rate": 8.12874822637133e-08, "loss": 0.1075, "step": 32367 }, { "epoch": 0.9442791294707976, "grad_norm": 0.9234172909187949, "learning_rate": 8.120266224487416e-08, "loss": 0.0889, "step": 32368 }, { "epoch": 0.9443083027014412, "grad_norm": 0.7791893482755741, "learning_rate": 8.111788613988369e-08, "loss": 0.1555, "step": 32369 }, { "epoch": 0.9443374759320847, "grad_norm": 0.8860145963518655, "learning_rate": 8.103315394949906e-08, "loss": 0.1119, "step": 32370 }, { "epoch": 0.9443666491627283, "grad_norm": 0.8118565613192408, "learning_rate": 8.094846567447523e-08, "loss": 0.0869, "step": 32371 }, { "epoch": 0.9443958223933718, "grad_norm": 1.0808754968211483, "learning_rate": 8.086382131556935e-08, "loss": 0.1027, "step": 32372 }, { "epoch": 0.9444249956240154, "grad_norm": 0.7209412532372016, "learning_rate": 8.077922087353751e-08, "loss": 0.114, "step": 32373 }, { "epoch": 0.9444541688546589, "grad_norm": 1.3381069972839603, "learning_rate": 8.069466434913464e-08, "loss": 0.1124, "step": 32374 }, { "epoch": 0.9444833420853025, "grad_norm": 0.750155217556688, "learning_rate": 8.06101517431146e-08, "loss": 0.0897, "step": 32375 }, { "epoch": 0.944512515315946, "grad_norm": 0.7369771329983555, "learning_rate": 8.052568305623342e-08, "loss": 0.1086, "step": 32376 }, { "epoch": 0.9445416885465896, "grad_norm": 0.801568927073826, "learning_rate": 8.044125828924442e-08, "loss": 0.0983, "step": 32377 }, { "epoch": 0.9445708617772333, "grad_norm": 0.781518255797064, "learning_rate": 8.035687744290143e-08, "loss": 0.1041, "step": 32378 }, { "epoch": 0.9446000350078768, "grad_norm": 0.7125332917030004, "learning_rate": 8.027254051795774e-08, "loss": 0.0979, "step": 32379 }, { "epoch": 0.9446292082385204, "grad_norm": 0.6843526691826908, "learning_rate": 8.018824751516663e-08, "loss": 0.1184, "step": 32380 }, { "epoch": 0.9446583814691639, "grad_norm": 1.002080672606367, "learning_rate": 8.010399843528083e-08, "loss": 0.0985, "step": 32381 }, { "epoch": 0.9446875546998075, "grad_norm": 0.7824026578557902, "learning_rate": 8.00197932790514e-08, "loss": 0.113, "step": 32382 }, { "epoch": 0.944716727930451, "grad_norm": 0.7873377395419179, "learning_rate": 7.993563204723054e-08, "loss": 0.113, "step": 32383 }, { "epoch": 0.9447459011610946, "grad_norm": 0.9239821817302963, "learning_rate": 7.98515147405704e-08, "loss": 0.1215, "step": 32384 }, { "epoch": 0.9447750743917381, "grad_norm": 0.8012849692276517, "learning_rate": 7.976744135982095e-08, "loss": 0.1125, "step": 32385 }, { "epoch": 0.9448042476223817, "grad_norm": 0.87859202581409, "learning_rate": 7.968341190573325e-08, "loss": 0.1211, "step": 32386 }, { "epoch": 0.9448334208530252, "grad_norm": 0.8184684077296053, "learning_rate": 7.959942637905783e-08, "loss": 0.1036, "step": 32387 }, { "epoch": 0.9448625940836688, "grad_norm": 0.8394996941678644, "learning_rate": 7.951548478054405e-08, "loss": 0.1164, "step": 32388 }, { "epoch": 0.9448917673143123, "grad_norm": 0.7561225631203814, "learning_rate": 7.943158711094079e-08, "loss": 0.0907, "step": 32389 }, { "epoch": 0.9449209405449559, "grad_norm": 0.6961283322428043, "learning_rate": 7.934773337099855e-08, "loss": 0.0987, "step": 32390 }, { "epoch": 0.9449501137755996, "grad_norm": 0.7230837570967884, "learning_rate": 7.926392356146507e-08, "loss": 0.105, "step": 32391 }, { "epoch": 0.9449792870062431, "grad_norm": 0.892162969730835, "learning_rate": 7.918015768308806e-08, "loss": 0.1104, "step": 32392 }, { "epoch": 0.9450084602368867, "grad_norm": 0.7914370249735397, "learning_rate": 7.90964357366164e-08, "loss": 0.1058, "step": 32393 }, { "epoch": 0.9450376334675302, "grad_norm": 0.7030963808324242, "learning_rate": 7.90127577227967e-08, "loss": 0.1102, "step": 32394 }, { "epoch": 0.9450668066981738, "grad_norm": 0.9540850690225302, "learning_rate": 7.89291236423767e-08, "loss": 0.1207, "step": 32395 }, { "epoch": 0.9450959799288173, "grad_norm": 1.2345368165618769, "learning_rate": 7.884553349610191e-08, "loss": 0.1145, "step": 32396 }, { "epoch": 0.9451251531594609, "grad_norm": 0.7108277104669307, "learning_rate": 7.876198728472062e-08, "loss": 0.1173, "step": 32397 }, { "epoch": 0.9451543263901044, "grad_norm": 0.7955082892917396, "learning_rate": 7.867848500897668e-08, "loss": 0.113, "step": 32398 }, { "epoch": 0.945183499620748, "grad_norm": 0.8259253011058484, "learning_rate": 7.859502666961672e-08, "loss": 0.1283, "step": 32399 }, { "epoch": 0.9452126728513915, "grad_norm": 0.7176344232509159, "learning_rate": 7.851161226738569e-08, "loss": 0.0807, "step": 32400 }, { "epoch": 0.9452418460820351, "grad_norm": 0.7244473039419826, "learning_rate": 7.842824180302743e-08, "loss": 0.1229, "step": 32401 }, { "epoch": 0.9452710193126787, "grad_norm": 0.763476028012814, "learning_rate": 7.834491527728694e-08, "loss": 0.1113, "step": 32402 }, { "epoch": 0.9453001925433222, "grad_norm": 0.9109140432203537, "learning_rate": 7.826163269090914e-08, "loss": 0.1065, "step": 32403 }, { "epoch": 0.9453293657739658, "grad_norm": 0.747528977273796, "learning_rate": 7.817839404463623e-08, "loss": 0.1216, "step": 32404 }, { "epoch": 0.9453585390046094, "grad_norm": 0.7952431788295623, "learning_rate": 7.809519933921095e-08, "loss": 0.1179, "step": 32405 }, { "epoch": 0.945387712235253, "grad_norm": 0.8155826659945792, "learning_rate": 7.80120485753777e-08, "loss": 0.11, "step": 32406 }, { "epoch": 0.9454168854658965, "grad_norm": 0.8149750360445821, "learning_rate": 7.792894175387755e-08, "loss": 0.096, "step": 32407 }, { "epoch": 0.9454460586965401, "grad_norm": 0.5960639511586834, "learning_rate": 7.784587887545269e-08, "loss": 0.098, "step": 32408 }, { "epoch": 0.9454752319271836, "grad_norm": 0.9003230157312117, "learning_rate": 7.776285994084476e-08, "loss": 0.1141, "step": 32409 }, { "epoch": 0.9455044051578272, "grad_norm": 0.6997698470257007, "learning_rate": 7.767988495079536e-08, "loss": 0.1029, "step": 32410 }, { "epoch": 0.9455335783884707, "grad_norm": 0.7786886267937401, "learning_rate": 7.759695390604505e-08, "loss": 0.0877, "step": 32411 }, { "epoch": 0.9455627516191143, "grad_norm": 0.8070390944776786, "learning_rate": 7.75140668073332e-08, "loss": 0.1096, "step": 32412 }, { "epoch": 0.9455919248497578, "grad_norm": 0.6548227331117534, "learning_rate": 7.74312236554009e-08, "loss": 0.1166, "step": 32413 }, { "epoch": 0.9456210980804014, "grad_norm": 0.7864888096709484, "learning_rate": 7.734842445098811e-08, "loss": 0.1104, "step": 32414 }, { "epoch": 0.945650271311045, "grad_norm": 0.7404766707515956, "learning_rate": 7.726566919483313e-08, "loss": 0.1091, "step": 32415 }, { "epoch": 0.9456794445416885, "grad_norm": 1.1420709390800285, "learning_rate": 7.718295788767537e-08, "loss": 0.1208, "step": 32416 }, { "epoch": 0.9457086177723321, "grad_norm": 1.083427221237561, "learning_rate": 7.710029053025258e-08, "loss": 0.1335, "step": 32417 }, { "epoch": 0.9457377910029756, "grad_norm": 0.8615196675754264, "learning_rate": 7.701766712330305e-08, "loss": 0.0932, "step": 32418 }, { "epoch": 0.9457669642336193, "grad_norm": 0.8327881297641448, "learning_rate": 7.693508766756508e-08, "loss": 0.128, "step": 32419 }, { "epoch": 0.9457961374642628, "grad_norm": 0.7653280774890531, "learning_rate": 7.68525521637753e-08, "loss": 0.113, "step": 32420 }, { "epoch": 0.9458253106949064, "grad_norm": 0.8654162343913903, "learning_rate": 7.677006061267089e-08, "loss": 0.1037, "step": 32421 }, { "epoch": 0.9458544839255499, "grad_norm": 0.8712620590015554, "learning_rate": 7.668761301498739e-08, "loss": 0.0953, "step": 32422 }, { "epoch": 0.9458836571561935, "grad_norm": 0.7792027222281702, "learning_rate": 7.660520937146199e-08, "loss": 0.1309, "step": 32423 }, { "epoch": 0.945912830386837, "grad_norm": 0.939320024063705, "learning_rate": 7.65228496828302e-08, "loss": 0.1182, "step": 32424 }, { "epoch": 0.9459420036174806, "grad_norm": 0.7470944219962071, "learning_rate": 7.644053394982698e-08, "loss": 0.1234, "step": 32425 }, { "epoch": 0.9459711768481242, "grad_norm": 0.7567899105879465, "learning_rate": 7.635826217318676e-08, "loss": 0.1142, "step": 32426 }, { "epoch": 0.9460003500787677, "grad_norm": 0.7022270603143397, "learning_rate": 7.627603435364562e-08, "loss": 0.1059, "step": 32427 }, { "epoch": 0.9460295233094113, "grad_norm": 0.8755207850523682, "learning_rate": 7.619385049193573e-08, "loss": 0.115, "step": 32428 }, { "epoch": 0.9460586965400548, "grad_norm": 0.8289761925866534, "learning_rate": 7.611171058879208e-08, "loss": 0.1208, "step": 32429 }, { "epoch": 0.9460878697706984, "grad_norm": 0.7441816679164663, "learning_rate": 7.602961464494796e-08, "loss": 0.1055, "step": 32430 }, { "epoch": 0.9461170430013419, "grad_norm": 0.8571158621900724, "learning_rate": 7.594756266113556e-08, "loss": 0.1038, "step": 32431 }, { "epoch": 0.9461462162319856, "grad_norm": 0.9005891761133941, "learning_rate": 7.586555463808765e-08, "loss": 0.1166, "step": 32432 }, { "epoch": 0.9461753894626291, "grad_norm": 0.7337949228878811, "learning_rate": 7.578359057653751e-08, "loss": 0.119, "step": 32433 }, { "epoch": 0.9462045626932727, "grad_norm": 0.8261839710203674, "learning_rate": 7.57016704772151e-08, "loss": 0.1355, "step": 32434 }, { "epoch": 0.9462337359239162, "grad_norm": 0.7690019357159564, "learning_rate": 7.56197943408532e-08, "loss": 0.1031, "step": 32435 }, { "epoch": 0.9462629091545598, "grad_norm": 0.8164878148238821, "learning_rate": 7.553796216818177e-08, "loss": 0.1049, "step": 32436 }, { "epoch": 0.9462920823852033, "grad_norm": 0.790209351407258, "learning_rate": 7.545617395993188e-08, "loss": 0.1319, "step": 32437 }, { "epoch": 0.9463212556158469, "grad_norm": 0.7399463395526849, "learning_rate": 7.537442971683406e-08, "loss": 0.113, "step": 32438 }, { "epoch": 0.9463504288464905, "grad_norm": 0.8608838647635368, "learning_rate": 7.529272943961774e-08, "loss": 0.1114, "step": 32439 }, { "epoch": 0.946379602077134, "grad_norm": 0.7811986672017879, "learning_rate": 7.521107312901177e-08, "loss": 0.1439, "step": 32440 }, { "epoch": 0.9464087753077776, "grad_norm": 0.8592101554393792, "learning_rate": 7.512946078574667e-08, "loss": 0.1024, "step": 32441 }, { "epoch": 0.9464379485384211, "grad_norm": 0.868918578391751, "learning_rate": 7.50478924105491e-08, "loss": 0.1281, "step": 32442 }, { "epoch": 0.9464671217690647, "grad_norm": 0.9483448154147353, "learning_rate": 7.496636800414847e-08, "loss": 0.122, "step": 32443 }, { "epoch": 0.9464962949997082, "grad_norm": 0.7123911422266416, "learning_rate": 7.488488756727252e-08, "loss": 0.1023, "step": 32444 }, { "epoch": 0.9465254682303518, "grad_norm": 0.9491089059453062, "learning_rate": 7.480345110064846e-08, "loss": 0.0922, "step": 32445 }, { "epoch": 0.9465546414609954, "grad_norm": 0.8656337906944751, "learning_rate": 7.472205860500403e-08, "loss": 0.1105, "step": 32446 }, { "epoch": 0.946583814691639, "grad_norm": 0.9254558721488838, "learning_rate": 7.464071008106477e-08, "loss": 0.1273, "step": 32447 }, { "epoch": 0.9466129879222825, "grad_norm": 1.1120337824344588, "learning_rate": 7.455940552955732e-08, "loss": 0.0938, "step": 32448 }, { "epoch": 0.9466421611529261, "grad_norm": 0.8734464676563862, "learning_rate": 7.447814495120775e-08, "loss": 0.1188, "step": 32449 }, { "epoch": 0.9466713343835697, "grad_norm": 0.932221433076052, "learning_rate": 7.439692834674217e-08, "loss": 0.0833, "step": 32450 }, { "epoch": 0.9467005076142132, "grad_norm": 0.771003383833281, "learning_rate": 7.431575571688443e-08, "loss": 0.1447, "step": 32451 }, { "epoch": 0.9467296808448568, "grad_norm": 0.7799731886129633, "learning_rate": 7.42346270623595e-08, "loss": 0.138, "step": 32452 }, { "epoch": 0.9467588540755003, "grad_norm": 0.8575818353583003, "learning_rate": 7.415354238389239e-08, "loss": 0.1272, "step": 32453 }, { "epoch": 0.9467880273061439, "grad_norm": 0.8350146255677302, "learning_rate": 7.407250168220692e-08, "loss": 0.1115, "step": 32454 }, { "epoch": 0.9468172005367874, "grad_norm": 1.2155046907044247, "learning_rate": 7.399150495802532e-08, "loss": 0.1331, "step": 32455 }, { "epoch": 0.946846373767431, "grad_norm": 0.7861259438910083, "learning_rate": 7.391055221207199e-08, "loss": 0.1078, "step": 32456 }, { "epoch": 0.9468755469980745, "grad_norm": 0.7929531775979544, "learning_rate": 7.382964344506971e-08, "loss": 0.0894, "step": 32457 }, { "epoch": 0.9469047202287181, "grad_norm": 0.7207911773731474, "learning_rate": 7.374877865774011e-08, "loss": 0.1174, "step": 32458 }, { "epoch": 0.9469338934593617, "grad_norm": 0.8473804329915969, "learning_rate": 7.366795785080538e-08, "loss": 0.1043, "step": 32459 }, { "epoch": 0.9469630666900053, "grad_norm": 0.9293679818654809, "learning_rate": 7.358718102498718e-08, "loss": 0.0955, "step": 32460 }, { "epoch": 0.9469922399206488, "grad_norm": 0.9925557625259872, "learning_rate": 7.350644818100605e-08, "loss": 0.1084, "step": 32461 }, { "epoch": 0.9470214131512924, "grad_norm": 0.8357931404349741, "learning_rate": 7.342575931958362e-08, "loss": 0.1226, "step": 32462 }, { "epoch": 0.947050586381936, "grad_norm": 0.8516494057558605, "learning_rate": 7.334511444144043e-08, "loss": 0.1135, "step": 32463 }, { "epoch": 0.9470797596125795, "grad_norm": 0.9690824340711388, "learning_rate": 7.326451354729591e-08, "loss": 0.1019, "step": 32464 }, { "epoch": 0.9471089328432231, "grad_norm": 0.8434389326351772, "learning_rate": 7.318395663786892e-08, "loss": 0.1279, "step": 32465 }, { "epoch": 0.9471381060738666, "grad_norm": 0.7854950540974656, "learning_rate": 7.310344371388057e-08, "loss": 0.1118, "step": 32466 }, { "epoch": 0.9471672793045102, "grad_norm": 0.6897796201625724, "learning_rate": 7.302297477604747e-08, "loss": 0.0937, "step": 32467 }, { "epoch": 0.9471964525351537, "grad_norm": 0.8238705776744283, "learning_rate": 7.294254982508963e-08, "loss": 0.1019, "step": 32468 }, { "epoch": 0.9472256257657973, "grad_norm": 0.680516572844136, "learning_rate": 7.286216886172425e-08, "loss": 0.1055, "step": 32469 }, { "epoch": 0.9472547989964408, "grad_norm": 0.8786545586463291, "learning_rate": 7.278183188666965e-08, "loss": 0.1434, "step": 32470 }, { "epoch": 0.9472839722270844, "grad_norm": 1.0483298869582072, "learning_rate": 7.270153890064246e-08, "loss": 0.1187, "step": 32471 }, { "epoch": 0.9473131454577279, "grad_norm": 0.7640401178064328, "learning_rate": 7.262128990435934e-08, "loss": 0.12, "step": 32472 }, { "epoch": 0.9473423186883716, "grad_norm": 0.7435605455000042, "learning_rate": 7.25410848985375e-08, "loss": 0.1075, "step": 32473 }, { "epoch": 0.9473714919190152, "grad_norm": 0.839756218587077, "learning_rate": 7.246092388389247e-08, "loss": 0.117, "step": 32474 }, { "epoch": 0.9474006651496587, "grad_norm": 0.9486867625872087, "learning_rate": 7.23808068611398e-08, "loss": 0.1018, "step": 32475 }, { "epoch": 0.9474298383803023, "grad_norm": 0.686994518444723, "learning_rate": 7.230073383099556e-08, "loss": 0.1132, "step": 32476 }, { "epoch": 0.9474590116109458, "grad_norm": 1.194465992391673, "learning_rate": 7.222070479417365e-08, "loss": 0.1322, "step": 32477 }, { "epoch": 0.9474881848415894, "grad_norm": 0.9061250797428757, "learning_rate": 7.214071975138847e-08, "loss": 0.0982, "step": 32478 }, { "epoch": 0.9475173580722329, "grad_norm": 0.8336272683284411, "learning_rate": 7.206077870335504e-08, "loss": 0.1363, "step": 32479 }, { "epoch": 0.9475465313028765, "grad_norm": 0.7735124687475878, "learning_rate": 7.198088165078664e-08, "loss": 0.1225, "step": 32480 }, { "epoch": 0.94757570453352, "grad_norm": 1.0237110014291062, "learning_rate": 7.190102859439662e-08, "loss": 0.1145, "step": 32481 }, { "epoch": 0.9476048777641636, "grad_norm": 0.7648542443407397, "learning_rate": 7.182121953489718e-08, "loss": 0.0958, "step": 32482 }, { "epoch": 0.9476340509948071, "grad_norm": 1.0863584681681098, "learning_rate": 7.174145447300218e-08, "loss": 0.1251, "step": 32483 }, { "epoch": 0.9476632242254507, "grad_norm": 0.8585402501507573, "learning_rate": 7.166173340942273e-08, "loss": 0.1177, "step": 32484 }, { "epoch": 0.9476923974560942, "grad_norm": 0.791338627216461, "learning_rate": 7.158205634487103e-08, "loss": 0.0855, "step": 32485 }, { "epoch": 0.9477215706867379, "grad_norm": 0.8626801997444347, "learning_rate": 7.150242328005763e-08, "loss": 0.0975, "step": 32486 }, { "epoch": 0.9477507439173815, "grad_norm": 0.7039490546372619, "learning_rate": 7.142283421569474e-08, "loss": 0.1164, "step": 32487 }, { "epoch": 0.947779917148025, "grad_norm": 0.7765559387927986, "learning_rate": 7.134328915249177e-08, "loss": 0.1068, "step": 32488 }, { "epoch": 0.9478090903786686, "grad_norm": 0.8814504468730606, "learning_rate": 7.126378809115931e-08, "loss": 0.129, "step": 32489 }, { "epoch": 0.9478382636093121, "grad_norm": 0.608087582565708, "learning_rate": 7.118433103240729e-08, "loss": 0.1025, "step": 32490 }, { "epoch": 0.9478674368399557, "grad_norm": 0.7717572647366848, "learning_rate": 7.110491797694519e-08, "loss": 0.1095, "step": 32491 }, { "epoch": 0.9478966100705992, "grad_norm": 0.9897115468888162, "learning_rate": 7.10255489254813e-08, "loss": 0.1052, "step": 32492 }, { "epoch": 0.9479257833012428, "grad_norm": 0.8760283595412774, "learning_rate": 7.094622387872508e-08, "loss": 0.1139, "step": 32493 }, { "epoch": 0.9479549565318863, "grad_norm": 0.6954293766892239, "learning_rate": 7.086694283738427e-08, "loss": 0.1182, "step": 32494 }, { "epoch": 0.9479841297625299, "grad_norm": 0.9198009490309836, "learning_rate": 7.078770580216664e-08, "loss": 0.1093, "step": 32495 }, { "epoch": 0.9480133029931734, "grad_norm": 1.0179819067131206, "learning_rate": 7.070851277377944e-08, "loss": 0.1204, "step": 32496 }, { "epoch": 0.948042476223817, "grad_norm": 0.8157738612589095, "learning_rate": 7.062936375293039e-08, "loss": 0.1359, "step": 32497 }, { "epoch": 0.9480716494544605, "grad_norm": 0.8817424538251957, "learning_rate": 7.055025874032562e-08, "loss": 0.1249, "step": 32498 }, { "epoch": 0.9481008226851041, "grad_norm": 0.8186664338244846, "learning_rate": 7.047119773667066e-08, "loss": 0.1045, "step": 32499 }, { "epoch": 0.9481299959157478, "grad_norm": 0.8767729803975891, "learning_rate": 7.039218074267273e-08, "loss": 0.1211, "step": 32500 }, { "epoch": 0.9481591691463913, "grad_norm": 0.768487745352673, "learning_rate": 7.031320775903682e-08, "loss": 0.1096, "step": 32501 }, { "epoch": 0.9481883423770349, "grad_norm": 0.8039052449936003, "learning_rate": 7.023427878646739e-08, "loss": 0.104, "step": 32502 }, { "epoch": 0.9482175156076784, "grad_norm": 0.8063128935223344, "learning_rate": 7.015539382566882e-08, "loss": 0.0907, "step": 32503 }, { "epoch": 0.948246688838322, "grad_norm": 0.9271385773683889, "learning_rate": 7.007655287734727e-08, "loss": 0.1003, "step": 32504 }, { "epoch": 0.9482758620689655, "grad_norm": 0.9647932622383132, "learning_rate": 6.999775594220437e-08, "loss": 0.119, "step": 32505 }, { "epoch": 0.9483050352996091, "grad_norm": 0.8090997577971532, "learning_rate": 6.991900302094567e-08, "loss": 0.109, "step": 32506 }, { "epoch": 0.9483342085302526, "grad_norm": 0.8575302787877651, "learning_rate": 6.984029411427285e-08, "loss": 0.1288, "step": 32507 }, { "epoch": 0.9483633817608962, "grad_norm": 1.143824229557474, "learning_rate": 6.976162922288865e-08, "loss": 0.118, "step": 32508 }, { "epoch": 0.9483925549915397, "grad_norm": 0.8649502905621248, "learning_rate": 6.968300834749531e-08, "loss": 0.1038, "step": 32509 }, { "epoch": 0.9484217282221833, "grad_norm": 0.9273763814160385, "learning_rate": 6.960443148879559e-08, "loss": 0.1177, "step": 32510 }, { "epoch": 0.9484509014528268, "grad_norm": 0.7928934445398926, "learning_rate": 6.952589864749115e-08, "loss": 0.1143, "step": 32511 }, { "epoch": 0.9484800746834704, "grad_norm": 0.9374873688596449, "learning_rate": 6.944740982428144e-08, "loss": 0.1088, "step": 32512 }, { "epoch": 0.9485092479141141, "grad_norm": 0.8220838038955269, "learning_rate": 6.936896501986868e-08, "loss": 0.1247, "step": 32513 }, { "epoch": 0.9485384211447576, "grad_norm": 0.7494913221019011, "learning_rate": 6.929056423495285e-08, "loss": 0.1118, "step": 32514 }, { "epoch": 0.9485675943754012, "grad_norm": 0.574077662448829, "learning_rate": 6.921220747023394e-08, "loss": 0.0967, "step": 32515 }, { "epoch": 0.9485967676060447, "grad_norm": 0.9191293324336272, "learning_rate": 6.913389472641085e-08, "loss": 0.1119, "step": 32516 }, { "epoch": 0.9486259408366883, "grad_norm": 0.8234192992220227, "learning_rate": 6.905562600418359e-08, "loss": 0.1148, "step": 32517 }, { "epoch": 0.9486551140673318, "grad_norm": 0.8801974156793452, "learning_rate": 6.897740130425046e-08, "loss": 0.1079, "step": 32518 }, { "epoch": 0.9486842872979754, "grad_norm": 0.7527567196852354, "learning_rate": 6.88992206273098e-08, "loss": 0.1209, "step": 32519 }, { "epoch": 0.9487134605286189, "grad_norm": 0.7828147462334349, "learning_rate": 6.882108397406051e-08, "loss": 0.1172, "step": 32520 }, { "epoch": 0.9487426337592625, "grad_norm": 0.7652612727291339, "learning_rate": 6.874299134519868e-08, "loss": 0.0962, "step": 32521 }, { "epoch": 0.948771806989906, "grad_norm": 0.9264724715795395, "learning_rate": 6.86649427414221e-08, "loss": 0.1053, "step": 32522 }, { "epoch": 0.9488009802205496, "grad_norm": 0.7727918440017685, "learning_rate": 6.858693816342854e-08, "loss": 0.0987, "step": 32523 }, { "epoch": 0.9488301534511931, "grad_norm": 0.8662458394639372, "learning_rate": 6.8508977611913e-08, "loss": 0.1073, "step": 32524 }, { "epoch": 0.9488593266818367, "grad_norm": 0.9801777045089599, "learning_rate": 6.843106108757214e-08, "loss": 0.1183, "step": 32525 }, { "epoch": 0.9488884999124803, "grad_norm": 0.8325529827915136, "learning_rate": 6.835318859110152e-08, "loss": 0.1045, "step": 32526 }, { "epoch": 0.9489176731431239, "grad_norm": 0.8014491100772806, "learning_rate": 6.827536012319613e-08, "loss": 0.1235, "step": 32527 }, { "epoch": 0.9489468463737675, "grad_norm": 0.9160593276523509, "learning_rate": 6.819757568455155e-08, "loss": 0.0946, "step": 32528 }, { "epoch": 0.948976019604411, "grad_norm": 0.8850257788412628, "learning_rate": 6.811983527586108e-08, "loss": 0.1158, "step": 32529 }, { "epoch": 0.9490051928350546, "grad_norm": 0.7254581291890501, "learning_rate": 6.804213889781974e-08, "loss": 0.121, "step": 32530 }, { "epoch": 0.9490343660656981, "grad_norm": 0.8242057641046956, "learning_rate": 6.796448655112142e-08, "loss": 0.1055, "step": 32531 }, { "epoch": 0.9490635392963417, "grad_norm": 0.7271872053019198, "learning_rate": 6.788687823645723e-08, "loss": 0.095, "step": 32532 }, { "epoch": 0.9490927125269852, "grad_norm": 0.7964237689345448, "learning_rate": 6.780931395452273e-08, "loss": 0.1194, "step": 32533 }, { "epoch": 0.9491218857576288, "grad_norm": 0.7695145596993271, "learning_rate": 6.773179370600958e-08, "loss": 0.1105, "step": 32534 }, { "epoch": 0.9491510589882723, "grad_norm": 0.8957846772447188, "learning_rate": 6.765431749160889e-08, "loss": 0.1241, "step": 32535 }, { "epoch": 0.9491802322189159, "grad_norm": 0.8944661895065943, "learning_rate": 6.75768853120129e-08, "loss": 0.1188, "step": 32536 }, { "epoch": 0.9492094054495595, "grad_norm": 0.7907215058520678, "learning_rate": 6.749949716791382e-08, "loss": 0.1327, "step": 32537 }, { "epoch": 0.949238578680203, "grad_norm": 0.8652602143387097, "learning_rate": 6.742215306000055e-08, "loss": 0.0997, "step": 32538 }, { "epoch": 0.9492677519108466, "grad_norm": 0.9278490960321242, "learning_rate": 6.734485298896531e-08, "loss": 0.1197, "step": 32539 }, { "epoch": 0.9492969251414902, "grad_norm": 0.8093230324059786, "learning_rate": 6.726759695549812e-08, "loss": 0.1298, "step": 32540 }, { "epoch": 0.9493260983721338, "grad_norm": 0.8335422554079511, "learning_rate": 6.71903849602884e-08, "loss": 0.1049, "step": 32541 }, { "epoch": 0.9493552716027773, "grad_norm": 0.8902666678634433, "learning_rate": 6.711321700402451e-08, "loss": 0.1042, "step": 32542 }, { "epoch": 0.9493844448334209, "grad_norm": 0.7495172320445483, "learning_rate": 6.7036093087397e-08, "loss": 0.0962, "step": 32543 }, { "epoch": 0.9494136180640644, "grad_norm": 0.6637903822240693, "learning_rate": 6.695901321109311e-08, "loss": 0.1163, "step": 32544 }, { "epoch": 0.949442791294708, "grad_norm": 0.8944716687778668, "learning_rate": 6.688197737580226e-08, "loss": 0.1148, "step": 32545 }, { "epoch": 0.9494719645253515, "grad_norm": 0.7032948601866934, "learning_rate": 6.68049855822106e-08, "loss": 0.0942, "step": 32546 }, { "epoch": 0.9495011377559951, "grad_norm": 0.6992534994997684, "learning_rate": 6.672803783100701e-08, "loss": 0.1009, "step": 32547 }, { "epoch": 0.9495303109866386, "grad_norm": 0.7429958521862423, "learning_rate": 6.66511341228776e-08, "loss": 0.1014, "step": 32548 }, { "epoch": 0.9495594842172822, "grad_norm": 0.7998772670532653, "learning_rate": 6.657427445850906e-08, "loss": 0.1069, "step": 32549 }, { "epoch": 0.9495886574479258, "grad_norm": 0.8214812360205656, "learning_rate": 6.64974588385875e-08, "loss": 0.111, "step": 32550 }, { "epoch": 0.9496178306785693, "grad_norm": 0.6227033556614411, "learning_rate": 6.642068726379958e-08, "loss": 0.1119, "step": 32551 }, { "epoch": 0.9496470039092129, "grad_norm": 0.8738627469696575, "learning_rate": 6.634395973482976e-08, "loss": 0.1482, "step": 32552 }, { "epoch": 0.9496761771398564, "grad_norm": 0.8448002130865022, "learning_rate": 6.626727625236307e-08, "loss": 0.1065, "step": 32553 }, { "epoch": 0.9497053503705001, "grad_norm": 0.6625229258472054, "learning_rate": 6.619063681708504e-08, "loss": 0.1054, "step": 32554 }, { "epoch": 0.9497345236011436, "grad_norm": 0.7563557777917117, "learning_rate": 6.611404142967847e-08, "loss": 0.1074, "step": 32555 }, { "epoch": 0.9497636968317872, "grad_norm": 0.9927561808730692, "learning_rate": 6.603749009082782e-08, "loss": 0.1136, "step": 32556 }, { "epoch": 0.9497928700624307, "grad_norm": 0.7893414017567375, "learning_rate": 6.596098280121699e-08, "loss": 0.1051, "step": 32557 }, { "epoch": 0.9498220432930743, "grad_norm": 0.8773759445176594, "learning_rate": 6.588451956152875e-08, "loss": 0.1216, "step": 32558 }, { "epoch": 0.9498512165237178, "grad_norm": 0.8107183958581707, "learning_rate": 6.580810037244533e-08, "loss": 0.1146, "step": 32559 }, { "epoch": 0.9498803897543614, "grad_norm": 0.7921634542797843, "learning_rate": 6.573172523464954e-08, "loss": 0.0983, "step": 32560 }, { "epoch": 0.949909562985005, "grad_norm": 0.7370843643468035, "learning_rate": 6.56553941488236e-08, "loss": 0.0908, "step": 32561 }, { "epoch": 0.9499387362156485, "grad_norm": 0.6442301494912659, "learning_rate": 6.557910711564697e-08, "loss": 0.0944, "step": 32562 }, { "epoch": 0.9499679094462921, "grad_norm": 0.7498354563584554, "learning_rate": 6.550286413580298e-08, "loss": 0.117, "step": 32563 }, { "epoch": 0.9499970826769356, "grad_norm": 0.7483228818550821, "learning_rate": 6.542666520997166e-08, "loss": 0.0957, "step": 32564 }, { "epoch": 0.9500262559075792, "grad_norm": 0.8375602683930373, "learning_rate": 6.535051033883245e-08, "loss": 0.1179, "step": 32565 }, { "epoch": 0.9500554291382227, "grad_norm": 0.7275852938497513, "learning_rate": 6.527439952306647e-08, "loss": 0.1083, "step": 32566 }, { "epoch": 0.9500846023688664, "grad_norm": 0.9878603165436323, "learning_rate": 6.519833276335263e-08, "loss": 0.1156, "step": 32567 }, { "epoch": 0.9501137755995099, "grad_norm": 0.7871433797833636, "learning_rate": 6.512231006036984e-08, "loss": 0.1064, "step": 32568 }, { "epoch": 0.9501429488301535, "grad_norm": 0.6917231232985578, "learning_rate": 6.504633141479644e-08, "loss": 0.1166, "step": 32569 }, { "epoch": 0.950172122060797, "grad_norm": 0.7962815841372336, "learning_rate": 6.497039682731243e-08, "loss": 0.107, "step": 32570 }, { "epoch": 0.9502012952914406, "grad_norm": 0.8131996014082601, "learning_rate": 6.489450629859394e-08, "loss": 0.1028, "step": 32571 }, { "epoch": 0.9502304685220841, "grad_norm": 0.7680218242347662, "learning_rate": 6.481865982931934e-08, "loss": 0.0984, "step": 32572 }, { "epoch": 0.9502596417527277, "grad_norm": 0.7015601473634733, "learning_rate": 6.474285742016583e-08, "loss": 0.1083, "step": 32573 }, { "epoch": 0.9502888149833713, "grad_norm": 0.8884200993336463, "learning_rate": 6.466709907180957e-08, "loss": 0.1146, "step": 32574 }, { "epoch": 0.9503179882140148, "grad_norm": 0.9475049563156215, "learning_rate": 6.459138478492721e-08, "loss": 0.1122, "step": 32575 }, { "epoch": 0.9503471614446584, "grad_norm": 0.7885576938871349, "learning_rate": 6.45157145601949e-08, "loss": 0.1029, "step": 32576 }, { "epoch": 0.9503763346753019, "grad_norm": 0.7762302852183923, "learning_rate": 6.444008839828875e-08, "loss": 0.1176, "step": 32577 }, { "epoch": 0.9504055079059455, "grad_norm": 0.8350150433801837, "learning_rate": 6.436450629988267e-08, "loss": 0.1091, "step": 32578 }, { "epoch": 0.950434681136589, "grad_norm": 0.7493533951830512, "learning_rate": 6.428896826565223e-08, "loss": 0.1181, "step": 32579 }, { "epoch": 0.9504638543672326, "grad_norm": 0.991159279510208, "learning_rate": 6.421347429627134e-08, "loss": 0.109, "step": 32580 }, { "epoch": 0.9504930275978762, "grad_norm": 0.8734027167661529, "learning_rate": 6.413802439241445e-08, "loss": 0.1119, "step": 32581 }, { "epoch": 0.9505222008285198, "grad_norm": 0.9273288398776804, "learning_rate": 6.406261855475492e-08, "loss": 0.1043, "step": 32582 }, { "epoch": 0.9505513740591633, "grad_norm": 0.8732606294444948, "learning_rate": 6.398725678396611e-08, "loss": 0.1099, "step": 32583 }, { "epoch": 0.9505805472898069, "grad_norm": 0.6687579571720464, "learning_rate": 6.39119390807208e-08, "loss": 0.1276, "step": 32584 }, { "epoch": 0.9506097205204505, "grad_norm": 0.8621781554936837, "learning_rate": 6.383666544569122e-08, "loss": 0.112, "step": 32585 }, { "epoch": 0.950638893751094, "grad_norm": 0.7539262707182163, "learning_rate": 6.376143587954964e-08, "loss": 0.1071, "step": 32586 }, { "epoch": 0.9506680669817376, "grad_norm": 0.7066981580124291, "learning_rate": 6.368625038296772e-08, "loss": 0.1211, "step": 32587 }, { "epoch": 0.9506972402123811, "grad_norm": 0.6900478942678211, "learning_rate": 6.36111089566166e-08, "loss": 0.1042, "step": 32588 }, { "epoch": 0.9507264134430247, "grad_norm": 0.7289498956348031, "learning_rate": 6.353601160116685e-08, "loss": 0.1204, "step": 32589 }, { "epoch": 0.9507555866736682, "grad_norm": 0.8394279801179323, "learning_rate": 6.34609583172896e-08, "loss": 0.1201, "step": 32590 }, { "epoch": 0.9507847599043118, "grad_norm": 0.8028538442739547, "learning_rate": 6.338594910565376e-08, "loss": 0.0991, "step": 32591 }, { "epoch": 0.9508139331349553, "grad_norm": 1.0028204581743745, "learning_rate": 6.331098396692991e-08, "loss": 0.1242, "step": 32592 }, { "epoch": 0.9508431063655989, "grad_norm": 0.7777868754168333, "learning_rate": 6.323606290178697e-08, "loss": 0.1065, "step": 32593 }, { "epoch": 0.9508722795962425, "grad_norm": 0.6508464250057778, "learning_rate": 6.316118591089493e-08, "loss": 0.1047, "step": 32594 }, { "epoch": 0.9509014528268861, "grad_norm": 0.9154484747314856, "learning_rate": 6.308635299491994e-08, "loss": 0.1295, "step": 32595 }, { "epoch": 0.9509306260575296, "grad_norm": 0.8207156517485412, "learning_rate": 6.301156415453257e-08, "loss": 0.1293, "step": 32596 }, { "epoch": 0.9509597992881732, "grad_norm": 0.715485299643818, "learning_rate": 6.293681939039898e-08, "loss": 0.11, "step": 32597 }, { "epoch": 0.9509889725188168, "grad_norm": 0.8233722592986634, "learning_rate": 6.286211870318693e-08, "loss": 0.1142, "step": 32598 }, { "epoch": 0.9510181457494603, "grad_norm": 0.8366408034701655, "learning_rate": 6.278746209356313e-08, "loss": 0.1298, "step": 32599 }, { "epoch": 0.9510473189801039, "grad_norm": 0.8882187416155616, "learning_rate": 6.271284956219425e-08, "loss": 0.1018, "step": 32600 }, { "epoch": 0.9510764922107474, "grad_norm": 0.7522134535995358, "learning_rate": 6.263828110974645e-08, "loss": 0.1109, "step": 32601 }, { "epoch": 0.951105665441391, "grad_norm": 0.86579167965794, "learning_rate": 6.256375673688586e-08, "loss": 0.1142, "step": 32602 }, { "epoch": 0.9511348386720345, "grad_norm": 1.0030382824630497, "learning_rate": 6.248927644427694e-08, "loss": 0.1024, "step": 32603 }, { "epoch": 0.9511640119026781, "grad_norm": 0.6475231820740863, "learning_rate": 6.241484023258526e-08, "loss": 0.1105, "step": 32604 }, { "epoch": 0.9511931851333216, "grad_norm": 0.976756384760913, "learning_rate": 6.23404481024753e-08, "loss": 0.1011, "step": 32605 }, { "epoch": 0.9512223583639652, "grad_norm": 0.9550830639701129, "learning_rate": 6.226610005461043e-08, "loss": 0.1185, "step": 32606 }, { "epoch": 0.9512515315946087, "grad_norm": 0.9693759171969529, "learning_rate": 6.219179608965564e-08, "loss": 0.0972, "step": 32607 }, { "epoch": 0.9512807048252524, "grad_norm": 0.8450852425709949, "learning_rate": 6.211753620827377e-08, "loss": 0.1223, "step": 32608 }, { "epoch": 0.951309878055896, "grad_norm": 0.9628134473348807, "learning_rate": 6.204332041112759e-08, "loss": 0.1071, "step": 32609 }, { "epoch": 0.9513390512865395, "grad_norm": 0.7988154187396046, "learning_rate": 6.196914869887993e-08, "loss": 0.0981, "step": 32610 }, { "epoch": 0.9513682245171831, "grad_norm": 0.8331880469684507, "learning_rate": 6.189502107219302e-08, "loss": 0.1174, "step": 32611 }, { "epoch": 0.9513973977478266, "grad_norm": 0.7796078562984378, "learning_rate": 6.182093753172858e-08, "loss": 0.1269, "step": 32612 }, { "epoch": 0.9514265709784702, "grad_norm": 0.8244154041719207, "learning_rate": 6.174689807814771e-08, "loss": 0.1081, "step": 32613 }, { "epoch": 0.9514557442091137, "grad_norm": 0.8878633346593348, "learning_rate": 6.167290271211213e-08, "loss": 0.1021, "step": 32614 }, { "epoch": 0.9514849174397573, "grad_norm": 0.7331652320317426, "learning_rate": 6.15989514342813e-08, "loss": 0.1258, "step": 32615 }, { "epoch": 0.9515140906704008, "grad_norm": 0.8022422341246551, "learning_rate": 6.152504424531636e-08, "loss": 0.1125, "step": 32616 }, { "epoch": 0.9515432639010444, "grad_norm": 0.9996228397094764, "learning_rate": 6.145118114587733e-08, "loss": 0.1084, "step": 32617 }, { "epoch": 0.9515724371316879, "grad_norm": 1.0537827190381488, "learning_rate": 6.137736213662316e-08, "loss": 0.1023, "step": 32618 }, { "epoch": 0.9516016103623315, "grad_norm": 0.7684435751201382, "learning_rate": 6.130358721821272e-08, "loss": 0.101, "step": 32619 }, { "epoch": 0.951630783592975, "grad_norm": 0.8037522029007881, "learning_rate": 6.122985639130497e-08, "loss": 0.1156, "step": 32620 }, { "epoch": 0.9516599568236187, "grad_norm": 0.8974018603279343, "learning_rate": 6.115616965655824e-08, "loss": 0.1245, "step": 32621 }, { "epoch": 0.9516891300542623, "grad_norm": 0.7059823710551251, "learning_rate": 6.108252701462925e-08, "loss": 0.1014, "step": 32622 }, { "epoch": 0.9517183032849058, "grad_norm": 1.0761115786263042, "learning_rate": 6.100892846617745e-08, "loss": 0.1448, "step": 32623 }, { "epoch": 0.9517474765155494, "grad_norm": 1.0273670520567701, "learning_rate": 6.093537401185901e-08, "loss": 0.1133, "step": 32624 }, { "epoch": 0.9517766497461929, "grad_norm": 0.9418891085534057, "learning_rate": 6.086186365233005e-08, "loss": 0.0893, "step": 32625 }, { "epoch": 0.9518058229768365, "grad_norm": 0.6973764481163859, "learning_rate": 6.078839738824782e-08, "loss": 0.1099, "step": 32626 }, { "epoch": 0.95183499620748, "grad_norm": 0.6891166173220694, "learning_rate": 6.071497522026737e-08, "loss": 0.1131, "step": 32627 }, { "epoch": 0.9518641694381236, "grad_norm": 0.9490350493604558, "learning_rate": 6.064159714904428e-08, "loss": 0.1425, "step": 32628 }, { "epoch": 0.9518933426687671, "grad_norm": 0.8951531329085967, "learning_rate": 6.056826317523357e-08, "loss": 0.1325, "step": 32629 }, { "epoch": 0.9519225158994107, "grad_norm": 0.7920240975475742, "learning_rate": 6.049497329949139e-08, "loss": 0.1008, "step": 32630 }, { "epoch": 0.9519516891300542, "grad_norm": 0.8504101902469182, "learning_rate": 6.042172752247e-08, "loss": 0.1079, "step": 32631 }, { "epoch": 0.9519808623606978, "grad_norm": 0.830303144502529, "learning_rate": 6.034852584482442e-08, "loss": 0.0959, "step": 32632 }, { "epoch": 0.9520100355913413, "grad_norm": 0.6676870435088427, "learning_rate": 6.027536826720859e-08, "loss": 0.1203, "step": 32633 }, { "epoch": 0.9520392088219849, "grad_norm": 0.9017591609409177, "learning_rate": 6.020225479027419e-08, "loss": 0.1242, "step": 32634 }, { "epoch": 0.9520683820526286, "grad_norm": 1.08394380267901, "learning_rate": 6.012918541467572e-08, "loss": 0.1333, "step": 32635 }, { "epoch": 0.9520975552832721, "grad_norm": 0.7961625153907718, "learning_rate": 6.005616014106375e-08, "loss": 0.1073, "step": 32636 }, { "epoch": 0.9521267285139157, "grad_norm": 0.6973454160567234, "learning_rate": 5.998317897009165e-08, "loss": 0.1214, "step": 32637 }, { "epoch": 0.9521559017445592, "grad_norm": 0.8946820605255777, "learning_rate": 5.991024190241057e-08, "loss": 0.1193, "step": 32638 }, { "epoch": 0.9521850749752028, "grad_norm": 0.8561616523767539, "learning_rate": 5.983734893867166e-08, "loss": 0.0908, "step": 32639 }, { "epoch": 0.9522142482058463, "grad_norm": 0.8619288451934363, "learning_rate": 5.976450007952495e-08, "loss": 0.1369, "step": 32640 }, { "epoch": 0.9522434214364899, "grad_norm": 1.1506017454184636, "learning_rate": 5.969169532562158e-08, "loss": 0.1303, "step": 32641 }, { "epoch": 0.9522725946671334, "grad_norm": 0.9371048522861081, "learning_rate": 5.96189346776116e-08, "loss": 0.1144, "step": 32642 }, { "epoch": 0.952301767897777, "grad_norm": 0.7567991425407807, "learning_rate": 5.954621813614447e-08, "loss": 0.1135, "step": 32643 }, { "epoch": 0.9523309411284205, "grad_norm": 0.6268691765113534, "learning_rate": 5.9473545701869696e-08, "loss": 0.1092, "step": 32644 }, { "epoch": 0.9523601143590641, "grad_norm": 0.7497910810730019, "learning_rate": 5.940091737543507e-08, "loss": 0.1186, "step": 32645 }, { "epoch": 0.9523892875897076, "grad_norm": 0.8616186187157856, "learning_rate": 5.9328333157489535e-08, "loss": 0.1029, "step": 32646 }, { "epoch": 0.9524184608203512, "grad_norm": 0.9867630838410505, "learning_rate": 5.925579304868201e-08, "loss": 0.1162, "step": 32647 }, { "epoch": 0.9524476340509949, "grad_norm": 0.7842768877345314, "learning_rate": 5.9183297049658637e-08, "loss": 0.1112, "step": 32648 }, { "epoch": 0.9524768072816384, "grad_norm": 0.6791383638777265, "learning_rate": 5.9110845161067245e-08, "loss": 0.0876, "step": 32649 }, { "epoch": 0.952505980512282, "grad_norm": 0.7673675376467145, "learning_rate": 5.9038437383555636e-08, "loss": 0.091, "step": 32650 }, { "epoch": 0.9525351537429255, "grad_norm": 0.8811284227523455, "learning_rate": 5.896607371776886e-08, "loss": 0.1073, "step": 32651 }, { "epoch": 0.9525643269735691, "grad_norm": 0.8780540531852291, "learning_rate": 5.88937541643525e-08, "loss": 0.1318, "step": 32652 }, { "epoch": 0.9525935002042126, "grad_norm": 0.8550725392908367, "learning_rate": 5.882147872395438e-08, "loss": 0.1066, "step": 32653 }, { "epoch": 0.9526226734348562, "grad_norm": 0.9066859606474291, "learning_rate": 5.874924739721843e-08, "loss": 0.1124, "step": 32654 }, { "epoch": 0.9526518466654997, "grad_norm": 0.8223402910741842, "learning_rate": 5.8677060184789134e-08, "loss": 0.1127, "step": 32655 }, { "epoch": 0.9526810198961433, "grad_norm": 0.7837047894990621, "learning_rate": 5.860491708731153e-08, "loss": 0.104, "step": 32656 }, { "epoch": 0.9527101931267868, "grad_norm": 0.8590150222609634, "learning_rate": 5.85328181054301e-08, "loss": 0.0879, "step": 32657 }, { "epoch": 0.9527393663574304, "grad_norm": 0.9602282152252887, "learning_rate": 5.8460763239787666e-08, "loss": 0.1379, "step": 32658 }, { "epoch": 0.952768539588074, "grad_norm": 1.0993961155785237, "learning_rate": 5.83887524910276e-08, "loss": 0.1038, "step": 32659 }, { "epoch": 0.9527977128187175, "grad_norm": 0.811768160733312, "learning_rate": 5.8316785859793836e-08, "loss": 0.1116, "step": 32660 }, { "epoch": 0.952826886049361, "grad_norm": 0.8474323462158622, "learning_rate": 5.824486334672752e-08, "loss": 0.0862, "step": 32661 }, { "epoch": 0.9528560592800047, "grad_norm": 1.2586626991754393, "learning_rate": 5.817298495247148e-08, "loss": 0.1136, "step": 32662 }, { "epoch": 0.9528852325106483, "grad_norm": 0.9847370764031189, "learning_rate": 5.8101150677667975e-08, "loss": 0.1196, "step": 32663 }, { "epoch": 0.9529144057412918, "grad_norm": 0.7908098364741442, "learning_rate": 5.802936052295649e-08, "loss": 0.1057, "step": 32664 }, { "epoch": 0.9529435789719354, "grad_norm": 0.663226266575957, "learning_rate": 5.795761448897985e-08, "loss": 0.1082, "step": 32665 }, { "epoch": 0.9529727522025789, "grad_norm": 0.8961722386481162, "learning_rate": 5.78859125763781e-08, "loss": 0.0796, "step": 32666 }, { "epoch": 0.9530019254332225, "grad_norm": 0.7789305018609528, "learning_rate": 5.7814254785790724e-08, "loss": 0.1054, "step": 32667 }, { "epoch": 0.953031098663866, "grad_norm": 0.8662469204807426, "learning_rate": 5.774264111785832e-08, "loss": 0.1085, "step": 32668 }, { "epoch": 0.9530602718945096, "grad_norm": 0.7783665091937831, "learning_rate": 5.767107157321927e-08, "loss": 0.1043, "step": 32669 }, { "epoch": 0.9530894451251531, "grad_norm": 0.8554149891511343, "learning_rate": 5.759954615251307e-08, "loss": 0.1212, "step": 32670 }, { "epoch": 0.9531186183557967, "grad_norm": 1.116922813535786, "learning_rate": 5.752806485637863e-08, "loss": 0.1013, "step": 32671 }, { "epoch": 0.9531477915864403, "grad_norm": 0.6838342260271995, "learning_rate": 5.745662768545324e-08, "loss": 0.0908, "step": 32672 }, { "epoch": 0.9531769648170838, "grad_norm": 0.6748127117389364, "learning_rate": 5.7385234640375817e-08, "loss": 0.0936, "step": 32673 }, { "epoch": 0.9532061380477274, "grad_norm": 0.8109081455760337, "learning_rate": 5.731388572178309e-08, "loss": 0.1078, "step": 32674 }, { "epoch": 0.9532353112783709, "grad_norm": 0.8996950441897913, "learning_rate": 5.724258093031176e-08, "loss": 0.1237, "step": 32675 }, { "epoch": 0.9532644845090146, "grad_norm": 0.9041151303209126, "learning_rate": 5.717132026659855e-08, "loss": 0.1279, "step": 32676 }, { "epoch": 0.9532936577396581, "grad_norm": 0.9389216356559982, "learning_rate": 5.710010373128016e-08, "loss": 0.1226, "step": 32677 }, { "epoch": 0.9533228309703017, "grad_norm": 2.2230861250735847, "learning_rate": 5.702893132499221e-08, "loss": 0.1248, "step": 32678 }, { "epoch": 0.9533520042009452, "grad_norm": 0.8085138104073984, "learning_rate": 5.695780304836973e-08, "loss": 0.1027, "step": 32679 }, { "epoch": 0.9533811774315888, "grad_norm": 0.8832825819353127, "learning_rate": 5.6886718902048334e-08, "loss": 0.0885, "step": 32680 }, { "epoch": 0.9534103506622323, "grad_norm": 1.217138091986393, "learning_rate": 5.6815678886661953e-08, "loss": 0.1106, "step": 32681 }, { "epoch": 0.9534395238928759, "grad_norm": 1.086105299322099, "learning_rate": 5.674468300284508e-08, "loss": 0.1116, "step": 32682 }, { "epoch": 0.9534686971235194, "grad_norm": 0.8230304666633287, "learning_rate": 5.667373125123166e-08, "loss": 0.1122, "step": 32683 }, { "epoch": 0.953497870354163, "grad_norm": 0.7375332041179844, "learning_rate": 5.660282363245562e-08, "loss": 0.1066, "step": 32684 }, { "epoch": 0.9535270435848066, "grad_norm": 0.811745805945028, "learning_rate": 5.653196014714868e-08, "loss": 0.1065, "step": 32685 }, { "epoch": 0.9535562168154501, "grad_norm": 0.9421793573950739, "learning_rate": 5.646114079594478e-08, "loss": 0.113, "step": 32686 }, { "epoch": 0.9535853900460937, "grad_norm": 0.9071335278536922, "learning_rate": 5.6390365579476195e-08, "loss": 0.1076, "step": 32687 }, { "epoch": 0.9536145632767372, "grad_norm": 0.958546515415864, "learning_rate": 5.631963449837352e-08, "loss": 0.11, "step": 32688 }, { "epoch": 0.9536437365073809, "grad_norm": 0.6393352449910052, "learning_rate": 5.624894755326904e-08, "loss": 0.0829, "step": 32689 }, { "epoch": 0.9536729097380244, "grad_norm": 0.8669815361704764, "learning_rate": 5.617830474479391e-08, "loss": 0.1175, "step": 32690 }, { "epoch": 0.953702082968668, "grad_norm": 0.7695261088984546, "learning_rate": 5.6107706073578735e-08, "loss": 0.1254, "step": 32691 }, { "epoch": 0.9537312561993115, "grad_norm": 0.846695217818394, "learning_rate": 5.6037151540253574e-08, "loss": 0.103, "step": 32692 }, { "epoch": 0.9537604294299551, "grad_norm": 0.72007459617846, "learning_rate": 5.596664114544903e-08, "loss": 0.079, "step": 32693 }, { "epoch": 0.9537896026605986, "grad_norm": 0.7513026332398145, "learning_rate": 5.589617488979349e-08, "loss": 0.1083, "step": 32694 }, { "epoch": 0.9538187758912422, "grad_norm": 0.6366230101102938, "learning_rate": 5.5825752773917e-08, "loss": 0.0907, "step": 32695 }, { "epoch": 0.9538479491218858, "grad_norm": 0.8375176551428908, "learning_rate": 5.5755374798447394e-08, "loss": 0.101, "step": 32696 }, { "epoch": 0.9538771223525293, "grad_norm": 0.8338850696366432, "learning_rate": 5.568504096401417e-08, "loss": 0.1109, "step": 32697 }, { "epoch": 0.9539062955831729, "grad_norm": 0.9244959917031603, "learning_rate": 5.56147512712446e-08, "loss": 0.1138, "step": 32698 }, { "epoch": 0.9539354688138164, "grad_norm": 0.7904081543814052, "learning_rate": 5.5544505720765974e-08, "loss": 0.1074, "step": 32699 }, { "epoch": 0.95396464204446, "grad_norm": 0.71380367927924, "learning_rate": 5.547430431320555e-08, "loss": 0.1079, "step": 32700 }, { "epoch": 0.9539938152751035, "grad_norm": 0.722018876045306, "learning_rate": 5.540414704919006e-08, "loss": 0.0951, "step": 32701 }, { "epoch": 0.9540229885057471, "grad_norm": 0.983592964344662, "learning_rate": 5.533403392934622e-08, "loss": 0.1095, "step": 32702 }, { "epoch": 0.9540521617363907, "grad_norm": 0.7452949862713076, "learning_rate": 5.5263964954299644e-08, "loss": 0.0971, "step": 32703 }, { "epoch": 0.9540813349670343, "grad_norm": 0.9514650723231737, "learning_rate": 5.519394012467649e-08, "loss": 0.1353, "step": 32704 }, { "epoch": 0.9541105081976778, "grad_norm": 0.7965203008504623, "learning_rate": 5.5123959441100713e-08, "loss": 0.109, "step": 32705 }, { "epoch": 0.9541396814283214, "grad_norm": 0.7555129535758409, "learning_rate": 5.505402290419792e-08, "loss": 0.102, "step": 32706 }, { "epoch": 0.954168854658965, "grad_norm": 0.8251394665377272, "learning_rate": 5.498413051459261e-08, "loss": 0.1277, "step": 32707 }, { "epoch": 0.9541980278896085, "grad_norm": 0.7186693398842615, "learning_rate": 5.4914282272908737e-08, "loss": 0.1214, "step": 32708 }, { "epoch": 0.954227201120252, "grad_norm": 0.8534837535293472, "learning_rate": 5.484447817976912e-08, "loss": 0.1309, "step": 32709 }, { "epoch": 0.9542563743508956, "grad_norm": 0.9406564450983651, "learning_rate": 5.477471823579772e-08, "loss": 0.1216, "step": 32710 }, { "epoch": 0.9542855475815392, "grad_norm": 0.6421733732819564, "learning_rate": 5.470500244161736e-08, "loss": 0.1008, "step": 32711 }, { "epoch": 0.9543147208121827, "grad_norm": 0.8149105103919811, "learning_rate": 5.4635330797849217e-08, "loss": 0.1093, "step": 32712 }, { "epoch": 0.9543438940428263, "grad_norm": 0.7854076563447636, "learning_rate": 5.456570330511724e-08, "loss": 0.1234, "step": 32713 }, { "epoch": 0.9543730672734698, "grad_norm": 0.8292307039778413, "learning_rate": 5.449611996404203e-08, "loss": 0.0911, "step": 32714 }, { "epoch": 0.9544022405041134, "grad_norm": 0.847145646906693, "learning_rate": 5.442658077524476e-08, "loss": 0.104, "step": 32715 }, { "epoch": 0.954431413734757, "grad_norm": 0.5969312758250869, "learning_rate": 5.435708573934662e-08, "loss": 0.085, "step": 32716 }, { "epoch": 0.9544605869654006, "grad_norm": 0.8888413792758529, "learning_rate": 5.428763485696764e-08, "loss": 0.128, "step": 32717 }, { "epoch": 0.9544897601960441, "grad_norm": 0.8815023539978275, "learning_rate": 5.4218228128727345e-08, "loss": 0.1126, "step": 32718 }, { "epoch": 0.9545189334266877, "grad_norm": 0.8988664623351925, "learning_rate": 5.4148865555246896e-08, "loss": 0.1305, "step": 32719 }, { "epoch": 0.9545481066573313, "grad_norm": 0.8547614744774444, "learning_rate": 5.407954713714414e-08, "loss": 0.1212, "step": 32720 }, { "epoch": 0.9545772798879748, "grad_norm": 0.9884351548375194, "learning_rate": 5.4010272875039125e-08, "loss": 0.1045, "step": 32721 }, { "epoch": 0.9546064531186184, "grad_norm": 0.7517468882826768, "learning_rate": 5.3941042769549146e-08, "loss": 0.1197, "step": 32722 }, { "epoch": 0.9546356263492619, "grad_norm": 0.7619462510575757, "learning_rate": 5.387185682129259e-08, "loss": 0.1188, "step": 32723 }, { "epoch": 0.9546647995799055, "grad_norm": 0.9094217397506837, "learning_rate": 5.380271503088841e-08, "loss": 0.1104, "step": 32724 }, { "epoch": 0.954693972810549, "grad_norm": 0.8684033543593407, "learning_rate": 5.373361739895222e-08, "loss": 0.1074, "step": 32725 }, { "epoch": 0.9547231460411926, "grad_norm": 0.7737522514610439, "learning_rate": 5.366456392610131e-08, "loss": 0.1272, "step": 32726 }, { "epoch": 0.9547523192718361, "grad_norm": 0.8463556442373639, "learning_rate": 5.3595554612952404e-08, "loss": 0.1081, "step": 32727 }, { "epoch": 0.9547814925024797, "grad_norm": 0.7869651876905523, "learning_rate": 5.352658946012224e-08, "loss": 0.106, "step": 32728 }, { "epoch": 0.9548106657331232, "grad_norm": 1.1556828545657285, "learning_rate": 5.345766846822475e-08, "loss": 0.1097, "step": 32729 }, { "epoch": 0.9548398389637669, "grad_norm": 0.8149670517569663, "learning_rate": 5.3388791637877244e-08, "loss": 0.1278, "step": 32730 }, { "epoch": 0.9548690121944104, "grad_norm": 0.8282747326715098, "learning_rate": 5.3319958969693665e-08, "loss": 0.1176, "step": 32731 }, { "epoch": 0.954898185425054, "grad_norm": 0.853088271351987, "learning_rate": 5.3251170464288516e-08, "loss": 0.099, "step": 32732 }, { "epoch": 0.9549273586556976, "grad_norm": 0.6244398348582837, "learning_rate": 5.3182426122275753e-08, "loss": 0.1136, "step": 32733 }, { "epoch": 0.9549565318863411, "grad_norm": 0.7176252117683201, "learning_rate": 5.311372594426989e-08, "loss": 0.1149, "step": 32734 }, { "epoch": 0.9549857051169847, "grad_norm": 0.7292212616731406, "learning_rate": 5.304506993088321e-08, "loss": 0.1046, "step": 32735 }, { "epoch": 0.9550148783476282, "grad_norm": 0.6841145006758904, "learning_rate": 5.2976458082729666e-08, "loss": 0.098, "step": 32736 }, { "epoch": 0.9550440515782718, "grad_norm": 0.768517442130169, "learning_rate": 5.290789040042099e-08, "loss": 0.1047, "step": 32737 }, { "epoch": 0.9550732248089153, "grad_norm": 0.8851022480303948, "learning_rate": 5.283936688457003e-08, "loss": 0.1126, "step": 32738 }, { "epoch": 0.9551023980395589, "grad_norm": 0.9288174884552797, "learning_rate": 5.277088753578796e-08, "loss": 0.1247, "step": 32739 }, { "epoch": 0.9551315712702024, "grad_norm": 0.9339753629522923, "learning_rate": 5.2702452354687075e-08, "loss": 0.1158, "step": 32740 }, { "epoch": 0.955160744500846, "grad_norm": 0.7427770459169746, "learning_rate": 5.2634061341876874e-08, "loss": 0.1088, "step": 32741 }, { "epoch": 0.9551899177314895, "grad_norm": 0.7796077651791834, "learning_rate": 5.256571449796854e-08, "loss": 0.1001, "step": 32742 }, { "epoch": 0.9552190909621332, "grad_norm": 0.841522036360295, "learning_rate": 5.2497411823573264e-08, "loss": 0.1188, "step": 32743 }, { "epoch": 0.9552482641927768, "grad_norm": 0.6581909732765954, "learning_rate": 5.2429153319299987e-08, "loss": 0.1126, "step": 32744 }, { "epoch": 0.9552774374234203, "grad_norm": 0.8948577551819488, "learning_rate": 5.236093898575767e-08, "loss": 0.1175, "step": 32745 }, { "epoch": 0.9553066106540639, "grad_norm": 0.6663435400942943, "learning_rate": 5.229276882355583e-08, "loss": 0.0963, "step": 32746 }, { "epoch": 0.9553357838847074, "grad_norm": 0.7926324562650706, "learning_rate": 5.222464283330342e-08, "loss": 0.1044, "step": 32747 }, { "epoch": 0.955364957115351, "grad_norm": 0.9649432308644017, "learning_rate": 5.215656101560829e-08, "loss": 0.1203, "step": 32748 }, { "epoch": 0.9553941303459945, "grad_norm": 1.0764631275838574, "learning_rate": 5.2088523371077724e-08, "loss": 0.1174, "step": 32749 }, { "epoch": 0.9554233035766381, "grad_norm": 0.730160856196989, "learning_rate": 5.202052990032014e-08, "loss": 0.099, "step": 32750 }, { "epoch": 0.9554524768072816, "grad_norm": 0.8186530304108599, "learning_rate": 5.1952580603941705e-08, "loss": 0.118, "step": 32751 }, { "epoch": 0.9554816500379252, "grad_norm": 0.7699885973715855, "learning_rate": 5.188467548254972e-08, "loss": 0.0974, "step": 32752 }, { "epoch": 0.9555108232685687, "grad_norm": 0.9613407817142756, "learning_rate": 5.1816814536749804e-08, "loss": 0.095, "step": 32753 }, { "epoch": 0.9555399964992123, "grad_norm": 0.7684494701360869, "learning_rate": 5.174899776714814e-08, "loss": 0.1038, "step": 32754 }, { "epoch": 0.9555691697298558, "grad_norm": 0.715427278530436, "learning_rate": 5.1681225174350926e-08, "loss": 0.1099, "step": 32755 }, { "epoch": 0.9555983429604994, "grad_norm": 0.8722937914171108, "learning_rate": 5.1613496758961545e-08, "loss": 0.1198, "step": 32756 }, { "epoch": 0.9556275161911431, "grad_norm": 0.8399977858132577, "learning_rate": 5.154581252158619e-08, "loss": 0.1371, "step": 32757 }, { "epoch": 0.9556566894217866, "grad_norm": 0.9756169504636132, "learning_rate": 5.147817246282882e-08, "loss": 0.1103, "step": 32758 }, { "epoch": 0.9556858626524302, "grad_norm": 0.9175257322200491, "learning_rate": 5.1410576583291736e-08, "loss": 0.126, "step": 32759 }, { "epoch": 0.9557150358830737, "grad_norm": 1.1429093656478735, "learning_rate": 5.134302488358056e-08, "loss": 0.1195, "step": 32760 }, { "epoch": 0.9557442091137173, "grad_norm": 0.6904741345709744, "learning_rate": 5.127551736429759e-08, "loss": 0.1156, "step": 32761 }, { "epoch": 0.9557733823443608, "grad_norm": 0.6425373426976441, "learning_rate": 5.120805402604512e-08, "loss": 0.1188, "step": 32762 }, { "epoch": 0.9558025555750044, "grad_norm": 0.8153282788925978, "learning_rate": 5.114063486942655e-08, "loss": 0.1034, "step": 32763 }, { "epoch": 0.9558317288056479, "grad_norm": 0.9550712149912414, "learning_rate": 5.1073259895042527e-08, "loss": 0.0952, "step": 32764 }, { "epoch": 0.9558609020362915, "grad_norm": 0.7575655806906811, "learning_rate": 5.100592910349478e-08, "loss": 0.0979, "step": 32765 }, { "epoch": 0.955890075266935, "grad_norm": 0.7274889051662751, "learning_rate": 5.0938642495384495e-08, "loss": 0.102, "step": 32766 }, { "epoch": 0.9559192484975786, "grad_norm": 0.7797081707174183, "learning_rate": 5.087140007131286e-08, "loss": 0.0989, "step": 32767 }, { "epoch": 0.9559484217282221, "grad_norm": 0.7614799380669769, "learning_rate": 5.0804201831880505e-08, "loss": 0.0928, "step": 32768 }, { "epoch": 0.9559775949588657, "grad_norm": 0.8549511031994224, "learning_rate": 5.073704777768584e-08, "loss": 0.1075, "step": 32769 }, { "epoch": 0.9560067681895094, "grad_norm": 0.8778242344808694, "learning_rate": 5.0669937909330056e-08, "loss": 0.1186, "step": 32770 }, { "epoch": 0.9560359414201529, "grad_norm": 0.8661055700702226, "learning_rate": 5.0602872227411e-08, "loss": 0.1228, "step": 32771 }, { "epoch": 0.9560651146507965, "grad_norm": 0.7538873029924936, "learning_rate": 5.053585073252765e-08, "loss": 0.1002, "step": 32772 }, { "epoch": 0.95609428788144, "grad_norm": 0.7249928753886032, "learning_rate": 5.046887342527951e-08, "loss": 0.1145, "step": 32773 }, { "epoch": 0.9561234611120836, "grad_norm": 0.7851065270411969, "learning_rate": 5.0401940306263884e-08, "loss": 0.0956, "step": 32774 }, { "epoch": 0.9561526343427271, "grad_norm": 0.7216456921036118, "learning_rate": 5.0335051376077527e-08, "loss": 0.1069, "step": 32775 }, { "epoch": 0.9561818075733707, "grad_norm": 0.7984319027499761, "learning_rate": 5.026820663531828e-08, "loss": 0.1144, "step": 32776 }, { "epoch": 0.9562109808040142, "grad_norm": 0.7090533146522467, "learning_rate": 5.02014060845829e-08, "loss": 0.117, "step": 32777 }, { "epoch": 0.9562401540346578, "grad_norm": 0.7948516064912404, "learning_rate": 5.013464972446813e-08, "loss": 0.1167, "step": 32778 }, { "epoch": 0.9562693272653013, "grad_norm": 0.9578149005999798, "learning_rate": 5.0067937555569603e-08, "loss": 0.1289, "step": 32779 }, { "epoch": 0.9562985004959449, "grad_norm": 0.7431435145855708, "learning_rate": 5.000126957848239e-08, "loss": 0.1186, "step": 32780 }, { "epoch": 0.9563276737265884, "grad_norm": 0.9385269079835733, "learning_rate": 4.9934645793802696e-08, "loss": 0.1311, "step": 32781 }, { "epoch": 0.956356846957232, "grad_norm": 0.7426492576133975, "learning_rate": 4.9868066202124476e-08, "loss": 0.1035, "step": 32782 }, { "epoch": 0.9563860201878756, "grad_norm": 0.722847872566871, "learning_rate": 4.980153080404227e-08, "loss": 0.1105, "step": 32783 }, { "epoch": 0.9564151934185192, "grad_norm": 0.8650234530628166, "learning_rate": 4.973503960015058e-08, "loss": 0.1222, "step": 32784 }, { "epoch": 0.9564443666491628, "grad_norm": 0.8360374043334173, "learning_rate": 4.9668592591042844e-08, "loss": 0.1185, "step": 32785 }, { "epoch": 0.9564735398798063, "grad_norm": 0.839297362196858, "learning_rate": 4.9602189777311906e-08, "loss": 0.1155, "step": 32786 }, { "epoch": 0.9565027131104499, "grad_norm": 0.6934641601053315, "learning_rate": 4.9535831159551186e-08, "loss": 0.1112, "step": 32787 }, { "epoch": 0.9565318863410934, "grad_norm": 1.53006393883715, "learning_rate": 4.9469516738352986e-08, "loss": 0.1085, "step": 32788 }, { "epoch": 0.956561059571737, "grad_norm": 0.8742586377683601, "learning_rate": 4.94032465143085e-08, "loss": 0.1067, "step": 32789 }, { "epoch": 0.9565902328023805, "grad_norm": 0.8120832384895197, "learning_rate": 4.933702048801003e-08, "loss": 0.1044, "step": 32790 }, { "epoch": 0.9566194060330241, "grad_norm": 0.7112104474890033, "learning_rate": 4.927083866004934e-08, "loss": 0.1008, "step": 32791 }, { "epoch": 0.9566485792636676, "grad_norm": 0.8156216727451591, "learning_rate": 4.920470103101649e-08, "loss": 0.0775, "step": 32792 }, { "epoch": 0.9566777524943112, "grad_norm": 0.9304001213469875, "learning_rate": 4.9138607601502684e-08, "loss": 0.1237, "step": 32793 }, { "epoch": 0.9567069257249547, "grad_norm": 0.7397240385780188, "learning_rate": 4.90725583720969e-08, "loss": 0.1073, "step": 32794 }, { "epoch": 0.9567360989555983, "grad_norm": 0.8207286387182503, "learning_rate": 4.9006553343389774e-08, "loss": 0.1118, "step": 32795 }, { "epoch": 0.9567652721862419, "grad_norm": 0.9117029824346379, "learning_rate": 4.894059251596972e-08, "loss": 0.1349, "step": 32796 }, { "epoch": 0.9567944454168855, "grad_norm": 0.9294226671764082, "learning_rate": 4.887467589042683e-08, "loss": 0.1069, "step": 32797 }, { "epoch": 0.9568236186475291, "grad_norm": 0.9921486183494397, "learning_rate": 4.88088034673484e-08, "loss": 0.1036, "step": 32798 }, { "epoch": 0.9568527918781726, "grad_norm": 0.995969663092959, "learning_rate": 4.874297524732341e-08, "loss": 0.1221, "step": 32799 }, { "epoch": 0.9568819651088162, "grad_norm": 0.7589754390526152, "learning_rate": 4.867719123093917e-08, "loss": 0.1041, "step": 32800 }, { "epoch": 0.9569111383394597, "grad_norm": 0.920959659120988, "learning_rate": 4.861145141878243e-08, "loss": 0.1026, "step": 32801 }, { "epoch": 0.9569403115701033, "grad_norm": 1.0198832879935908, "learning_rate": 4.8545755811441054e-08, "loss": 0.12, "step": 32802 }, { "epoch": 0.9569694848007468, "grad_norm": 0.7420504055522724, "learning_rate": 4.8480104409501236e-08, "loss": 0.1258, "step": 32803 }, { "epoch": 0.9569986580313904, "grad_norm": 0.738726918682796, "learning_rate": 4.8414497213549184e-08, "loss": 0.1081, "step": 32804 }, { "epoch": 0.957027831262034, "grad_norm": 0.6951912015343367, "learning_rate": 4.834893422416997e-08, "loss": 0.1024, "step": 32805 }, { "epoch": 0.9570570044926775, "grad_norm": 0.8459641707126396, "learning_rate": 4.828341544194981e-08, "loss": 0.1158, "step": 32806 }, { "epoch": 0.957086177723321, "grad_norm": 0.7477834215712245, "learning_rate": 4.8217940867473225e-08, "loss": 0.102, "step": 32807 }, { "epoch": 0.9571153509539646, "grad_norm": 0.7300391656927202, "learning_rate": 4.8152510501324745e-08, "loss": 0.1436, "step": 32808 }, { "epoch": 0.9571445241846082, "grad_norm": 1.000883386167395, "learning_rate": 4.8087124344088353e-08, "loss": 0.1075, "step": 32809 }, { "epoch": 0.9571736974152517, "grad_norm": 0.9674040131635351, "learning_rate": 4.8021782396348026e-08, "loss": 0.1262, "step": 32810 }, { "epoch": 0.9572028706458954, "grad_norm": 0.6453790645349463, "learning_rate": 4.795648465868719e-08, "loss": 0.0904, "step": 32811 }, { "epoch": 0.9572320438765389, "grad_norm": 0.7435397773090228, "learning_rate": 4.7891231131688695e-08, "loss": 0.1081, "step": 32812 }, { "epoch": 0.9572612171071825, "grad_norm": 0.7425090442832256, "learning_rate": 4.782602181593488e-08, "loss": 0.1281, "step": 32813 }, { "epoch": 0.957290390337826, "grad_norm": 0.7247203167434828, "learning_rate": 4.7760856712008584e-08, "loss": 0.1165, "step": 32814 }, { "epoch": 0.9573195635684696, "grad_norm": 1.0385829791443835, "learning_rate": 4.769573582049103e-08, "loss": 0.1239, "step": 32815 }, { "epoch": 0.9573487367991131, "grad_norm": 0.8691451414890681, "learning_rate": 4.763065914196341e-08, "loss": 0.132, "step": 32816 }, { "epoch": 0.9573779100297567, "grad_norm": 0.7427551091637435, "learning_rate": 4.756562667700748e-08, "loss": 0.1042, "step": 32817 }, { "epoch": 0.9574070832604002, "grad_norm": 0.9395494879599302, "learning_rate": 4.750063842620389e-08, "loss": 0.128, "step": 32818 }, { "epoch": 0.9574362564910438, "grad_norm": 0.8307875710800233, "learning_rate": 4.743569439013107e-08, "loss": 0.1175, "step": 32819 }, { "epoch": 0.9574654297216874, "grad_norm": 0.788224328384697, "learning_rate": 4.737079456937077e-08, "loss": 0.1032, "step": 32820 }, { "epoch": 0.9574946029523309, "grad_norm": 0.9026167256181064, "learning_rate": 4.730593896450197e-08, "loss": 0.1181, "step": 32821 }, { "epoch": 0.9575237761829745, "grad_norm": 0.7578415303003803, "learning_rate": 4.724112757610311e-08, "loss": 0.1037, "step": 32822 }, { "epoch": 0.957552949413618, "grad_norm": 0.712269092387141, "learning_rate": 4.717636040475315e-08, "loss": 0.1184, "step": 32823 }, { "epoch": 0.9575821226442617, "grad_norm": 0.7880497015147347, "learning_rate": 4.711163745103053e-08, "loss": 0.1004, "step": 32824 }, { "epoch": 0.9576112958749052, "grad_norm": 0.8939136102826749, "learning_rate": 4.704695871551257e-08, "loss": 0.0993, "step": 32825 }, { "epoch": 0.9576404691055488, "grad_norm": 0.5951060923281825, "learning_rate": 4.698232419877658e-08, "loss": 0.094, "step": 32826 }, { "epoch": 0.9576696423361923, "grad_norm": 0.7474564157472834, "learning_rate": 4.6917733901400976e-08, "loss": 0.1207, "step": 32827 }, { "epoch": 0.9576988155668359, "grad_norm": 0.8172784909372025, "learning_rate": 4.685318782396087e-08, "loss": 0.1042, "step": 32828 }, { "epoch": 0.9577279887974794, "grad_norm": 0.8084277300051048, "learning_rate": 4.678868596703301e-08, "loss": 0.0943, "step": 32829 }, { "epoch": 0.957757162028123, "grad_norm": 0.8381053554024123, "learning_rate": 4.6724228331194166e-08, "loss": 0.1208, "step": 32830 }, { "epoch": 0.9577863352587666, "grad_norm": 0.7795954897951408, "learning_rate": 4.665981491701776e-08, "loss": 0.1105, "step": 32831 }, { "epoch": 0.9578155084894101, "grad_norm": 1.1263082644142337, "learning_rate": 4.6595445725080566e-08, "loss": 0.1193, "step": 32832 }, { "epoch": 0.9578446817200537, "grad_norm": 0.7691307823628832, "learning_rate": 4.653112075595711e-08, "loss": 0.1138, "step": 32833 }, { "epoch": 0.9578738549506972, "grad_norm": 0.9599208035477919, "learning_rate": 4.6466840010221395e-08, "loss": 0.1025, "step": 32834 }, { "epoch": 0.9579030281813408, "grad_norm": 0.9485995240003718, "learning_rate": 4.640260348844683e-08, "loss": 0.0876, "step": 32835 }, { "epoch": 0.9579322014119843, "grad_norm": 0.7077820665793446, "learning_rate": 4.6338411191207414e-08, "loss": 0.1085, "step": 32836 }, { "epoch": 0.9579613746426279, "grad_norm": 0.827331988072541, "learning_rate": 4.627426311907601e-08, "loss": 0.1123, "step": 32837 }, { "epoch": 0.9579905478732715, "grad_norm": 0.8156878645710716, "learning_rate": 4.621015927262551e-08, "loss": 0.1085, "step": 32838 }, { "epoch": 0.9580197211039151, "grad_norm": 0.7015158744487017, "learning_rate": 4.614609965242822e-08, "loss": 0.129, "step": 32839 }, { "epoch": 0.9580488943345586, "grad_norm": 0.7925884216061743, "learning_rate": 4.608208425905592e-08, "loss": 0.1252, "step": 32840 }, { "epoch": 0.9580780675652022, "grad_norm": 0.9877298809925094, "learning_rate": 4.601811309308035e-08, "loss": 0.1214, "step": 32841 }, { "epoch": 0.9581072407958457, "grad_norm": 0.81919534179844, "learning_rate": 4.595418615507219e-08, "loss": 0.1239, "step": 32842 }, { "epoch": 0.9581364140264893, "grad_norm": 0.7967418626689056, "learning_rate": 4.589030344560208e-08, "loss": 0.0949, "step": 32843 }, { "epoch": 0.9581655872571329, "grad_norm": 0.8079560103900926, "learning_rate": 4.582646496524124e-08, "loss": 0.1193, "step": 32844 }, { "epoch": 0.9581947604877764, "grad_norm": 0.9559706878467896, "learning_rate": 4.5762670714559196e-08, "loss": 0.104, "step": 32845 }, { "epoch": 0.95822393371842, "grad_norm": 0.684763539678176, "learning_rate": 4.569892069412496e-08, "loss": 0.0884, "step": 32846 }, { "epoch": 0.9582531069490635, "grad_norm": 0.8668853365655103, "learning_rate": 4.563521490450862e-08, "loss": 0.0919, "step": 32847 }, { "epoch": 0.9582822801797071, "grad_norm": 0.9643167870716441, "learning_rate": 4.557155334627805e-08, "loss": 0.0941, "step": 32848 }, { "epoch": 0.9583114534103506, "grad_norm": 0.873714390428912, "learning_rate": 4.550793602000114e-08, "loss": 0.0918, "step": 32849 }, { "epoch": 0.9583406266409942, "grad_norm": 0.893234930076437, "learning_rate": 4.544436292624743e-08, "loss": 0.1033, "step": 32850 }, { "epoch": 0.9583697998716378, "grad_norm": 0.7652129907452041, "learning_rate": 4.538083406558425e-08, "loss": 0.1277, "step": 32851 }, { "epoch": 0.9583989731022814, "grad_norm": 0.7577640621083667, "learning_rate": 4.531734943857724e-08, "loss": 0.1071, "step": 32852 }, { "epoch": 0.958428146332925, "grad_norm": 1.1307995167316394, "learning_rate": 4.525390904579485e-08, "loss": 0.0948, "step": 32853 }, { "epoch": 0.9584573195635685, "grad_norm": 0.7003890147655835, "learning_rate": 4.5190512887802186e-08, "loss": 0.1044, "step": 32854 }, { "epoch": 0.958486492794212, "grad_norm": 0.8369313551339875, "learning_rate": 4.512716096516601e-08, "loss": 0.1015, "step": 32855 }, { "epoch": 0.9585156660248556, "grad_norm": 0.7204840338085293, "learning_rate": 4.506385327845197e-08, "loss": 0.1002, "step": 32856 }, { "epoch": 0.9585448392554992, "grad_norm": 1.4576364723203665, "learning_rate": 4.500058982822464e-08, "loss": 0.1044, "step": 32857 }, { "epoch": 0.9585740124861427, "grad_norm": 0.9758919669163466, "learning_rate": 4.493737061504966e-08, "loss": 0.099, "step": 32858 }, { "epoch": 0.9586031857167863, "grad_norm": 0.8193843089787872, "learning_rate": 4.487419563949047e-08, "loss": 0.1227, "step": 32859 }, { "epoch": 0.9586323589474298, "grad_norm": 0.8757366667764627, "learning_rate": 4.4811064902112175e-08, "loss": 0.1016, "step": 32860 }, { "epoch": 0.9586615321780734, "grad_norm": 0.6686091926794362, "learning_rate": 4.474797840347711e-08, "loss": 0.0864, "step": 32861 }, { "epoch": 0.9586907054087169, "grad_norm": 0.8385389414468926, "learning_rate": 4.468493614414926e-08, "loss": 0.1282, "step": 32862 }, { "epoch": 0.9587198786393605, "grad_norm": 1.046296384758437, "learning_rate": 4.462193812469151e-08, "loss": 0.1227, "step": 32863 }, { "epoch": 0.958749051870004, "grad_norm": 0.9308343716101727, "learning_rate": 4.4558984345666745e-08, "loss": 0.0847, "step": 32864 }, { "epoch": 0.9587782251006477, "grad_norm": 0.6667854690095036, "learning_rate": 4.4496074807635626e-08, "loss": 0.1114, "step": 32865 }, { "epoch": 0.9588073983312912, "grad_norm": 0.7987377356130329, "learning_rate": 4.443320951116103e-08, "loss": 0.1207, "step": 32866 }, { "epoch": 0.9588365715619348, "grad_norm": 1.01976480586201, "learning_rate": 4.437038845680308e-08, "loss": 0.0965, "step": 32867 }, { "epoch": 0.9588657447925784, "grad_norm": 0.8932694195104837, "learning_rate": 4.4307611645124096e-08, "loss": 0.1133, "step": 32868 }, { "epoch": 0.9588949180232219, "grad_norm": 0.7346521941143019, "learning_rate": 4.4244879076683065e-08, "loss": 0.1083, "step": 32869 }, { "epoch": 0.9589240912538655, "grad_norm": 0.7835186886056975, "learning_rate": 4.418219075204122e-08, "loss": 0.1083, "step": 32870 }, { "epoch": 0.958953264484509, "grad_norm": 0.8545462903301045, "learning_rate": 4.411954667175811e-08, "loss": 0.1427, "step": 32871 }, { "epoch": 0.9589824377151526, "grad_norm": 0.7472910583556739, "learning_rate": 4.405694683639161e-08, "loss": 0.1214, "step": 32872 }, { "epoch": 0.9590116109457961, "grad_norm": 0.967913088083774, "learning_rate": 4.3994391246501846e-08, "loss": 0.1058, "step": 32873 }, { "epoch": 0.9590407841764397, "grad_norm": 0.7862975461298798, "learning_rate": 4.39318799026478e-08, "loss": 0.1173, "step": 32874 }, { "epoch": 0.9590699574070832, "grad_norm": 0.9360038111422968, "learning_rate": 4.3869412805386256e-08, "loss": 0.1114, "step": 32875 }, { "epoch": 0.9590991306377268, "grad_norm": 0.7903278081066042, "learning_rate": 4.380698995527566e-08, "loss": 0.1173, "step": 32876 }, { "epoch": 0.9591283038683703, "grad_norm": 0.905513339696466, "learning_rate": 4.374461135287278e-08, "loss": 0.1239, "step": 32877 }, { "epoch": 0.959157477099014, "grad_norm": 0.8878514363182609, "learning_rate": 4.3682276998735505e-08, "loss": 0.1103, "step": 32878 }, { "epoch": 0.9591866503296576, "grad_norm": 0.9682928562932697, "learning_rate": 4.361998689341895e-08, "loss": 0.1312, "step": 32879 }, { "epoch": 0.9592158235603011, "grad_norm": 0.6971320156321881, "learning_rate": 4.355774103748045e-08, "loss": 0.1155, "step": 32880 }, { "epoch": 0.9592449967909447, "grad_norm": 0.93114582243866, "learning_rate": 4.3495539431475106e-08, "loss": 0.1156, "step": 32881 }, { "epoch": 0.9592741700215882, "grad_norm": 1.006106115889474, "learning_rate": 4.343338207595804e-08, "loss": 0.1183, "step": 32882 }, { "epoch": 0.9593033432522318, "grad_norm": 0.7048743503820754, "learning_rate": 4.3371268971484915e-08, "loss": 0.1177, "step": 32883 }, { "epoch": 0.9593325164828753, "grad_norm": 0.9426349108180363, "learning_rate": 4.330920011860973e-08, "loss": 0.11, "step": 32884 }, { "epoch": 0.9593616897135189, "grad_norm": 0.9077580728160185, "learning_rate": 4.3247175517887044e-08, "loss": 0.1293, "step": 32885 }, { "epoch": 0.9593908629441624, "grad_norm": 1.0798663308631007, "learning_rate": 4.318519516986974e-08, "loss": 0.1283, "step": 32886 }, { "epoch": 0.959420036174806, "grad_norm": 0.7957556625772834, "learning_rate": 4.312325907511183e-08, "loss": 0.1151, "step": 32887 }, { "epoch": 0.9594492094054495, "grad_norm": 1.0294127171563132, "learning_rate": 4.3061367234166764e-08, "loss": 0.1007, "step": 32888 }, { "epoch": 0.9594783826360931, "grad_norm": 0.8257449616386338, "learning_rate": 4.2999519647585755e-08, "loss": 0.0825, "step": 32889 }, { "epoch": 0.9595075558667366, "grad_norm": 0.7546505037339957, "learning_rate": 4.293771631592225e-08, "loss": 0.1035, "step": 32890 }, { "epoch": 0.9595367290973802, "grad_norm": 0.7245457104184709, "learning_rate": 4.287595723972693e-08, "loss": 0.1109, "step": 32891 }, { "epoch": 0.9595659023280239, "grad_norm": 0.8459373428090952, "learning_rate": 4.281424241955212e-08, "loss": 0.0971, "step": 32892 }, { "epoch": 0.9595950755586674, "grad_norm": 0.8149609414731143, "learning_rate": 4.2752571855948496e-08, "loss": 0.1057, "step": 32893 }, { "epoch": 0.959624248789311, "grad_norm": 0.7574602051965764, "learning_rate": 4.269094554946618e-08, "loss": 0.1307, "step": 32894 }, { "epoch": 0.9596534220199545, "grad_norm": 0.84237972087483, "learning_rate": 4.262936350065583e-08, "loss": 0.0923, "step": 32895 }, { "epoch": 0.9596825952505981, "grad_norm": 0.8154954755563415, "learning_rate": 4.256782571006701e-08, "loss": 0.0889, "step": 32896 }, { "epoch": 0.9597117684812416, "grad_norm": 0.7796157232550976, "learning_rate": 4.2506332178249286e-08, "loss": 0.1263, "step": 32897 }, { "epoch": 0.9597409417118852, "grad_norm": 0.6344972446368206, "learning_rate": 4.244488290575166e-08, "loss": 0.0886, "step": 32898 }, { "epoch": 0.9597701149425287, "grad_norm": 1.2740887915383508, "learning_rate": 4.2383477893122584e-08, "loss": 0.093, "step": 32899 }, { "epoch": 0.9597992881731723, "grad_norm": 0.8093674068745108, "learning_rate": 4.23221171409105e-08, "loss": 0.11, "step": 32900 }, { "epoch": 0.9598284614038158, "grad_norm": 0.9556726021725885, "learning_rate": 4.2260800649662756e-08, "loss": 0.1429, "step": 32901 }, { "epoch": 0.9598576346344594, "grad_norm": 0.7988374231433764, "learning_rate": 4.219952841992725e-08, "loss": 0.1224, "step": 32902 }, { "epoch": 0.9598868078651029, "grad_norm": 0.9391374602952318, "learning_rate": 4.2138300452250756e-08, "loss": 0.1175, "step": 32903 }, { "epoch": 0.9599159810957465, "grad_norm": 0.7569434105553122, "learning_rate": 4.207711674718007e-08, "loss": 0.115, "step": 32904 }, { "epoch": 0.9599451543263902, "grad_norm": 0.7241375268849807, "learning_rate": 4.201597730526141e-08, "loss": 0.1069, "step": 32905 }, { "epoch": 0.9599743275570337, "grad_norm": 0.7945752981885397, "learning_rate": 4.1954882127040466e-08, "loss": 0.0892, "step": 32906 }, { "epoch": 0.9600035007876773, "grad_norm": 0.8453466298444707, "learning_rate": 4.18938312130629e-08, "loss": 0.1165, "step": 32907 }, { "epoch": 0.9600326740183208, "grad_norm": 0.7038362290638049, "learning_rate": 4.183282456387327e-08, "loss": 0.1226, "step": 32908 }, { "epoch": 0.9600618472489644, "grad_norm": 0.8187939030680478, "learning_rate": 4.177186218001617e-08, "loss": 0.1085, "step": 32909 }, { "epoch": 0.9600910204796079, "grad_norm": 0.9455942275266978, "learning_rate": 4.171094406203724e-08, "loss": 0.1064, "step": 32910 }, { "epoch": 0.9601201937102515, "grad_norm": 0.8317222060420671, "learning_rate": 4.165007021047884e-08, "loss": 0.0955, "step": 32911 }, { "epoch": 0.960149366940895, "grad_norm": 0.7933141621435059, "learning_rate": 4.1589240625884986e-08, "loss": 0.1097, "step": 32912 }, { "epoch": 0.9601785401715386, "grad_norm": 0.8913368630929362, "learning_rate": 4.152845530879912e-08, "loss": 0.1138, "step": 32913 }, { "epoch": 0.9602077134021821, "grad_norm": 0.7858133652973975, "learning_rate": 4.1467714259763034e-08, "loss": 0.077, "step": 32914 }, { "epoch": 0.9602368866328257, "grad_norm": 0.8070321620835452, "learning_rate": 4.1407017479319636e-08, "loss": 0.1108, "step": 32915 }, { "epoch": 0.9602660598634692, "grad_norm": 0.7260421751741842, "learning_rate": 4.13463649680107e-08, "loss": 0.0944, "step": 32916 }, { "epoch": 0.9602952330941128, "grad_norm": 0.8073658116030934, "learning_rate": 4.128575672637747e-08, "loss": 0.1107, "step": 32917 }, { "epoch": 0.9603244063247564, "grad_norm": 0.7818934109657987, "learning_rate": 4.122519275496173e-08, "loss": 0.1207, "step": 32918 }, { "epoch": 0.9603535795554, "grad_norm": 0.6977574966582254, "learning_rate": 4.11646730543036e-08, "loss": 0.1011, "step": 32919 }, { "epoch": 0.9603827527860436, "grad_norm": 0.7836466621465713, "learning_rate": 4.110419762494322e-08, "loss": 0.1056, "step": 32920 }, { "epoch": 0.9604119260166871, "grad_norm": 0.7855120714498354, "learning_rate": 4.10437664674207e-08, "loss": 0.11, "step": 32921 }, { "epoch": 0.9604410992473307, "grad_norm": 0.7335995347301505, "learning_rate": 4.098337958227561e-08, "loss": 0.1251, "step": 32922 }, { "epoch": 0.9604702724779742, "grad_norm": 0.8376774119876726, "learning_rate": 4.0923036970047516e-08, "loss": 0.1322, "step": 32923 }, { "epoch": 0.9604994457086178, "grad_norm": 0.8464667864558919, "learning_rate": 4.086273863127488e-08, "loss": 0.1078, "step": 32924 }, { "epoch": 0.9605286189392613, "grad_norm": 0.7911772461653852, "learning_rate": 4.08024845664956e-08, "loss": 0.1028, "step": 32925 }, { "epoch": 0.9605577921699049, "grad_norm": 0.9011527629033576, "learning_rate": 4.074227477624759e-08, "loss": 0.1104, "step": 32926 }, { "epoch": 0.9605869654005484, "grad_norm": 0.9795115911375706, "learning_rate": 4.068210926106875e-08, "loss": 0.1424, "step": 32927 }, { "epoch": 0.960616138631192, "grad_norm": 0.6949332747096366, "learning_rate": 4.062198802149642e-08, "loss": 0.1111, "step": 32928 }, { "epoch": 0.9606453118618355, "grad_norm": 0.8964322759490889, "learning_rate": 4.056191105806684e-08, "loss": 0.1154, "step": 32929 }, { "epoch": 0.9606744850924791, "grad_norm": 0.8493600943026116, "learning_rate": 4.0501878371316806e-08, "loss": 0.1171, "step": 32930 }, { "epoch": 0.9607036583231227, "grad_norm": 0.7911199460032626, "learning_rate": 4.044188996178255e-08, "loss": 0.1151, "step": 32931 }, { "epoch": 0.9607328315537662, "grad_norm": 0.9322881469754698, "learning_rate": 4.0381945829998105e-08, "loss": 0.0967, "step": 32932 }, { "epoch": 0.9607620047844099, "grad_norm": 0.7760666350108234, "learning_rate": 4.0322045976500246e-08, "loss": 0.1201, "step": 32933 }, { "epoch": 0.9607911780150534, "grad_norm": 0.9476825495695969, "learning_rate": 4.0262190401822995e-08, "loss": 0.1226, "step": 32934 }, { "epoch": 0.960820351245697, "grad_norm": 0.8372849613729361, "learning_rate": 4.0202379106501486e-08, "loss": 0.1042, "step": 32935 }, { "epoch": 0.9608495244763405, "grad_norm": 0.8716121342632084, "learning_rate": 4.014261209106862e-08, "loss": 0.1012, "step": 32936 }, { "epoch": 0.9608786977069841, "grad_norm": 0.8071127617666869, "learning_rate": 4.0082889356058416e-08, "loss": 0.1087, "step": 32937 }, { "epoch": 0.9609078709376276, "grad_norm": 1.0736021576704553, "learning_rate": 4.002321090200434e-08, "loss": 0.1343, "step": 32938 }, { "epoch": 0.9609370441682712, "grad_norm": 1.272409731944864, "learning_rate": 3.996357672943874e-08, "loss": 0.1102, "step": 32939 }, { "epoch": 0.9609662173989147, "grad_norm": 0.8475898315710867, "learning_rate": 3.990398683889507e-08, "loss": 0.121, "step": 32940 }, { "epoch": 0.9609953906295583, "grad_norm": 0.7365683823754445, "learning_rate": 3.984444123090403e-08, "loss": 0.114, "step": 32941 }, { "epoch": 0.9610245638602019, "grad_norm": 0.8605090063585837, "learning_rate": 3.978493990599741e-08, "loss": 0.1141, "step": 32942 }, { "epoch": 0.9610537370908454, "grad_norm": 0.717129922732362, "learning_rate": 3.972548286470701e-08, "loss": 0.1042, "step": 32943 }, { "epoch": 0.961082910321489, "grad_norm": 0.7521501690881692, "learning_rate": 3.966607010756351e-08, "loss": 0.1269, "step": 32944 }, { "epoch": 0.9611120835521325, "grad_norm": 0.6593434180891637, "learning_rate": 3.960670163509706e-08, "loss": 0.0992, "step": 32945 }, { "epoch": 0.9611412567827762, "grad_norm": 0.8931219923621592, "learning_rate": 3.954737744783776e-08, "loss": 0.1182, "step": 32946 }, { "epoch": 0.9611704300134197, "grad_norm": 0.79771928209597, "learning_rate": 3.9488097546315774e-08, "loss": 0.1016, "step": 32947 }, { "epoch": 0.9611996032440633, "grad_norm": 0.8599903754100809, "learning_rate": 3.942886193105955e-08, "loss": 0.1198, "step": 32948 }, { "epoch": 0.9612287764747068, "grad_norm": 0.7881054768386069, "learning_rate": 3.936967060259811e-08, "loss": 0.1202, "step": 32949 }, { "epoch": 0.9612579497053504, "grad_norm": 0.8539346892614871, "learning_rate": 3.931052356145992e-08, "loss": 0.0911, "step": 32950 }, { "epoch": 0.9612871229359939, "grad_norm": 0.7723839977611351, "learning_rate": 3.925142080817346e-08, "loss": 0.0975, "step": 32951 }, { "epoch": 0.9613162961666375, "grad_norm": 1.1751985653196537, "learning_rate": 3.9192362343266065e-08, "loss": 0.1021, "step": 32952 }, { "epoch": 0.961345469397281, "grad_norm": 1.1151891842326118, "learning_rate": 3.913334816726511e-08, "loss": 0.1307, "step": 32953 }, { "epoch": 0.9613746426279246, "grad_norm": 1.0173224641863299, "learning_rate": 3.907437828069738e-08, "loss": 0.1285, "step": 32954 }, { "epoch": 0.9614038158585682, "grad_norm": 1.1297874265647203, "learning_rate": 3.901545268408913e-08, "loss": 0.1072, "step": 32955 }, { "epoch": 0.9614329890892117, "grad_norm": 0.7802241055611593, "learning_rate": 3.8956571377966603e-08, "loss": 0.1209, "step": 32956 }, { "epoch": 0.9614621623198553, "grad_norm": 0.8205962512299297, "learning_rate": 3.889773436285604e-08, "loss": 0.1092, "step": 32957 }, { "epoch": 0.9614913355504988, "grad_norm": 0.8153359881409391, "learning_rate": 3.8838941639282036e-08, "loss": 0.143, "step": 32958 }, { "epoch": 0.9615205087811424, "grad_norm": 0.8393741738916831, "learning_rate": 3.8780193207769154e-08, "loss": 0.1295, "step": 32959 }, { "epoch": 0.961549682011786, "grad_norm": 0.8312326704942081, "learning_rate": 3.8721489068842543e-08, "loss": 0.1003, "step": 32960 }, { "epoch": 0.9615788552424296, "grad_norm": 0.7485737538452321, "learning_rate": 3.866282922302622e-08, "loss": 0.1245, "step": 32961 }, { "epoch": 0.9616080284730731, "grad_norm": 0.6920980023177091, "learning_rate": 3.860421367084366e-08, "loss": 0.1222, "step": 32962 }, { "epoch": 0.9616372017037167, "grad_norm": 0.8393656485189316, "learning_rate": 3.8545642412818327e-08, "loss": 0.1136, "step": 32963 }, { "epoch": 0.9616663749343602, "grad_norm": 0.6426028953976453, "learning_rate": 3.84871154494737e-08, "loss": 0.0787, "step": 32964 }, { "epoch": 0.9616955481650038, "grad_norm": 0.9316557809494685, "learning_rate": 3.842863278133102e-08, "loss": 0.1054, "step": 32965 }, { "epoch": 0.9617247213956474, "grad_norm": 0.8161632030829696, "learning_rate": 3.837019440891321e-08, "loss": 0.1011, "step": 32966 }, { "epoch": 0.9617538946262909, "grad_norm": 0.86914766144398, "learning_rate": 3.8311800332742065e-08, "loss": 0.1156, "step": 32967 }, { "epoch": 0.9617830678569345, "grad_norm": 0.9001803427227271, "learning_rate": 3.825345055333829e-08, "loss": 0.1143, "step": 32968 }, { "epoch": 0.961812241087578, "grad_norm": 0.6701753188413643, "learning_rate": 3.819514507122368e-08, "loss": 0.1106, "step": 32969 }, { "epoch": 0.9618414143182216, "grad_norm": 0.8365762014485906, "learning_rate": 3.813688388691783e-08, "loss": 0.124, "step": 32970 }, { "epoch": 0.9618705875488651, "grad_norm": 1.013591248087509, "learning_rate": 3.807866700094198e-08, "loss": 0.119, "step": 32971 }, { "epoch": 0.9618997607795087, "grad_norm": 0.7875181488612599, "learning_rate": 3.8020494413815165e-08, "loss": 0.0823, "step": 32972 }, { "epoch": 0.9619289340101523, "grad_norm": 0.7823356857316066, "learning_rate": 3.796236612605641e-08, "loss": 0.1174, "step": 32973 }, { "epoch": 0.9619581072407959, "grad_norm": 0.7649462781377928, "learning_rate": 3.790428213818531e-08, "loss": 0.122, "step": 32974 }, { "epoch": 0.9619872804714394, "grad_norm": 0.7209496697471285, "learning_rate": 3.784624245072088e-08, "loss": 0.0914, "step": 32975 }, { "epoch": 0.962016453702083, "grad_norm": 0.8375459597140754, "learning_rate": 3.778824706417994e-08, "loss": 0.1241, "step": 32976 }, { "epoch": 0.9620456269327265, "grad_norm": 0.6895641257485305, "learning_rate": 3.7730295979080956e-08, "loss": 0.0936, "step": 32977 }, { "epoch": 0.9620748001633701, "grad_norm": 0.9472593070816314, "learning_rate": 3.767238919594185e-08, "loss": 0.0892, "step": 32978 }, { "epoch": 0.9621039733940137, "grad_norm": 0.7789037981698359, "learning_rate": 3.761452671527832e-08, "loss": 0.1096, "step": 32979 }, { "epoch": 0.9621331466246572, "grad_norm": 0.7157690630040628, "learning_rate": 3.755670853760773e-08, "loss": 0.1179, "step": 32980 }, { "epoch": 0.9621623198553008, "grad_norm": 0.9952490367997867, "learning_rate": 3.7498934663446897e-08, "loss": 0.1266, "step": 32981 }, { "epoch": 0.9621914930859443, "grad_norm": 0.78846643130223, "learning_rate": 3.7441205093310394e-08, "loss": 0.0923, "step": 32982 }, { "epoch": 0.9622206663165879, "grad_norm": 0.7882133989258244, "learning_rate": 3.738351982771449e-08, "loss": 0.1077, "step": 32983 }, { "epoch": 0.9622498395472314, "grad_norm": 0.8372383457356164, "learning_rate": 3.7325878867173757e-08, "loss": 0.113, "step": 32984 }, { "epoch": 0.962279012777875, "grad_norm": 0.8303243025515532, "learning_rate": 3.72682822122028e-08, "loss": 0.1028, "step": 32985 }, { "epoch": 0.9623081860085185, "grad_norm": 0.7177457152597371, "learning_rate": 3.7210729863315645e-08, "loss": 0.0972, "step": 32986 }, { "epoch": 0.9623373592391622, "grad_norm": 0.6882980232675601, "learning_rate": 3.7153221821026875e-08, "loss": 0.1213, "step": 32987 }, { "epoch": 0.9623665324698057, "grad_norm": 0.8717886244412169, "learning_rate": 3.709575808584942e-08, "loss": 0.1317, "step": 32988 }, { "epoch": 0.9623957057004493, "grad_norm": 0.7009483093016554, "learning_rate": 3.703833865829565e-08, "loss": 0.105, "step": 32989 }, { "epoch": 0.9624248789310929, "grad_norm": 0.7956738157981716, "learning_rate": 3.6980963538879585e-08, "loss": 0.1406, "step": 32990 }, { "epoch": 0.9624540521617364, "grad_norm": 1.178436146550977, "learning_rate": 3.69236327281125e-08, "loss": 0.1005, "step": 32991 }, { "epoch": 0.96248322539238, "grad_norm": 1.0159017263716346, "learning_rate": 3.68663462265062e-08, "loss": 0.0968, "step": 32992 }, { "epoch": 0.9625123986230235, "grad_norm": 0.7600469535711079, "learning_rate": 3.680910403457194e-08, "loss": 0.1155, "step": 32993 }, { "epoch": 0.9625415718536671, "grad_norm": 0.6000076446619093, "learning_rate": 3.6751906152822095e-08, "loss": 0.1024, "step": 32994 }, { "epoch": 0.9625707450843106, "grad_norm": 0.8937829675335617, "learning_rate": 3.669475258176625e-08, "loss": 0.1128, "step": 32995 }, { "epoch": 0.9625999183149542, "grad_norm": 0.8659494459743594, "learning_rate": 3.663764332191455e-08, "loss": 0.116, "step": 32996 }, { "epoch": 0.9626290915455977, "grad_norm": 1.0176679697840387, "learning_rate": 3.658057837377716e-08, "loss": 0.0958, "step": 32997 }, { "epoch": 0.9626582647762413, "grad_norm": 0.77596651910336, "learning_rate": 3.6523557737863646e-08, "loss": 0.1245, "step": 32998 }, { "epoch": 0.9626874380068848, "grad_norm": 0.7578318550528443, "learning_rate": 3.646658141468251e-08, "loss": 0.1087, "step": 32999 }, { "epoch": 0.9627166112375285, "grad_norm": 0.9924055428571892, "learning_rate": 3.640964940474334e-08, "loss": 0.1082, "step": 33000 }, { "epoch": 0.962745784468172, "grad_norm": 0.7900169952493743, "learning_rate": 3.635276170855351e-08, "loss": 0.1185, "step": 33001 }, { "epoch": 0.9627749576988156, "grad_norm": 0.9883778430563414, "learning_rate": 3.629591832662149e-08, "loss": 0.1256, "step": 33002 }, { "epoch": 0.9628041309294592, "grad_norm": 0.8830936674417562, "learning_rate": 3.623911925945467e-08, "loss": 0.1038, "step": 33003 }, { "epoch": 0.9628333041601027, "grad_norm": 0.9489308384871062, "learning_rate": 3.618236450755985e-08, "loss": 0.1226, "step": 33004 }, { "epoch": 0.9628624773907463, "grad_norm": 0.7970359065891596, "learning_rate": 3.6125654071444414e-08, "loss": 0.1031, "step": 33005 }, { "epoch": 0.9628916506213898, "grad_norm": 0.8478636430615173, "learning_rate": 3.606898795161351e-08, "loss": 0.1215, "step": 33006 }, { "epoch": 0.9629208238520334, "grad_norm": 1.1068734345463336, "learning_rate": 3.6012366148574505e-08, "loss": 0.1089, "step": 33007 }, { "epoch": 0.9629499970826769, "grad_norm": 0.8433289126861183, "learning_rate": 3.5955788662831445e-08, "loss": 0.1157, "step": 33008 }, { "epoch": 0.9629791703133205, "grad_norm": 0.743744804801746, "learning_rate": 3.589925549489004e-08, "loss": 0.0957, "step": 33009 }, { "epoch": 0.963008343543964, "grad_norm": 0.9682467808318107, "learning_rate": 3.5842766645255436e-08, "loss": 0.1065, "step": 33010 }, { "epoch": 0.9630375167746076, "grad_norm": 0.8089201034724288, "learning_rate": 3.578632211443112e-08, "loss": 0.0963, "step": 33011 }, { "epoch": 0.9630666900052511, "grad_norm": 0.7571672096064874, "learning_rate": 3.5729921902921684e-08, "loss": 0.1121, "step": 33012 }, { "epoch": 0.9630958632358947, "grad_norm": 0.8249523443104517, "learning_rate": 3.567356601123062e-08, "loss": 0.1084, "step": 33013 }, { "epoch": 0.9631250364665384, "grad_norm": 0.9194602558745402, "learning_rate": 3.561725443986086e-08, "loss": 0.1028, "step": 33014 }, { "epoch": 0.9631542096971819, "grad_norm": 0.7845212325979251, "learning_rate": 3.556098718931478e-08, "loss": 0.1123, "step": 33015 }, { "epoch": 0.9631833829278255, "grad_norm": 0.8637188543436544, "learning_rate": 3.55047642600953e-08, "loss": 0.1098, "step": 33016 }, { "epoch": 0.963212556158469, "grad_norm": 0.7449570557338919, "learning_rate": 3.544858565270426e-08, "loss": 0.0923, "step": 33017 }, { "epoch": 0.9632417293891126, "grad_norm": 0.8004000862337594, "learning_rate": 3.5392451367643466e-08, "loss": 0.0861, "step": 33018 }, { "epoch": 0.9632709026197561, "grad_norm": 0.6309363146460676, "learning_rate": 3.5336361405413076e-08, "loss": 0.1095, "step": 33019 }, { "epoch": 0.9633000758503997, "grad_norm": 0.7282028087219455, "learning_rate": 3.5280315766514915e-08, "loss": 0.102, "step": 33020 }, { "epoch": 0.9633292490810432, "grad_norm": 0.7912197510853144, "learning_rate": 3.522431445144858e-08, "loss": 0.0866, "step": 33021 }, { "epoch": 0.9633584223116868, "grad_norm": 0.9651112176832288, "learning_rate": 3.5168357460714785e-08, "loss": 0.1145, "step": 33022 }, { "epoch": 0.9633875955423303, "grad_norm": 0.8764524239134768, "learning_rate": 3.5112444794812016e-08, "loss": 0.119, "step": 33023 }, { "epoch": 0.9634167687729739, "grad_norm": 0.7404967584345311, "learning_rate": 3.5056576454240984e-08, "loss": 0.1232, "step": 33024 }, { "epoch": 0.9634459420036174, "grad_norm": 0.9301669890657671, "learning_rate": 3.5000752439499076e-08, "loss": 0.1166, "step": 33025 }, { "epoch": 0.963475115234261, "grad_norm": 0.9760219893991673, "learning_rate": 3.494497275108533e-08, "loss": 0.1369, "step": 33026 }, { "epoch": 0.9635042884649047, "grad_norm": 0.8939962196058359, "learning_rate": 3.4889237389497673e-08, "loss": 0.1372, "step": 33027 }, { "epoch": 0.9635334616955482, "grad_norm": 0.8861108558928846, "learning_rate": 3.48335463552335e-08, "loss": 0.0916, "step": 33028 }, { "epoch": 0.9635626349261918, "grad_norm": 0.6805670338444785, "learning_rate": 3.477789964879019e-08, "loss": 0.0971, "step": 33029 }, { "epoch": 0.9635918081568353, "grad_norm": 0.7093956552409721, "learning_rate": 3.4722297270664564e-08, "loss": 0.1201, "step": 33030 }, { "epoch": 0.9636209813874789, "grad_norm": 0.8640660654636393, "learning_rate": 3.4666739221352885e-08, "loss": 0.1015, "step": 33031 }, { "epoch": 0.9636501546181224, "grad_norm": 1.0140748355593368, "learning_rate": 3.461122550135143e-08, "loss": 0.1179, "step": 33032 }, { "epoch": 0.963679327848766, "grad_norm": 0.7772640268005586, "learning_rate": 3.4555756111155356e-08, "loss": 0.1227, "step": 33033 }, { "epoch": 0.9637085010794095, "grad_norm": 1.1213463004520035, "learning_rate": 3.4500331051260383e-08, "loss": 0.1257, "step": 33034 }, { "epoch": 0.9637376743100531, "grad_norm": 0.8425831603907519, "learning_rate": 3.4444950322161106e-08, "loss": 0.1079, "step": 33035 }, { "epoch": 0.9637668475406966, "grad_norm": 0.6934389390212334, "learning_rate": 3.438961392435158e-08, "loss": 0.114, "step": 33036 }, { "epoch": 0.9637960207713402, "grad_norm": 0.8457633294947993, "learning_rate": 3.433432185832641e-08, "loss": 0.1563, "step": 33037 }, { "epoch": 0.9638251940019837, "grad_norm": 0.8949233028699001, "learning_rate": 3.4279074124579094e-08, "loss": 0.1168, "step": 33038 }, { "epoch": 0.9638543672326273, "grad_norm": 0.6767415914019637, "learning_rate": 3.422387072360256e-08, "loss": 0.095, "step": 33039 }, { "epoch": 0.9638835404632708, "grad_norm": 0.8134499340441764, "learning_rate": 3.4168711655889756e-08, "loss": 0.1106, "step": 33040 }, { "epoch": 0.9639127136939145, "grad_norm": 1.791976430899528, "learning_rate": 3.411359692193361e-08, "loss": 0.0971, "step": 33041 }, { "epoch": 0.9639418869245581, "grad_norm": 0.8628128175687187, "learning_rate": 3.405852652222596e-08, "loss": 0.0912, "step": 33042 }, { "epoch": 0.9639710601552016, "grad_norm": 0.7840498889838119, "learning_rate": 3.400350045725809e-08, "loss": 0.0998, "step": 33043 }, { "epoch": 0.9640002333858452, "grad_norm": 0.6967376898445659, "learning_rate": 3.3948518727521807e-08, "loss": 0.1097, "step": 33044 }, { "epoch": 0.9640294066164887, "grad_norm": 0.9112833338701539, "learning_rate": 3.3893581333507286e-08, "loss": 0.1213, "step": 33045 }, { "epoch": 0.9640585798471323, "grad_norm": 0.8504616077584305, "learning_rate": 3.383868827570524e-08, "loss": 0.1123, "step": 33046 }, { "epoch": 0.9640877530777758, "grad_norm": 0.7451934307086762, "learning_rate": 3.3783839554605845e-08, "loss": 0.0916, "step": 33047 }, { "epoch": 0.9641169263084194, "grad_norm": 0.8904984547388519, "learning_rate": 3.372903517069925e-08, "loss": 0.1235, "step": 33048 }, { "epoch": 0.9641460995390629, "grad_norm": 0.8104490859134511, "learning_rate": 3.3674275124473966e-08, "loss": 0.0947, "step": 33049 }, { "epoch": 0.9641752727697065, "grad_norm": 0.8997780029101679, "learning_rate": 3.361955941641959e-08, "loss": 0.1266, "step": 33050 }, { "epoch": 0.96420444600035, "grad_norm": 0.7583106702966642, "learning_rate": 3.356488804702407e-08, "loss": 0.1317, "step": 33051 }, { "epoch": 0.9642336192309936, "grad_norm": 0.9595040396376152, "learning_rate": 3.351026101677535e-08, "loss": 0.1255, "step": 33052 }, { "epoch": 0.9642627924616372, "grad_norm": 0.8097031344318243, "learning_rate": 3.345567832616137e-08, "loss": 0.092, "step": 33053 }, { "epoch": 0.9642919656922808, "grad_norm": 0.8790094470262768, "learning_rate": 3.3401139975669515e-08, "loss": 0.1251, "step": 33054 }, { "epoch": 0.9643211389229244, "grad_norm": 0.8628659152043027, "learning_rate": 3.334664596578718e-08, "loss": 0.1039, "step": 33055 }, { "epoch": 0.9643503121535679, "grad_norm": 0.8293429931343395, "learning_rate": 3.329219629699954e-08, "loss": 0.1141, "step": 33056 }, { "epoch": 0.9643794853842115, "grad_norm": 0.8531929437695732, "learning_rate": 3.323779096979396e-08, "loss": 0.0961, "step": 33057 }, { "epoch": 0.964408658614855, "grad_norm": 0.721512424405809, "learning_rate": 3.3183429984655626e-08, "loss": 0.098, "step": 33058 }, { "epoch": 0.9644378318454986, "grad_norm": 0.7962545737939721, "learning_rate": 3.312911334207025e-08, "loss": 0.1024, "step": 33059 }, { "epoch": 0.9644670050761421, "grad_norm": 0.8252475799718151, "learning_rate": 3.307484104252245e-08, "loss": 0.1019, "step": 33060 }, { "epoch": 0.9644961783067857, "grad_norm": 0.8107301483904512, "learning_rate": 3.302061308649629e-08, "loss": 0.0977, "step": 33061 }, { "epoch": 0.9645253515374292, "grad_norm": 0.7197481795572213, "learning_rate": 3.296642947447693e-08, "loss": 0.0951, "step": 33062 }, { "epoch": 0.9645545247680728, "grad_norm": 0.8894285379799222, "learning_rate": 3.2912290206947305e-08, "loss": 0.1271, "step": 33063 }, { "epoch": 0.9645836979987163, "grad_norm": 0.6619987909443094, "learning_rate": 3.2858195284390936e-08, "loss": 0.092, "step": 33064 }, { "epoch": 0.9646128712293599, "grad_norm": 0.7370591185472479, "learning_rate": 3.280414470729076e-08, "loss": 0.1097, "step": 33065 }, { "epoch": 0.9646420444600035, "grad_norm": 0.7120999999351729, "learning_rate": 3.2750138476129736e-08, "loss": 0.0959, "step": 33066 }, { "epoch": 0.964671217690647, "grad_norm": 1.7231995176486146, "learning_rate": 3.269617659138968e-08, "loss": 0.1007, "step": 33067 }, { "epoch": 0.9647003909212907, "grad_norm": 0.743186413695021, "learning_rate": 3.264225905355245e-08, "loss": 0.1084, "step": 33068 }, { "epoch": 0.9647295641519342, "grad_norm": 0.6858552204728713, "learning_rate": 3.258838586309876e-08, "loss": 0.1067, "step": 33069 }, { "epoch": 0.9647587373825778, "grad_norm": 0.7390737277577913, "learning_rate": 3.253455702051045e-08, "loss": 0.132, "step": 33070 }, { "epoch": 0.9647879106132213, "grad_norm": 0.7102986768287417, "learning_rate": 3.248077252626769e-08, "loss": 0.1048, "step": 33071 }, { "epoch": 0.9648170838438649, "grad_norm": 0.8894319490504129, "learning_rate": 3.2427032380851206e-08, "loss": 0.1153, "step": 33072 }, { "epoch": 0.9648462570745084, "grad_norm": 1.2816692484979821, "learning_rate": 3.2373336584740066e-08, "loss": 0.1329, "step": 33073 }, { "epoch": 0.964875430305152, "grad_norm": 0.8388061203776557, "learning_rate": 3.231968513841388e-08, "loss": 0.1132, "step": 33074 }, { "epoch": 0.9649046035357955, "grad_norm": 0.8260590103042612, "learning_rate": 3.2266078042351155e-08, "loss": 0.1003, "step": 33075 }, { "epoch": 0.9649337767664391, "grad_norm": 0.8251853949977179, "learning_rate": 3.221251529703151e-08, "loss": 0.1083, "step": 33076 }, { "epoch": 0.9649629499970827, "grad_norm": 0.7431120586565373, "learning_rate": 3.2158996902932896e-08, "loss": 0.1028, "step": 33077 }, { "epoch": 0.9649921232277262, "grad_norm": 1.0436005730416644, "learning_rate": 3.21055228605327e-08, "loss": 0.1054, "step": 33078 }, { "epoch": 0.9650212964583698, "grad_norm": 0.8754435615184252, "learning_rate": 3.2052093170307774e-08, "loss": 0.0967, "step": 33079 }, { "epoch": 0.9650504696890133, "grad_norm": 0.946657646123212, "learning_rate": 3.199870783273662e-08, "loss": 0.1219, "step": 33080 }, { "epoch": 0.965079642919657, "grad_norm": 0.5836997331509567, "learning_rate": 3.194536684829497e-08, "loss": 0.0944, "step": 33081 }, { "epoch": 0.9651088161503005, "grad_norm": 0.8370698797524847, "learning_rate": 3.189207021745855e-08, "loss": 0.1203, "step": 33082 }, { "epoch": 0.9651379893809441, "grad_norm": 0.6781248954224448, "learning_rate": 3.1838817940704206e-08, "loss": 0.1109, "step": 33083 }, { "epoch": 0.9651671626115876, "grad_norm": 0.9151357452442114, "learning_rate": 3.178561001850655e-08, "loss": 0.1234, "step": 33084 }, { "epoch": 0.9651963358422312, "grad_norm": 0.9572796163881453, "learning_rate": 3.1732446451341326e-08, "loss": 0.1005, "step": 33085 }, { "epoch": 0.9652255090728747, "grad_norm": 0.8989409858058658, "learning_rate": 3.167932723968259e-08, "loss": 0.1152, "step": 33086 }, { "epoch": 0.9652546823035183, "grad_norm": 0.9120765378663985, "learning_rate": 3.162625238400496e-08, "loss": 0.1157, "step": 33087 }, { "epoch": 0.9652838555341618, "grad_norm": 0.8992382488484729, "learning_rate": 3.157322188478196e-08, "loss": 0.1051, "step": 33088 }, { "epoch": 0.9653130287648054, "grad_norm": 0.6885380623396845, "learning_rate": 3.1520235742487084e-08, "loss": 0.089, "step": 33089 }, { "epoch": 0.965342201995449, "grad_norm": 0.7241347519213581, "learning_rate": 3.1467293957593846e-08, "loss": 0.1072, "step": 33090 }, { "epoch": 0.9653713752260925, "grad_norm": 0.7173859987060868, "learning_rate": 3.1414396530574655e-08, "loss": 0.1185, "step": 33091 }, { "epoch": 0.9654005484567361, "grad_norm": 0.9488102912576474, "learning_rate": 3.136154346190079e-08, "loss": 0.1123, "step": 33092 }, { "epoch": 0.9654297216873796, "grad_norm": 0.7968865901405182, "learning_rate": 3.1308734752045767e-08, "loss": 0.1124, "step": 33093 }, { "epoch": 0.9654588949180232, "grad_norm": 0.8296046017990707, "learning_rate": 3.125597040147976e-08, "loss": 0.1047, "step": 33094 }, { "epoch": 0.9654880681486668, "grad_norm": 1.1175573532422671, "learning_rate": 3.1203250410674625e-08, "loss": 0.1087, "step": 33095 }, { "epoch": 0.9655172413793104, "grad_norm": 0.8937334867429435, "learning_rate": 3.115057478010053e-08, "loss": 0.0985, "step": 33096 }, { "epoch": 0.9655464146099539, "grad_norm": 1.1154369897194318, "learning_rate": 3.1097943510227657e-08, "loss": 0.1029, "step": 33097 }, { "epoch": 0.9655755878405975, "grad_norm": 0.9569703271745534, "learning_rate": 3.1045356601526746e-08, "loss": 0.1053, "step": 33098 }, { "epoch": 0.965604761071241, "grad_norm": 1.0134480377950137, "learning_rate": 3.09928140544663e-08, "loss": 0.1022, "step": 33099 }, { "epoch": 0.9656339343018846, "grad_norm": 0.969656792964338, "learning_rate": 3.094031586951596e-08, "loss": 0.1002, "step": 33100 }, { "epoch": 0.9656631075325282, "grad_norm": 0.7280858817022674, "learning_rate": 3.0887862047144227e-08, "loss": 0.0964, "step": 33101 }, { "epoch": 0.9656922807631717, "grad_norm": 0.80575138456284, "learning_rate": 3.083545258781961e-08, "loss": 0.1356, "step": 33102 }, { "epoch": 0.9657214539938153, "grad_norm": 0.9630461123158345, "learning_rate": 3.078308749200953e-08, "loss": 0.1038, "step": 33103 }, { "epoch": 0.9657506272244588, "grad_norm": 0.7824225056970916, "learning_rate": 3.0730766760182494e-08, "loss": 0.1202, "step": 33104 }, { "epoch": 0.9657798004551024, "grad_norm": 0.7243585512474819, "learning_rate": 3.067849039280424e-08, "loss": 0.1011, "step": 33105 }, { "epoch": 0.9658089736857459, "grad_norm": 0.8562293576233819, "learning_rate": 3.0626258390342165e-08, "loss": 0.1141, "step": 33106 }, { "epoch": 0.9658381469163895, "grad_norm": 0.7856195383148817, "learning_rate": 3.057407075326258e-08, "loss": 0.107, "step": 33107 }, { "epoch": 0.9658673201470331, "grad_norm": 1.3227329182781364, "learning_rate": 3.052192748203175e-08, "loss": 0.1194, "step": 33108 }, { "epoch": 0.9658964933776767, "grad_norm": 0.8942257656405606, "learning_rate": 3.046982857711434e-08, "loss": 0.1131, "step": 33109 }, { "epoch": 0.9659256666083202, "grad_norm": 0.8446760466420564, "learning_rate": 3.0417774038976614e-08, "loss": 0.1032, "step": 33110 }, { "epoch": 0.9659548398389638, "grad_norm": 0.656919495620798, "learning_rate": 3.0365763868082096e-08, "loss": 0.0995, "step": 33111 }, { "epoch": 0.9659840130696074, "grad_norm": 0.6938332147972859, "learning_rate": 3.031379806489598e-08, "loss": 0.1018, "step": 33112 }, { "epoch": 0.9660131863002509, "grad_norm": 0.8633441793085521, "learning_rate": 3.026187662988178e-08, "loss": 0.1079, "step": 33113 }, { "epoch": 0.9660423595308945, "grad_norm": 0.9591116687070425, "learning_rate": 3.0209999563503015e-08, "loss": 0.1093, "step": 33114 }, { "epoch": 0.966071532761538, "grad_norm": 0.7233070936573648, "learning_rate": 3.015816686622319e-08, "loss": 0.1051, "step": 33115 }, { "epoch": 0.9661007059921816, "grad_norm": 0.8609837590828512, "learning_rate": 3.010637853850473e-08, "loss": 0.1113, "step": 33116 }, { "epoch": 0.9661298792228251, "grad_norm": 0.8313443937140413, "learning_rate": 3.0054634580810594e-08, "loss": 0.0928, "step": 33117 }, { "epoch": 0.9661590524534687, "grad_norm": 0.8196854774424199, "learning_rate": 3.000293499360207e-08, "loss": 0.0932, "step": 33118 }, { "epoch": 0.9661882256841122, "grad_norm": 0.7198436893292882, "learning_rate": 2.995127977734047e-08, "loss": 0.1083, "step": 33119 }, { "epoch": 0.9662173989147558, "grad_norm": 0.9496831099723301, "learning_rate": 2.9899668932487636e-08, "loss": 0.0956, "step": 33120 }, { "epoch": 0.9662465721453993, "grad_norm": 0.7647315277194521, "learning_rate": 2.984810245950431e-08, "loss": 0.0781, "step": 33121 }, { "epoch": 0.966275745376043, "grad_norm": 0.8740475248357543, "learning_rate": 2.9796580358850134e-08, "loss": 0.1247, "step": 33122 }, { "epoch": 0.9663049186066865, "grad_norm": 0.8668125591352983, "learning_rate": 2.9745102630985844e-08, "loss": 0.1056, "step": 33123 }, { "epoch": 0.9663340918373301, "grad_norm": 0.8427167068623925, "learning_rate": 2.9693669276371073e-08, "loss": 0.1337, "step": 33124 }, { "epoch": 0.9663632650679737, "grad_norm": 0.9657409626030692, "learning_rate": 2.96422802954649e-08, "loss": 0.1255, "step": 33125 }, { "epoch": 0.9663924382986172, "grad_norm": 0.8345508028686581, "learning_rate": 2.9590935688725288e-08, "loss": 0.1361, "step": 33126 }, { "epoch": 0.9664216115292608, "grad_norm": 1.0172467489185282, "learning_rate": 2.9539635456611872e-08, "loss": 0.0841, "step": 33127 }, { "epoch": 0.9664507847599043, "grad_norm": 0.8745450174531825, "learning_rate": 2.9488379599581507e-08, "loss": 0.1381, "step": 33128 }, { "epoch": 0.9664799579905479, "grad_norm": 1.157553950817821, "learning_rate": 2.943716811809272e-08, "loss": 0.1123, "step": 33129 }, { "epoch": 0.9665091312211914, "grad_norm": 0.8153465544904627, "learning_rate": 2.9386001012601805e-08, "loss": 0.1208, "step": 33130 }, { "epoch": 0.966538304451835, "grad_norm": 0.7524619022845139, "learning_rate": 2.9334878283566737e-08, "loss": 0.1063, "step": 33131 }, { "epoch": 0.9665674776824785, "grad_norm": 0.8994755021243348, "learning_rate": 2.9283799931442704e-08, "loss": 0.1001, "step": 33132 }, { "epoch": 0.9665966509131221, "grad_norm": 0.7204162617885567, "learning_rate": 2.923276595668656e-08, "loss": 0.1077, "step": 33133 }, { "epoch": 0.9666258241437656, "grad_norm": 0.8019148939981262, "learning_rate": 2.9181776359754054e-08, "loss": 0.1395, "step": 33134 }, { "epoch": 0.9666549973744093, "grad_norm": 0.9545709462728342, "learning_rate": 2.9130831141099268e-08, "loss": 0.0897, "step": 33135 }, { "epoch": 0.9666841706050529, "grad_norm": 0.7268565055547692, "learning_rate": 2.907993030117795e-08, "loss": 0.101, "step": 33136 }, { "epoch": 0.9667133438356964, "grad_norm": 0.7361034516151381, "learning_rate": 2.9029073840444733e-08, "loss": 0.1089, "step": 33137 }, { "epoch": 0.96674251706634, "grad_norm": 1.0953714849920393, "learning_rate": 2.89782617593537e-08, "loss": 0.1053, "step": 33138 }, { "epoch": 0.9667716902969835, "grad_norm": 0.7727801956727839, "learning_rate": 2.8927494058357265e-08, "loss": 0.0999, "step": 33139 }, { "epoch": 0.9668008635276271, "grad_norm": 0.9204105396856075, "learning_rate": 2.887677073790951e-08, "loss": 0.0884, "step": 33140 }, { "epoch": 0.9668300367582706, "grad_norm": 0.7203501760453466, "learning_rate": 2.882609179846341e-08, "loss": 0.0931, "step": 33141 }, { "epoch": 0.9668592099889142, "grad_norm": 0.7463886266732962, "learning_rate": 2.877545724047137e-08, "loss": 0.1241, "step": 33142 }, { "epoch": 0.9668883832195577, "grad_norm": 0.693979411774402, "learning_rate": 2.8724867064385265e-08, "loss": 0.0919, "step": 33143 }, { "epoch": 0.9669175564502013, "grad_norm": 0.7106745996989098, "learning_rate": 2.8674321270656946e-08, "loss": 0.1145, "step": 33144 }, { "epoch": 0.9669467296808448, "grad_norm": 0.8100047697571606, "learning_rate": 2.8623819859737168e-08, "loss": 0.1155, "step": 33145 }, { "epoch": 0.9669759029114884, "grad_norm": 0.7790447099378133, "learning_rate": 2.8573362832077234e-08, "loss": 0.098, "step": 33146 }, { "epoch": 0.9670050761421319, "grad_norm": 0.7495023717099109, "learning_rate": 2.8522950188127342e-08, "loss": 0.1144, "step": 33147 }, { "epoch": 0.9670342493727755, "grad_norm": 0.8861864894397857, "learning_rate": 2.847258192833824e-08, "loss": 0.1241, "step": 33148 }, { "epoch": 0.9670634226034192, "grad_norm": 1.1470730068790005, "learning_rate": 2.8422258053159014e-08, "loss": 0.1134, "step": 33149 }, { "epoch": 0.9670925958340627, "grad_norm": 0.7778296006944421, "learning_rate": 2.8371978563039304e-08, "loss": 0.1124, "step": 33150 }, { "epoch": 0.9671217690647063, "grad_norm": 0.648556325252706, "learning_rate": 2.8321743458427087e-08, "loss": 0.1219, "step": 33151 }, { "epoch": 0.9671509422953498, "grad_norm": 0.8489118138471465, "learning_rate": 2.8271552739772e-08, "loss": 0.1121, "step": 33152 }, { "epoch": 0.9671801155259934, "grad_norm": 1.0260411798809466, "learning_rate": 2.8221406407521466e-08, "loss": 0.1003, "step": 33153 }, { "epoch": 0.9672092887566369, "grad_norm": 0.7700507317507708, "learning_rate": 2.817130446212346e-08, "loss": 0.1063, "step": 33154 }, { "epoch": 0.9672384619872805, "grad_norm": 0.8193315575784067, "learning_rate": 2.81212469040254e-08, "loss": 0.1049, "step": 33155 }, { "epoch": 0.967267635217924, "grad_norm": 0.900771176844274, "learning_rate": 2.8071233733673597e-08, "loss": 0.1234, "step": 33156 }, { "epoch": 0.9672968084485676, "grad_norm": 0.9560339972914657, "learning_rate": 2.8021264951514916e-08, "loss": 0.0932, "step": 33157 }, { "epoch": 0.9673259816792111, "grad_norm": 0.6546265651253206, "learning_rate": 2.7971340557995665e-08, "loss": 0.1132, "step": 33158 }, { "epoch": 0.9673551549098547, "grad_norm": 1.0290057482398285, "learning_rate": 2.7921460553561042e-08, "loss": 0.1238, "step": 33159 }, { "epoch": 0.9673843281404982, "grad_norm": 0.9428544993149429, "learning_rate": 2.7871624938656805e-08, "loss": 0.1157, "step": 33160 }, { "epoch": 0.9674135013711418, "grad_norm": 1.0415995121814428, "learning_rate": 2.7821833713728152e-08, "loss": 0.1169, "step": 33161 }, { "epoch": 0.9674426746017853, "grad_norm": 0.6790913256221224, "learning_rate": 2.7772086879218617e-08, "loss": 0.1206, "step": 33162 }, { "epoch": 0.967471847832429, "grad_norm": 0.7560335334944387, "learning_rate": 2.77223844355734e-08, "loss": 0.0954, "step": 33163 }, { "epoch": 0.9675010210630726, "grad_norm": 0.9423620708381659, "learning_rate": 2.7672726383235482e-08, "loss": 0.105, "step": 33164 }, { "epoch": 0.9675301942937161, "grad_norm": 0.7353544534745028, "learning_rate": 2.7623112722648394e-08, "loss": 0.1118, "step": 33165 }, { "epoch": 0.9675593675243597, "grad_norm": 0.8067071208282636, "learning_rate": 2.757354345425567e-08, "loss": 0.1131, "step": 33166 }, { "epoch": 0.9675885407550032, "grad_norm": 0.9932651772818989, "learning_rate": 2.7524018578498623e-08, "loss": 0.1231, "step": 33167 }, { "epoch": 0.9676177139856468, "grad_norm": 0.8114355437385394, "learning_rate": 2.7474538095820792e-08, "loss": 0.1231, "step": 33168 }, { "epoch": 0.9676468872162903, "grad_norm": 0.8220403344879893, "learning_rate": 2.742510200666293e-08, "loss": 0.1324, "step": 33169 }, { "epoch": 0.9676760604469339, "grad_norm": 0.9237094451487354, "learning_rate": 2.737571031146691e-08, "loss": 0.1029, "step": 33170 }, { "epoch": 0.9677052336775774, "grad_norm": 0.8566931516908737, "learning_rate": 2.732636301067293e-08, "loss": 0.1103, "step": 33171 }, { "epoch": 0.967734406908221, "grad_norm": 0.8089451056696318, "learning_rate": 2.7277060104722865e-08, "loss": 0.1302, "step": 33172 }, { "epoch": 0.9677635801388645, "grad_norm": 0.8801491851632947, "learning_rate": 2.722780159405525e-08, "loss": 0.1215, "step": 33173 }, { "epoch": 0.9677927533695081, "grad_norm": 0.894810803757725, "learning_rate": 2.7178587479111397e-08, "loss": 0.0923, "step": 33174 }, { "epoch": 0.9678219266001516, "grad_norm": 0.920052125297922, "learning_rate": 2.7129417760329846e-08, "loss": 0.1213, "step": 33175 }, { "epoch": 0.9678510998307953, "grad_norm": 0.9199774232586273, "learning_rate": 2.708029243814969e-08, "loss": 0.1189, "step": 33176 }, { "epoch": 0.9678802730614389, "grad_norm": 0.776353475684395, "learning_rate": 2.703121151300947e-08, "loss": 0.1001, "step": 33177 }, { "epoch": 0.9679094462920824, "grad_norm": 0.9131735148224904, "learning_rate": 2.698217498534772e-08, "loss": 0.1248, "step": 33178 }, { "epoch": 0.967938619522726, "grad_norm": 0.7870175820259996, "learning_rate": 2.693318285560187e-08, "loss": 0.1014, "step": 33179 }, { "epoch": 0.9679677927533695, "grad_norm": 0.8996599779335045, "learning_rate": 2.6884235124209345e-08, "loss": 0.1041, "step": 33180 }, { "epoch": 0.9679969659840131, "grad_norm": 0.8984752240465178, "learning_rate": 2.6835331791607023e-08, "loss": 0.134, "step": 33181 }, { "epoch": 0.9680261392146566, "grad_norm": 0.7918699501152622, "learning_rate": 2.6786472858231772e-08, "loss": 0.1004, "step": 33182 }, { "epoch": 0.9680553124453002, "grad_norm": 0.7766932154413836, "learning_rate": 2.673765832451991e-08, "loss": 0.1131, "step": 33183 }, { "epoch": 0.9680844856759437, "grad_norm": 1.3130603270873022, "learning_rate": 2.6688888190906647e-08, "loss": 0.0971, "step": 33184 }, { "epoch": 0.9681136589065873, "grad_norm": 0.9150769599954006, "learning_rate": 2.66401624578283e-08, "loss": 0.1051, "step": 33185 }, { "epoch": 0.9681428321372308, "grad_norm": 0.9161269213580724, "learning_rate": 2.6591481125718967e-08, "loss": 0.1002, "step": 33186 }, { "epoch": 0.9681720053678744, "grad_norm": 0.6980169049739354, "learning_rate": 2.6542844195013297e-08, "loss": 0.1066, "step": 33187 }, { "epoch": 0.968201178598518, "grad_norm": 0.7662191903971364, "learning_rate": 2.6494251666146497e-08, "loss": 0.092, "step": 33188 }, { "epoch": 0.9682303518291615, "grad_norm": 0.9375710003997315, "learning_rate": 2.644570353955156e-08, "loss": 0.1104, "step": 33189 }, { "epoch": 0.9682595250598052, "grad_norm": 0.6504484474349596, "learning_rate": 2.6397199815662022e-08, "loss": 0.1005, "step": 33190 }, { "epoch": 0.9682886982904487, "grad_norm": 0.8032264552360474, "learning_rate": 2.6348740494910875e-08, "loss": 0.1198, "step": 33191 }, { "epoch": 0.9683178715210923, "grad_norm": 0.9963163192859755, "learning_rate": 2.6300325577731102e-08, "loss": 0.11, "step": 33192 }, { "epoch": 0.9683470447517358, "grad_norm": 0.9106261616915791, "learning_rate": 2.625195506455458e-08, "loss": 0.1056, "step": 33193 }, { "epoch": 0.9683762179823794, "grad_norm": 2.154062381770336, "learning_rate": 2.6203628955813188e-08, "loss": 0.1141, "step": 33194 }, { "epoch": 0.9684053912130229, "grad_norm": 1.0892300851194652, "learning_rate": 2.6155347251938247e-08, "loss": 0.1288, "step": 33195 }, { "epoch": 0.9684345644436665, "grad_norm": 0.8808649017784892, "learning_rate": 2.610710995336163e-08, "loss": 0.1041, "step": 33196 }, { "epoch": 0.96846373767431, "grad_norm": 0.875432106744955, "learning_rate": 2.6058917060513002e-08, "loss": 0.0885, "step": 33197 }, { "epoch": 0.9684929109049536, "grad_norm": 0.7705466745780121, "learning_rate": 2.601076857382312e-08, "loss": 0.1057, "step": 33198 }, { "epoch": 0.9685220841355971, "grad_norm": 0.7848633989472745, "learning_rate": 2.5962664493721646e-08, "loss": 0.1077, "step": 33199 }, { "epoch": 0.9685512573662407, "grad_norm": 0.9357134369685914, "learning_rate": 2.5914604820638233e-08, "loss": 0.105, "step": 33200 }, { "epoch": 0.9685804305968843, "grad_norm": 1.0718354077973316, "learning_rate": 2.5866589555001432e-08, "loss": 0.1006, "step": 33201 }, { "epoch": 0.9686096038275278, "grad_norm": 0.9688733295168274, "learning_rate": 2.5818618697240337e-08, "loss": 0.1132, "step": 33202 }, { "epoch": 0.9686387770581715, "grad_norm": 0.8703435809535759, "learning_rate": 2.5770692247783502e-08, "loss": 0.0975, "step": 33203 }, { "epoch": 0.968667950288815, "grad_norm": 0.7218303744110426, "learning_rate": 2.5722810207058356e-08, "loss": 0.0993, "step": 33204 }, { "epoch": 0.9686971235194586, "grad_norm": 0.7759747512927662, "learning_rate": 2.5674972575492896e-08, "loss": 0.1138, "step": 33205 }, { "epoch": 0.9687262967501021, "grad_norm": 0.8570026436756201, "learning_rate": 2.562717935351289e-08, "loss": 0.1186, "step": 33206 }, { "epoch": 0.9687554699807457, "grad_norm": 0.7398037588879186, "learning_rate": 2.5579430541546324e-08, "loss": 0.0901, "step": 33207 }, { "epoch": 0.9687846432113892, "grad_norm": 0.7236714997816808, "learning_rate": 2.553172614001953e-08, "loss": 0.1161, "step": 33208 }, { "epoch": 0.9688138164420328, "grad_norm": 1.2607678151739428, "learning_rate": 2.5484066149357723e-08, "loss": 0.1117, "step": 33209 }, { "epoch": 0.9688429896726763, "grad_norm": 0.9101920600804906, "learning_rate": 2.543645056998667e-08, "loss": 0.0882, "step": 33210 }, { "epoch": 0.9688721629033199, "grad_norm": 0.8108032443512756, "learning_rate": 2.538887940233159e-08, "loss": 0.1143, "step": 33211 }, { "epoch": 0.9689013361339635, "grad_norm": 0.7041602985171656, "learning_rate": 2.5341352646816585e-08, "loss": 0.1145, "step": 33212 }, { "epoch": 0.968930509364607, "grad_norm": 0.8401975422025159, "learning_rate": 2.5293870303866876e-08, "loss": 0.1186, "step": 33213 }, { "epoch": 0.9689596825952506, "grad_norm": 0.8464830955458004, "learning_rate": 2.5246432373906004e-08, "loss": 0.1015, "step": 33214 }, { "epoch": 0.9689888558258941, "grad_norm": 0.8387548712600454, "learning_rate": 2.5199038857357526e-08, "loss": 0.096, "step": 33215 }, { "epoch": 0.9690180290565377, "grad_norm": 0.9199625599619493, "learning_rate": 2.5151689754643883e-08, "loss": 0.1151, "step": 33216 }, { "epoch": 0.9690472022871813, "grad_norm": 1.3089082232519005, "learning_rate": 2.5104385066188618e-08, "loss": 0.1145, "step": 33217 }, { "epoch": 0.9690763755178249, "grad_norm": 0.8277829418479505, "learning_rate": 2.505712479241418e-08, "loss": 0.0957, "step": 33218 }, { "epoch": 0.9691055487484684, "grad_norm": 0.6815338576041906, "learning_rate": 2.5009908933741335e-08, "loss": 0.106, "step": 33219 }, { "epoch": 0.969134721979112, "grad_norm": 0.8492507818011145, "learning_rate": 2.496273749059308e-08, "loss": 0.1094, "step": 33220 }, { "epoch": 0.9691638952097555, "grad_norm": 0.775050926146816, "learning_rate": 2.4915610463389637e-08, "loss": 0.0947, "step": 33221 }, { "epoch": 0.9691930684403991, "grad_norm": 0.7439142650091273, "learning_rate": 2.486852785255178e-08, "loss": 0.1078, "step": 33222 }, { "epoch": 0.9692222416710427, "grad_norm": 0.9900561762145669, "learning_rate": 2.4821489658500286e-08, "loss": 0.126, "step": 33223 }, { "epoch": 0.9692514149016862, "grad_norm": 0.7834785723011627, "learning_rate": 2.4774495881654813e-08, "loss": 0.113, "step": 33224 }, { "epoch": 0.9692805881323298, "grad_norm": 0.7419422618761509, "learning_rate": 2.472754652243503e-08, "loss": 0.0971, "step": 33225 }, { "epoch": 0.9693097613629733, "grad_norm": 1.0690866161387549, "learning_rate": 2.468064158125949e-08, "loss": 0.1132, "step": 33226 }, { "epoch": 0.9693389345936169, "grad_norm": 0.9161898550399602, "learning_rate": 2.463378105854841e-08, "loss": 0.139, "step": 33227 }, { "epoch": 0.9693681078242604, "grad_norm": 0.8379852469233645, "learning_rate": 2.4586964954718683e-08, "loss": 0.1007, "step": 33228 }, { "epoch": 0.969397281054904, "grad_norm": 0.7514237099956552, "learning_rate": 2.454019327018886e-08, "loss": 0.1222, "step": 33229 }, { "epoch": 0.9694264542855476, "grad_norm": 0.8241317805620066, "learning_rate": 2.449346600537639e-08, "loss": 0.1079, "step": 33230 }, { "epoch": 0.9694556275161912, "grad_norm": 0.9168777228764998, "learning_rate": 2.4446783160698152e-08, "loss": 0.1072, "step": 33231 }, { "epoch": 0.9694848007468347, "grad_norm": 0.8124714608903882, "learning_rate": 2.440014473657215e-08, "loss": 0.103, "step": 33232 }, { "epoch": 0.9695139739774783, "grad_norm": 0.7571477527597106, "learning_rate": 2.4353550733413056e-08, "loss": 0.1044, "step": 33233 }, { "epoch": 0.9695431472081218, "grad_norm": 0.75046520235371, "learning_rate": 2.430700115163831e-08, "loss": 0.1065, "step": 33234 }, { "epoch": 0.9695723204387654, "grad_norm": 0.8101537463858147, "learning_rate": 2.426049599166258e-08, "loss": 0.0979, "step": 33235 }, { "epoch": 0.969601493669409, "grad_norm": 0.9014119870855315, "learning_rate": 2.4214035253901093e-08, "loss": 0.1058, "step": 33236 }, { "epoch": 0.9696306669000525, "grad_norm": 1.1299225630003138, "learning_rate": 2.4167618938769066e-08, "loss": 0.1026, "step": 33237 }, { "epoch": 0.9696598401306961, "grad_norm": 0.9588030555201384, "learning_rate": 2.4121247046681174e-08, "loss": 0.1032, "step": 33238 }, { "epoch": 0.9696890133613396, "grad_norm": 1.2022350808963467, "learning_rate": 2.4074919578050415e-08, "loss": 0.0923, "step": 33239 }, { "epoch": 0.9697181865919832, "grad_norm": 0.8357029085217434, "learning_rate": 2.4028636533290904e-08, "loss": 0.1014, "step": 33240 }, { "epoch": 0.9697473598226267, "grad_norm": 0.7351472449121997, "learning_rate": 2.3982397912816203e-08, "loss": 0.1172, "step": 33241 }, { "epoch": 0.9697765330532703, "grad_norm": 0.8882296353447906, "learning_rate": 2.3936203717038753e-08, "loss": 0.1098, "step": 33242 }, { "epoch": 0.9698057062839138, "grad_norm": 0.9327990009193649, "learning_rate": 2.389005394637045e-08, "loss": 0.117, "step": 33243 }, { "epoch": 0.9698348795145575, "grad_norm": 0.8334900163090636, "learning_rate": 2.38439486012243e-08, "loss": 0.1141, "step": 33244 }, { "epoch": 0.969864052745201, "grad_norm": 0.8102328846098208, "learning_rate": 2.3797887682011632e-08, "loss": 0.1138, "step": 33245 }, { "epoch": 0.9698932259758446, "grad_norm": 0.8732129555678707, "learning_rate": 2.375187118914324e-08, "loss": 0.1265, "step": 33246 }, { "epoch": 0.9699223992064882, "grad_norm": 0.9265668905180008, "learning_rate": 2.3705899123030452e-08, "loss": 0.1265, "step": 33247 }, { "epoch": 0.9699515724371317, "grad_norm": 0.7005632848767195, "learning_rate": 2.36599714840835e-08, "loss": 0.1314, "step": 33248 }, { "epoch": 0.9699807456677753, "grad_norm": 0.7864713792294779, "learning_rate": 2.3614088272712055e-08, "loss": 0.1068, "step": 33249 }, { "epoch": 0.9700099188984188, "grad_norm": 0.7727446005676165, "learning_rate": 2.3568249489325788e-08, "loss": 0.1133, "step": 33250 }, { "epoch": 0.9700390921290624, "grad_norm": 0.8746891260098911, "learning_rate": 2.3522455134334932e-08, "loss": 0.1133, "step": 33251 }, { "epoch": 0.9700682653597059, "grad_norm": 0.8062800368374295, "learning_rate": 2.347670520814749e-08, "loss": 0.1235, "step": 33252 }, { "epoch": 0.9700974385903495, "grad_norm": 0.9503210236297273, "learning_rate": 2.3430999711171466e-08, "loss": 0.1366, "step": 33253 }, { "epoch": 0.970126611820993, "grad_norm": 0.7298905343904102, "learning_rate": 2.338533864381598e-08, "loss": 0.1053, "step": 33254 }, { "epoch": 0.9701557850516366, "grad_norm": 0.8383404371548997, "learning_rate": 2.333972200648793e-08, "loss": 0.1275, "step": 33255 }, { "epoch": 0.9701849582822801, "grad_norm": 0.8118394073432936, "learning_rate": 2.329414979959477e-08, "loss": 0.0874, "step": 33256 }, { "epoch": 0.9702141315129238, "grad_norm": 0.7989731379292758, "learning_rate": 2.3248622023543387e-08, "loss": 0.1127, "step": 33257 }, { "epoch": 0.9702433047435673, "grad_norm": 0.6262229387101635, "learning_rate": 2.320313867874069e-08, "loss": 0.1175, "step": 33258 }, { "epoch": 0.9702724779742109, "grad_norm": 0.8163687193739113, "learning_rate": 2.3157699765591902e-08, "loss": 0.1135, "step": 33259 }, { "epoch": 0.9703016512048545, "grad_norm": 0.8569892640654317, "learning_rate": 2.3112305284503365e-08, "loss": 0.1125, "step": 33260 }, { "epoch": 0.970330824435498, "grad_norm": 0.7352684456279495, "learning_rate": 2.3066955235879763e-08, "loss": 0.1159, "step": 33261 }, { "epoch": 0.9703599976661416, "grad_norm": 0.6172889263874495, "learning_rate": 2.3021649620126873e-08, "loss": 0.1172, "step": 33262 }, { "epoch": 0.9703891708967851, "grad_norm": 0.8463945571490848, "learning_rate": 2.2976388437648267e-08, "loss": 0.106, "step": 33263 }, { "epoch": 0.9704183441274287, "grad_norm": 1.0325710207608405, "learning_rate": 2.2931171688848066e-08, "loss": 0.1194, "step": 33264 }, { "epoch": 0.9704475173580722, "grad_norm": 0.78629712982603, "learning_rate": 2.288599937413094e-08, "loss": 0.1122, "step": 33265 }, { "epoch": 0.9704766905887158, "grad_norm": 0.7600775276779111, "learning_rate": 2.2840871493898798e-08, "loss": 0.0942, "step": 33266 }, { "epoch": 0.9705058638193593, "grad_norm": 0.9811926471288643, "learning_rate": 2.27957880485552e-08, "loss": 0.1302, "step": 33267 }, { "epoch": 0.9705350370500029, "grad_norm": 0.8820362620981588, "learning_rate": 2.2750749038503162e-08, "loss": 0.0909, "step": 33268 }, { "epoch": 0.9705642102806464, "grad_norm": 0.6327932055874735, "learning_rate": 2.2705754464144024e-08, "loss": 0.1087, "step": 33269 }, { "epoch": 0.97059338351129, "grad_norm": 0.9295755842950882, "learning_rate": 2.266080432587969e-08, "loss": 0.1172, "step": 33270 }, { "epoch": 0.9706225567419337, "grad_norm": 0.9489213979627705, "learning_rate": 2.261589862411151e-08, "loss": 0.1073, "step": 33271 }, { "epoch": 0.9706517299725772, "grad_norm": 1.1446728499654448, "learning_rate": 2.2571037359240268e-08, "loss": 0.1114, "step": 33272 }, { "epoch": 0.9706809032032208, "grad_norm": 0.767914187023319, "learning_rate": 2.2526220531666752e-08, "loss": 0.1168, "step": 33273 }, { "epoch": 0.9707100764338643, "grad_norm": 0.7252827403882242, "learning_rate": 2.2481448141791206e-08, "loss": 0.1176, "step": 33274 }, { "epoch": 0.9707392496645079, "grad_norm": 0.7670138313431533, "learning_rate": 2.243672019001275e-08, "loss": 0.0991, "step": 33275 }, { "epoch": 0.9707684228951514, "grad_norm": 0.8109237412730789, "learning_rate": 2.2392036676730512e-08, "loss": 0.128, "step": 33276 }, { "epoch": 0.970797596125795, "grad_norm": 0.7587996528575016, "learning_rate": 2.2347397602344722e-08, "loss": 0.1018, "step": 33277 }, { "epoch": 0.9708267693564385, "grad_norm": 1.1152898987839726, "learning_rate": 2.2302802967252847e-08, "loss": 0.101, "step": 33278 }, { "epoch": 0.9708559425870821, "grad_norm": 0.7740685771683296, "learning_rate": 2.225825277185345e-08, "loss": 0.1029, "step": 33279 }, { "epoch": 0.9708851158177256, "grad_norm": 0.858881855407697, "learning_rate": 2.2213747016543442e-08, "loss": 0.1211, "step": 33280 }, { "epoch": 0.9709142890483692, "grad_norm": 0.9201220084377129, "learning_rate": 2.2169285701721388e-08, "loss": 0.1167, "step": 33281 }, { "epoch": 0.9709434622790127, "grad_norm": 0.8283169503953051, "learning_rate": 2.212486882778364e-08, "loss": 0.0877, "step": 33282 }, { "epoch": 0.9709726355096563, "grad_norm": 0.7731137437956862, "learning_rate": 2.208049639512655e-08, "loss": 0.1099, "step": 33283 }, { "epoch": 0.9710018087403, "grad_norm": 0.8886735153467984, "learning_rate": 2.203616840414646e-08, "loss": 0.0937, "step": 33284 }, { "epoch": 0.9710309819709435, "grad_norm": 0.772189292030863, "learning_rate": 2.1991884855239177e-08, "loss": 0.1172, "step": 33285 }, { "epoch": 0.9710601552015871, "grad_norm": 0.9297205871305089, "learning_rate": 2.1947645748799927e-08, "loss": 0.105, "step": 33286 }, { "epoch": 0.9710893284322306, "grad_norm": 0.8003621453355055, "learning_rate": 2.1903451085223958e-08, "loss": 0.1077, "step": 33287 }, { "epoch": 0.9711185016628742, "grad_norm": 0.8251416550823791, "learning_rate": 2.185930086490595e-08, "loss": 0.1399, "step": 33288 }, { "epoch": 0.9711476748935177, "grad_norm": 0.7866508578070678, "learning_rate": 2.1815195088238926e-08, "loss": 0.0948, "step": 33289 }, { "epoch": 0.9711768481241613, "grad_norm": 0.7879612377093205, "learning_rate": 2.1771133755618124e-08, "loss": 0.1088, "step": 33290 }, { "epoch": 0.9712060213548048, "grad_norm": 0.7372331894153694, "learning_rate": 2.172711686743545e-08, "loss": 0.1103, "step": 33291 }, { "epoch": 0.9712351945854484, "grad_norm": 0.9221458662624825, "learning_rate": 2.1683144424085034e-08, "loss": 0.1017, "step": 33292 }, { "epoch": 0.9712643678160919, "grad_norm": 0.6565910677863517, "learning_rate": 2.1639216425959342e-08, "loss": 0.1046, "step": 33293 }, { "epoch": 0.9712935410467355, "grad_norm": 0.9345734592058208, "learning_rate": 2.159533287345028e-08, "loss": 0.1521, "step": 33294 }, { "epoch": 0.971322714277379, "grad_norm": 0.8823190509865528, "learning_rate": 2.155149376694976e-08, "loss": 0.11, "step": 33295 }, { "epoch": 0.9713518875080226, "grad_norm": 0.8445446854319447, "learning_rate": 2.1507699106848577e-08, "loss": 0.1082, "step": 33296 }, { "epoch": 0.9713810607386661, "grad_norm": 0.9642143256794932, "learning_rate": 2.146394889353809e-08, "loss": 0.1057, "step": 33297 }, { "epoch": 0.9714102339693098, "grad_norm": 0.8639220123901701, "learning_rate": 2.1420243127409644e-08, "loss": 0.1068, "step": 33298 }, { "epoch": 0.9714394071999534, "grad_norm": 1.0167976612282208, "learning_rate": 2.137658180885238e-08, "loss": 0.1107, "step": 33299 }, { "epoch": 0.9714685804305969, "grad_norm": 0.81110458572188, "learning_rate": 2.133296493825654e-08, "loss": 0.1066, "step": 33300 }, { "epoch": 0.9714977536612405, "grad_norm": 0.6746057619205598, "learning_rate": 2.1289392516011253e-08, "loss": 0.1113, "step": 33301 }, { "epoch": 0.971526926891884, "grad_norm": 0.7692790261086835, "learning_rate": 2.1245864542506213e-08, "loss": 0.1103, "step": 33302 }, { "epoch": 0.9715561001225276, "grad_norm": 0.9241372568505284, "learning_rate": 2.1202381018129436e-08, "loss": 0.1174, "step": 33303 }, { "epoch": 0.9715852733531711, "grad_norm": 0.7847597273931936, "learning_rate": 2.1158941943268952e-08, "loss": 0.1236, "step": 33304 }, { "epoch": 0.9716144465838147, "grad_norm": 1.1458056422264347, "learning_rate": 2.1115547318313334e-08, "loss": 0.1231, "step": 33305 }, { "epoch": 0.9716436198144582, "grad_norm": 1.265289722213336, "learning_rate": 2.1072197143649497e-08, "loss": 0.1029, "step": 33306 }, { "epoch": 0.9716727930451018, "grad_norm": 0.9444687779410674, "learning_rate": 2.1028891419664354e-08, "loss": 0.1261, "step": 33307 }, { "epoch": 0.9717019662757453, "grad_norm": 0.7195124075247614, "learning_rate": 2.0985630146744264e-08, "loss": 0.1177, "step": 33308 }, { "epoch": 0.9717311395063889, "grad_norm": 1.5685025454870865, "learning_rate": 2.0942413325276688e-08, "loss": 0.0905, "step": 33309 }, { "epoch": 0.9717603127370325, "grad_norm": 0.9524786479041064, "learning_rate": 2.0899240955646326e-08, "loss": 0.1304, "step": 33310 }, { "epoch": 0.9717894859676761, "grad_norm": 1.0465798406007878, "learning_rate": 2.085611303823898e-08, "loss": 0.107, "step": 33311 }, { "epoch": 0.9718186591983197, "grad_norm": 0.7076932035780915, "learning_rate": 2.0813029573439335e-08, "loss": 0.1043, "step": 33312 }, { "epoch": 0.9718478324289632, "grad_norm": 0.8522436171572916, "learning_rate": 2.0769990561632647e-08, "loss": 0.1071, "step": 33313 }, { "epoch": 0.9718770056596068, "grad_norm": 0.7615812205683006, "learning_rate": 2.0726996003202492e-08, "loss": 0.1101, "step": 33314 }, { "epoch": 0.9719061788902503, "grad_norm": 0.755199234780472, "learning_rate": 2.0684045898533566e-08, "loss": 0.1142, "step": 33315 }, { "epoch": 0.9719353521208939, "grad_norm": 0.9259430231150687, "learning_rate": 2.064114024800834e-08, "loss": 0.1327, "step": 33316 }, { "epoch": 0.9719645253515374, "grad_norm": 0.952728399417846, "learning_rate": 2.059827905201095e-08, "loss": 0.1031, "step": 33317 }, { "epoch": 0.971993698582181, "grad_norm": 0.7640032847065388, "learning_rate": 2.055546231092276e-08, "loss": 0.1147, "step": 33318 }, { "epoch": 0.9720228718128245, "grad_norm": 0.7113730249546651, "learning_rate": 2.0512690025127345e-08, "loss": 0.1064, "step": 33319 }, { "epoch": 0.9720520450434681, "grad_norm": 0.9526394956738103, "learning_rate": 2.0469962195005522e-08, "loss": 0.1155, "step": 33320 }, { "epoch": 0.9720812182741116, "grad_norm": 0.8208247646065269, "learning_rate": 2.04272788209392e-08, "loss": 0.0987, "step": 33321 }, { "epoch": 0.9721103915047552, "grad_norm": 0.7365215128034867, "learning_rate": 2.0384639903309744e-08, "loss": 0.1001, "step": 33322 }, { "epoch": 0.9721395647353988, "grad_norm": 0.9058537089768122, "learning_rate": 2.034204544249685e-08, "loss": 0.114, "step": 33323 }, { "epoch": 0.9721687379660423, "grad_norm": 0.9173028845224632, "learning_rate": 2.0299495438881877e-08, "loss": 0.1031, "step": 33324 }, { "epoch": 0.972197911196686, "grad_norm": 0.7301709254330327, "learning_rate": 2.0256989892844526e-08, "loss": 0.0938, "step": 33325 }, { "epoch": 0.9722270844273295, "grad_norm": 0.8623916502247145, "learning_rate": 2.0214528804763377e-08, "loss": 0.1139, "step": 33326 }, { "epoch": 0.9722562576579731, "grad_norm": 0.7493150574255667, "learning_rate": 2.017211217501869e-08, "loss": 0.1281, "step": 33327 }, { "epoch": 0.9722854308886166, "grad_norm": 0.8172134180991346, "learning_rate": 2.0129740003988485e-08, "loss": 0.1104, "step": 33328 }, { "epoch": 0.9723146041192602, "grad_norm": 0.8068921454843423, "learning_rate": 2.00874122920508e-08, "loss": 0.132, "step": 33329 }, { "epoch": 0.9723437773499037, "grad_norm": 0.7895308248497875, "learning_rate": 2.0045129039584222e-08, "loss": 0.1377, "step": 33330 }, { "epoch": 0.9723729505805473, "grad_norm": 0.7257825512227422, "learning_rate": 2.0002890246965666e-08, "loss": 0.1095, "step": 33331 }, { "epoch": 0.9724021238111908, "grad_norm": 0.7815689958219003, "learning_rate": 1.9960695914572613e-08, "loss": 0.1059, "step": 33332 }, { "epoch": 0.9724312970418344, "grad_norm": 0.7451748441768528, "learning_rate": 1.9918546042781982e-08, "loss": 0.1318, "step": 33333 }, { "epoch": 0.972460470272478, "grad_norm": 0.8058696504447004, "learning_rate": 1.9876440631969585e-08, "loss": 0.114, "step": 33334 }, { "epoch": 0.9724896435031215, "grad_norm": 0.9150165019876599, "learning_rate": 1.983437968251123e-08, "loss": 0.1319, "step": 33335 }, { "epoch": 0.9725188167337651, "grad_norm": 0.80739249159237, "learning_rate": 1.9792363194782726e-08, "loss": 0.1131, "step": 33336 }, { "epoch": 0.9725479899644086, "grad_norm": 0.7029138450106285, "learning_rate": 1.9750391169159332e-08, "loss": 0.133, "step": 33337 }, { "epoch": 0.9725771631950523, "grad_norm": 0.8346001268840691, "learning_rate": 1.9708463606015194e-08, "loss": 0.1339, "step": 33338 }, { "epoch": 0.9726063364256958, "grad_norm": 0.8896279040035748, "learning_rate": 1.9666580505725007e-08, "loss": 0.1001, "step": 33339 }, { "epoch": 0.9726355096563394, "grad_norm": 1.0616821086827493, "learning_rate": 1.9624741868662922e-08, "loss": 0.1143, "step": 33340 }, { "epoch": 0.9726646828869829, "grad_norm": 0.7409043448761004, "learning_rate": 1.9582947695202527e-08, "loss": 0.1069, "step": 33341 }, { "epoch": 0.9726938561176265, "grad_norm": 0.9229229172873346, "learning_rate": 1.9541197985716298e-08, "loss": 0.1005, "step": 33342 }, { "epoch": 0.97272302934827, "grad_norm": 0.7304547907080917, "learning_rate": 1.9499492740577273e-08, "loss": 0.1199, "step": 33343 }, { "epoch": 0.9727522025789136, "grad_norm": 0.6824912921749677, "learning_rate": 1.9457831960157937e-08, "loss": 0.1144, "step": 33344 }, { "epoch": 0.9727813758095571, "grad_norm": 0.7877393040686903, "learning_rate": 1.9416215644830204e-08, "loss": 0.1062, "step": 33345 }, { "epoch": 0.9728105490402007, "grad_norm": 0.9844808545510941, "learning_rate": 1.9374643794964897e-08, "loss": 0.0913, "step": 33346 }, { "epoch": 0.9728397222708443, "grad_norm": 1.1689726331096526, "learning_rate": 1.9333116410934493e-08, "loss": 0.0937, "step": 33347 }, { "epoch": 0.9728688955014878, "grad_norm": 0.7126234237969035, "learning_rate": 1.9291633493109254e-08, "loss": 0.1255, "step": 33348 }, { "epoch": 0.9728980687321314, "grad_norm": 0.7489160067019679, "learning_rate": 1.9250195041858876e-08, "loss": 0.1209, "step": 33349 }, { "epoch": 0.9729272419627749, "grad_norm": 0.7490003086713569, "learning_rate": 1.920880105755363e-08, "loss": 0.0771, "step": 33350 }, { "epoch": 0.9729564151934185, "grad_norm": 0.6394620877002085, "learning_rate": 1.9167451540563765e-08, "loss": 0.1051, "step": 33351 }, { "epoch": 0.9729855884240621, "grad_norm": 0.9090195318328302, "learning_rate": 1.9126146491257324e-08, "loss": 0.1023, "step": 33352 }, { "epoch": 0.9730147616547057, "grad_norm": 0.8825669052766255, "learning_rate": 1.908488591000346e-08, "loss": 0.123, "step": 33353 }, { "epoch": 0.9730439348853492, "grad_norm": 0.8893309323915279, "learning_rate": 1.9043669797171316e-08, "loss": 0.1122, "step": 33354 }, { "epoch": 0.9730731081159928, "grad_norm": 0.9490527767267487, "learning_rate": 1.900249815312838e-08, "loss": 0.1222, "step": 33355 }, { "epoch": 0.9731022813466363, "grad_norm": 0.709442625281374, "learning_rate": 1.8961370978241023e-08, "loss": 0.0956, "step": 33356 }, { "epoch": 0.9731314545772799, "grad_norm": 0.978385822187881, "learning_rate": 1.8920288272878396e-08, "loss": 0.1154, "step": 33357 }, { "epoch": 0.9731606278079235, "grad_norm": 0.9637221416511035, "learning_rate": 1.8879250037406315e-08, "loss": 0.1041, "step": 33358 }, { "epoch": 0.973189801038567, "grad_norm": 0.8283305516874159, "learning_rate": 1.8838256272190602e-08, "loss": 0.1059, "step": 33359 }, { "epoch": 0.9732189742692106, "grad_norm": 0.9277834903067519, "learning_rate": 1.8797306977598184e-08, "loss": 0.1024, "step": 33360 }, { "epoch": 0.9732481474998541, "grad_norm": 0.8363224529409721, "learning_rate": 1.8756402153994324e-08, "loss": 0.1092, "step": 33361 }, { "epoch": 0.9732773207304977, "grad_norm": 0.7339383701319554, "learning_rate": 1.8715541801744286e-08, "loss": 0.1196, "step": 33362 }, { "epoch": 0.9733064939611412, "grad_norm": 1.1063275108381208, "learning_rate": 1.8674725921212776e-08, "loss": 0.0935, "step": 33363 }, { "epoch": 0.9733356671917848, "grad_norm": 0.756467841423844, "learning_rate": 1.8633954512763953e-08, "loss": 0.104, "step": 33364 }, { "epoch": 0.9733648404224284, "grad_norm": 0.7192393817105305, "learning_rate": 1.8593227576761962e-08, "loss": 0.1001, "step": 33365 }, { "epoch": 0.973394013653072, "grad_norm": 0.7233696841641885, "learning_rate": 1.8552545113570963e-08, "loss": 0.1177, "step": 33366 }, { "epoch": 0.9734231868837155, "grad_norm": 1.0112742820744531, "learning_rate": 1.851190712355344e-08, "loss": 0.1091, "step": 33367 }, { "epoch": 0.9734523601143591, "grad_norm": 0.7479705079772767, "learning_rate": 1.8471313607071883e-08, "loss": 0.0998, "step": 33368 }, { "epoch": 0.9734815333450026, "grad_norm": 0.6921841689447383, "learning_rate": 1.843076456448989e-08, "loss": 0.1123, "step": 33369 }, { "epoch": 0.9735107065756462, "grad_norm": 0.9172583354982481, "learning_rate": 1.8390259996168835e-08, "loss": 0.1199, "step": 33370 }, { "epoch": 0.9735398798062898, "grad_norm": 0.8822899103914085, "learning_rate": 1.83497999024701e-08, "loss": 0.1334, "step": 33371 }, { "epoch": 0.9735690530369333, "grad_norm": 0.7350092952227788, "learning_rate": 1.830938428375506e-08, "loss": 0.1061, "step": 33372 }, { "epoch": 0.9735982262675769, "grad_norm": 0.7330787897650202, "learning_rate": 1.8269013140385094e-08, "loss": 0.1056, "step": 33373 }, { "epoch": 0.9736273994982204, "grad_norm": 0.8272844833046916, "learning_rate": 1.822868647271936e-08, "loss": 0.1096, "step": 33374 }, { "epoch": 0.973656572728864, "grad_norm": 0.6753441382658865, "learning_rate": 1.818840428111923e-08, "loss": 0.096, "step": 33375 }, { "epoch": 0.9736857459595075, "grad_norm": 0.7612429464049904, "learning_rate": 1.814816656594387e-08, "loss": 0.1336, "step": 33376 }, { "epoch": 0.9737149191901511, "grad_norm": 0.9039727898563927, "learning_rate": 1.8107973327551876e-08, "loss": 0.1213, "step": 33377 }, { "epoch": 0.9737440924207946, "grad_norm": 0.7970470410181474, "learning_rate": 1.8067824566302962e-08, "loss": 0.112, "step": 33378 }, { "epoch": 0.9737732656514383, "grad_norm": 0.8800650514267462, "learning_rate": 1.802772028255517e-08, "loss": 0.0963, "step": 33379 }, { "epoch": 0.9738024388820818, "grad_norm": 0.6846404777296988, "learning_rate": 1.7987660476666556e-08, "loss": 0.1139, "step": 33380 }, { "epoch": 0.9738316121127254, "grad_norm": 0.913059160933428, "learning_rate": 1.7947645148995162e-08, "loss": 0.1212, "step": 33381 }, { "epoch": 0.973860785343369, "grad_norm": 0.8062975208045219, "learning_rate": 1.790767429989737e-08, "loss": 0.1162, "step": 33382 }, { "epoch": 0.9738899585740125, "grad_norm": 1.1223707926612059, "learning_rate": 1.7867747929730673e-08, "loss": 0.1186, "step": 33383 }, { "epoch": 0.9739191318046561, "grad_norm": 1.3384563318411626, "learning_rate": 1.7827866038852005e-08, "loss": 0.1021, "step": 33384 }, { "epoch": 0.9739483050352996, "grad_norm": 0.80109189907296, "learning_rate": 1.7788028627616083e-08, "loss": 0.0966, "step": 33385 }, { "epoch": 0.9739774782659432, "grad_norm": 0.9382834093700994, "learning_rate": 1.774823569637929e-08, "loss": 0.1133, "step": 33386 }, { "epoch": 0.9740066514965867, "grad_norm": 0.7803182073018958, "learning_rate": 1.7708487245497454e-08, "loss": 0.1294, "step": 33387 }, { "epoch": 0.9740358247272303, "grad_norm": 0.9460944671462214, "learning_rate": 1.7668783275324176e-08, "loss": 0.1155, "step": 33388 }, { "epoch": 0.9740649979578738, "grad_norm": 1.0073333152762762, "learning_rate": 1.7629123786215285e-08, "loss": 0.1032, "step": 33389 }, { "epoch": 0.9740941711885174, "grad_norm": 0.737617428990919, "learning_rate": 1.7589508778523833e-08, "loss": 0.1105, "step": 33390 }, { "epoch": 0.9741233444191609, "grad_norm": 0.8700243402153485, "learning_rate": 1.754993825260398e-08, "loss": 0.1224, "step": 33391 }, { "epoch": 0.9741525176498046, "grad_norm": 0.7810096236326348, "learning_rate": 1.751041220880878e-08, "loss": 0.1212, "step": 33392 }, { "epoch": 0.9741816908804481, "grad_norm": 0.6825120453776412, "learning_rate": 1.7470930647490724e-08, "loss": 0.0967, "step": 33393 }, { "epoch": 0.9742108641110917, "grad_norm": 0.7636929463072785, "learning_rate": 1.7431493569003422e-08, "loss": 0.132, "step": 33394 }, { "epoch": 0.9742400373417353, "grad_norm": 0.8319015578493912, "learning_rate": 1.7392100973698257e-08, "loss": 0.1286, "step": 33395 }, { "epoch": 0.9742692105723788, "grad_norm": 0.693362482239197, "learning_rate": 1.7352752861927168e-08, "loss": 0.0962, "step": 33396 }, { "epoch": 0.9742983838030224, "grad_norm": 0.9325020114660988, "learning_rate": 1.7313449234040992e-08, "loss": 0.1056, "step": 33397 }, { "epoch": 0.9743275570336659, "grad_norm": 0.8365567021857802, "learning_rate": 1.7274190090390553e-08, "loss": 0.1164, "step": 33398 }, { "epoch": 0.9743567302643095, "grad_norm": 0.972605134269126, "learning_rate": 1.723497543132724e-08, "loss": 0.1283, "step": 33399 }, { "epoch": 0.974385903494953, "grad_norm": 0.8577117535910984, "learning_rate": 1.719580525719966e-08, "loss": 0.103, "step": 33400 }, { "epoch": 0.9744150767255966, "grad_norm": 0.8280299127440968, "learning_rate": 1.7156679568359203e-08, "loss": 0.1125, "step": 33401 }, { "epoch": 0.9744442499562401, "grad_norm": 0.8463966314981628, "learning_rate": 1.7117598365154477e-08, "loss": 0.1154, "step": 33402 }, { "epoch": 0.9744734231868837, "grad_norm": 0.7877558319809553, "learning_rate": 1.707856164793409e-08, "loss": 0.0971, "step": 33403 }, { "epoch": 0.9745025964175272, "grad_norm": 0.9374016346771774, "learning_rate": 1.7039569417046655e-08, "loss": 0.0881, "step": 33404 }, { "epoch": 0.9745317696481708, "grad_norm": 0.9121423303443583, "learning_rate": 1.7000621672840777e-08, "loss": 0.1252, "step": 33405 }, { "epoch": 0.9745609428788145, "grad_norm": 0.714116311834555, "learning_rate": 1.696171841566341e-08, "loss": 0.1106, "step": 33406 }, { "epoch": 0.974590116109458, "grad_norm": 0.9038189175558847, "learning_rate": 1.69228596458626e-08, "loss": 0.1176, "step": 33407 }, { "epoch": 0.9746192893401016, "grad_norm": 0.837749300781571, "learning_rate": 1.688404536378474e-08, "loss": 0.1421, "step": 33408 }, { "epoch": 0.9746484625707451, "grad_norm": 0.8193194685692583, "learning_rate": 1.6845275569776774e-08, "loss": 0.1134, "step": 33409 }, { "epoch": 0.9746776358013887, "grad_norm": 0.7724165656177046, "learning_rate": 1.680655026418454e-08, "loss": 0.094, "step": 33410 }, { "epoch": 0.9747068090320322, "grad_norm": 0.8670529420332095, "learning_rate": 1.676786944735387e-08, "loss": 0.09, "step": 33411 }, { "epoch": 0.9747359822626758, "grad_norm": 0.6433729522077717, "learning_rate": 1.672923311963004e-08, "loss": 0.0993, "step": 33412 }, { "epoch": 0.9747651554933193, "grad_norm": 1.0027729146225761, "learning_rate": 1.6690641281357778e-08, "loss": 0.0999, "step": 33413 }, { "epoch": 0.9747943287239629, "grad_norm": 0.6992562887457178, "learning_rate": 1.6652093932881807e-08, "loss": 0.092, "step": 33414 }, { "epoch": 0.9748235019546064, "grad_norm": 0.7068301941366186, "learning_rate": 1.6613591074546855e-08, "loss": 0.1141, "step": 33415 }, { "epoch": 0.97485267518525, "grad_norm": 0.814202526508201, "learning_rate": 1.6575132706695417e-08, "loss": 0.1043, "step": 33416 }, { "epoch": 0.9748818484158935, "grad_norm": 0.8187714646032954, "learning_rate": 1.6536718829672227e-08, "loss": 0.1209, "step": 33417 }, { "epoch": 0.9749110216465371, "grad_norm": 0.8708815506151023, "learning_rate": 1.6498349443819227e-08, "loss": 0.1084, "step": 33418 }, { "epoch": 0.9749401948771806, "grad_norm": 0.673224660071052, "learning_rate": 1.6460024549479482e-08, "loss": 0.1007, "step": 33419 }, { "epoch": 0.9749693681078243, "grad_norm": 1.0546626415822506, "learning_rate": 1.6421744146994932e-08, "loss": 0.1018, "step": 33420 }, { "epoch": 0.9749985413384679, "grad_norm": 1.025942351102925, "learning_rate": 1.638350823670698e-08, "loss": 0.1079, "step": 33421 }, { "epoch": 0.9750277145691114, "grad_norm": 0.8179767362394554, "learning_rate": 1.6345316818958123e-08, "loss": 0.1257, "step": 33422 }, { "epoch": 0.975056887799755, "grad_norm": 0.808942523053795, "learning_rate": 1.630716989408754e-08, "loss": 0.1147, "step": 33423 }, { "epoch": 0.9750860610303985, "grad_norm": 1.6266496243270054, "learning_rate": 1.6269067462437727e-08, "loss": 0.1198, "step": 33424 }, { "epoch": 0.9751152342610421, "grad_norm": 0.8222191101815929, "learning_rate": 1.6231009524347862e-08, "loss": 0.101, "step": 33425 }, { "epoch": 0.9751444074916856, "grad_norm": 0.6514665271591409, "learning_rate": 1.6192996080157676e-08, "loss": 0.0949, "step": 33426 }, { "epoch": 0.9751735807223292, "grad_norm": 0.7969046839209064, "learning_rate": 1.615502713020689e-08, "loss": 0.1409, "step": 33427 }, { "epoch": 0.9752027539529727, "grad_norm": 0.8554952062082433, "learning_rate": 1.6117102674833575e-08, "loss": 0.1023, "step": 33428 }, { "epoch": 0.9752319271836163, "grad_norm": 0.7427680985706927, "learning_rate": 1.6079222714378008e-08, "loss": 0.0852, "step": 33429 }, { "epoch": 0.9752611004142598, "grad_norm": 0.7194565309418961, "learning_rate": 1.6041387249176588e-08, "loss": 0.1011, "step": 33430 }, { "epoch": 0.9752902736449034, "grad_norm": 0.9604794209898954, "learning_rate": 1.600359627956849e-08, "loss": 0.1043, "step": 33431 }, { "epoch": 0.975319446875547, "grad_norm": 0.821924743818213, "learning_rate": 1.596584980589011e-08, "loss": 0.0975, "step": 33432 }, { "epoch": 0.9753486201061906, "grad_norm": 0.7024338531347216, "learning_rate": 1.5928147828478958e-08, "loss": 0.1077, "step": 33433 }, { "epoch": 0.9753777933368342, "grad_norm": 0.9182789520795457, "learning_rate": 1.589049034767143e-08, "loss": 0.1035, "step": 33434 }, { "epoch": 0.9754069665674777, "grad_norm": 0.6687812974110965, "learning_rate": 1.585287736380392e-08, "loss": 0.0841, "step": 33435 }, { "epoch": 0.9754361397981213, "grad_norm": 1.0874316139265678, "learning_rate": 1.581530887721172e-08, "loss": 0.1128, "step": 33436 }, { "epoch": 0.9754653130287648, "grad_norm": 0.7284841937281055, "learning_rate": 1.5777784888231228e-08, "loss": 0.1025, "step": 33437 }, { "epoch": 0.9754944862594084, "grad_norm": 0.879325279853439, "learning_rate": 1.574030539719662e-08, "loss": 0.1226, "step": 33438 }, { "epoch": 0.9755236594900519, "grad_norm": 1.013744102609131, "learning_rate": 1.570287040444263e-08, "loss": 0.11, "step": 33439 }, { "epoch": 0.9755528327206955, "grad_norm": 0.7306770282912424, "learning_rate": 1.566547991030343e-08, "loss": 0.1121, "step": 33440 }, { "epoch": 0.975582005951339, "grad_norm": 0.7091404223497363, "learning_rate": 1.5628133915113196e-08, "loss": 0.1157, "step": 33441 }, { "epoch": 0.9756111791819826, "grad_norm": 0.9053919337519989, "learning_rate": 1.5590832419205003e-08, "loss": 0.1161, "step": 33442 }, { "epoch": 0.9756403524126261, "grad_norm": 0.7123748698206179, "learning_rate": 1.5553575422911915e-08, "loss": 0.1046, "step": 33443 }, { "epoch": 0.9756695256432697, "grad_norm": 0.9529840701004273, "learning_rate": 1.5516362926566996e-08, "loss": 0.1205, "step": 33444 }, { "epoch": 0.9756986988739133, "grad_norm": 0.8206795705164966, "learning_rate": 1.5479194930502206e-08, "loss": 0.1173, "step": 33445 }, { "epoch": 0.9757278721045568, "grad_norm": 1.0333538600532497, "learning_rate": 1.544207143504839e-08, "loss": 0.1128, "step": 33446 }, { "epoch": 0.9757570453352005, "grad_norm": 0.7544708164282096, "learning_rate": 1.5404992440538612e-08, "loss": 0.1337, "step": 33447 }, { "epoch": 0.975786218565844, "grad_norm": 0.6724146045830542, "learning_rate": 1.5367957947302615e-08, "loss": 0.1074, "step": 33448 }, { "epoch": 0.9758153917964876, "grad_norm": 0.7946364233903862, "learning_rate": 1.5330967955671794e-08, "loss": 0.1009, "step": 33449 }, { "epoch": 0.9758445650271311, "grad_norm": 0.7718172549434619, "learning_rate": 1.5294022465976444e-08, "loss": 0.0961, "step": 33450 }, { "epoch": 0.9758737382577747, "grad_norm": 0.9088213407755129, "learning_rate": 1.5257121478545744e-08, "loss": 0.1179, "step": 33451 }, { "epoch": 0.9759029114884182, "grad_norm": 0.9327672303193023, "learning_rate": 1.5220264993709988e-08, "loss": 0.1245, "step": 33452 }, { "epoch": 0.9759320847190618, "grad_norm": 0.7753202811566637, "learning_rate": 1.5183453011797243e-08, "loss": 0.1115, "step": 33453 }, { "epoch": 0.9759612579497053, "grad_norm": 0.8000881894874269, "learning_rate": 1.5146685533136697e-08, "loss": 0.0896, "step": 33454 }, { "epoch": 0.9759904311803489, "grad_norm": 0.787279925433227, "learning_rate": 1.510996255805697e-08, "loss": 0.1077, "step": 33455 }, { "epoch": 0.9760196044109924, "grad_norm": 0.9641529645428879, "learning_rate": 1.507328408688502e-08, "loss": 0.132, "step": 33456 }, { "epoch": 0.976048777641636, "grad_norm": 0.9703981020772765, "learning_rate": 1.5036650119948926e-08, "loss": 0.0928, "step": 33457 }, { "epoch": 0.9760779508722796, "grad_norm": 1.1180763624483268, "learning_rate": 1.5000060657575643e-08, "loss": 0.1095, "step": 33458 }, { "epoch": 0.9761071241029231, "grad_norm": 1.32960764059299, "learning_rate": 1.4963515700092135e-08, "loss": 0.1101, "step": 33459 }, { "epoch": 0.9761362973335668, "grad_norm": 0.8027003055346538, "learning_rate": 1.4927015247823695e-08, "loss": 0.1139, "step": 33460 }, { "epoch": 0.9761654705642103, "grad_norm": 0.7467701103652643, "learning_rate": 1.4890559301097284e-08, "loss": 0.1049, "step": 33461 }, { "epoch": 0.9761946437948539, "grad_norm": 0.8576695245988681, "learning_rate": 1.48541478602382e-08, "loss": 0.1041, "step": 33462 }, { "epoch": 0.9762238170254974, "grad_norm": 0.9751636245609486, "learning_rate": 1.4817780925570625e-08, "loss": 0.1221, "step": 33463 }, { "epoch": 0.976252990256141, "grad_norm": 0.8419763689914282, "learning_rate": 1.4781458497419854e-08, "loss": 0.0979, "step": 33464 }, { "epoch": 0.9762821634867845, "grad_norm": 0.7866540633974234, "learning_rate": 1.4745180576110629e-08, "loss": 0.1111, "step": 33465 }, { "epoch": 0.9763113367174281, "grad_norm": 0.8147538835025226, "learning_rate": 1.4708947161966025e-08, "loss": 0.1052, "step": 33466 }, { "epoch": 0.9763405099480716, "grad_norm": 0.7236913976966386, "learning_rate": 1.467275825530967e-08, "loss": 0.1063, "step": 33467 }, { "epoch": 0.9763696831787152, "grad_norm": 0.871967743547231, "learning_rate": 1.4636613856465198e-08, "loss": 0.125, "step": 33468 }, { "epoch": 0.9763988564093588, "grad_norm": 0.8103982140571668, "learning_rate": 1.4600513965755125e-08, "loss": 0.1162, "step": 33469 }, { "epoch": 0.9764280296400023, "grad_norm": 1.1658768722585042, "learning_rate": 1.4564458583500861e-08, "loss": 0.1191, "step": 33470 }, { "epoch": 0.9764572028706459, "grad_norm": 0.7372703729649, "learning_rate": 1.4528447710025484e-08, "loss": 0.0881, "step": 33471 }, { "epoch": 0.9764863761012894, "grad_norm": 0.6513245861024871, "learning_rate": 1.4492481345649844e-08, "loss": 0.0973, "step": 33472 }, { "epoch": 0.976515549331933, "grad_norm": 0.7532321738316657, "learning_rate": 1.4456559490695355e-08, "loss": 0.1037, "step": 33473 }, { "epoch": 0.9765447225625766, "grad_norm": 0.8958885167130327, "learning_rate": 1.4420682145482313e-08, "loss": 0.1079, "step": 33474 }, { "epoch": 0.9765738957932202, "grad_norm": 0.9528026867579396, "learning_rate": 1.4384849310331573e-08, "loss": 0.093, "step": 33475 }, { "epoch": 0.9766030690238637, "grad_norm": 0.9398564865817778, "learning_rate": 1.4349060985562325e-08, "loss": 0.1488, "step": 33476 }, { "epoch": 0.9766322422545073, "grad_norm": 0.7662224821385598, "learning_rate": 1.4313317171494867e-08, "loss": 0.1098, "step": 33477 }, { "epoch": 0.9766614154851508, "grad_norm": 1.0432365376197361, "learning_rate": 1.4277617868447835e-08, "loss": 0.1247, "step": 33478 }, { "epoch": 0.9766905887157944, "grad_norm": 0.9898892779238785, "learning_rate": 1.4241963076739862e-08, "loss": 0.1062, "step": 33479 }, { "epoch": 0.976719761946438, "grad_norm": 0.8288831787117793, "learning_rate": 1.4206352796689582e-08, "loss": 0.1068, "step": 33480 }, { "epoch": 0.9767489351770815, "grad_norm": 1.1654789166534627, "learning_rate": 1.4170787028615074e-08, "loss": 0.1016, "step": 33481 }, { "epoch": 0.976778108407725, "grad_norm": 0.6830434175297825, "learning_rate": 1.4135265772833307e-08, "loss": 0.0871, "step": 33482 }, { "epoch": 0.9768072816383686, "grad_norm": 0.8740675074834651, "learning_rate": 1.4099789029661249e-08, "loss": 0.1071, "step": 33483 }, { "epoch": 0.9768364548690122, "grad_norm": 1.1663422147800555, "learning_rate": 1.4064356799416423e-08, "loss": 0.1158, "step": 33484 }, { "epoch": 0.9768656280996557, "grad_norm": 1.02950746241133, "learning_rate": 1.4028969082415245e-08, "loss": 0.1167, "step": 33485 }, { "epoch": 0.9768948013302993, "grad_norm": 0.8121815245135743, "learning_rate": 1.3993625878972461e-08, "loss": 0.1089, "step": 33486 }, { "epoch": 0.9769239745609429, "grad_norm": 0.7529322717191119, "learning_rate": 1.3958327189404486e-08, "loss": 0.1149, "step": 33487 }, { "epoch": 0.9769531477915865, "grad_norm": 0.8016289552099051, "learning_rate": 1.3923073014026623e-08, "loss": 0.0936, "step": 33488 }, { "epoch": 0.97698232102223, "grad_norm": 0.7566834874084734, "learning_rate": 1.3887863353153064e-08, "loss": 0.1036, "step": 33489 }, { "epoch": 0.9770114942528736, "grad_norm": 0.8066875277382531, "learning_rate": 1.3852698207098004e-08, "loss": 0.1286, "step": 33490 }, { "epoch": 0.9770406674835171, "grad_norm": 1.3215638404967414, "learning_rate": 1.3817577576176744e-08, "loss": 0.127, "step": 33491 }, { "epoch": 0.9770698407141607, "grad_norm": 0.8640221730882329, "learning_rate": 1.3782501460701258e-08, "loss": 0.1046, "step": 33492 }, { "epoch": 0.9770990139448043, "grad_norm": 0.8799891919501298, "learning_rate": 1.3747469860985186e-08, "loss": 0.1058, "step": 33493 }, { "epoch": 0.9771281871754478, "grad_norm": 0.9599150347920247, "learning_rate": 1.3712482777341052e-08, "loss": 0.0938, "step": 33494 }, { "epoch": 0.9771573604060914, "grad_norm": 0.7217733960288283, "learning_rate": 1.3677540210082495e-08, "loss": 0.0943, "step": 33495 }, { "epoch": 0.9771865336367349, "grad_norm": 0.7299157592610854, "learning_rate": 1.3642642159519826e-08, "loss": 0.1087, "step": 33496 }, { "epoch": 0.9772157068673785, "grad_norm": 0.9035806109289037, "learning_rate": 1.3607788625965567e-08, "loss": 0.1467, "step": 33497 }, { "epoch": 0.977244880098022, "grad_norm": 0.8251648123877693, "learning_rate": 1.3572979609730586e-08, "loss": 0.1114, "step": 33498 }, { "epoch": 0.9772740533286656, "grad_norm": 0.8127618790375482, "learning_rate": 1.353821511112574e-08, "loss": 0.1133, "step": 33499 }, { "epoch": 0.9773032265593091, "grad_norm": 1.048735789101215, "learning_rate": 1.3503495130460786e-08, "loss": 0.1259, "step": 33500 }, { "epoch": 0.9773323997899528, "grad_norm": 0.7650104707120872, "learning_rate": 1.346881966804714e-08, "loss": 0.1322, "step": 33501 }, { "epoch": 0.9773615730205963, "grad_norm": 0.9096094626126157, "learning_rate": 1.3434188724192888e-08, "loss": 0.1215, "step": 33502 }, { "epoch": 0.9773907462512399, "grad_norm": 0.7911768998071496, "learning_rate": 1.3399602299208337e-08, "loss": 0.1101, "step": 33503 }, { "epoch": 0.9774199194818834, "grad_norm": 0.8342474729210573, "learning_rate": 1.3365060393401574e-08, "loss": 0.1393, "step": 33504 }, { "epoch": 0.977449092712527, "grad_norm": 0.829972991771532, "learning_rate": 1.3330563007080688e-08, "loss": 0.1439, "step": 33505 }, { "epoch": 0.9774782659431706, "grad_norm": 0.8317439227451584, "learning_rate": 1.3296110140554319e-08, "loss": 0.1196, "step": 33506 }, { "epoch": 0.9775074391738141, "grad_norm": 0.8171054407467253, "learning_rate": 1.326170179413e-08, "loss": 0.1124, "step": 33507 }, { "epoch": 0.9775366124044577, "grad_norm": 0.704629330813467, "learning_rate": 1.3227337968114705e-08, "loss": 0.1217, "step": 33508 }, { "epoch": 0.9775657856351012, "grad_norm": 0.6362952901423882, "learning_rate": 1.3193018662815416e-08, "loss": 0.1042, "step": 33509 }, { "epoch": 0.9775949588657448, "grad_norm": 0.7817879002668029, "learning_rate": 1.315874387853855e-08, "loss": 0.1108, "step": 33510 }, { "epoch": 0.9776241320963883, "grad_norm": 0.7951073224680534, "learning_rate": 1.3124513615589419e-08, "loss": 0.1262, "step": 33511 }, { "epoch": 0.9776533053270319, "grad_norm": 0.678908976733393, "learning_rate": 1.3090327874274445e-08, "loss": 0.1213, "step": 33512 }, { "epoch": 0.9776824785576754, "grad_norm": 0.8913169990854443, "learning_rate": 1.305618665489894e-08, "loss": 0.1038, "step": 33513 }, { "epoch": 0.9777116517883191, "grad_norm": 0.6844177475755414, "learning_rate": 1.3022089957766548e-08, "loss": 0.0742, "step": 33514 }, { "epoch": 0.9777408250189626, "grad_norm": 0.6221148187316252, "learning_rate": 1.2988037783183138e-08, "loss": 0.0928, "step": 33515 }, { "epoch": 0.9777699982496062, "grad_norm": 1.4901746606833184, "learning_rate": 1.29540301314518e-08, "loss": 0.1121, "step": 33516 }, { "epoch": 0.9777991714802498, "grad_norm": 0.7422810677962711, "learning_rate": 1.292006700287618e-08, "loss": 0.1065, "step": 33517 }, { "epoch": 0.9778283447108933, "grad_norm": 0.9316007606678465, "learning_rate": 1.2886148397759923e-08, "loss": 0.148, "step": 33518 }, { "epoch": 0.9778575179415369, "grad_norm": 0.9082058317249178, "learning_rate": 1.2852274316405567e-08, "loss": 0.1225, "step": 33519 }, { "epoch": 0.9778866911721804, "grad_norm": 1.0126244087613379, "learning_rate": 1.2818444759115644e-08, "loss": 0.1195, "step": 33520 }, { "epoch": 0.977915864402824, "grad_norm": 1.0742154430928772, "learning_rate": 1.2784659726192139e-08, "loss": 0.137, "step": 33521 }, { "epoch": 0.9779450376334675, "grad_norm": 0.779363725290733, "learning_rate": 1.2750919217936475e-08, "loss": 0.1077, "step": 33522 }, { "epoch": 0.9779742108641111, "grad_norm": 0.7613057254401159, "learning_rate": 1.2717223234650079e-08, "loss": 0.1029, "step": 33523 }, { "epoch": 0.9780033840947546, "grad_norm": 1.022905821591553, "learning_rate": 1.2683571776633819e-08, "loss": 0.1275, "step": 33524 }, { "epoch": 0.9780325573253982, "grad_norm": 0.8794318217864383, "learning_rate": 1.2649964844188013e-08, "loss": 0.1042, "step": 33525 }, { "epoch": 0.9780617305560417, "grad_norm": 0.9135103344287407, "learning_rate": 1.2616402437612418e-08, "loss": 0.1179, "step": 33526 }, { "epoch": 0.9780909037866853, "grad_norm": 0.8849909720850804, "learning_rate": 1.2582884557207908e-08, "loss": 0.1077, "step": 33527 }, { "epoch": 0.978120077017329, "grad_norm": 0.9713646329248453, "learning_rate": 1.2549411203272021e-08, "loss": 0.1012, "step": 33528 }, { "epoch": 0.9781492502479725, "grad_norm": 0.7887688045716686, "learning_rate": 1.2515982376104518e-08, "loss": 0.104, "step": 33529 }, { "epoch": 0.978178423478616, "grad_norm": 0.8497511414826573, "learning_rate": 1.2482598076003493e-08, "loss": 0.1076, "step": 33530 }, { "epoch": 0.9782075967092596, "grad_norm": 0.8172917817280038, "learning_rate": 1.2449258303267597e-08, "loss": 0.1212, "step": 33531 }, { "epoch": 0.9782367699399032, "grad_norm": 0.8647268169479864, "learning_rate": 1.241596305819437e-08, "loss": 0.1096, "step": 33532 }, { "epoch": 0.9782659431705467, "grad_norm": 0.7647816226522861, "learning_rate": 1.238271234108024e-08, "loss": 0.1131, "step": 33533 }, { "epoch": 0.9782951164011903, "grad_norm": 0.6886995127166381, "learning_rate": 1.2349506152223301e-08, "loss": 0.1095, "step": 33534 }, { "epoch": 0.9783242896318338, "grad_norm": 0.7503952331544292, "learning_rate": 1.231634449191832e-08, "loss": 0.1005, "step": 33535 }, { "epoch": 0.9783534628624774, "grad_norm": 0.6880498841861453, "learning_rate": 1.2283227360462834e-08, "loss": 0.1056, "step": 33536 }, { "epoch": 0.9783826360931209, "grad_norm": 0.708908472219334, "learning_rate": 1.2250154758152167e-08, "loss": 0.1143, "step": 33537 }, { "epoch": 0.9784118093237645, "grad_norm": 0.6950050722908366, "learning_rate": 1.2217126685281633e-08, "loss": 0.0734, "step": 33538 }, { "epoch": 0.978440982554408, "grad_norm": 1.1183797592938378, "learning_rate": 1.2184143142145444e-08, "loss": 0.1104, "step": 33539 }, { "epoch": 0.9784701557850516, "grad_norm": 0.7917304513124189, "learning_rate": 1.2151204129038918e-08, "loss": 0.1236, "step": 33540 }, { "epoch": 0.9784993290156953, "grad_norm": 0.8255660088355159, "learning_rate": 1.211830964625571e-08, "loss": 0.0944, "step": 33541 }, { "epoch": 0.9785285022463388, "grad_norm": 1.0263963579505295, "learning_rate": 1.2085459694089475e-08, "loss": 0.1461, "step": 33542 }, { "epoch": 0.9785576754769824, "grad_norm": 0.8897985538898494, "learning_rate": 1.2052654272833309e-08, "loss": 0.1063, "step": 33543 }, { "epoch": 0.9785868487076259, "grad_norm": 0.6096755265309961, "learning_rate": 1.2019893382780312e-08, "loss": 0.1214, "step": 33544 }, { "epoch": 0.9786160219382695, "grad_norm": 0.8703492366971958, "learning_rate": 1.1987177024223028e-08, "loss": 0.1238, "step": 33545 }, { "epoch": 0.978645195168913, "grad_norm": 0.8568893824806576, "learning_rate": 1.1954505197454002e-08, "loss": 0.1124, "step": 33546 }, { "epoch": 0.9786743683995566, "grad_norm": 0.8984923640720707, "learning_rate": 1.1921877902763557e-08, "loss": 0.0964, "step": 33547 }, { "epoch": 0.9787035416302001, "grad_norm": 0.9315082956857679, "learning_rate": 1.188929514044479e-08, "loss": 0.1107, "step": 33548 }, { "epoch": 0.9787327148608437, "grad_norm": 0.7133434124220804, "learning_rate": 1.1856756910786915e-08, "loss": 0.1127, "step": 33549 }, { "epoch": 0.9787618880914872, "grad_norm": 0.8420504959062934, "learning_rate": 1.1824263214081367e-08, "loss": 0.0958, "step": 33550 }, { "epoch": 0.9787910613221308, "grad_norm": 0.8654764531361201, "learning_rate": 1.179181405061791e-08, "loss": 0.0959, "step": 33551 }, { "epoch": 0.9788202345527743, "grad_norm": 0.880735452510767, "learning_rate": 1.1759409420686873e-08, "loss": 0.1416, "step": 33552 }, { "epoch": 0.9788494077834179, "grad_norm": 0.8464581829556374, "learning_rate": 1.1727049324576355e-08, "loss": 0.0964, "step": 33553 }, { "epoch": 0.9788785810140614, "grad_norm": 0.6674807253957455, "learning_rate": 1.1694733762576127e-08, "loss": 0.0991, "step": 33554 }, { "epoch": 0.9789077542447051, "grad_norm": 1.1879563273991856, "learning_rate": 1.1662462734974845e-08, "loss": 0.1231, "step": 33555 }, { "epoch": 0.9789369274753487, "grad_norm": 0.94763739100455, "learning_rate": 1.1630236242060056e-08, "loss": 0.0921, "step": 33556 }, { "epoch": 0.9789661007059922, "grad_norm": 0.7393453897394201, "learning_rate": 1.1598054284119864e-08, "loss": 0.0784, "step": 33557 }, { "epoch": 0.9789952739366358, "grad_norm": 0.6927939059784717, "learning_rate": 1.1565916861441263e-08, "loss": 0.0906, "step": 33558 }, { "epoch": 0.9790244471672793, "grad_norm": 0.8144790970997765, "learning_rate": 1.1533823974311242e-08, "loss": 0.1138, "step": 33559 }, { "epoch": 0.9790536203979229, "grad_norm": 0.7101226923340795, "learning_rate": 1.1501775623016243e-08, "loss": 0.1004, "step": 33560 }, { "epoch": 0.9790827936285664, "grad_norm": 0.8468378824637969, "learning_rate": 1.14697718078427e-08, "loss": 0.1119, "step": 33561 }, { "epoch": 0.97911196685921, "grad_norm": 0.7868780522659902, "learning_rate": 1.1437812529076498e-08, "loss": 0.1165, "step": 33562 }, { "epoch": 0.9791411400898535, "grad_norm": 0.832739648487524, "learning_rate": 1.1405897787002407e-08, "loss": 0.0972, "step": 33563 }, { "epoch": 0.9791703133204971, "grad_norm": 0.7707445998333764, "learning_rate": 1.1374027581905201e-08, "loss": 0.0913, "step": 33564 }, { "epoch": 0.9791994865511406, "grad_norm": 0.752586600078335, "learning_rate": 1.1342201914070206e-08, "loss": 0.1283, "step": 33565 }, { "epoch": 0.9792286597817842, "grad_norm": 0.8288984065373154, "learning_rate": 1.1310420783781084e-08, "loss": 0.1002, "step": 33566 }, { "epoch": 0.9792578330124277, "grad_norm": 0.882658771823189, "learning_rate": 1.1278684191321499e-08, "loss": 0.1135, "step": 33567 }, { "epoch": 0.9792870062430714, "grad_norm": 0.809954938094631, "learning_rate": 1.124699213697511e-08, "loss": 0.1148, "step": 33568 }, { "epoch": 0.979316179473715, "grad_norm": 0.8110699918505555, "learning_rate": 1.1215344621025026e-08, "loss": 0.1225, "step": 33569 }, { "epoch": 0.9793453527043585, "grad_norm": 0.8564527368341427, "learning_rate": 1.1183741643752688e-08, "loss": 0.1148, "step": 33570 }, { "epoch": 0.9793745259350021, "grad_norm": 0.8592063659683723, "learning_rate": 1.1152183205441202e-08, "loss": 0.109, "step": 33571 }, { "epoch": 0.9794036991656456, "grad_norm": 0.8518428727333377, "learning_rate": 1.1120669306372568e-08, "loss": 0.1218, "step": 33572 }, { "epoch": 0.9794328723962892, "grad_norm": 0.8120187384522541, "learning_rate": 1.1089199946827111e-08, "loss": 0.1312, "step": 33573 }, { "epoch": 0.9794620456269327, "grad_norm": 1.0401449897679538, "learning_rate": 1.1057775127086279e-08, "loss": 0.1517, "step": 33574 }, { "epoch": 0.9794912188575763, "grad_norm": 0.8482366633895007, "learning_rate": 1.1026394847430954e-08, "loss": 0.1105, "step": 33575 }, { "epoch": 0.9795203920882198, "grad_norm": 0.6194503260232692, "learning_rate": 1.0995059108140916e-08, "loss": 0.0999, "step": 33576 }, { "epoch": 0.9795495653188634, "grad_norm": 0.6525009094528953, "learning_rate": 1.0963767909495938e-08, "loss": 0.1031, "step": 33577 }, { "epoch": 0.9795787385495069, "grad_norm": 0.8035432789246304, "learning_rate": 1.0932521251775796e-08, "loss": 0.1117, "step": 33578 }, { "epoch": 0.9796079117801505, "grad_norm": 0.6810473434341285, "learning_rate": 1.0901319135259158e-08, "loss": 0.0903, "step": 33579 }, { "epoch": 0.979637085010794, "grad_norm": 0.8300760745461686, "learning_rate": 1.0870161560224134e-08, "loss": 0.141, "step": 33580 }, { "epoch": 0.9796662582414376, "grad_norm": 0.8687429024153636, "learning_rate": 1.0839048526949391e-08, "loss": 0.1, "step": 33581 }, { "epoch": 0.9796954314720813, "grad_norm": 0.8460977724789748, "learning_rate": 1.080798003571304e-08, "loss": 0.1358, "step": 33582 }, { "epoch": 0.9797246047027248, "grad_norm": 0.716330625236146, "learning_rate": 1.0776956086790968e-08, "loss": 0.1036, "step": 33583 }, { "epoch": 0.9797537779333684, "grad_norm": 0.73467268286258, "learning_rate": 1.07459766804624e-08, "loss": 0.122, "step": 33584 }, { "epoch": 0.9797829511640119, "grad_norm": 0.7435576255141277, "learning_rate": 1.0715041817002114e-08, "loss": 0.1009, "step": 33585 }, { "epoch": 0.9798121243946555, "grad_norm": 0.9354998523865375, "learning_rate": 1.0684151496687112e-08, "loss": 0.0872, "step": 33586 }, { "epoch": 0.979841297625299, "grad_norm": 0.6837934822505343, "learning_rate": 1.0653305719792727e-08, "loss": 0.0985, "step": 33587 }, { "epoch": 0.9798704708559426, "grad_norm": 1.123557878098781, "learning_rate": 1.0622504486594853e-08, "loss": 0.1109, "step": 33588 }, { "epoch": 0.9798996440865861, "grad_norm": 0.8260486070102699, "learning_rate": 1.0591747797367713e-08, "loss": 0.0882, "step": 33589 }, { "epoch": 0.9799288173172297, "grad_norm": 0.8483487365515615, "learning_rate": 1.0561035652386643e-08, "loss": 0.1096, "step": 33590 }, { "epoch": 0.9799579905478732, "grad_norm": 0.899948870894597, "learning_rate": 1.0530368051925865e-08, "loss": 0.1261, "step": 33591 }, { "epoch": 0.9799871637785168, "grad_norm": 0.9432100765013671, "learning_rate": 1.0499744996259054e-08, "loss": 0.1259, "step": 33592 }, { "epoch": 0.9800163370091604, "grad_norm": 0.7118238030219393, "learning_rate": 1.0469166485658766e-08, "loss": 0.0994, "step": 33593 }, { "epoch": 0.9800455102398039, "grad_norm": 0.5866476698926588, "learning_rate": 1.0438632520399227e-08, "loss": 0.0928, "step": 33594 }, { "epoch": 0.9800746834704476, "grad_norm": 0.8024200703226215, "learning_rate": 1.0408143100751888e-08, "loss": 0.1121, "step": 33595 }, { "epoch": 0.9801038567010911, "grad_norm": 0.7835659963475179, "learning_rate": 1.0377698226989863e-08, "loss": 0.0924, "step": 33596 }, { "epoch": 0.9801330299317347, "grad_norm": 0.7460840593951773, "learning_rate": 1.0347297899384601e-08, "loss": 0.1092, "step": 33597 }, { "epoch": 0.9801622031623782, "grad_norm": 0.6389914910112341, "learning_rate": 1.0316942118207551e-08, "loss": 0.1233, "step": 33598 }, { "epoch": 0.9801913763930218, "grad_norm": 0.8388707244957739, "learning_rate": 1.0286630883729608e-08, "loss": 0.1164, "step": 33599 }, { "epoch": 0.9802205496236653, "grad_norm": 0.9032565473013633, "learning_rate": 1.0256364196221669e-08, "loss": 0.1012, "step": 33600 }, { "epoch": 0.9802497228543089, "grad_norm": 0.9149871122702345, "learning_rate": 1.0226142055953515e-08, "loss": 0.1217, "step": 33601 }, { "epoch": 0.9802788960849524, "grad_norm": 0.9215684318390998, "learning_rate": 1.0195964463195485e-08, "loss": 0.1058, "step": 33602 }, { "epoch": 0.980308069315596, "grad_norm": 0.9950604692617956, "learning_rate": 1.0165831418216255e-08, "loss": 0.1126, "step": 33603 }, { "epoch": 0.9803372425462396, "grad_norm": 0.7541798563495795, "learning_rate": 1.0135742921286163e-08, "loss": 0.0971, "step": 33604 }, { "epoch": 0.9803664157768831, "grad_norm": 0.7461091518709655, "learning_rate": 1.0105698972672217e-08, "loss": 0.1101, "step": 33605 }, { "epoch": 0.9803955890075267, "grad_norm": 1.0912559824009103, "learning_rate": 1.0075699572643649e-08, "loss": 0.1261, "step": 33606 }, { "epoch": 0.9804247622381702, "grad_norm": 0.9400767961900052, "learning_rate": 1.0045744721468021e-08, "loss": 0.1261, "step": 33607 }, { "epoch": 0.9804539354688138, "grad_norm": 0.9017659561990901, "learning_rate": 1.0015834419412895e-08, "loss": 0.1264, "step": 33608 }, { "epoch": 0.9804831086994574, "grad_norm": 0.8615105950532248, "learning_rate": 9.985968666745282e-09, "loss": 0.106, "step": 33609 }, { "epoch": 0.980512281930101, "grad_norm": 0.825579417128215, "learning_rate": 9.95614746373108e-09, "loss": 0.1219, "step": 33610 }, { "epoch": 0.9805414551607445, "grad_norm": 0.6876971184436184, "learning_rate": 9.926370810637853e-09, "loss": 0.1029, "step": 33611 }, { "epoch": 0.9805706283913881, "grad_norm": 0.6744817637104469, "learning_rate": 9.896638707730944e-09, "loss": 0.1196, "step": 33612 }, { "epoch": 0.9805998016220316, "grad_norm": 0.86975152342235, "learning_rate": 9.866951155274585e-09, "loss": 0.1182, "step": 33613 }, { "epoch": 0.9806289748526752, "grad_norm": 0.7316605252320606, "learning_rate": 9.837308153535786e-09, "loss": 0.0906, "step": 33614 }, { "epoch": 0.9806581480833187, "grad_norm": 0.8640335590223941, "learning_rate": 9.807709702778223e-09, "loss": 0.1253, "step": 33615 }, { "epoch": 0.9806873213139623, "grad_norm": 0.7699819317838843, "learning_rate": 9.778155803265577e-09, "loss": 0.1125, "step": 33616 }, { "epoch": 0.9807164945446059, "grad_norm": 0.9002660430621313, "learning_rate": 9.748646455262633e-09, "loss": 0.1143, "step": 33617 }, { "epoch": 0.9807456677752494, "grad_norm": 0.688563331347183, "learning_rate": 9.719181659032518e-09, "loss": 0.0892, "step": 33618 }, { "epoch": 0.980774841005893, "grad_norm": 0.7607676172778293, "learning_rate": 9.68976141483835e-09, "loss": 0.1141, "step": 33619 }, { "epoch": 0.9808040142365365, "grad_norm": 0.6857942810864647, "learning_rate": 9.6603857229427e-09, "loss": 0.0977, "step": 33620 }, { "epoch": 0.9808331874671801, "grad_norm": 0.8653116959258328, "learning_rate": 9.63105458360758e-09, "loss": 0.0938, "step": 33621 }, { "epoch": 0.9808623606978237, "grad_norm": 0.8703808503252962, "learning_rate": 9.601767997095556e-09, "loss": 0.1275, "step": 33622 }, { "epoch": 0.9808915339284673, "grad_norm": 0.953380987348052, "learning_rate": 9.572525963666979e-09, "loss": 0.0995, "step": 33623 }, { "epoch": 0.9809207071591108, "grad_norm": 1.0503153488917936, "learning_rate": 9.543328483584412e-09, "loss": 0.1027, "step": 33624 }, { "epoch": 0.9809498803897544, "grad_norm": 0.9197137964180604, "learning_rate": 9.514175557107097e-09, "loss": 0.1037, "step": 33625 }, { "epoch": 0.980979053620398, "grad_norm": 0.879340509246777, "learning_rate": 9.485067184495932e-09, "loss": 0.1313, "step": 33626 }, { "epoch": 0.9810082268510415, "grad_norm": 0.7984548676562049, "learning_rate": 9.456003366010713e-09, "loss": 0.145, "step": 33627 }, { "epoch": 0.981037400081685, "grad_norm": 1.1241025417926034, "learning_rate": 9.42698410191123e-09, "loss": 0.1002, "step": 33628 }, { "epoch": 0.9810665733123286, "grad_norm": 0.7751571980232825, "learning_rate": 9.398009392456165e-09, "loss": 0.1082, "step": 33629 }, { "epoch": 0.9810957465429722, "grad_norm": 0.6539449698680845, "learning_rate": 9.3690792379042e-09, "loss": 0.1209, "step": 33630 }, { "epoch": 0.9811249197736157, "grad_norm": 0.9025010932371627, "learning_rate": 9.340193638514017e-09, "loss": 0.0988, "step": 33631 }, { "epoch": 0.9811540930042593, "grad_norm": 0.9088939761302108, "learning_rate": 9.311352594543188e-09, "loss": 0.0958, "step": 33632 }, { "epoch": 0.9811832662349028, "grad_norm": 0.6746669390137113, "learning_rate": 9.28255610624873e-09, "loss": 0.1222, "step": 33633 }, { "epoch": 0.9812124394655464, "grad_norm": 0.8845037677253808, "learning_rate": 9.25380417388877e-09, "loss": 0.1036, "step": 33634 }, { "epoch": 0.9812416126961899, "grad_norm": 0.8244579628519634, "learning_rate": 9.225096797719213e-09, "loss": 0.1208, "step": 33635 }, { "epoch": 0.9812707859268336, "grad_norm": 0.7776124324380745, "learning_rate": 9.196433977996522e-09, "loss": 0.1214, "step": 33636 }, { "epoch": 0.9812999591574771, "grad_norm": 0.7720265074637314, "learning_rate": 9.167815714977158e-09, "loss": 0.1226, "step": 33637 }, { "epoch": 0.9813291323881207, "grad_norm": 0.7452347521734213, "learning_rate": 9.13924200891536e-09, "loss": 0.1007, "step": 33638 }, { "epoch": 0.9813583056187642, "grad_norm": 0.8160326354493608, "learning_rate": 9.110712860067594e-09, "loss": 0.0961, "step": 33639 }, { "epoch": 0.9813874788494078, "grad_norm": 0.9735477117013972, "learning_rate": 9.082228268688099e-09, "loss": 0.1054, "step": 33640 }, { "epoch": 0.9814166520800514, "grad_norm": 0.753904475086462, "learning_rate": 9.053788235030558e-09, "loss": 0.1179, "step": 33641 }, { "epoch": 0.9814458253106949, "grad_norm": 1.0664785226371116, "learning_rate": 9.025392759349771e-09, "loss": 0.1252, "step": 33642 }, { "epoch": 0.9814749985413385, "grad_norm": 0.7380798782229535, "learning_rate": 8.997041841898312e-09, "loss": 0.0998, "step": 33643 }, { "epoch": 0.981504171771982, "grad_norm": 0.8774971908772576, "learning_rate": 8.968735482929868e-09, "loss": 0.1071, "step": 33644 }, { "epoch": 0.9815333450026256, "grad_norm": 1.0827863457925666, "learning_rate": 8.94047368269757e-09, "loss": 0.1222, "step": 33645 }, { "epoch": 0.9815625182332691, "grad_norm": 0.8178162774695727, "learning_rate": 8.912256441452882e-09, "loss": 0.0873, "step": 33646 }, { "epoch": 0.9815916914639127, "grad_norm": 0.7479260808972571, "learning_rate": 8.884083759448381e-09, "loss": 0.1038, "step": 33647 }, { "epoch": 0.9816208646945562, "grad_norm": 0.9094641469251314, "learning_rate": 8.855955636935531e-09, "loss": 0.1121, "step": 33648 }, { "epoch": 0.9816500379251999, "grad_norm": 0.7544709780358172, "learning_rate": 8.82787207416469e-09, "loss": 0.12, "step": 33649 }, { "epoch": 0.9816792111558434, "grad_norm": 0.7819759218229222, "learning_rate": 8.79983307138732e-09, "loss": 0.1003, "step": 33650 }, { "epoch": 0.981708384386487, "grad_norm": 0.870250345927785, "learning_rate": 8.771838628853225e-09, "loss": 0.1235, "step": 33651 }, { "epoch": 0.9817375576171306, "grad_norm": 0.9524485265012032, "learning_rate": 8.743888746813312e-09, "loss": 0.1528, "step": 33652 }, { "epoch": 0.9817667308477741, "grad_norm": 0.8862812860992284, "learning_rate": 8.715983425515718e-09, "loss": 0.1059, "step": 33653 }, { "epoch": 0.9817959040784177, "grad_norm": 1.0019586128183071, "learning_rate": 8.688122665210796e-09, "loss": 0.1227, "step": 33654 }, { "epoch": 0.9818250773090612, "grad_norm": 1.1203320311115514, "learning_rate": 8.660306466146683e-09, "loss": 0.0952, "step": 33655 }, { "epoch": 0.9818542505397048, "grad_norm": 0.7728779293846545, "learning_rate": 8.632534828571516e-09, "loss": 0.1072, "step": 33656 }, { "epoch": 0.9818834237703483, "grad_norm": 0.7066081730542775, "learning_rate": 8.60480775273398e-09, "loss": 0.1179, "step": 33657 }, { "epoch": 0.9819125970009919, "grad_norm": 0.9057985351791848, "learning_rate": 8.577125238881102e-09, "loss": 0.1114, "step": 33658 }, { "epoch": 0.9819417702316354, "grad_norm": 0.725792279645597, "learning_rate": 8.549487287259906e-09, "loss": 0.1236, "step": 33659 }, { "epoch": 0.981970943462279, "grad_norm": 0.6701349775250318, "learning_rate": 8.521893898117417e-09, "loss": 0.1038, "step": 33660 }, { "epoch": 0.9820001166929225, "grad_norm": 0.7656834595744108, "learning_rate": 8.494345071700105e-09, "loss": 0.1128, "step": 33661 }, { "epoch": 0.9820292899235661, "grad_norm": 0.8793608677890365, "learning_rate": 8.46684080825333e-09, "loss": 0.1264, "step": 33662 }, { "epoch": 0.9820584631542097, "grad_norm": 0.6874765366075883, "learning_rate": 8.439381108023559e-09, "loss": 0.1028, "step": 33663 }, { "epoch": 0.9820876363848533, "grad_norm": 0.802076161968697, "learning_rate": 8.411965971255042e-09, "loss": 0.1213, "step": 33664 }, { "epoch": 0.9821168096154969, "grad_norm": 2.7792084946682474, "learning_rate": 8.38459539819314e-09, "loss": 0.0998, "step": 33665 }, { "epoch": 0.9821459828461404, "grad_norm": 0.8362544481670676, "learning_rate": 8.357269389081547e-09, "loss": 0.1176, "step": 33666 }, { "epoch": 0.982175156076784, "grad_norm": 0.7512691058425112, "learning_rate": 8.329987944165064e-09, "loss": 0.1282, "step": 33667 }, { "epoch": 0.9822043293074275, "grad_norm": 0.845468959005257, "learning_rate": 8.302751063686276e-09, "loss": 0.1196, "step": 33668 }, { "epoch": 0.9822335025380711, "grad_norm": 0.7454117878596589, "learning_rate": 8.275558747889434e-09, "loss": 0.1028, "step": 33669 }, { "epoch": 0.9822626757687146, "grad_norm": 0.815619565420258, "learning_rate": 8.248410997016565e-09, "loss": 0.104, "step": 33670 }, { "epoch": 0.9822918489993582, "grad_norm": 0.9247442920539567, "learning_rate": 8.221307811310808e-09, "loss": 0.1186, "step": 33671 }, { "epoch": 0.9823210222300017, "grad_norm": 0.8804617502901677, "learning_rate": 8.194249191013082e-09, "loss": 0.1227, "step": 33672 }, { "epoch": 0.9823501954606453, "grad_norm": 0.8159412005318231, "learning_rate": 8.167235136365414e-09, "loss": 0.122, "step": 33673 }, { "epoch": 0.9823793686912888, "grad_norm": 1.1741006117092003, "learning_rate": 8.140265647608725e-09, "loss": 0.1087, "step": 33674 }, { "epoch": 0.9824085419219324, "grad_norm": 0.7397438447371917, "learning_rate": 8.113340724985042e-09, "loss": 0.0956, "step": 33675 }, { "epoch": 0.9824377151525759, "grad_norm": 0.8174985130404561, "learning_rate": 8.086460368733062e-09, "loss": 0.1241, "step": 33676 }, { "epoch": 0.9824668883832196, "grad_norm": 0.9351532761366477, "learning_rate": 8.059624579093705e-09, "loss": 0.0985, "step": 33677 }, { "epoch": 0.9824960616138632, "grad_norm": 0.8264677583332484, "learning_rate": 8.032833356306224e-09, "loss": 0.1091, "step": 33678 }, { "epoch": 0.9825252348445067, "grad_norm": 0.8207624477175735, "learning_rate": 8.006086700609872e-09, "loss": 0.1065, "step": 33679 }, { "epoch": 0.9825544080751503, "grad_norm": 0.7723821360481832, "learning_rate": 7.979384612243901e-09, "loss": 0.1062, "step": 33680 }, { "epoch": 0.9825835813057938, "grad_norm": 0.8756408712289196, "learning_rate": 7.9527270914459e-09, "loss": 0.0945, "step": 33681 }, { "epoch": 0.9826127545364374, "grad_norm": 1.0934535005137842, "learning_rate": 7.926114138454566e-09, "loss": 0.1074, "step": 33682 }, { "epoch": 0.9826419277670809, "grad_norm": 0.70889898196579, "learning_rate": 7.899545753506933e-09, "loss": 0.1137, "step": 33683 }, { "epoch": 0.9826711009977245, "grad_norm": 0.7487615325736632, "learning_rate": 7.873021936840585e-09, "loss": 0.0959, "step": 33684 }, { "epoch": 0.982700274228368, "grad_norm": 0.6615837681059235, "learning_rate": 7.846542688692005e-09, "loss": 0.099, "step": 33685 }, { "epoch": 0.9827294474590116, "grad_norm": 0.763631584549047, "learning_rate": 7.820108009297667e-09, "loss": 0.1192, "step": 33686 }, { "epoch": 0.9827586206896551, "grad_norm": 0.8010618778523452, "learning_rate": 7.79371789889405e-09, "loss": 0.1097, "step": 33687 }, { "epoch": 0.9827877939202987, "grad_norm": 0.9549698425565081, "learning_rate": 7.767372357715964e-09, "loss": 0.1429, "step": 33688 }, { "epoch": 0.9828169671509422, "grad_norm": 0.7716348496530502, "learning_rate": 7.741071385999332e-09, "loss": 0.0939, "step": 33689 }, { "epoch": 0.9828461403815859, "grad_norm": 1.1080247987326997, "learning_rate": 7.714814983978414e-09, "loss": 0.1007, "step": 33690 }, { "epoch": 0.9828753136122295, "grad_norm": 0.8263190894010536, "learning_rate": 7.688603151888019e-09, "loss": 0.0956, "step": 33691 }, { "epoch": 0.982904486842873, "grad_norm": 0.9552939698584892, "learning_rate": 7.662435889962406e-09, "loss": 0.1035, "step": 33692 }, { "epoch": 0.9829336600735166, "grad_norm": 0.8852238599522226, "learning_rate": 7.636313198434164e-09, "loss": 0.1089, "step": 33693 }, { "epoch": 0.9829628333041601, "grad_norm": 0.7441534776528622, "learning_rate": 7.610235077537554e-09, "loss": 0.1068, "step": 33694 }, { "epoch": 0.9829920065348037, "grad_norm": 0.8998278979857637, "learning_rate": 7.584201527505163e-09, "loss": 0.1273, "step": 33695 }, { "epoch": 0.9830211797654472, "grad_norm": 0.9221953547531013, "learning_rate": 7.558212548568478e-09, "loss": 0.1212, "step": 33696 }, { "epoch": 0.9830503529960908, "grad_norm": 0.9391561379043512, "learning_rate": 7.532268140961197e-09, "loss": 0.0911, "step": 33697 }, { "epoch": 0.9830795262267343, "grad_norm": 0.7395787939323956, "learning_rate": 7.506368304913136e-09, "loss": 0.1265, "step": 33698 }, { "epoch": 0.9831086994573779, "grad_norm": 0.7795398518213585, "learning_rate": 7.48051304065689e-09, "loss": 0.1114, "step": 33699 }, { "epoch": 0.9831378726880214, "grad_norm": 0.7745135194126502, "learning_rate": 7.454702348422826e-09, "loss": 0.1247, "step": 33700 }, { "epoch": 0.983167045918665, "grad_norm": 0.8725922866159622, "learning_rate": 7.428936228441319e-09, "loss": 0.1271, "step": 33701 }, { "epoch": 0.9831962191493085, "grad_norm": 0.7601226929595027, "learning_rate": 7.403214680942739e-09, "loss": 0.0777, "step": 33702 }, { "epoch": 0.9832253923799521, "grad_norm": 0.9176273069054299, "learning_rate": 7.377537706155791e-09, "loss": 0.1291, "step": 33703 }, { "epoch": 0.9832545656105958, "grad_norm": 0.7472549936756512, "learning_rate": 7.351905304310847e-09, "loss": 0.0944, "step": 33704 }, { "epoch": 0.9832837388412393, "grad_norm": 0.9094716647789562, "learning_rate": 7.326317475636058e-09, "loss": 0.1141, "step": 33705 }, { "epoch": 0.9833129120718829, "grad_norm": 0.956582694642723, "learning_rate": 7.30077422036013e-09, "loss": 0.1215, "step": 33706 }, { "epoch": 0.9833420853025264, "grad_norm": 0.7160709952785085, "learning_rate": 7.275275538711213e-09, "loss": 0.099, "step": 33707 }, { "epoch": 0.98337125853317, "grad_norm": 0.7956117148602124, "learning_rate": 7.249821430916348e-09, "loss": 0.1043, "step": 33708 }, { "epoch": 0.9834004317638135, "grad_norm": 1.0359450303438382, "learning_rate": 7.224411897203687e-09, "loss": 0.1045, "step": 33709 }, { "epoch": 0.9834296049944571, "grad_norm": 0.842684944587351, "learning_rate": 7.199046937799159e-09, "loss": 0.093, "step": 33710 }, { "epoch": 0.9834587782251006, "grad_norm": 0.8383679980030517, "learning_rate": 7.173726552929805e-09, "loss": 0.1224, "step": 33711 }, { "epoch": 0.9834879514557442, "grad_norm": 0.7657670068051833, "learning_rate": 7.148450742821556e-09, "loss": 0.1023, "step": 33712 }, { "epoch": 0.9835171246863877, "grad_norm": 0.8992060937948977, "learning_rate": 7.123219507700341e-09, "loss": 0.1017, "step": 33713 }, { "epoch": 0.9835462979170313, "grad_norm": 0.8077556941158505, "learning_rate": 7.098032847790426e-09, "loss": 0.1071, "step": 33714 }, { "epoch": 0.9835754711476749, "grad_norm": 0.7106926564942275, "learning_rate": 7.072890763317742e-09, "loss": 0.1024, "step": 33715 }, { "epoch": 0.9836046443783184, "grad_norm": 0.8228047962547985, "learning_rate": 7.047793254506552e-09, "loss": 0.1239, "step": 33716 }, { "epoch": 0.9836338176089621, "grad_norm": 1.2068928857190815, "learning_rate": 7.0227403215805675e-09, "loss": 0.1149, "step": 33717 }, { "epoch": 0.9836629908396056, "grad_norm": 0.6898694648620306, "learning_rate": 6.997731964764054e-09, "loss": 0.093, "step": 33718 }, { "epoch": 0.9836921640702492, "grad_norm": 0.6336542462070724, "learning_rate": 6.97276818427961e-09, "loss": 0.104, "step": 33719 }, { "epoch": 0.9837213373008927, "grad_norm": 0.7946128929906376, "learning_rate": 6.947848980349836e-09, "loss": 0.1118, "step": 33720 }, { "epoch": 0.9837505105315363, "grad_norm": 0.8331790764768273, "learning_rate": 6.922974353198441e-09, "loss": 0.1047, "step": 33721 }, { "epoch": 0.9837796837621798, "grad_norm": 0.9639134480924492, "learning_rate": 6.898144303046361e-09, "loss": 0.1134, "step": 33722 }, { "epoch": 0.9838088569928234, "grad_norm": 0.9261810124901682, "learning_rate": 6.873358830116194e-09, "loss": 0.1219, "step": 33723 }, { "epoch": 0.9838380302234669, "grad_norm": 0.9425190641302295, "learning_rate": 6.848617934628321e-09, "loss": 0.1167, "step": 33724 }, { "epoch": 0.9838672034541105, "grad_norm": 1.023442998022823, "learning_rate": 6.82392161680423e-09, "loss": 0.1337, "step": 33725 }, { "epoch": 0.983896376684754, "grad_norm": 0.793750959786204, "learning_rate": 6.799269876863745e-09, "loss": 0.1188, "step": 33726 }, { "epoch": 0.9839255499153976, "grad_norm": 0.9351271772309842, "learning_rate": 6.7746627150278024e-09, "loss": 0.116, "step": 33727 }, { "epoch": 0.9839547231460412, "grad_norm": 0.7967768431673874, "learning_rate": 6.750100131515669e-09, "loss": 0.1201, "step": 33728 }, { "epoch": 0.9839838963766847, "grad_norm": 0.8303397987610552, "learning_rate": 6.725582126546615e-09, "loss": 0.1087, "step": 33729 }, { "epoch": 0.9840130696073283, "grad_norm": 0.7244863809147774, "learning_rate": 6.701108700339354e-09, "loss": 0.1062, "step": 33730 }, { "epoch": 0.9840422428379719, "grad_norm": 0.8722186341634288, "learning_rate": 6.6766798531126e-09, "loss": 0.1254, "step": 33731 }, { "epoch": 0.9840714160686155, "grad_norm": 0.7624681886954229, "learning_rate": 6.652295585085066e-09, "loss": 0.1193, "step": 33732 }, { "epoch": 0.984100589299259, "grad_norm": 0.8032247254944517, "learning_rate": 6.627955896473248e-09, "loss": 0.1081, "step": 33733 }, { "epoch": 0.9841297625299026, "grad_norm": 0.9768125206862693, "learning_rate": 6.603660787495303e-09, "loss": 0.1346, "step": 33734 }, { "epoch": 0.9841589357605461, "grad_norm": 0.8058024424483172, "learning_rate": 6.579410258367724e-09, "loss": 0.0867, "step": 33735 }, { "epoch": 0.9841881089911897, "grad_norm": 0.7482135268712199, "learning_rate": 6.5552043093070065e-09, "loss": 0.0987, "step": 33736 }, { "epoch": 0.9842172822218332, "grad_norm": 0.9304102671489258, "learning_rate": 6.531042940529642e-09, "loss": 0.1318, "step": 33737 }, { "epoch": 0.9842464554524768, "grad_norm": 0.7830598836781537, "learning_rate": 6.5069261522510145e-09, "loss": 0.1001, "step": 33738 }, { "epoch": 0.9842756286831204, "grad_norm": 0.7684164285739562, "learning_rate": 6.482853944686507e-09, "loss": 0.0917, "step": 33739 }, { "epoch": 0.9843048019137639, "grad_norm": 0.7536747962239854, "learning_rate": 6.458826318050948e-09, "loss": 0.1106, "step": 33740 }, { "epoch": 0.9843339751444075, "grad_norm": 0.7836223399991963, "learning_rate": 6.434843272558611e-09, "loss": 0.1087, "step": 33741 }, { "epoch": 0.984363148375051, "grad_norm": 0.8938947438845976, "learning_rate": 6.410904808424878e-09, "loss": 0.123, "step": 33742 }, { "epoch": 0.9843923216056946, "grad_norm": 0.7465379618144085, "learning_rate": 6.387010925861803e-09, "loss": 0.0952, "step": 33743 }, { "epoch": 0.9844214948363382, "grad_norm": 0.7405949789405286, "learning_rate": 6.363161625083103e-09, "loss": 0.1137, "step": 33744 }, { "epoch": 0.9844506680669818, "grad_norm": 0.8374986638280276, "learning_rate": 6.339356906303051e-09, "loss": 0.1294, "step": 33745 }, { "epoch": 0.9844798412976253, "grad_norm": 0.9245977154510832, "learning_rate": 6.315596769732035e-09, "loss": 0.1078, "step": 33746 }, { "epoch": 0.9845090145282689, "grad_norm": 0.78849944130388, "learning_rate": 6.291881215584328e-09, "loss": 0.1185, "step": 33747 }, { "epoch": 0.9845381877589124, "grad_norm": 0.8158094863158458, "learning_rate": 6.268210244069761e-09, "loss": 0.1102, "step": 33748 }, { "epoch": 0.984567360989556, "grad_norm": 0.8965222529472734, "learning_rate": 6.244583855400943e-09, "loss": 0.1218, "step": 33749 }, { "epoch": 0.9845965342201995, "grad_norm": 0.7050811801846438, "learning_rate": 6.2210020497882605e-09, "loss": 0.0927, "step": 33750 }, { "epoch": 0.9846257074508431, "grad_norm": 0.8155386673770202, "learning_rate": 6.197464827442657e-09, "loss": 0.1248, "step": 33751 }, { "epoch": 0.9846548806814867, "grad_norm": 0.8699865058644309, "learning_rate": 6.173972188573407e-09, "loss": 0.1034, "step": 33752 }, { "epoch": 0.9846840539121302, "grad_norm": 0.7230642282293623, "learning_rate": 6.1505241333909e-09, "loss": 0.0969, "step": 33753 }, { "epoch": 0.9847132271427738, "grad_norm": 0.7383807593153091, "learning_rate": 6.127120662104968e-09, "loss": 0.1072, "step": 33754 }, { "epoch": 0.9847424003734173, "grad_norm": 0.7064889880173199, "learning_rate": 6.103761774923778e-09, "loss": 0.1239, "step": 33755 }, { "epoch": 0.9847715736040609, "grad_norm": 0.7074108063353488, "learning_rate": 6.080447472055495e-09, "loss": 0.1309, "step": 33756 }, { "epoch": 0.9848007468347044, "grad_norm": 0.9665701456235283, "learning_rate": 6.057177753709398e-09, "loss": 0.1446, "step": 33757 }, { "epoch": 0.9848299200653481, "grad_norm": 0.7167502735280957, "learning_rate": 6.033952620092542e-09, "loss": 0.1068, "step": 33758 }, { "epoch": 0.9848590932959916, "grad_norm": 1.018252063516159, "learning_rate": 6.010772071412541e-09, "loss": 0.1252, "step": 33759 }, { "epoch": 0.9848882665266352, "grad_norm": 0.8126773964569709, "learning_rate": 5.987636107875894e-09, "loss": 0.0858, "step": 33760 }, { "epoch": 0.9849174397572787, "grad_norm": 0.7460523752464587, "learning_rate": 5.964544729689658e-09, "loss": 0.1034, "step": 33761 }, { "epoch": 0.9849466129879223, "grad_norm": 0.7929453814306174, "learning_rate": 5.941497937059227e-09, "loss": 0.1156, "step": 33762 }, { "epoch": 0.9849757862185659, "grad_norm": 0.8474348784984916, "learning_rate": 5.918495730191654e-09, "loss": 0.1034, "step": 33763 }, { "epoch": 0.9850049594492094, "grad_norm": 0.845453547210441, "learning_rate": 5.895538109291221e-09, "loss": 0.1046, "step": 33764 }, { "epoch": 0.985034132679853, "grad_norm": 0.8228342380688106, "learning_rate": 5.8726250745633205e-09, "loss": 0.1169, "step": 33765 }, { "epoch": 0.9850633059104965, "grad_norm": 0.8642893571375703, "learning_rate": 5.849756626212788e-09, "loss": 0.1035, "step": 33766 }, { "epoch": 0.9850924791411401, "grad_norm": 0.873641406793772, "learning_rate": 5.826932764442794e-09, "loss": 0.1319, "step": 33767 }, { "epoch": 0.9851216523717836, "grad_norm": 0.7918134576248088, "learning_rate": 5.804153489458175e-09, "loss": 0.0969, "step": 33768 }, { "epoch": 0.9851508256024272, "grad_norm": 0.7837560319809252, "learning_rate": 5.781418801461547e-09, "loss": 0.1002, "step": 33769 }, { "epoch": 0.9851799988330707, "grad_norm": 0.8508208071336113, "learning_rate": 5.758728700656635e-09, "loss": 0.1048, "step": 33770 }, { "epoch": 0.9852091720637144, "grad_norm": 0.8732390898780681, "learning_rate": 5.736083187244945e-09, "loss": 0.111, "step": 33771 }, { "epoch": 0.9852383452943579, "grad_norm": 0.726612294000801, "learning_rate": 5.713482261429648e-09, "loss": 0.1099, "step": 33772 }, { "epoch": 0.9852675185250015, "grad_norm": 0.6977549401593138, "learning_rate": 5.690925923412249e-09, "loss": 0.0882, "step": 33773 }, { "epoch": 0.985296691755645, "grad_norm": 1.0404520066601548, "learning_rate": 5.6684141733936996e-09, "loss": 0.1109, "step": 33774 }, { "epoch": 0.9853258649862886, "grad_norm": 0.8411315454281804, "learning_rate": 5.645947011576059e-09, "loss": 0.1122, "step": 33775 }, { "epoch": 0.9853550382169322, "grad_norm": 0.7566714896192657, "learning_rate": 5.623524438158612e-09, "loss": 0.1029, "step": 33776 }, { "epoch": 0.9853842114475757, "grad_norm": 0.7386793368375577, "learning_rate": 5.601146453341755e-09, "loss": 0.1045, "step": 33777 }, { "epoch": 0.9854133846782193, "grad_norm": 0.8420695324047094, "learning_rate": 5.578813057325883e-09, "loss": 0.1153, "step": 33778 }, { "epoch": 0.9854425579088628, "grad_norm": 0.8975325100795434, "learning_rate": 5.55652425031028e-09, "loss": 0.1079, "step": 33779 }, { "epoch": 0.9854717311395064, "grad_norm": 0.7724228338656928, "learning_rate": 5.534280032493678e-09, "loss": 0.0951, "step": 33780 }, { "epoch": 0.9855009043701499, "grad_norm": 0.9173868650710375, "learning_rate": 5.512080404074804e-09, "loss": 0.1233, "step": 33781 }, { "epoch": 0.9855300776007935, "grad_norm": 0.9017049037987892, "learning_rate": 5.489925365251836e-09, "loss": 0.1235, "step": 33782 }, { "epoch": 0.985559250831437, "grad_norm": 0.6661587110680808, "learning_rate": 5.467814916222392e-09, "loss": 0.1097, "step": 33783 }, { "epoch": 0.9855884240620806, "grad_norm": 0.6911047352843254, "learning_rate": 5.445749057184091e-09, "loss": 0.099, "step": 33784 }, { "epoch": 0.9856175972927242, "grad_norm": 0.8754468376270351, "learning_rate": 5.423727788333444e-09, "loss": 0.1084, "step": 33785 }, { "epoch": 0.9856467705233678, "grad_norm": 0.8638236782689099, "learning_rate": 5.40175110986807e-09, "loss": 0.1234, "step": 33786 }, { "epoch": 0.9856759437540114, "grad_norm": 0.8495487146954932, "learning_rate": 5.379819021982813e-09, "loss": 0.1126, "step": 33787 }, { "epoch": 0.9857051169846549, "grad_norm": 0.774478508037016, "learning_rate": 5.3579315248747376e-09, "loss": 0.1149, "step": 33788 }, { "epoch": 0.9857342902152985, "grad_norm": 0.8827195113424046, "learning_rate": 5.336088618738688e-09, "loss": 0.1014, "step": 33789 }, { "epoch": 0.985763463445942, "grad_norm": 0.6734589417483602, "learning_rate": 5.314290303770065e-09, "loss": 0.1233, "step": 33790 }, { "epoch": 0.9857926366765856, "grad_norm": 0.7914457591354623, "learning_rate": 5.292536580162599e-09, "loss": 0.0978, "step": 33791 }, { "epoch": 0.9858218099072291, "grad_norm": 0.7155567007460567, "learning_rate": 5.270827448111137e-09, "loss": 0.0885, "step": 33792 }, { "epoch": 0.9858509831378727, "grad_norm": 0.809195209325187, "learning_rate": 5.249162907809413e-09, "loss": 0.1077, "step": 33793 }, { "epoch": 0.9858801563685162, "grad_norm": 0.7076078580788532, "learning_rate": 5.227542959450604e-09, "loss": 0.1372, "step": 33794 }, { "epoch": 0.9859093295991598, "grad_norm": 0.9310975834524328, "learning_rate": 5.2059676032284454e-09, "loss": 0.1441, "step": 33795 }, { "epoch": 0.9859385028298033, "grad_norm": 0.9107781975811486, "learning_rate": 5.1844368393350054e-09, "loss": 0.1368, "step": 33796 }, { "epoch": 0.9859676760604469, "grad_norm": 0.8790947077131462, "learning_rate": 5.162950667962352e-09, "loss": 0.1132, "step": 33797 }, { "epoch": 0.9859968492910905, "grad_norm": 0.8364424603862561, "learning_rate": 5.141509089301999e-09, "loss": 0.1085, "step": 33798 }, { "epoch": 0.9860260225217341, "grad_norm": 1.0138809314595514, "learning_rate": 5.120112103546571e-09, "loss": 0.1121, "step": 33799 }, { "epoch": 0.9860551957523777, "grad_norm": 0.9221749366125264, "learning_rate": 5.09875971088647e-09, "loss": 0.1234, "step": 33800 }, { "epoch": 0.9860843689830212, "grad_norm": 0.6832511862452569, "learning_rate": 5.077451911512099e-09, "loss": 0.1254, "step": 33801 }, { "epoch": 0.9861135422136648, "grad_norm": 0.8844021458838913, "learning_rate": 5.056188705613863e-09, "loss": 0.1065, "step": 33802 }, { "epoch": 0.9861427154443083, "grad_norm": 0.8742626585704845, "learning_rate": 5.0349700933810534e-09, "loss": 0.0935, "step": 33803 }, { "epoch": 0.9861718886749519, "grad_norm": 0.6718860110054, "learning_rate": 5.013796075004074e-09, "loss": 0.1122, "step": 33804 }, { "epoch": 0.9862010619055954, "grad_norm": 0.7714586732886123, "learning_rate": 4.9926666506716624e-09, "loss": 0.1059, "step": 33805 }, { "epoch": 0.986230235136239, "grad_norm": 0.83949546271214, "learning_rate": 4.971581820572002e-09, "loss": 0.105, "step": 33806 }, { "epoch": 0.9862594083668825, "grad_norm": 0.8939256990744707, "learning_rate": 4.950541584893831e-09, "loss": 0.0902, "step": 33807 }, { "epoch": 0.9862885815975261, "grad_norm": 0.7161276479660431, "learning_rate": 4.929545943825331e-09, "loss": 0.1202, "step": 33808 }, { "epoch": 0.9863177548281696, "grad_norm": 0.9380570904591177, "learning_rate": 4.9085948975524654e-09, "loss": 0.1125, "step": 33809 }, { "epoch": 0.9863469280588132, "grad_norm": 0.8168481495001629, "learning_rate": 4.887688446263971e-09, "loss": 0.1038, "step": 33810 }, { "epoch": 0.9863761012894567, "grad_norm": 0.860385922459585, "learning_rate": 4.866826590145257e-09, "loss": 0.0977, "step": 33811 }, { "epoch": 0.9864052745201004, "grad_norm": 1.0703660474635481, "learning_rate": 4.846009329383394e-09, "loss": 0.1178, "step": 33812 }, { "epoch": 0.986434447750744, "grad_norm": 0.907132889092109, "learning_rate": 4.825236664163791e-09, "loss": 0.1323, "step": 33813 }, { "epoch": 0.9864636209813875, "grad_norm": 1.175990114333469, "learning_rate": 4.804508594671853e-09, "loss": 0.0998, "step": 33814 }, { "epoch": 0.9864927942120311, "grad_norm": 0.9312263874009149, "learning_rate": 4.783825121093544e-09, "loss": 0.1324, "step": 33815 }, { "epoch": 0.9865219674426746, "grad_norm": 0.8647671220568324, "learning_rate": 4.7631862436120506e-09, "loss": 0.1164, "step": 33816 }, { "epoch": 0.9865511406733182, "grad_norm": 0.933902115857926, "learning_rate": 4.7425919624122244e-09, "loss": 0.1037, "step": 33817 }, { "epoch": 0.9865803139039617, "grad_norm": 0.8014744857424474, "learning_rate": 4.722042277678918e-09, "loss": 0.1086, "step": 33818 }, { "epoch": 0.9866094871346053, "grad_norm": 0.7676682128542434, "learning_rate": 4.701537189594207e-09, "loss": 0.1056, "step": 33819 }, { "epoch": 0.9866386603652488, "grad_norm": 0.9070406343868757, "learning_rate": 4.681076698341836e-09, "loss": 0.1154, "step": 33820 }, { "epoch": 0.9866678335958924, "grad_norm": 0.8643679289367764, "learning_rate": 4.6606608041038785e-09, "loss": 0.1106, "step": 33821 }, { "epoch": 0.9866970068265359, "grad_norm": 0.8080790414757442, "learning_rate": 4.640289507063522e-09, "loss": 0.1027, "step": 33822 }, { "epoch": 0.9867261800571795, "grad_norm": 0.8425880321963215, "learning_rate": 4.6199628074022895e-09, "loss": 0.1249, "step": 33823 }, { "epoch": 0.986755353287823, "grad_norm": 1.1170594980059778, "learning_rate": 4.599680705301146e-09, "loss": 0.1184, "step": 33824 }, { "epoch": 0.9867845265184667, "grad_norm": 0.780067863775467, "learning_rate": 4.5794432009416134e-09, "loss": 0.0808, "step": 33825 }, { "epoch": 0.9868136997491103, "grad_norm": 0.6142058142011302, "learning_rate": 4.559250294504658e-09, "loss": 0.1092, "step": 33826 }, { "epoch": 0.9868428729797538, "grad_norm": 0.841668590756161, "learning_rate": 4.539101986170136e-09, "loss": 0.1227, "step": 33827 }, { "epoch": 0.9868720462103974, "grad_norm": 1.0237218215407604, "learning_rate": 4.518998276117903e-09, "loss": 0.1022, "step": 33828 }, { "epoch": 0.9869012194410409, "grad_norm": 0.7902743064688862, "learning_rate": 4.498939164527261e-09, "loss": 0.11, "step": 33829 }, { "epoch": 0.9869303926716845, "grad_norm": 0.7704266705221741, "learning_rate": 4.4789246515780645e-09, "loss": 0.097, "step": 33830 }, { "epoch": 0.986959565902328, "grad_norm": 1.0580313645697441, "learning_rate": 4.458954737447951e-09, "loss": 0.1272, "step": 33831 }, { "epoch": 0.9869887391329716, "grad_norm": 0.7736702002583923, "learning_rate": 4.4390294223162215e-09, "loss": 0.1096, "step": 33832 }, { "epoch": 0.9870179123636151, "grad_norm": 0.8535642495482743, "learning_rate": 4.419148706359955e-09, "loss": 0.1002, "step": 33833 }, { "epoch": 0.9870470855942587, "grad_norm": 0.8200675228157727, "learning_rate": 4.399312589757343e-09, "loss": 0.1245, "step": 33834 }, { "epoch": 0.9870762588249022, "grad_norm": 0.7445801999459258, "learning_rate": 4.379521072684911e-09, "loss": 0.0944, "step": 33835 }, { "epoch": 0.9871054320555458, "grad_norm": 0.953599559050052, "learning_rate": 4.3597741553191856e-09, "loss": 0.1251, "step": 33836 }, { "epoch": 0.9871346052861893, "grad_norm": 0.7392225373935208, "learning_rate": 4.3400718378372455e-09, "loss": 0.1225, "step": 33837 }, { "epoch": 0.9871637785168329, "grad_norm": 1.169378435526558, "learning_rate": 4.320414120415062e-09, "loss": 0.1198, "step": 33838 }, { "epoch": 0.9871929517474766, "grad_norm": 0.9150936407751873, "learning_rate": 4.30080100322694e-09, "loss": 0.0919, "step": 33839 }, { "epoch": 0.9872221249781201, "grad_norm": 0.6768329207289796, "learning_rate": 4.281232486448849e-09, "loss": 0.1148, "step": 33840 }, { "epoch": 0.9872512982087637, "grad_norm": 0.8517050775068293, "learning_rate": 4.2617085702556515e-09, "loss": 0.1131, "step": 33841 }, { "epoch": 0.9872804714394072, "grad_norm": 0.842801122120863, "learning_rate": 4.242229254821095e-09, "loss": 0.1756, "step": 33842 }, { "epoch": 0.9873096446700508, "grad_norm": 0.7570353160226188, "learning_rate": 4.222794540318931e-09, "loss": 0.1001, "step": 33843 }, { "epoch": 0.9873388179006943, "grad_norm": 0.8437804409050115, "learning_rate": 4.203404426924018e-09, "loss": 0.1161, "step": 33844 }, { "epoch": 0.9873679911313379, "grad_norm": 0.9523134795250229, "learning_rate": 4.184058914807887e-09, "loss": 0.108, "step": 33845 }, { "epoch": 0.9873971643619814, "grad_norm": 0.9045730184233152, "learning_rate": 4.164758004143732e-09, "loss": 0.1057, "step": 33846 }, { "epoch": 0.987426337592625, "grad_norm": 0.7367454289036565, "learning_rate": 4.145501695104193e-09, "loss": 0.1353, "step": 33847 }, { "epoch": 0.9874555108232685, "grad_norm": 2.4213463654490845, "learning_rate": 4.1262899878613535e-09, "loss": 0.1337, "step": 33848 }, { "epoch": 0.9874846840539121, "grad_norm": 0.7595899548234707, "learning_rate": 4.10712288258619e-09, "loss": 0.1229, "step": 33849 }, { "epoch": 0.9875138572845557, "grad_norm": 0.7828957073264476, "learning_rate": 4.088000379449675e-09, "loss": 0.1158, "step": 33850 }, { "epoch": 0.9875430305151992, "grad_norm": 0.9058873105421239, "learning_rate": 4.0689224786233385e-09, "loss": 0.1097, "step": 33851 }, { "epoch": 0.9875722037458429, "grad_norm": 0.9725555610164482, "learning_rate": 4.04988918027649e-09, "loss": 0.129, "step": 33852 }, { "epoch": 0.9876013769764864, "grad_norm": 0.8101288708307562, "learning_rate": 4.030900484580102e-09, "loss": 0.1048, "step": 33853 }, { "epoch": 0.98763055020713, "grad_norm": 0.9430845170879232, "learning_rate": 4.011956391702932e-09, "loss": 0.1083, "step": 33854 }, { "epoch": 0.9876597234377735, "grad_norm": 0.7060511064029318, "learning_rate": 3.9930569018148406e-09, "loss": 0.117, "step": 33855 }, { "epoch": 0.9876888966684171, "grad_norm": 0.9708052015115435, "learning_rate": 3.974202015083473e-09, "loss": 0.1143, "step": 33856 }, { "epoch": 0.9877180698990606, "grad_norm": 0.8593164344162948, "learning_rate": 3.955391731678138e-09, "loss": 0.1169, "step": 33857 }, { "epoch": 0.9877472431297042, "grad_norm": 0.6501196245436692, "learning_rate": 3.936626051766479e-09, "loss": 0.0969, "step": 33858 }, { "epoch": 0.9877764163603477, "grad_norm": 0.875014284433436, "learning_rate": 3.917904975515585e-09, "loss": 0.1012, "step": 33859 }, { "epoch": 0.9878055895909913, "grad_norm": 0.7746246360939394, "learning_rate": 3.8992285030930995e-09, "loss": 0.1375, "step": 33860 }, { "epoch": 0.9878347628216348, "grad_norm": 0.7526757611316315, "learning_rate": 3.880596634666112e-09, "loss": 0.1088, "step": 33861 }, { "epoch": 0.9878639360522784, "grad_norm": 0.6386235396886358, "learning_rate": 3.862009370400044e-09, "loss": 0.1131, "step": 33862 }, { "epoch": 0.987893109282922, "grad_norm": 0.7178575161832981, "learning_rate": 3.84346671046143e-09, "loss": 0.101, "step": 33863 }, { "epoch": 0.9879222825135655, "grad_norm": 0.8271433401077359, "learning_rate": 3.824968655015138e-09, "loss": 0.0895, "step": 33864 }, { "epoch": 0.9879514557442091, "grad_norm": 1.0395034701488943, "learning_rate": 3.806515204227701e-09, "loss": 0.1209, "step": 33865 }, { "epoch": 0.9879806289748527, "grad_norm": 0.8263482701477021, "learning_rate": 3.788106358262322e-09, "loss": 0.1164, "step": 33866 }, { "epoch": 0.9880098022054963, "grad_norm": 0.8350036899593295, "learning_rate": 3.769742117284425e-09, "loss": 0.0985, "step": 33867 }, { "epoch": 0.9880389754361398, "grad_norm": 0.8196861699741413, "learning_rate": 3.751422481457212e-09, "loss": 0.0894, "step": 33868 }, { "epoch": 0.9880681486667834, "grad_norm": 0.7286077158393757, "learning_rate": 3.733147450944996e-09, "loss": 0.1221, "step": 33869 }, { "epoch": 0.9880973218974269, "grad_norm": 1.1734134072376297, "learning_rate": 3.714917025910425e-09, "loss": 0.1434, "step": 33870 }, { "epoch": 0.9881264951280705, "grad_norm": 0.7874856809417258, "learning_rate": 3.6967312065161466e-09, "loss": 0.0946, "step": 33871 }, { "epoch": 0.988155668358714, "grad_norm": 0.8778410972133301, "learning_rate": 3.678589992925363e-09, "loss": 0.1083, "step": 33872 }, { "epoch": 0.9881848415893576, "grad_norm": 1.598198545475215, "learning_rate": 3.6604933852985023e-09, "loss": 0.1026, "step": 33873 }, { "epoch": 0.9882140148200012, "grad_norm": 0.8277714856641315, "learning_rate": 3.642441383798767e-09, "loss": 0.0936, "step": 33874 }, { "epoch": 0.9882431880506447, "grad_norm": 0.833821378908683, "learning_rate": 3.6244339885865843e-09, "loss": 0.0986, "step": 33875 }, { "epoch": 0.9882723612812883, "grad_norm": 0.6860716202291299, "learning_rate": 3.606471199822381e-09, "loss": 0.1097, "step": 33876 }, { "epoch": 0.9883015345119318, "grad_norm": 0.9047231834060151, "learning_rate": 3.588553017666585e-09, "loss": 0.1197, "step": 33877 }, { "epoch": 0.9883307077425754, "grad_norm": 0.8772978706130129, "learning_rate": 3.5706794422801783e-09, "loss": 0.1137, "step": 33878 }, { "epoch": 0.988359880973219, "grad_norm": 1.0249440848752416, "learning_rate": 3.5528504738213676e-09, "loss": 0.1081, "step": 33879 }, { "epoch": 0.9883890542038626, "grad_norm": 0.7789866297536088, "learning_rate": 3.535066112450025e-09, "loss": 0.0979, "step": 33880 }, { "epoch": 0.9884182274345061, "grad_norm": 0.7906564491597248, "learning_rate": 3.5173263583254678e-09, "loss": 0.0884, "step": 33881 }, { "epoch": 0.9884474006651497, "grad_norm": 1.0393986718624706, "learning_rate": 3.4996312116047925e-09, "loss": 0.1049, "step": 33882 }, { "epoch": 0.9884765738957932, "grad_norm": 0.7313767163172219, "learning_rate": 3.481980672446761e-09, "loss": 0.0933, "step": 33883 }, { "epoch": 0.9885057471264368, "grad_norm": 0.85539088527848, "learning_rate": 3.4643747410090244e-09, "loss": 0.1167, "step": 33884 }, { "epoch": 0.9885349203570803, "grad_norm": 0.8256152225779988, "learning_rate": 3.44681341744868e-09, "loss": 0.1062, "step": 33885 }, { "epoch": 0.9885640935877239, "grad_norm": 0.7836846465773687, "learning_rate": 3.429296701922269e-09, "loss": 0.1139, "step": 33886 }, { "epoch": 0.9885932668183675, "grad_norm": 0.8079916059808888, "learning_rate": 3.4118245945863326e-09, "loss": 0.1251, "step": 33887 }, { "epoch": 0.988622440049011, "grad_norm": 1.049243222210774, "learning_rate": 3.3943970955968573e-09, "loss": 0.1053, "step": 33888 }, { "epoch": 0.9886516132796546, "grad_norm": 0.8836078354958317, "learning_rate": 3.377014205109275e-09, "loss": 0.1203, "step": 33889 }, { "epoch": 0.9886807865102981, "grad_norm": 0.6035320234324897, "learning_rate": 3.3596759232790156e-09, "loss": 0.1092, "step": 33890 }, { "epoch": 0.9887099597409417, "grad_norm": 0.9012561199472223, "learning_rate": 3.342382250260956e-09, "loss": 0.1408, "step": 33891 }, { "epoch": 0.9887391329715852, "grad_norm": 0.8166263530626607, "learning_rate": 3.325133186209417e-09, "loss": 0.1165, "step": 33892 }, { "epoch": 0.9887683062022289, "grad_norm": 0.8651909650123857, "learning_rate": 3.30792873127761e-09, "loss": 0.1001, "step": 33893 }, { "epoch": 0.9887974794328724, "grad_norm": 0.7425522189351392, "learning_rate": 3.29076888562041e-09, "loss": 0.1255, "step": 33894 }, { "epoch": 0.988826652663516, "grad_norm": 1.1253179179711035, "learning_rate": 3.2736536493904734e-09, "loss": 0.1039, "step": 33895 }, { "epoch": 0.9888558258941595, "grad_norm": 0.7628373725503663, "learning_rate": 3.256583022739901e-09, "loss": 0.1015, "step": 33896 }, { "epoch": 0.9888849991248031, "grad_norm": 0.8887192641238454, "learning_rate": 3.239557005822458e-09, "loss": 0.1296, "step": 33897 }, { "epoch": 0.9889141723554467, "grad_norm": 0.801320497822789, "learning_rate": 3.222575598789135e-09, "loss": 0.1055, "step": 33898 }, { "epoch": 0.9889433455860902, "grad_norm": 0.7180598202244861, "learning_rate": 3.2056388017914773e-09, "loss": 0.1054, "step": 33899 }, { "epoch": 0.9889725188167338, "grad_norm": 0.7580971729199544, "learning_rate": 3.188746614981586e-09, "loss": 0.1161, "step": 33900 }, { "epoch": 0.9890016920473773, "grad_norm": 0.8562341518330403, "learning_rate": 3.1718990385093408e-09, "loss": 0.115, "step": 33901 }, { "epoch": 0.9890308652780209, "grad_norm": 0.7894581821337003, "learning_rate": 3.155096072525732e-09, "loss": 0.1185, "step": 33902 }, { "epoch": 0.9890600385086644, "grad_norm": 0.8305931487248671, "learning_rate": 3.1383377171806396e-09, "loss": 0.1109, "step": 33903 }, { "epoch": 0.989089211739308, "grad_norm": 0.710003354133609, "learning_rate": 3.1216239726233888e-09, "loss": 0.0931, "step": 33904 }, { "epoch": 0.9891183849699515, "grad_norm": 0.945893025959921, "learning_rate": 3.1049548390038596e-09, "loss": 0.1062, "step": 33905 }, { "epoch": 0.9891475582005951, "grad_norm": 0.8633522359130263, "learning_rate": 3.0883303164702673e-09, "loss": 0.1167, "step": 33906 }, { "epoch": 0.9891767314312387, "grad_norm": 0.8842800265805906, "learning_rate": 3.071750405170826e-09, "loss": 0.0941, "step": 33907 }, { "epoch": 0.9892059046618823, "grad_norm": 0.8391003658343408, "learning_rate": 3.0552151052543057e-09, "loss": 0.0986, "step": 33908 }, { "epoch": 0.9892350778925258, "grad_norm": 0.840982645146856, "learning_rate": 3.038724416867811e-09, "loss": 0.1361, "step": 33909 }, { "epoch": 0.9892642511231694, "grad_norm": 0.7738698120393673, "learning_rate": 3.0222783401590016e-09, "loss": 0.092, "step": 33910 }, { "epoch": 0.989293424353813, "grad_norm": 0.734905388385287, "learning_rate": 3.0058768752738723e-09, "loss": 0.1124, "step": 33911 }, { "epoch": 0.9893225975844565, "grad_norm": 0.8107093289877106, "learning_rate": 2.989520022360082e-09, "loss": 0.1281, "step": 33912 }, { "epoch": 0.9893517708151001, "grad_norm": 0.8404966074340152, "learning_rate": 2.9732077815625148e-09, "loss": 0.1082, "step": 33913 }, { "epoch": 0.9893809440457436, "grad_norm": 0.8900145930179061, "learning_rate": 2.956940153027166e-09, "loss": 0.1088, "step": 33914 }, { "epoch": 0.9894101172763872, "grad_norm": 0.839804287940214, "learning_rate": 2.9407171368994738e-09, "loss": 0.1132, "step": 33915 }, { "epoch": 0.9894392905070307, "grad_norm": 0.7373948432771645, "learning_rate": 2.9245387333243225e-09, "loss": 0.1297, "step": 33916 }, { "epoch": 0.9894684637376743, "grad_norm": 0.6906506282981626, "learning_rate": 2.9084049424460414e-09, "loss": 0.127, "step": 33917 }, { "epoch": 0.9894976369683178, "grad_norm": 0.8405123828936328, "learning_rate": 2.8923157644084044e-09, "loss": 0.1146, "step": 33918 }, { "epoch": 0.9895268101989614, "grad_norm": 0.8178881314785621, "learning_rate": 2.876271199355185e-09, "loss": 0.1149, "step": 33919 }, { "epoch": 0.989555983429605, "grad_norm": 0.7996511022800938, "learning_rate": 2.8602712474301575e-09, "loss": 0.1123, "step": 33920 }, { "epoch": 0.9895851566602486, "grad_norm": 0.7828003158065426, "learning_rate": 2.8443159087754304e-09, "loss": 0.1056, "step": 33921 }, { "epoch": 0.9896143298908922, "grad_norm": 0.6241510006751408, "learning_rate": 2.828405183533667e-09, "loss": 0.1072, "step": 33922 }, { "epoch": 0.9896435031215357, "grad_norm": 0.9955227614630167, "learning_rate": 2.8125390718469757e-09, "loss": 0.1123, "step": 33923 }, { "epoch": 0.9896726763521793, "grad_norm": 0.7808665715637741, "learning_rate": 2.7967175738569107e-09, "loss": 0.0969, "step": 33924 }, { "epoch": 0.9897018495828228, "grad_norm": 0.7306766518997789, "learning_rate": 2.780940689705025e-09, "loss": 0.0956, "step": 33925 }, { "epoch": 0.9897310228134664, "grad_norm": 0.6744496999579083, "learning_rate": 2.765208419531762e-09, "loss": 0.1179, "step": 33926 }, { "epoch": 0.9897601960441099, "grad_norm": 0.7961666872297498, "learning_rate": 2.7495207634781194e-09, "loss": 0.1129, "step": 33927 }, { "epoch": 0.9897893692747535, "grad_norm": 0.8423488570580006, "learning_rate": 2.733877721683986e-09, "loss": 0.1114, "step": 33928 }, { "epoch": 0.989818542505397, "grad_norm": 0.7405981694754284, "learning_rate": 2.7182792942881396e-09, "loss": 0.0894, "step": 33929 }, { "epoch": 0.9898477157360406, "grad_norm": 0.8079617588290383, "learning_rate": 2.7027254814310232e-09, "loss": 0.1029, "step": 33930 }, { "epoch": 0.9898768889666841, "grad_norm": 0.6998068980562837, "learning_rate": 2.6872162832508596e-09, "loss": 0.1023, "step": 33931 }, { "epoch": 0.9899060621973277, "grad_norm": 1.3290014117581306, "learning_rate": 2.671751699886427e-09, "loss": 0.1042, "step": 33932 }, { "epoch": 0.9899352354279712, "grad_norm": 0.8834074317425398, "learning_rate": 2.656331731475392e-09, "loss": 0.1083, "step": 33933 }, { "epoch": 0.9899644086586149, "grad_norm": 0.8090366186211602, "learning_rate": 2.640956378155979e-09, "loss": 0.1065, "step": 33934 }, { "epoch": 0.9899935818892585, "grad_norm": 0.8713800432799568, "learning_rate": 2.625625640064744e-09, "loss": 0.1172, "step": 33935 }, { "epoch": 0.990022755119902, "grad_norm": 0.9535086899752458, "learning_rate": 2.610339517339355e-09, "loss": 0.1052, "step": 33936 }, { "epoch": 0.9900519283505456, "grad_norm": 0.8555863263579646, "learning_rate": 2.595098010115815e-09, "loss": 0.1121, "step": 33937 }, { "epoch": 0.9900811015811891, "grad_norm": 0.9805459250371799, "learning_rate": 2.579901118530126e-09, "loss": 0.1012, "step": 33938 }, { "epoch": 0.9901102748118327, "grad_norm": 0.7345744517421733, "learning_rate": 2.5647488427182897e-09, "loss": 0.0979, "step": 33939 }, { "epoch": 0.9901394480424762, "grad_norm": 0.8835896243778248, "learning_rate": 2.549641182815199e-09, "loss": 0.1323, "step": 33940 }, { "epoch": 0.9901686212731198, "grad_norm": 0.7344805468122377, "learning_rate": 2.5345781389557454e-09, "loss": 0.1114, "step": 33941 }, { "epoch": 0.9901977945037633, "grad_norm": 0.809929318413112, "learning_rate": 2.5195597112748215e-09, "loss": 0.1137, "step": 33942 }, { "epoch": 0.9902269677344069, "grad_norm": 0.8039203377031532, "learning_rate": 2.5045858999062087e-09, "loss": 0.0856, "step": 33943 }, { "epoch": 0.9902561409650504, "grad_norm": 0.779359803495864, "learning_rate": 2.4896567049836896e-09, "loss": 0.1144, "step": 33944 }, { "epoch": 0.990285314195694, "grad_norm": 0.790561357947606, "learning_rate": 2.4747721266404902e-09, "loss": 0.1029, "step": 33945 }, { "epoch": 0.9903144874263375, "grad_norm": 0.908733578638023, "learning_rate": 2.4599321650098375e-09, "loss": 0.106, "step": 33946 }, { "epoch": 0.9903436606569812, "grad_norm": 0.765747348911198, "learning_rate": 2.445136820223293e-09, "loss": 0.0964, "step": 33947 }, { "epoch": 0.9903728338876248, "grad_norm": 0.7649930331908215, "learning_rate": 2.4303860924140833e-09, "loss": 0.1098, "step": 33948 }, { "epoch": 0.9904020071182683, "grad_norm": 0.9362867907583359, "learning_rate": 2.4156799817132147e-09, "loss": 0.1468, "step": 33949 }, { "epoch": 0.9904311803489119, "grad_norm": 0.8898718990061929, "learning_rate": 2.401018488251694e-09, "loss": 0.1186, "step": 33950 }, { "epoch": 0.9904603535795554, "grad_norm": 0.8409265066223599, "learning_rate": 2.3864016121616375e-09, "loss": 0.1167, "step": 33951 }, { "epoch": 0.990489526810199, "grad_norm": 0.7395345854650033, "learning_rate": 2.3718293535723857e-09, "loss": 0.1166, "step": 33952 }, { "epoch": 0.9905187000408425, "grad_norm": 1.015248079779123, "learning_rate": 2.3573017126143904e-09, "loss": 0.1145, "step": 33953 }, { "epoch": 0.9905478732714861, "grad_norm": 0.8702693898561817, "learning_rate": 2.3428186894169925e-09, "loss": 0.1175, "step": 33954 }, { "epoch": 0.9905770465021296, "grad_norm": 0.6597727036415594, "learning_rate": 2.328380284110643e-09, "loss": 0.1121, "step": 33955 }, { "epoch": 0.9906062197327732, "grad_norm": 0.7357749183320261, "learning_rate": 2.3139864968230175e-09, "loss": 0.1052, "step": 33956 }, { "epoch": 0.9906353929634167, "grad_norm": 0.7268518988741933, "learning_rate": 2.299637327682902e-09, "loss": 0.1021, "step": 33957 }, { "epoch": 0.9906645661940603, "grad_norm": 0.6513487882001722, "learning_rate": 2.2853327768190823e-09, "loss": 0.11, "step": 33958 }, { "epoch": 0.9906937394247038, "grad_norm": 0.7685793890117875, "learning_rate": 2.2710728443586793e-09, "loss": 0.1185, "step": 33959 }, { "epoch": 0.9907229126553474, "grad_norm": 0.8430703910225354, "learning_rate": 2.2568575304288133e-09, "loss": 0.1157, "step": 33960 }, { "epoch": 0.9907520858859911, "grad_norm": 0.7342130147692949, "learning_rate": 2.2426868351566046e-09, "loss": 0.1079, "step": 33961 }, { "epoch": 0.9907812591166346, "grad_norm": 0.9779710870850913, "learning_rate": 2.2285607586686186e-09, "loss": 0.1297, "step": 33962 }, { "epoch": 0.9908104323472782, "grad_norm": 1.01625764127674, "learning_rate": 2.214479301091421e-09, "loss": 0.0999, "step": 33963 }, { "epoch": 0.9908396055779217, "grad_norm": 0.7672220115317753, "learning_rate": 2.200442462549912e-09, "loss": 0.0967, "step": 33964 }, { "epoch": 0.9908687788085653, "grad_norm": 0.7909276021297114, "learning_rate": 2.1864502431701017e-09, "loss": 0.1148, "step": 33965 }, { "epoch": 0.9908979520392088, "grad_norm": 0.7674713643682542, "learning_rate": 2.172502643076335e-09, "loss": 0.1228, "step": 33966 }, { "epoch": 0.9909271252698524, "grad_norm": 1.086493672651407, "learning_rate": 2.158599662392957e-09, "loss": 0.1008, "step": 33967 }, { "epoch": 0.9909562985004959, "grad_norm": 0.9305529523729485, "learning_rate": 2.144741301245423e-09, "loss": 0.1152, "step": 33968 }, { "epoch": 0.9909854717311395, "grad_norm": 0.691799122538135, "learning_rate": 2.1309275597558577e-09, "loss": 0.1137, "step": 33969 }, { "epoch": 0.991014644961783, "grad_norm": 0.8863295115076664, "learning_rate": 2.1171584380486055e-09, "loss": 0.1317, "step": 33970 }, { "epoch": 0.9910438181924266, "grad_norm": 0.9360840751832611, "learning_rate": 2.1034339362463464e-09, "loss": 0.0951, "step": 33971 }, { "epoch": 0.9910729914230701, "grad_norm": 1.0275039878745016, "learning_rate": 2.0897540544712046e-09, "loss": 0.1065, "step": 33972 }, { "epoch": 0.9911021646537137, "grad_norm": 0.7275166231646761, "learning_rate": 2.0761187928458606e-09, "loss": 0.1172, "step": 33973 }, { "epoch": 0.9911313378843574, "grad_norm": 0.8463512795866125, "learning_rate": 2.062528151491883e-09, "loss": 0.1153, "step": 33974 }, { "epoch": 0.9911605111150009, "grad_norm": 0.8650386158175781, "learning_rate": 2.048982130530286e-09, "loss": 0.1101, "step": 33975 }, { "epoch": 0.9911896843456445, "grad_norm": 0.6942209457183107, "learning_rate": 2.0354807300826397e-09, "loss": 0.1337, "step": 33976 }, { "epoch": 0.991218857576288, "grad_norm": 0.8645080700651425, "learning_rate": 2.0220239502688478e-09, "loss": 0.1056, "step": 33977 }, { "epoch": 0.9912480308069316, "grad_norm": 0.8021416694734592, "learning_rate": 2.0086117912093696e-09, "loss": 0.1084, "step": 33978 }, { "epoch": 0.9912772040375751, "grad_norm": 0.8155716191224088, "learning_rate": 1.995244253024109e-09, "loss": 0.1047, "step": 33979 }, { "epoch": 0.9913063772682187, "grad_norm": 1.0162152574593812, "learning_rate": 1.98192133583186e-09, "loss": 0.1124, "step": 33980 }, { "epoch": 0.9913355504988622, "grad_norm": 0.899792569027654, "learning_rate": 1.9686430397519718e-09, "loss": 0.099, "step": 33981 }, { "epoch": 0.9913647237295058, "grad_norm": 1.0551539411187696, "learning_rate": 1.955409364902683e-09, "loss": 0.1034, "step": 33982 }, { "epoch": 0.9913938969601493, "grad_norm": 0.6042602662003457, "learning_rate": 1.942220311402787e-09, "loss": 0.1029, "step": 33983 }, { "epoch": 0.9914230701907929, "grad_norm": 0.9069850981424438, "learning_rate": 1.929075879369413e-09, "loss": 0.1101, "step": 33984 }, { "epoch": 0.9914522434214365, "grad_norm": 0.9710260042606573, "learning_rate": 1.9159760689202447e-09, "loss": 0.0983, "step": 33985 }, { "epoch": 0.99148141665208, "grad_norm": 1.0138261162568778, "learning_rate": 1.9029208801718547e-09, "loss": 0.1077, "step": 33986 }, { "epoch": 0.9915105898827236, "grad_norm": 0.8687092665657794, "learning_rate": 1.8899103132413722e-09, "loss": 0.0993, "step": 33987 }, { "epoch": 0.9915397631133672, "grad_norm": 0.8323700945122122, "learning_rate": 1.87694436824426e-09, "loss": 0.1134, "step": 33988 }, { "epoch": 0.9915689363440108, "grad_norm": 1.1966702473090336, "learning_rate": 1.864023045297092e-09, "loss": 0.1147, "step": 33989 }, { "epoch": 0.9915981095746543, "grad_norm": 0.6946174160030281, "learning_rate": 1.851146344514776e-09, "loss": 0.1128, "step": 33990 }, { "epoch": 0.9916272828052979, "grad_norm": 0.6748738127061272, "learning_rate": 1.8383142660116647e-09, "loss": 0.1011, "step": 33991 }, { "epoch": 0.9916564560359414, "grad_norm": 0.7796551964750823, "learning_rate": 1.825526809903222e-09, "loss": 0.1007, "step": 33992 }, { "epoch": 0.991685629266585, "grad_norm": 0.7387923943880071, "learning_rate": 1.8127839763038003e-09, "loss": 0.0923, "step": 33993 }, { "epoch": 0.9917148024972285, "grad_norm": 0.7548773703453022, "learning_rate": 1.8000857653260872e-09, "loss": 0.1067, "step": 33994 }, { "epoch": 0.9917439757278721, "grad_norm": 0.8038998741498162, "learning_rate": 1.787432177083881e-09, "loss": 0.1052, "step": 33995 }, { "epoch": 0.9917731489585156, "grad_norm": 0.7773612754787679, "learning_rate": 1.7748232116909792e-09, "loss": 0.105, "step": 33996 }, { "epoch": 0.9918023221891592, "grad_norm": 1.0876727764687901, "learning_rate": 1.7622588692589593e-09, "loss": 0.1251, "step": 33997 }, { "epoch": 0.9918314954198028, "grad_norm": 0.9704825951939312, "learning_rate": 1.749739149900509e-09, "loss": 0.109, "step": 33998 }, { "epoch": 0.9918606686504463, "grad_norm": 0.7351005748774584, "learning_rate": 1.7372640537266506e-09, "loss": 0.1007, "step": 33999 }, { "epoch": 0.9918898418810899, "grad_norm": 0.7330137275697113, "learning_rate": 1.7248335808500715e-09, "loss": 0.0972, "step": 34000 }, { "epoch": 0.9919190151117335, "grad_norm": 1.1005420867252866, "learning_rate": 1.7124477313801292e-09, "loss": 0.1002, "step": 34001 }, { "epoch": 0.9919481883423771, "grad_norm": 0.7729468141397632, "learning_rate": 1.7001065054289557e-09, "loss": 0.1141, "step": 34002 }, { "epoch": 0.9919773615730206, "grad_norm": 0.7921442542462045, "learning_rate": 1.687809903105908e-09, "loss": 0.1244, "step": 34003 }, { "epoch": 0.9920065348036642, "grad_norm": 0.8050298833471278, "learning_rate": 1.6755579245208986e-09, "loss": 0.1251, "step": 34004 }, { "epoch": 0.9920357080343077, "grad_norm": 0.7913452179414457, "learning_rate": 1.6633505697832842e-09, "loss": 0.1231, "step": 34005 }, { "epoch": 0.9920648812649513, "grad_norm": 0.8524388524735569, "learning_rate": 1.6511878390018664e-09, "loss": 0.1131, "step": 34006 }, { "epoch": 0.9920940544955948, "grad_norm": 0.7207385234256999, "learning_rate": 1.6390697322854476e-09, "loss": 0.1034, "step": 34007 }, { "epoch": 0.9921232277262384, "grad_norm": 0.892106125992658, "learning_rate": 1.6269962497422742e-09, "loss": 0.1023, "step": 34008 }, { "epoch": 0.992152400956882, "grad_norm": 0.8920521212411752, "learning_rate": 1.6149673914800379e-09, "loss": 0.1108, "step": 34009 }, { "epoch": 0.9921815741875255, "grad_norm": 0.8646521052733187, "learning_rate": 1.6029831576064303e-09, "loss": 0.1082, "step": 34010 }, { "epoch": 0.9922107474181691, "grad_norm": 0.9169541937022215, "learning_rate": 1.591043548228033e-09, "loss": 0.1195, "step": 34011 }, { "epoch": 0.9922399206488126, "grad_norm": 0.7139755905377889, "learning_rate": 1.5791485634514269e-09, "loss": 0.0878, "step": 34012 }, { "epoch": 0.9922690938794562, "grad_norm": 0.9725231084367686, "learning_rate": 1.5672982033831941e-09, "loss": 0.1181, "step": 34013 }, { "epoch": 0.9922982671100997, "grad_norm": 0.7994798428439593, "learning_rate": 1.5554924681288052e-09, "loss": 0.1043, "step": 34014 }, { "epoch": 0.9923274403407434, "grad_norm": 0.7636802346871505, "learning_rate": 1.543731357793732e-09, "loss": 0.1302, "step": 34015 }, { "epoch": 0.9923566135713869, "grad_norm": 0.7368600712341854, "learning_rate": 1.532014872483445e-09, "loss": 0.1101, "step": 34016 }, { "epoch": 0.9923857868020305, "grad_norm": 0.7183991162784704, "learning_rate": 1.5203430123011953e-09, "loss": 0.0941, "step": 34017 }, { "epoch": 0.992414960032674, "grad_norm": 0.8763857078528436, "learning_rate": 1.5087157773530092e-09, "loss": 0.1088, "step": 34018 }, { "epoch": 0.9924441332633176, "grad_norm": 0.7429516001128845, "learning_rate": 1.4971331677410273e-09, "loss": 0.1064, "step": 34019 }, { "epoch": 0.9924733064939611, "grad_norm": 0.7613951574229304, "learning_rate": 1.4855951835696102e-09, "loss": 0.1174, "step": 34020 }, { "epoch": 0.9925024797246047, "grad_norm": 0.7740688048253014, "learning_rate": 1.4741018249420091e-09, "loss": 0.098, "step": 34021 }, { "epoch": 0.9925316529552483, "grad_norm": 0.8195838171484119, "learning_rate": 1.4626530919598093e-09, "loss": 0.1332, "step": 34022 }, { "epoch": 0.9925608261858918, "grad_norm": 1.2659801867341571, "learning_rate": 1.4512489847262612e-09, "loss": 0.085, "step": 34023 }, { "epoch": 0.9925899994165354, "grad_norm": 0.885384170892487, "learning_rate": 1.4398895033423954e-09, "loss": 0.1159, "step": 34024 }, { "epoch": 0.9926191726471789, "grad_norm": 0.9042474170601904, "learning_rate": 1.4285746479097973e-09, "loss": 0.1043, "step": 34025 }, { "epoch": 0.9926483458778225, "grad_norm": 0.7205575069692667, "learning_rate": 1.4173044185300522e-09, "loss": 0.1019, "step": 34026 }, { "epoch": 0.992677519108466, "grad_norm": 0.7391088876462139, "learning_rate": 1.4060788153030802e-09, "loss": 0.0908, "step": 34027 }, { "epoch": 0.9927066923391097, "grad_norm": 0.8612764153386636, "learning_rate": 1.3948978383293565e-09, "loss": 0.121, "step": 34028 }, { "epoch": 0.9927358655697532, "grad_norm": 0.6940616724774372, "learning_rate": 1.3837614877088013e-09, "loss": 0.1133, "step": 34029 }, { "epoch": 0.9927650388003968, "grad_norm": 0.8593610006395457, "learning_rate": 1.3726697635407792e-09, "loss": 0.1256, "step": 34030 }, { "epoch": 0.9927942120310403, "grad_norm": 0.7378808583037828, "learning_rate": 1.3616226659246557e-09, "loss": 0.1249, "step": 34031 }, { "epoch": 0.9928233852616839, "grad_norm": 1.4935313770643186, "learning_rate": 1.35062019495813e-09, "loss": 0.1145, "step": 34032 }, { "epoch": 0.9928525584923275, "grad_norm": 0.7204585551341403, "learning_rate": 1.339662350740012e-09, "loss": 0.0982, "step": 34033 }, { "epoch": 0.992881731722971, "grad_norm": 0.6170457621250698, "learning_rate": 1.3287491333685564e-09, "loss": 0.1038, "step": 34034 }, { "epoch": 0.9929109049536146, "grad_norm": 0.7836008503839021, "learning_rate": 1.317880542940353e-09, "loss": 0.1004, "step": 34035 }, { "epoch": 0.9929400781842581, "grad_norm": 0.7978269990252792, "learning_rate": 1.3070565795531009e-09, "loss": 0.128, "step": 34036 }, { "epoch": 0.9929692514149017, "grad_norm": 1.0070737496571687, "learning_rate": 1.2962772433028347e-09, "loss": 0.0966, "step": 34037 }, { "epoch": 0.9929984246455452, "grad_norm": 1.0961206417910445, "learning_rate": 1.2855425342861439e-09, "loss": 0.1446, "step": 34038 }, { "epoch": 0.9930275978761888, "grad_norm": 0.8363924212235179, "learning_rate": 1.2748524525990624e-09, "loss": 0.1109, "step": 34039 }, { "epoch": 0.9930567711068323, "grad_norm": 0.6959908934163784, "learning_rate": 1.2642069983370698e-09, "loss": 0.1214, "step": 34040 }, { "epoch": 0.9930859443374759, "grad_norm": 0.66674851157858, "learning_rate": 1.2536061715945346e-09, "loss": 0.0952, "step": 34041 }, { "epoch": 0.9931151175681195, "grad_norm": 0.9546778122469106, "learning_rate": 1.2430499724663813e-09, "loss": 0.1113, "step": 34042 }, { "epoch": 0.9931442907987631, "grad_norm": 0.6442948551810961, "learning_rate": 1.232538401047534e-09, "loss": 0.1114, "step": 34043 }, { "epoch": 0.9931734640294066, "grad_norm": 0.6836414226523234, "learning_rate": 1.2220714574306957e-09, "loss": 0.0965, "step": 34044 }, { "epoch": 0.9932026372600502, "grad_norm": 0.779165312058414, "learning_rate": 1.211649141710236e-09, "loss": 0.1001, "step": 34045 }, { "epoch": 0.9932318104906938, "grad_norm": 0.8468336953767481, "learning_rate": 1.2012714539788585e-09, "loss": 0.1185, "step": 34046 }, { "epoch": 0.9932609837213373, "grad_norm": 0.6729363274783582, "learning_rate": 1.190938394328711e-09, "loss": 0.1008, "step": 34047 }, { "epoch": 0.9932901569519809, "grad_norm": 0.7754525527136992, "learning_rate": 1.1806499628530531e-09, "loss": 0.1276, "step": 34048 }, { "epoch": 0.9933193301826244, "grad_norm": 0.8506307230635818, "learning_rate": 1.1704061596434779e-09, "loss": 0.1208, "step": 34049 }, { "epoch": 0.993348503413268, "grad_norm": 1.023780179462189, "learning_rate": 1.1602069847904685e-09, "loss": 0.1013, "step": 34050 }, { "epoch": 0.9933776766439115, "grad_norm": 0.6617346585707174, "learning_rate": 1.1500524383861734e-09, "loss": 0.1192, "step": 34051 }, { "epoch": 0.9934068498745551, "grad_norm": 0.9134584862081634, "learning_rate": 1.1399425205210758e-09, "loss": 0.1371, "step": 34052 }, { "epoch": 0.9934360231051986, "grad_norm": 0.7653441795668655, "learning_rate": 1.1298772312851036e-09, "loss": 0.1011, "step": 34053 }, { "epoch": 0.9934651963358422, "grad_norm": 0.717620650852394, "learning_rate": 1.1198565707681852e-09, "loss": 0.1062, "step": 34054 }, { "epoch": 0.9934943695664858, "grad_norm": 1.0198982643740935, "learning_rate": 1.1098805390602486e-09, "loss": 0.1016, "step": 34055 }, { "epoch": 0.9935235427971294, "grad_norm": 0.8518773308694678, "learning_rate": 1.0999491362495563e-09, "loss": 0.1226, "step": 34056 }, { "epoch": 0.993552716027773, "grad_norm": 0.7623908621128579, "learning_rate": 1.0900623624254814e-09, "loss": 0.0971, "step": 34057 }, { "epoch": 0.9935818892584165, "grad_norm": 0.7211693048706922, "learning_rate": 1.0802202176757314e-09, "loss": 0.1168, "step": 34058 }, { "epoch": 0.9936110624890601, "grad_norm": 0.8603851488590624, "learning_rate": 1.0704227020885694e-09, "loss": 0.1154, "step": 34059 }, { "epoch": 0.9936402357197036, "grad_norm": 1.0251546069850566, "learning_rate": 1.0606698157511475e-09, "loss": 0.1058, "step": 34060 }, { "epoch": 0.9936694089503472, "grad_norm": 0.7998863832192737, "learning_rate": 1.0509615587506183e-09, "loss": 0.1007, "step": 34061 }, { "epoch": 0.9936985821809907, "grad_norm": 0.8498521076196066, "learning_rate": 1.0412979311741345e-09, "loss": 0.127, "step": 34062 }, { "epoch": 0.9937277554116343, "grad_norm": 0.8093687737428069, "learning_rate": 1.031678933107183e-09, "loss": 0.1018, "step": 34063 }, { "epoch": 0.9937569286422778, "grad_norm": 0.7804226128275563, "learning_rate": 1.0221045646363615e-09, "loss": 0.1114, "step": 34064 }, { "epoch": 0.9937861018729214, "grad_norm": 0.9194000153692374, "learning_rate": 1.0125748258471569e-09, "loss": 0.1374, "step": 34065 }, { "epoch": 0.9938152751035649, "grad_norm": 0.7797226722297699, "learning_rate": 1.0030897168239462e-09, "loss": 0.1355, "step": 34066 }, { "epoch": 0.9938444483342085, "grad_norm": 1.0513752125003564, "learning_rate": 9.936492376516616e-10, "loss": 0.1329, "step": 34067 }, { "epoch": 0.993873621564852, "grad_norm": 0.6836143583423224, "learning_rate": 9.842533884146798e-10, "loss": 0.0866, "step": 34068 }, { "epoch": 0.9939027947954957, "grad_norm": 0.8211784816659222, "learning_rate": 9.749021691973781e-10, "loss": 0.1356, "step": 34069 }, { "epoch": 0.9939319680261393, "grad_norm": 1.1690471816891186, "learning_rate": 9.655955800824679e-10, "loss": 0.1149, "step": 34070 }, { "epoch": 0.9939611412567828, "grad_norm": 0.8040608420154217, "learning_rate": 9.563336211532159e-10, "loss": 0.1108, "step": 34071 }, { "epoch": 0.9939903144874264, "grad_norm": 0.9887116452727273, "learning_rate": 9.471162924928888e-10, "loss": 0.1148, "step": 34072 }, { "epoch": 0.9940194877180699, "grad_norm": 0.9079191114011983, "learning_rate": 9.379435941830884e-10, "loss": 0.1247, "step": 34073 }, { "epoch": 0.9940486609487135, "grad_norm": 0.8907520479518357, "learning_rate": 9.288155263059706e-10, "loss": 0.1216, "step": 34074 }, { "epoch": 0.994077834179357, "grad_norm": 0.9711221010827501, "learning_rate": 9.19732088942582e-10, "loss": 0.1117, "step": 34075 }, { "epoch": 0.9941070074100006, "grad_norm": 0.8999582269382925, "learning_rate": 9.106932821750791e-10, "loss": 0.1195, "step": 34076 }, { "epoch": 0.9941361806406441, "grad_norm": 0.6759357107902308, "learning_rate": 9.01699106083398e-10, "loss": 0.101, "step": 34077 }, { "epoch": 0.9941653538712877, "grad_norm": 0.743747522764742, "learning_rate": 8.927495607480296e-10, "loss": 0.0999, "step": 34078 }, { "epoch": 0.9941945271019312, "grad_norm": 0.8807387907516453, "learning_rate": 8.838446462483552e-10, "loss": 0.1026, "step": 34079 }, { "epoch": 0.9942237003325748, "grad_norm": 0.7764928226275828, "learning_rate": 8.749843626648657e-10, "loss": 0.1084, "step": 34080 }, { "epoch": 0.9942528735632183, "grad_norm": 0.9501748476200179, "learning_rate": 8.661687100758321e-10, "loss": 0.1233, "step": 34081 }, { "epoch": 0.994282046793862, "grad_norm": 0.8364287403975764, "learning_rate": 8.573976885600799e-10, "loss": 0.1073, "step": 34082 }, { "epoch": 0.9943112200245056, "grad_norm": 0.9066436897953483, "learning_rate": 8.486712981964352e-10, "loss": 0.0953, "step": 34083 }, { "epoch": 0.9943403932551491, "grad_norm": 1.050949421943745, "learning_rate": 8.399895390626134e-10, "loss": 0.119, "step": 34084 }, { "epoch": 0.9943695664857927, "grad_norm": 0.9541080393983301, "learning_rate": 8.3135241123522e-10, "loss": 0.1053, "step": 34085 }, { "epoch": 0.9943987397164362, "grad_norm": 0.9015100872206822, "learning_rate": 8.22759914793081e-10, "loss": 0.1043, "step": 34086 }, { "epoch": 0.9944279129470798, "grad_norm": 0.8138820065716954, "learning_rate": 8.142120498111361e-10, "loss": 0.1118, "step": 34087 }, { "epoch": 0.9944570861777233, "grad_norm": 0.9281246681070685, "learning_rate": 8.057088163671011e-10, "loss": 0.1044, "step": 34088 }, { "epoch": 0.9944862594083669, "grad_norm": 0.7806980332141217, "learning_rate": 7.972502145359163e-10, "loss": 0.0911, "step": 34089 }, { "epoch": 0.9945154326390104, "grad_norm": 0.7832774255404693, "learning_rate": 7.888362443936315e-10, "loss": 0.1124, "step": 34090 }, { "epoch": 0.994544605869654, "grad_norm": 0.7879711075617483, "learning_rate": 7.80466906015187e-10, "loss": 0.116, "step": 34091 }, { "epoch": 0.9945737791002975, "grad_norm": 0.7983604595183628, "learning_rate": 7.721421994749678e-10, "loss": 0.1008, "step": 34092 }, { "epoch": 0.9946029523309411, "grad_norm": 0.8004284092173197, "learning_rate": 7.638621248479139e-10, "loss": 0.1329, "step": 34093 }, { "epoch": 0.9946321255615846, "grad_norm": 0.6984064590860364, "learning_rate": 7.556266822078551e-10, "loss": 0.1055, "step": 34094 }, { "epoch": 0.9946612987922282, "grad_norm": 0.8736392918962326, "learning_rate": 7.47435871628066e-10, "loss": 0.1161, "step": 34095 }, { "epoch": 0.9946904720228719, "grad_norm": 1.0975838347093683, "learning_rate": 7.392896931818217e-10, "loss": 0.0972, "step": 34096 }, { "epoch": 0.9947196452535154, "grad_norm": 0.7537191566906429, "learning_rate": 7.311881469418414e-10, "loss": 0.1029, "step": 34097 }, { "epoch": 0.994748818484159, "grad_norm": 0.7944820515650836, "learning_rate": 7.231312329802897e-10, "loss": 0.1108, "step": 34098 }, { "epoch": 0.9947779917148025, "grad_norm": 0.7906896917838068, "learning_rate": 7.151189513687762e-10, "loss": 0.1271, "step": 34099 }, { "epoch": 0.9948071649454461, "grad_norm": 0.9187474403789473, "learning_rate": 7.071513021800202e-10, "loss": 0.105, "step": 34100 }, { "epoch": 0.9948363381760896, "grad_norm": 0.6981101245865255, "learning_rate": 6.99228285483966e-10, "loss": 0.1082, "step": 34101 }, { "epoch": 0.9948655114067332, "grad_norm": 0.8331510702176763, "learning_rate": 6.913499013516678e-10, "loss": 0.1208, "step": 34102 }, { "epoch": 0.9948946846373767, "grad_norm": 0.7078335438354388, "learning_rate": 6.835161498536246e-10, "loss": 0.1047, "step": 34103 }, { "epoch": 0.9949238578680203, "grad_norm": 0.6408704157548614, "learning_rate": 6.757270310597808e-10, "loss": 0.1158, "step": 34104 }, { "epoch": 0.9949530310986638, "grad_norm": 0.7426096241429958, "learning_rate": 6.679825450395249e-10, "loss": 0.1349, "step": 34105 }, { "epoch": 0.9949822043293074, "grad_norm": 1.0062563407269012, "learning_rate": 6.602826918622463e-10, "loss": 0.103, "step": 34106 }, { "epoch": 0.995011377559951, "grad_norm": 0.7157635929457596, "learning_rate": 6.526274715967784e-10, "loss": 0.0945, "step": 34107 }, { "epoch": 0.9950405507905945, "grad_norm": 0.8810473775563002, "learning_rate": 6.450168843108451e-10, "loss": 0.1082, "step": 34108 }, { "epoch": 0.9950697240212382, "grad_norm": 0.9297848423141039, "learning_rate": 6.37450930072725e-10, "loss": 0.1081, "step": 34109 }, { "epoch": 0.9950988972518817, "grad_norm": 1.0768216423786734, "learning_rate": 6.299296089501417e-10, "loss": 0.1393, "step": 34110 }, { "epoch": 0.9951280704825253, "grad_norm": 0.9025208740283891, "learning_rate": 6.224529210097086e-10, "loss": 0.1133, "step": 34111 }, { "epoch": 0.9951572437131688, "grad_norm": 0.7784503035521988, "learning_rate": 6.150208663191492e-10, "loss": 0.1065, "step": 34112 }, { "epoch": 0.9951864169438124, "grad_norm": 0.885480820943917, "learning_rate": 6.076334449439669e-10, "loss": 0.1164, "step": 34113 }, { "epoch": 0.9952155901744559, "grad_norm": 0.7198937868159827, "learning_rate": 6.002906569502199e-10, "loss": 0.0943, "step": 34114 }, { "epoch": 0.9952447634050995, "grad_norm": 0.7557393367661378, "learning_rate": 5.929925024039663e-10, "loss": 0.1015, "step": 34115 }, { "epoch": 0.995273936635743, "grad_norm": 0.8592429764339267, "learning_rate": 5.85738981369599e-10, "loss": 0.1027, "step": 34116 }, { "epoch": 0.9953031098663866, "grad_norm": 1.0326153929875528, "learning_rate": 5.785300939126215e-10, "loss": 0.1145, "step": 34117 }, { "epoch": 0.9953322830970301, "grad_norm": 1.131867533188281, "learning_rate": 5.713658400968714e-10, "loss": 0.1282, "step": 34118 }, { "epoch": 0.9953614563276737, "grad_norm": 0.8826275390906089, "learning_rate": 5.642462199867415e-10, "loss": 0.1201, "step": 34119 }, { "epoch": 0.9953906295583173, "grad_norm": 0.7075084976064901, "learning_rate": 5.571712336455149e-10, "loss": 0.0991, "step": 34120 }, { "epoch": 0.9954198027889608, "grad_norm": 1.005407846859457, "learning_rate": 5.501408811364739e-10, "loss": 0.09, "step": 34121 }, { "epoch": 0.9954489760196044, "grad_norm": 1.0504259355479635, "learning_rate": 5.431551625223463e-10, "loss": 0.1022, "step": 34122 }, { "epoch": 0.995478149250248, "grad_norm": 0.8581664648419247, "learning_rate": 5.362140778647495e-10, "loss": 0.0963, "step": 34123 }, { "epoch": 0.9955073224808916, "grad_norm": 0.7794675314709526, "learning_rate": 5.293176272269662e-10, "loss": 0.1077, "step": 34124 }, { "epoch": 0.9955364957115351, "grad_norm": 0.8426456159811151, "learning_rate": 5.224658106700586e-10, "loss": 0.1015, "step": 34125 }, { "epoch": 0.9955656689421787, "grad_norm": 0.6585376206474072, "learning_rate": 5.15658628255089e-10, "loss": 0.1106, "step": 34126 }, { "epoch": 0.9955948421728222, "grad_norm": 0.7160620826893653, "learning_rate": 5.088960800425646e-10, "loss": 0.1218, "step": 34127 }, { "epoch": 0.9956240154034658, "grad_norm": 0.9661865435285912, "learning_rate": 5.021781660935477e-10, "loss": 0.1331, "step": 34128 }, { "epoch": 0.9956531886341093, "grad_norm": 0.7367700762748168, "learning_rate": 4.95504886467435e-10, "loss": 0.1049, "step": 34129 }, { "epoch": 0.9956823618647529, "grad_norm": 0.8153560725549933, "learning_rate": 4.888762412236236e-10, "loss": 0.1138, "step": 34130 }, { "epoch": 0.9957115350953964, "grad_norm": 0.810661963401519, "learning_rate": 4.822922304220656e-10, "loss": 0.1305, "step": 34131 }, { "epoch": 0.99574070832604, "grad_norm": 0.8503076546543817, "learning_rate": 4.757528541210476e-10, "loss": 0.1035, "step": 34132 }, { "epoch": 0.9957698815566836, "grad_norm": 0.898054712318861, "learning_rate": 4.692581123788564e-10, "loss": 0.1124, "step": 34133 }, { "epoch": 0.9957990547873271, "grad_norm": 0.8063388173715647, "learning_rate": 4.628080052537787e-10, "loss": 0.1094, "step": 34134 }, { "epoch": 0.9958282280179707, "grad_norm": 0.928304486485139, "learning_rate": 4.5640253280299084e-10, "loss": 0.1102, "step": 34135 }, { "epoch": 0.9958574012486143, "grad_norm": 0.7833583767426001, "learning_rate": 4.500416950842246e-10, "loss": 0.1039, "step": 34136 }, { "epoch": 0.9958865744792579, "grad_norm": 0.7515909937925331, "learning_rate": 4.437254921541012e-10, "loss": 0.1062, "step": 34137 }, { "epoch": 0.9959157477099014, "grad_norm": 0.7038696971438514, "learning_rate": 4.3745392406868705e-10, "loss": 0.0882, "step": 34138 }, { "epoch": 0.995944920940545, "grad_norm": 0.8055358729293154, "learning_rate": 4.312269908840483e-10, "loss": 0.0885, "step": 34139 }, { "epoch": 0.9959740941711885, "grad_norm": 0.6796352115610131, "learning_rate": 4.2504469265625124e-10, "loss": 0.0983, "step": 34140 }, { "epoch": 0.9960032674018321, "grad_norm": 0.7239870930231954, "learning_rate": 4.1890702944025195e-10, "loss": 0.1019, "step": 34141 }, { "epoch": 0.9960324406324756, "grad_norm": 0.9938002737382183, "learning_rate": 4.1281400129045136e-10, "loss": 0.1002, "step": 34142 }, { "epoch": 0.9960616138631192, "grad_norm": 0.7459366688459611, "learning_rate": 4.0676560826180544e-10, "loss": 0.0918, "step": 34143 }, { "epoch": 0.9960907870937628, "grad_norm": 0.7463550455355336, "learning_rate": 4.0076185040760497e-10, "loss": 0.1104, "step": 34144 }, { "epoch": 0.9961199603244063, "grad_norm": 0.7699426460408898, "learning_rate": 3.948027277822508e-10, "loss": 0.1001, "step": 34145 }, { "epoch": 0.9961491335550499, "grad_norm": 0.7900420373373584, "learning_rate": 3.888882404384786e-10, "loss": 0.1078, "step": 34146 }, { "epoch": 0.9961783067856934, "grad_norm": 1.1382369025979637, "learning_rate": 3.8301838842957905e-10, "loss": 0.1013, "step": 34147 }, { "epoch": 0.996207480016337, "grad_norm": 1.3269699975868081, "learning_rate": 3.771931718071775e-10, "loss": 0.1096, "step": 34148 }, { "epoch": 0.9962366532469805, "grad_norm": 0.7587070204404274, "learning_rate": 3.714125906234545e-10, "loss": 0.1235, "step": 34149 }, { "epoch": 0.9962658264776242, "grad_norm": 0.8446558768476956, "learning_rate": 3.656766449305904e-10, "loss": 0.1095, "step": 34150 }, { "epoch": 0.9962949997082677, "grad_norm": 0.8949934761445224, "learning_rate": 3.599853347796556e-10, "loss": 0.1127, "step": 34151 }, { "epoch": 0.9963241729389113, "grad_norm": 1.0200940014131596, "learning_rate": 3.5433866022116516e-10, "loss": 0.1294, "step": 34152 }, { "epoch": 0.9963533461695548, "grad_norm": 0.90758597346904, "learning_rate": 3.4873662130563425e-10, "loss": 0.1054, "step": 34153 }, { "epoch": 0.9963825194001984, "grad_norm": 0.7143673488871289, "learning_rate": 3.4317921808302293e-10, "loss": 0.1097, "step": 34154 }, { "epoch": 0.996411692630842, "grad_norm": 0.6712702705515735, "learning_rate": 3.3766645060273605e-10, "loss": 0.1004, "step": 34155 }, { "epoch": 0.9964408658614855, "grad_norm": 0.9627409957114645, "learning_rate": 3.3219831891417863e-10, "loss": 0.1132, "step": 34156 }, { "epoch": 0.9964700390921291, "grad_norm": 0.8457140099250648, "learning_rate": 3.2677482306675554e-10, "loss": 0.1004, "step": 34157 }, { "epoch": 0.9964992123227726, "grad_norm": 0.6630682088470647, "learning_rate": 3.213959631082064e-10, "loss": 0.1104, "step": 34158 }, { "epoch": 0.9965283855534162, "grad_norm": 1.7996736791987658, "learning_rate": 3.160617390862708e-10, "loss": 0.1126, "step": 34159 }, { "epoch": 0.9965575587840597, "grad_norm": 0.9877985269285867, "learning_rate": 3.107721510497985e-10, "loss": 0.092, "step": 34160 }, { "epoch": 0.9965867320147033, "grad_norm": 0.6839949995421452, "learning_rate": 3.055271990448638e-10, "loss": 0.0887, "step": 34161 }, { "epoch": 0.9966159052453468, "grad_norm": 0.7442010323329299, "learning_rate": 3.003268831180961e-10, "loss": 0.1097, "step": 34162 }, { "epoch": 0.9966450784759904, "grad_norm": 0.8848179072753259, "learning_rate": 2.951712033172349e-10, "loss": 0.1165, "step": 34163 }, { "epoch": 0.996674251706634, "grad_norm": 0.7790254380643167, "learning_rate": 2.900601596872443e-10, "loss": 0.0826, "step": 34164 }, { "epoch": 0.9967034249372776, "grad_norm": 0.7864299898033266, "learning_rate": 2.8499375227419854e-10, "loss": 0.1104, "step": 34165 }, { "epoch": 0.9967325981679211, "grad_norm": 0.8027462154254484, "learning_rate": 2.7997198112306167e-10, "loss": 0.1231, "step": 34166 }, { "epoch": 0.9967617713985647, "grad_norm": 1.0899454076932567, "learning_rate": 2.749948462787977e-10, "loss": 0.1233, "step": 34167 }, { "epoch": 0.9967909446292083, "grad_norm": 0.8119848674925277, "learning_rate": 2.700623477858155e-10, "loss": 0.1195, "step": 34168 }, { "epoch": 0.9968201178598518, "grad_norm": 0.8075655322922792, "learning_rate": 2.651744856885241e-10, "loss": 0.1332, "step": 34169 }, { "epoch": 0.9968492910904954, "grad_norm": 0.6774971443211152, "learning_rate": 2.6033126003022213e-10, "loss": 0.1109, "step": 34170 }, { "epoch": 0.9968784643211389, "grad_norm": 0.8483895412969615, "learning_rate": 2.555326708536532e-10, "loss": 0.1066, "step": 34171 }, { "epoch": 0.9969076375517825, "grad_norm": 0.8093775376395698, "learning_rate": 2.5077871820267107e-10, "loss": 0.1264, "step": 34172 }, { "epoch": 0.996936810782426, "grad_norm": 0.8841579981638696, "learning_rate": 2.460694021189092e-10, "loss": 0.1116, "step": 34173 }, { "epoch": 0.9969659840130696, "grad_norm": 0.8846754073052753, "learning_rate": 2.414047226445559e-10, "loss": 0.1044, "step": 34174 }, { "epoch": 0.9969951572437131, "grad_norm": 0.9412410205833899, "learning_rate": 2.3678467982179986e-10, "loss": 0.1066, "step": 34175 }, { "epoch": 0.9970243304743567, "grad_norm": 0.8271434007851702, "learning_rate": 2.3220927369116408e-10, "loss": 0.1208, "step": 34176 }, { "epoch": 0.9970535037050003, "grad_norm": 0.8512201912170679, "learning_rate": 2.2767850429372684e-10, "loss": 0.1079, "step": 34177 }, { "epoch": 0.9970826769356439, "grad_norm": 0.7671519287134442, "learning_rate": 2.231923716705664e-10, "loss": 0.1049, "step": 34178 }, { "epoch": 0.9971118501662875, "grad_norm": 0.8886894100532023, "learning_rate": 2.1875087586054056e-10, "loss": 0.1123, "step": 34179 }, { "epoch": 0.997141023396931, "grad_norm": 0.8438997153914551, "learning_rate": 2.1435401690472756e-10, "loss": 0.1188, "step": 34180 }, { "epoch": 0.9971701966275746, "grad_norm": 0.8500831871218869, "learning_rate": 2.1000179484087501e-10, "loss": 0.1054, "step": 34181 }, { "epoch": 0.9971993698582181, "grad_norm": 0.7365222705634256, "learning_rate": 2.0569420970895092e-10, "loss": 0.0879, "step": 34182 }, { "epoch": 0.9972285430888617, "grad_norm": 0.9923526276086613, "learning_rate": 2.01431261547258e-10, "loss": 0.1208, "step": 34183 }, { "epoch": 0.9972577163195052, "grad_norm": 0.8316420117386609, "learning_rate": 1.9721295039298872e-10, "loss": 0.1179, "step": 34184 }, { "epoch": 0.9972868895501488, "grad_norm": 1.4848637927713826, "learning_rate": 1.9303927628500085e-10, "loss": 0.1157, "step": 34185 }, { "epoch": 0.9973160627807923, "grad_norm": 0.7938500757021467, "learning_rate": 1.889102392599318e-10, "loss": 0.109, "step": 34186 }, { "epoch": 0.9973452360114359, "grad_norm": 0.8120227142644452, "learning_rate": 1.848258393544189e-10, "loss": 0.1175, "step": 34187 }, { "epoch": 0.9973744092420794, "grad_norm": 0.8087073132513186, "learning_rate": 1.8078607660565463e-10, "loss": 0.1075, "step": 34188 }, { "epoch": 0.997403582472723, "grad_norm": 0.7444642727653622, "learning_rate": 1.767909510491661e-10, "loss": 0.1328, "step": 34189 }, { "epoch": 0.9974327557033665, "grad_norm": 0.8060351901233608, "learning_rate": 1.728404627204805e-10, "loss": 0.1091, "step": 34190 }, { "epoch": 0.9974619289340102, "grad_norm": 0.7364525489511471, "learning_rate": 1.6893461165512494e-10, "loss": 0.1106, "step": 34191 }, { "epoch": 0.9974911021646538, "grad_norm": 0.9649021370790292, "learning_rate": 1.6507339788807141e-10, "loss": 0.1192, "step": 34192 }, { "epoch": 0.9975202753952973, "grad_norm": 0.8226085288924465, "learning_rate": 1.6125682145373688e-10, "loss": 0.1243, "step": 34193 }, { "epoch": 0.9975494486259409, "grad_norm": 0.7496164783485849, "learning_rate": 1.5748488238653824e-10, "loss": 0.0993, "step": 34194 }, { "epoch": 0.9975786218565844, "grad_norm": 0.9370912822704288, "learning_rate": 1.5375758071922707e-10, "loss": 0.1268, "step": 34195 }, { "epoch": 0.997607795087228, "grad_norm": 0.7862567085951591, "learning_rate": 1.500749164856652e-10, "loss": 0.1092, "step": 34196 }, { "epoch": 0.9976369683178715, "grad_norm": 0.7339864850693282, "learning_rate": 1.4643688971860416e-10, "loss": 0.0988, "step": 34197 }, { "epoch": 0.9976661415485151, "grad_norm": 0.750307678578148, "learning_rate": 1.4284350045079555e-10, "loss": 0.104, "step": 34198 }, { "epoch": 0.9976953147791586, "grad_norm": 0.7161423311215511, "learning_rate": 1.3929474871388072e-10, "loss": 0.0992, "step": 34199 }, { "epoch": 0.9977244880098022, "grad_norm": 0.9210874526488155, "learning_rate": 1.3579063454005614e-10, "loss": 0.1004, "step": 34200 }, { "epoch": 0.9977536612404457, "grad_norm": 0.7399212394152471, "learning_rate": 1.3233115796040807e-10, "loss": 0.116, "step": 34201 }, { "epoch": 0.9977828344710893, "grad_norm": 0.8879265071603267, "learning_rate": 1.2891631900546764e-10, "loss": 0.1083, "step": 34202 }, { "epoch": 0.9978120077017328, "grad_norm": 0.8688691488447081, "learning_rate": 1.2554611770632107e-10, "loss": 0.0913, "step": 34203 }, { "epoch": 0.9978411809323765, "grad_norm": 0.8755796021913493, "learning_rate": 1.2222055409238932e-10, "loss": 0.1039, "step": 34204 }, { "epoch": 0.9978703541630201, "grad_norm": 0.7932573367705784, "learning_rate": 1.1893962819364836e-10, "loss": 0.1133, "step": 34205 }, { "epoch": 0.9978995273936636, "grad_norm": 0.8643948103149971, "learning_rate": 1.1570334003951911e-10, "loss": 0.132, "step": 34206 }, { "epoch": 0.9979287006243072, "grad_norm": 0.8684557441525104, "learning_rate": 1.1251168965886738e-10, "loss": 0.1012, "step": 34207 }, { "epoch": 0.9979578738549507, "grad_norm": 0.7384427021177166, "learning_rate": 1.0936467708055898e-10, "loss": 0.1112, "step": 34208 }, { "epoch": 0.9979870470855943, "grad_norm": 0.7491017461603411, "learning_rate": 1.0626230233179436e-10, "loss": 0.1113, "step": 34209 }, { "epoch": 0.9980162203162378, "grad_norm": 0.8501798348215374, "learning_rate": 1.032045654408842e-10, "loss": 0.112, "step": 34210 }, { "epoch": 0.9980453935468814, "grad_norm": 0.708422280305766, "learning_rate": 1.0019146643502898e-10, "loss": 0.1288, "step": 34211 }, { "epoch": 0.9980745667775249, "grad_norm": 0.892736371624153, "learning_rate": 9.722300534087403e-11, "loss": 0.11, "step": 34212 }, { "epoch": 0.9981037400081685, "grad_norm": 0.7949983304967112, "learning_rate": 9.429918218561984e-11, "loss": 0.1268, "step": 34213 }, { "epoch": 0.998132913238812, "grad_norm": 0.9191063415013071, "learning_rate": 9.141999699424641e-11, "loss": 0.1047, "step": 34214 }, { "epoch": 0.9981620864694556, "grad_norm": 0.7401892187233303, "learning_rate": 8.858544979339912e-11, "loss": 0.116, "step": 34215 }, { "epoch": 0.9981912597000991, "grad_norm": 0.7011227202244126, "learning_rate": 8.579554060805795e-11, "loss": 0.1206, "step": 34216 }, { "epoch": 0.9982204329307427, "grad_norm": 0.8806883928171629, "learning_rate": 8.305026946320294e-11, "loss": 0.1183, "step": 34217 }, { "epoch": 0.9982496061613864, "grad_norm": 0.9655641910202492, "learning_rate": 8.034963638325898e-11, "loss": 0.1123, "step": 34218 }, { "epoch": 0.9982787793920299, "grad_norm": 0.5808085748149094, "learning_rate": 7.769364139265101e-11, "loss": 0.096, "step": 34219 }, { "epoch": 0.9983079526226735, "grad_norm": 0.8818674122350973, "learning_rate": 7.508228451469369e-11, "loss": 0.1306, "step": 34220 }, { "epoch": 0.998337125853317, "grad_norm": 1.0098508528559604, "learning_rate": 7.251556577270169e-11, "loss": 0.1023, "step": 34221 }, { "epoch": 0.9983662990839606, "grad_norm": 0.8681199510423564, "learning_rate": 6.999348518943461e-11, "loss": 0.117, "step": 34222 }, { "epoch": 0.9983954723146041, "grad_norm": 0.7221477738461264, "learning_rate": 6.751604278820711e-11, "loss": 0.1011, "step": 34223 }, { "epoch": 0.9984246455452477, "grad_norm": 0.7846265418677336, "learning_rate": 6.508323859011345e-11, "loss": 0.1131, "step": 34224 }, { "epoch": 0.9984538187758912, "grad_norm": 0.8736913819740687, "learning_rate": 6.269507261791318e-11, "loss": 0.0924, "step": 34225 }, { "epoch": 0.9984829920065348, "grad_norm": 0.7938951168846574, "learning_rate": 6.035154489214546e-11, "loss": 0.1098, "step": 34226 }, { "epoch": 0.9985121652371783, "grad_norm": 0.7860922547822048, "learning_rate": 5.805265543390448e-11, "loss": 0.1054, "step": 34227 }, { "epoch": 0.9985413384678219, "grad_norm": 0.9894812049323295, "learning_rate": 5.57984042637294e-11, "loss": 0.1172, "step": 34228 }, { "epoch": 0.9985705116984654, "grad_norm": 0.9294474570565269, "learning_rate": 5.3588791402159335e-11, "loss": 0.1061, "step": 34229 }, { "epoch": 0.998599684929109, "grad_norm": 0.9494862180189568, "learning_rate": 5.142381686806808e-11, "loss": 0.1169, "step": 34230 }, { "epoch": 0.9986288581597527, "grad_norm": 0.7637484304705885, "learning_rate": 4.930348068143964e-11, "loss": 0.1089, "step": 34231 }, { "epoch": 0.9986580313903962, "grad_norm": 0.8862950308683506, "learning_rate": 4.722778286114782e-11, "loss": 0.1041, "step": 34232 }, { "epoch": 0.9986872046210398, "grad_norm": 0.7745062735066747, "learning_rate": 4.519672342551129e-11, "loss": 0.099, "step": 34233 }, { "epoch": 0.9987163778516833, "grad_norm": 0.7984060139393339, "learning_rate": 4.321030239340385e-11, "loss": 0.111, "step": 34234 }, { "epoch": 0.9987455510823269, "grad_norm": 0.85411201766429, "learning_rate": 4.1268519780923724e-11, "loss": 0.1055, "step": 34235 }, { "epoch": 0.9987747243129704, "grad_norm": 0.7978049144042352, "learning_rate": 3.9371375606944706e-11, "loss": 0.0955, "step": 34236 }, { "epoch": 0.998803897543614, "grad_norm": 0.854740245108905, "learning_rate": 3.751886988812015e-11, "loss": 0.1158, "step": 34237 }, { "epoch": 0.9988330707742575, "grad_norm": 0.6876753949975288, "learning_rate": 3.571100264054827e-11, "loss": 0.0801, "step": 34238 }, { "epoch": 0.9988622440049011, "grad_norm": 0.8745558354823828, "learning_rate": 3.3947773880327326e-11, "loss": 0.109, "step": 34239 }, { "epoch": 0.9988914172355446, "grad_norm": 0.8988813743641949, "learning_rate": 3.222918362355554e-11, "loss": 0.1088, "step": 34240 }, { "epoch": 0.9989205904661882, "grad_norm": 0.7468545358649729, "learning_rate": 3.055523188522091e-11, "loss": 0.1209, "step": 34241 }, { "epoch": 0.9989497636968317, "grad_norm": 0.9124898635304158, "learning_rate": 2.8925918680866582e-11, "loss": 0.1367, "step": 34242 }, { "epoch": 0.9989789369274753, "grad_norm": 0.8657235373221649, "learning_rate": 2.7341244024370328e-11, "loss": 0.1143, "step": 34243 }, { "epoch": 0.9990081101581189, "grad_norm": 0.6457685587430032, "learning_rate": 2.5801207930720163e-11, "loss": 0.0945, "step": 34244 }, { "epoch": 0.9990372833887625, "grad_norm": 0.9287976316252068, "learning_rate": 2.430581041268365e-11, "loss": 0.0939, "step": 34245 }, { "epoch": 0.9990664566194061, "grad_norm": 0.7213050276263969, "learning_rate": 2.2855051484138578e-11, "loss": 0.0892, "step": 34246 }, { "epoch": 0.9990956298500496, "grad_norm": 0.8124233881580213, "learning_rate": 2.1448931157852515e-11, "loss": 0.1103, "step": 34247 }, { "epoch": 0.9991248030806932, "grad_norm": 0.7611849642952453, "learning_rate": 2.0087449446593022e-11, "loss": 0.1106, "step": 34248 }, { "epoch": 0.9991539763113367, "grad_norm": 0.8055971271842521, "learning_rate": 1.877060636201744e-11, "loss": 0.0997, "step": 34249 }, { "epoch": 0.9991831495419803, "grad_norm": 0.8440487769792574, "learning_rate": 1.7498401916893338e-11, "loss": 0.1072, "step": 34250 }, { "epoch": 0.9992123227726238, "grad_norm": 0.8052304979021084, "learning_rate": 1.627083612176783e-11, "loss": 0.1079, "step": 34251 }, { "epoch": 0.9992414960032674, "grad_norm": 0.8352534142633674, "learning_rate": 1.508790898774315e-11, "loss": 0.1297, "step": 34252 }, { "epoch": 0.999270669233911, "grad_norm": 0.9629333553281815, "learning_rate": 1.3949620525366414e-11, "loss": 0.1061, "step": 34253 }, { "epoch": 0.9992998424645545, "grad_norm": 0.794338304710376, "learning_rate": 1.2855970744629632e-11, "loss": 0.1075, "step": 34254 }, { "epoch": 0.999329015695198, "grad_norm": 0.6942865165536916, "learning_rate": 1.1806959655524807e-11, "loss": 0.1117, "step": 34255 }, { "epoch": 0.9993581889258416, "grad_norm": 0.7866379169387789, "learning_rate": 1.0802587268043951e-11, "loss": 0.1234, "step": 34256 }, { "epoch": 0.9993873621564852, "grad_norm": 1.0778192537085203, "learning_rate": 9.842853589958623e-12, "loss": 0.1085, "step": 34257 }, { "epoch": 0.9994165353871288, "grad_norm": 0.8202622444032077, "learning_rate": 8.927758630705719e-12, "loss": 0.113, "step": 34258 }, { "epoch": 0.9994457086177724, "grad_norm": 0.8612818136871452, "learning_rate": 8.057302398056799e-12, "loss": 0.138, "step": 34259 }, { "epoch": 0.9994748818484159, "grad_norm": 0.7891601589283768, "learning_rate": 7.2314848997834255e-12, "loss": 0.1096, "step": 34260 }, { "epoch": 0.9995040550790595, "grad_norm": 0.9345282474650779, "learning_rate": 6.450306143102047e-12, "loss": 0.1197, "step": 34261 }, { "epoch": 0.999533228309703, "grad_norm": 0.7278489069436884, "learning_rate": 5.713766135784227e-12, "loss": 0.117, "step": 34262 }, { "epoch": 0.9995624015403466, "grad_norm": 0.8756420881385565, "learning_rate": 5.021864883381078e-12, "loss": 0.1264, "step": 34263 }, { "epoch": 0.9995915747709901, "grad_norm": 0.8340731296432814, "learning_rate": 4.374602393109051e-12, "loss": 0.0869, "step": 34264 }, { "epoch": 0.9996207480016337, "grad_norm": 0.8608276081109758, "learning_rate": 3.771978669409038e-12, "loss": 0.1258, "step": 34265 }, { "epoch": 0.9996499212322772, "grad_norm": 0.8681531460238366, "learning_rate": 3.2139937189423765e-12, "loss": 0.1148, "step": 34266 }, { "epoch": 0.9996790944629208, "grad_norm": 0.8631633579570969, "learning_rate": 2.7006475461499593e-12, "loss": 0.1019, "step": 34267 }, { "epoch": 0.9997082676935644, "grad_norm": 0.9156021269009654, "learning_rate": 2.23194015602779e-12, "loss": 0.0848, "step": 34268 }, { "epoch": 0.9997374409242079, "grad_norm": 0.7409696551956314, "learning_rate": 1.8078715519065371e-12, "loss": 0.1001, "step": 34269 }, { "epoch": 0.9997666141548515, "grad_norm": 0.9556775396839313, "learning_rate": 1.4284417382270933e-12, "loss": 0.1064, "step": 34270 }, { "epoch": 0.999795787385495, "grad_norm": 0.9079732754228492, "learning_rate": 1.0936507177650158e-12, "loss": 0.1223, "step": 34271 }, { "epoch": 0.9998249606161387, "grad_norm": 0.8537987482525036, "learning_rate": 8.034984944060853e-13, "loss": 0.1044, "step": 34272 }, { "epoch": 0.9998541338467822, "grad_norm": 0.8281308361387377, "learning_rate": 5.579850698156363e-13, "loss": 0.1137, "step": 34273 }, { "epoch": 0.9998833070774258, "grad_norm": 0.7654669263751744, "learning_rate": 3.571104473243381e-13, "loss": 0.1015, "step": 34274 }, { "epoch": 0.9999124803080693, "grad_norm": 0.8681306608926576, "learning_rate": 2.008746274873019e-13, "loss": 0.0917, "step": 34275 }, { "epoch": 0.9999416535387129, "grad_norm": 0.7804431762812664, "learning_rate": 8.927761252497391e-14, "loss": 0.1167, "step": 34276 }, { "epoch": 0.9999708267693564, "grad_norm": 1.0700611561542164, "learning_rate": 2.23194029924656e-14, "loss": 0.1194, "step": 34277 }, { "epoch": 1.0, "grad_norm": 0.7014689983147184, "learning_rate": 0.0, "loss": 0.1066, "step": 34278 }, { "epoch": 1.0, "step": 34278, "total_flos": 3.423170722804531e+16, "train_loss": 0.15108584606978395, "train_runtime": 135140.0077, "train_samples_per_second": 32.466, "train_steps_per_second": 0.254 } ], "logging_steps": 1.0, "max_steps": 34278, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.423170722804531e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }