{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.345255297111364, "eval_steps": 500, "global_step": 4500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.67233993580809e-05, "grad_norm": 1.601123332977295, "learning_rate": 2.777777777777778e-08, "loss": 2.0435, "step": 1 }, { "epoch": 0.0001534467987161618, "grad_norm": 1.6826177835464478, "learning_rate": 5.555555555555556e-08, "loss": 2.1183, "step": 2 }, { "epoch": 0.00023017019807424266, "grad_norm": 1.6449086666107178, "learning_rate": 8.333333333333334e-08, "loss": 2.0816, "step": 3 }, { "epoch": 0.0003068935974323236, "grad_norm": 1.6576600074768066, "learning_rate": 1.1111111111111112e-07, "loss": 2.0943, "step": 4 }, { "epoch": 0.0003836169967904045, "grad_norm": 1.606613278388977, "learning_rate": 1.3888888888888888e-07, "loss": 2.0324, "step": 5 }, { "epoch": 0.0004603403961484853, "grad_norm": 1.5725020170211792, "learning_rate": 1.6666666666666668e-07, "loss": 2.1093, "step": 6 }, { "epoch": 0.0005370637955065663, "grad_norm": 1.5602861642837524, "learning_rate": 1.9444444444444447e-07, "loss": 2.1319, "step": 7 }, { "epoch": 0.0006137871948646472, "grad_norm": 1.8084050416946411, "learning_rate": 2.2222222222222224e-07, "loss": 2.0788, "step": 8 }, { "epoch": 0.0006905105942227281, "grad_norm": 1.6459357738494873, "learning_rate": 2.5000000000000004e-07, "loss": 1.9793, "step": 9 }, { "epoch": 0.000767233993580809, "grad_norm": 1.4469834566116333, "learning_rate": 2.7777777777777776e-07, "loss": 2.0455, "step": 10 }, { "epoch": 0.0008439573929388899, "grad_norm": 1.4501526355743408, "learning_rate": 3.055555555555556e-07, "loss": 2.0601, "step": 11 }, { "epoch": 0.0009206807922969707, "grad_norm": 1.4698467254638672, "learning_rate": 3.3333333333333335e-07, "loss": 2.0168, "step": 12 }, { "epoch": 0.0009974041916550516, "grad_norm": 1.5266127586364746, "learning_rate": 3.611111111111111e-07, "loss": 2.1244, "step": 13 }, { "epoch": 0.0010741275910131326, "grad_norm": 1.3383086919784546, "learning_rate": 3.8888888888888895e-07, "loss": 1.8144, "step": 14 }, { "epoch": 0.0011508509903712133, "grad_norm": 1.4317928552627563, "learning_rate": 4.1666666666666667e-07, "loss": 1.9769, "step": 15 }, { "epoch": 0.0012275743897292943, "grad_norm": 1.4458036422729492, "learning_rate": 4.444444444444445e-07, "loss": 2.0641, "step": 16 }, { "epoch": 0.0013042977890873751, "grad_norm": 1.6645574569702148, "learning_rate": 4.7222222222222226e-07, "loss": 2.0645, "step": 17 }, { "epoch": 0.0013810211884454561, "grad_norm": 1.6048424243927002, "learning_rate": 5.000000000000001e-07, "loss": 2.1092, "step": 18 }, { "epoch": 0.001457744587803537, "grad_norm": 1.4078236818313599, "learning_rate": 5.277777777777779e-07, "loss": 1.9657, "step": 19 }, { "epoch": 0.001534467987161618, "grad_norm": 1.590699553489685, "learning_rate": 5.555555555555555e-07, "loss": 2.0471, "step": 20 }, { "epoch": 0.0016111913865196987, "grad_norm": 1.451219081878662, "learning_rate": 5.833333333333334e-07, "loss": 2.0169, "step": 21 }, { "epoch": 0.0016879147858777797, "grad_norm": 1.5344305038452148, "learning_rate": 6.111111111111112e-07, "loss": 2.0445, "step": 22 }, { "epoch": 0.0017646381852358605, "grad_norm": 1.438352346420288, "learning_rate": 6.388888888888889e-07, "loss": 2.0149, "step": 23 }, { "epoch": 0.0018413615845939413, "grad_norm": 1.2835633754730225, "learning_rate": 6.666666666666667e-07, "loss": 1.8625, "step": 24 }, { "epoch": 0.0019180849839520223, "grad_norm": 1.4749506711959839, "learning_rate": 6.944444444444446e-07, "loss": 1.9002, "step": 25 }, { "epoch": 0.001994808383310103, "grad_norm": 1.5343165397644043, "learning_rate": 7.222222222222222e-07, "loss": 1.9717, "step": 26 }, { "epoch": 0.002071531782668184, "grad_norm": 1.2220141887664795, "learning_rate": 7.5e-07, "loss": 1.9051, "step": 27 }, { "epoch": 0.002148255182026265, "grad_norm": 1.3438694477081299, "learning_rate": 7.777777777777779e-07, "loss": 1.9939, "step": 28 }, { "epoch": 0.002224978581384346, "grad_norm": 1.065828561782837, "learning_rate": 8.055555555555557e-07, "loss": 1.7784, "step": 29 }, { "epoch": 0.0023017019807424267, "grad_norm": 1.0827152729034424, "learning_rate": 8.333333333333333e-07, "loss": 1.8801, "step": 30 }, { "epoch": 0.0023784253801005075, "grad_norm": 1.1655535697937012, "learning_rate": 8.611111111111112e-07, "loss": 1.8413, "step": 31 }, { "epoch": 0.0024551487794585887, "grad_norm": 1.2185875177383423, "learning_rate": 8.88888888888889e-07, "loss": 1.9999, "step": 32 }, { "epoch": 0.0025318721788166695, "grad_norm": 2.3703560829162598, "learning_rate": 9.166666666666666e-07, "loss": 1.9176, "step": 33 }, { "epoch": 0.0026085955781747503, "grad_norm": 1.171060562133789, "learning_rate": 9.444444444444445e-07, "loss": 1.8191, "step": 34 }, { "epoch": 0.002685318977532831, "grad_norm": 1.014253854751587, "learning_rate": 9.722222222222224e-07, "loss": 1.8281, "step": 35 }, { "epoch": 0.0027620423768909123, "grad_norm": 0.9868394136428833, "learning_rate": 1.0000000000000002e-06, "loss": 1.7483, "step": 36 }, { "epoch": 0.002838765776248993, "grad_norm": 0.8541631698608398, "learning_rate": 1.0277777777777777e-06, "loss": 1.7323, "step": 37 }, { "epoch": 0.002915489175607074, "grad_norm": 0.6318525671958923, "learning_rate": 1.0555555555555557e-06, "loss": 1.6639, "step": 38 }, { "epoch": 0.0029922125749651547, "grad_norm": 0.7088087201118469, "learning_rate": 1.0833333333333335e-06, "loss": 1.6489, "step": 39 }, { "epoch": 0.003068935974323236, "grad_norm": 0.729012668132782, "learning_rate": 1.111111111111111e-06, "loss": 1.6712, "step": 40 }, { "epoch": 0.0031456593736813167, "grad_norm": 0.6080560088157654, "learning_rate": 1.138888888888889e-06, "loss": 1.6651, "step": 41 }, { "epoch": 0.0032223827730393974, "grad_norm": 0.7005060911178589, "learning_rate": 1.1666666666666668e-06, "loss": 1.6931, "step": 42 }, { "epoch": 0.0032991061723974782, "grad_norm": 0.7512519955635071, "learning_rate": 1.1944444444444446e-06, "loss": 1.7396, "step": 43 }, { "epoch": 0.0033758295717555595, "grad_norm": 0.6921234726905823, "learning_rate": 1.2222222222222223e-06, "loss": 1.7036, "step": 44 }, { "epoch": 0.0034525529711136402, "grad_norm": 0.5955190658569336, "learning_rate": 1.25e-06, "loss": 1.744, "step": 45 }, { "epoch": 0.003529276370471721, "grad_norm": 0.6165654063224792, "learning_rate": 1.2777777777777779e-06, "loss": 1.7232, "step": 46 }, { "epoch": 0.003605999769829802, "grad_norm": 0.2851021885871887, "learning_rate": 1.3055555555555556e-06, "loss": 1.5291, "step": 47 }, { "epoch": 0.0036827231691878826, "grad_norm": 0.6635045409202576, "learning_rate": 1.3333333333333334e-06, "loss": 1.5549, "step": 48 }, { "epoch": 0.003759446568545964, "grad_norm": 0.5668996572494507, "learning_rate": 1.3611111111111112e-06, "loss": 1.6054, "step": 49 }, { "epoch": 0.0038361699679040446, "grad_norm": 0.5261828303337097, "learning_rate": 1.3888888888888892e-06, "loss": 1.6654, "step": 50 }, { "epoch": 0.003912893367262126, "grad_norm": 0.28205057978630066, "learning_rate": 1.4166666666666667e-06, "loss": 1.5187, "step": 51 }, { "epoch": 0.003989616766620206, "grad_norm": 0.47881031036376953, "learning_rate": 1.4444444444444445e-06, "loss": 1.5802, "step": 52 }, { "epoch": 0.004066340165978287, "grad_norm": 0.3563588857650757, "learning_rate": 1.4722222222222225e-06, "loss": 1.6397, "step": 53 }, { "epoch": 0.004143063565336368, "grad_norm": 0.33441710472106934, "learning_rate": 1.5e-06, "loss": 1.6233, "step": 54 }, { "epoch": 0.004219786964694449, "grad_norm": 0.29444172978401184, "learning_rate": 1.527777777777778e-06, "loss": 1.5814, "step": 55 }, { "epoch": 0.00429651036405253, "grad_norm": 0.3189222812652588, "learning_rate": 1.5555555555555558e-06, "loss": 1.5046, "step": 56 }, { "epoch": 0.004373233763410611, "grad_norm": 0.6189427971839905, "learning_rate": 1.5833333333333333e-06, "loss": 1.6128, "step": 57 }, { "epoch": 0.004449957162768692, "grad_norm": 0.6564701795578003, "learning_rate": 1.6111111111111113e-06, "loss": 1.653, "step": 58 }, { "epoch": 0.004526680562126773, "grad_norm": 0.36154410243034363, "learning_rate": 1.638888888888889e-06, "loss": 1.6543, "step": 59 }, { "epoch": 0.004603403961484853, "grad_norm": 0.2730221450328827, "learning_rate": 1.6666666666666667e-06, "loss": 1.5718, "step": 60 }, { "epoch": 0.004680127360842935, "grad_norm": 0.31105998158454895, "learning_rate": 1.6944444444444446e-06, "loss": 1.5049, "step": 61 }, { "epoch": 0.004756850760201015, "grad_norm": 0.47651663422584534, "learning_rate": 1.7222222222222224e-06, "loss": 1.4807, "step": 62 }, { "epoch": 0.004833574159559096, "grad_norm": 0.26097097992897034, "learning_rate": 1.75e-06, "loss": 1.5921, "step": 63 }, { "epoch": 0.004910297558917177, "grad_norm": 0.2834774851799011, "learning_rate": 1.777777777777778e-06, "loss": 1.4628, "step": 64 }, { "epoch": 0.004987020958275258, "grad_norm": 0.3245694041252136, "learning_rate": 1.8055555555555557e-06, "loss": 1.5664, "step": 65 }, { "epoch": 0.005063744357633339, "grad_norm": 0.3074248433113098, "learning_rate": 1.8333333333333333e-06, "loss": 1.5628, "step": 66 }, { "epoch": 0.00514046775699142, "grad_norm": 0.302223265171051, "learning_rate": 1.8611111111111113e-06, "loss": 1.4289, "step": 67 }, { "epoch": 0.0052171911563495005, "grad_norm": 0.42279836535453796, "learning_rate": 1.888888888888889e-06, "loss": 1.4996, "step": 68 }, { "epoch": 0.005293914555707582, "grad_norm": 0.3414996564388275, "learning_rate": 1.916666666666667e-06, "loss": 1.4741, "step": 69 }, { "epoch": 0.005370637955065662, "grad_norm": 0.23484711349010468, "learning_rate": 1.944444444444445e-06, "loss": 1.5153, "step": 70 }, { "epoch": 0.005447361354423743, "grad_norm": 0.25329485535621643, "learning_rate": 1.9722222222222224e-06, "loss": 1.5515, "step": 71 }, { "epoch": 0.005524084753781825, "grad_norm": 0.2807338535785675, "learning_rate": 2.0000000000000003e-06, "loss": 1.4569, "step": 72 }, { "epoch": 0.005600808153139905, "grad_norm": 0.26787811517715454, "learning_rate": 2.027777777777778e-06, "loss": 1.497, "step": 73 }, { "epoch": 0.005677531552497986, "grad_norm": 0.24634388089179993, "learning_rate": 2.0555555555555555e-06, "loss": 1.4878, "step": 74 }, { "epoch": 0.0057542549518560665, "grad_norm": 0.31139981746673584, "learning_rate": 2.0833333333333334e-06, "loss": 1.4782, "step": 75 }, { "epoch": 0.005830978351214148, "grad_norm": 0.24583308398723602, "learning_rate": 2.1111111111111114e-06, "loss": 1.5123, "step": 76 }, { "epoch": 0.005907701750572229, "grad_norm": 0.27182942628860474, "learning_rate": 2.138888888888889e-06, "loss": 1.4896, "step": 77 }, { "epoch": 0.005984425149930309, "grad_norm": 0.2385183721780777, "learning_rate": 2.166666666666667e-06, "loss": 1.4495, "step": 78 }, { "epoch": 0.0060611485492883905, "grad_norm": 0.3554174304008484, "learning_rate": 2.1944444444444445e-06, "loss": 1.5536, "step": 79 }, { "epoch": 0.006137871948646472, "grad_norm": 0.3415203094482422, "learning_rate": 2.222222222222222e-06, "loss": 1.4761, "step": 80 }, { "epoch": 0.006214595348004552, "grad_norm": 0.25041118264198303, "learning_rate": 2.25e-06, "loss": 1.3805, "step": 81 }, { "epoch": 0.006291318747362633, "grad_norm": 0.30435630679130554, "learning_rate": 2.277777777777778e-06, "loss": 1.5065, "step": 82 }, { "epoch": 0.006368042146720714, "grad_norm": 0.22301357984542847, "learning_rate": 2.305555555555556e-06, "loss": 1.3803, "step": 83 }, { "epoch": 0.006444765546078795, "grad_norm": 0.2405404895544052, "learning_rate": 2.3333333333333336e-06, "loss": 1.5865, "step": 84 }, { "epoch": 0.006521488945436876, "grad_norm": 0.28169798851013184, "learning_rate": 2.361111111111111e-06, "loss": 1.5068, "step": 85 }, { "epoch": 0.0065982123447949565, "grad_norm": 0.2549459636211395, "learning_rate": 2.388888888888889e-06, "loss": 1.3974, "step": 86 }, { "epoch": 0.006674935744153038, "grad_norm": 0.3967515826225281, "learning_rate": 2.4166666666666667e-06, "loss": 1.495, "step": 87 }, { "epoch": 0.006751659143511119, "grad_norm": 0.27188628911972046, "learning_rate": 2.4444444444444447e-06, "loss": 1.426, "step": 88 }, { "epoch": 0.006828382542869199, "grad_norm": 0.25832194089889526, "learning_rate": 2.4722222222222226e-06, "loss": 1.5673, "step": 89 }, { "epoch": 0.0069051059422272805, "grad_norm": 0.250408411026001, "learning_rate": 2.5e-06, "loss": 1.5039, "step": 90 }, { "epoch": 0.006981829341585361, "grad_norm": 0.21664480865001678, "learning_rate": 2.5277777777777778e-06, "loss": 1.5301, "step": 91 }, { "epoch": 0.007058552740943442, "grad_norm": 0.23095263540744781, "learning_rate": 2.5555555555555557e-06, "loss": 1.4474, "step": 92 }, { "epoch": 0.007135276140301523, "grad_norm": 0.31329649686813354, "learning_rate": 2.5833333333333337e-06, "loss": 1.474, "step": 93 }, { "epoch": 0.007211999539659604, "grad_norm": 0.22136734426021576, "learning_rate": 2.6111111111111113e-06, "loss": 1.4716, "step": 94 }, { "epoch": 0.007288722939017685, "grad_norm": 0.4410797953605652, "learning_rate": 2.6388888888888893e-06, "loss": 1.4412, "step": 95 }, { "epoch": 0.007365446338375765, "grad_norm": 0.24003879725933075, "learning_rate": 2.666666666666667e-06, "loss": 1.4457, "step": 96 }, { "epoch": 0.0074421697377338464, "grad_norm": 0.3368971645832062, "learning_rate": 2.6944444444444444e-06, "loss": 1.4194, "step": 97 }, { "epoch": 0.007518893137091928, "grad_norm": 0.2774094343185425, "learning_rate": 2.7222222222222224e-06, "loss": 1.493, "step": 98 }, { "epoch": 0.007595616536450008, "grad_norm": 0.254801481962204, "learning_rate": 2.7500000000000004e-06, "loss": 1.4524, "step": 99 }, { "epoch": 0.007672339935808089, "grad_norm": 0.3394177556037903, "learning_rate": 2.7777777777777783e-06, "loss": 1.4278, "step": 100 }, { "epoch": 0.0077490633351661705, "grad_norm": 0.30386850237846375, "learning_rate": 2.805555555555556e-06, "loss": 1.4871, "step": 101 }, { "epoch": 0.007825786734524252, "grad_norm": 0.37782520055770874, "learning_rate": 2.8333333333333335e-06, "loss": 1.3822, "step": 102 }, { "epoch": 0.007902510133882332, "grad_norm": 0.26984918117523193, "learning_rate": 2.861111111111111e-06, "loss": 1.4612, "step": 103 }, { "epoch": 0.007979233533240412, "grad_norm": 0.3169573247432709, "learning_rate": 2.888888888888889e-06, "loss": 1.4348, "step": 104 }, { "epoch": 0.008055956932598494, "grad_norm": 0.20867016911506653, "learning_rate": 2.916666666666667e-06, "loss": 1.3401, "step": 105 }, { "epoch": 0.008132680331956575, "grad_norm": 0.24315032362937927, "learning_rate": 2.944444444444445e-06, "loss": 1.3981, "step": 106 }, { "epoch": 0.008209403731314655, "grad_norm": 0.2825799286365509, "learning_rate": 2.9722222222222225e-06, "loss": 1.4631, "step": 107 }, { "epoch": 0.008286127130672736, "grad_norm": 0.18994228541851044, "learning_rate": 3e-06, "loss": 1.5134, "step": 108 }, { "epoch": 0.008362850530030818, "grad_norm": 0.22739864885807037, "learning_rate": 3.0277777777777776e-06, "loss": 1.4215, "step": 109 }, { "epoch": 0.008439573929388898, "grad_norm": 0.2855815291404724, "learning_rate": 3.055555555555556e-06, "loss": 1.4403, "step": 110 }, { "epoch": 0.008516297328746978, "grad_norm": 0.9607840180397034, "learning_rate": 3.0833333333333336e-06, "loss": 1.4915, "step": 111 }, { "epoch": 0.00859302072810506, "grad_norm": 0.256582111120224, "learning_rate": 3.1111111111111116e-06, "loss": 1.5033, "step": 112 }, { "epoch": 0.00866974412746314, "grad_norm": 0.24900999665260315, "learning_rate": 3.138888888888889e-06, "loss": 1.2716, "step": 113 }, { "epoch": 0.008746467526821221, "grad_norm": 0.20090310275554657, "learning_rate": 3.1666666666666667e-06, "loss": 1.2997, "step": 114 }, { "epoch": 0.008823190926179303, "grad_norm": 0.2070528268814087, "learning_rate": 3.1944444444444443e-06, "loss": 1.4196, "step": 115 }, { "epoch": 0.008899914325537384, "grad_norm": 0.22434066236019135, "learning_rate": 3.2222222222222227e-06, "loss": 1.4708, "step": 116 }, { "epoch": 0.008976637724895464, "grad_norm": 0.20813767611980438, "learning_rate": 3.2500000000000002e-06, "loss": 1.3697, "step": 117 }, { "epoch": 0.009053361124253546, "grad_norm": 0.195035919547081, "learning_rate": 3.277777777777778e-06, "loss": 1.4602, "step": 118 }, { "epoch": 0.009130084523611626, "grad_norm": 0.20193152129650116, "learning_rate": 3.3055555555555558e-06, "loss": 1.3551, "step": 119 }, { "epoch": 0.009206807922969707, "grad_norm": 0.2239111065864563, "learning_rate": 3.3333333333333333e-06, "loss": 1.4341, "step": 120 }, { "epoch": 0.009283531322327787, "grad_norm": 0.24879895150661469, "learning_rate": 3.3611111111111117e-06, "loss": 1.4932, "step": 121 }, { "epoch": 0.00936025472168587, "grad_norm": 0.27931633591651917, "learning_rate": 3.3888888888888893e-06, "loss": 1.3843, "step": 122 }, { "epoch": 0.00943697812104395, "grad_norm": 0.24310681223869324, "learning_rate": 3.416666666666667e-06, "loss": 1.3926, "step": 123 }, { "epoch": 0.00951370152040203, "grad_norm": 0.26717981696128845, "learning_rate": 3.444444444444445e-06, "loss": 1.4611, "step": 124 }, { "epoch": 0.009590424919760112, "grad_norm": 0.4878996014595032, "learning_rate": 3.4722222222222224e-06, "loss": 1.4317, "step": 125 }, { "epoch": 0.009667148319118192, "grad_norm": 0.2071317434310913, "learning_rate": 3.5e-06, "loss": 1.3612, "step": 126 }, { "epoch": 0.009743871718476273, "grad_norm": 0.2265220582485199, "learning_rate": 3.5277777777777784e-06, "loss": 1.3564, "step": 127 }, { "epoch": 0.009820595117834355, "grad_norm": 0.3926137387752533, "learning_rate": 3.555555555555556e-06, "loss": 1.3432, "step": 128 }, { "epoch": 0.009897318517192435, "grad_norm": 0.33210667967796326, "learning_rate": 3.5833333333333335e-06, "loss": 1.4468, "step": 129 }, { "epoch": 0.009974041916550516, "grad_norm": 0.20587556064128876, "learning_rate": 3.6111111111111115e-06, "loss": 1.4039, "step": 130 }, { "epoch": 0.010050765315908598, "grad_norm": 0.21681547164916992, "learning_rate": 3.638888888888889e-06, "loss": 1.4499, "step": 131 }, { "epoch": 0.010127488715266678, "grad_norm": 0.2419356256723404, "learning_rate": 3.6666666666666666e-06, "loss": 1.3849, "step": 132 }, { "epoch": 0.010204212114624758, "grad_norm": 0.2453588992357254, "learning_rate": 3.694444444444445e-06, "loss": 1.3786, "step": 133 }, { "epoch": 0.01028093551398284, "grad_norm": 0.2242642641067505, "learning_rate": 3.7222222222222225e-06, "loss": 1.4102, "step": 134 }, { "epoch": 0.01035765891334092, "grad_norm": 0.2468804121017456, "learning_rate": 3.7500000000000005e-06, "loss": 1.3677, "step": 135 }, { "epoch": 0.010434382312699001, "grad_norm": 0.24158863723278046, "learning_rate": 3.777777777777778e-06, "loss": 1.3637, "step": 136 }, { "epoch": 0.010511105712057081, "grad_norm": 0.23158524930477142, "learning_rate": 3.8055555555555556e-06, "loss": 1.3275, "step": 137 }, { "epoch": 0.010587829111415164, "grad_norm": 0.2196633666753769, "learning_rate": 3.833333333333334e-06, "loss": 1.4208, "step": 138 }, { "epoch": 0.010664552510773244, "grad_norm": 0.22514431178569794, "learning_rate": 3.861111111111112e-06, "loss": 1.4186, "step": 139 }, { "epoch": 0.010741275910131324, "grad_norm": 0.24443094432353973, "learning_rate": 3.88888888888889e-06, "loss": 1.4091, "step": 140 }, { "epoch": 0.010817999309489406, "grad_norm": 0.23507577180862427, "learning_rate": 3.916666666666667e-06, "loss": 1.3729, "step": 141 }, { "epoch": 0.010894722708847487, "grad_norm": 0.2978883683681488, "learning_rate": 3.944444444444445e-06, "loss": 1.3818, "step": 142 }, { "epoch": 0.010971446108205567, "grad_norm": 0.2465181201696396, "learning_rate": 3.972222222222223e-06, "loss": 1.3499, "step": 143 }, { "epoch": 0.01104816950756365, "grad_norm": 0.2557956576347351, "learning_rate": 4.000000000000001e-06, "loss": 1.4141, "step": 144 }, { "epoch": 0.01112489290692173, "grad_norm": 0.2945006489753723, "learning_rate": 4.027777777777779e-06, "loss": 1.3852, "step": 145 }, { "epoch": 0.01120161630627981, "grad_norm": 0.35389965772628784, "learning_rate": 4.055555555555556e-06, "loss": 1.3379, "step": 146 }, { "epoch": 0.011278339705637892, "grad_norm": 0.21652956306934357, "learning_rate": 4.083333333333334e-06, "loss": 1.3345, "step": 147 }, { "epoch": 0.011355063104995972, "grad_norm": 0.34916144609451294, "learning_rate": 4.111111111111111e-06, "loss": 1.4633, "step": 148 }, { "epoch": 0.011431786504354053, "grad_norm": 0.22217579185962677, "learning_rate": 4.138888888888889e-06, "loss": 1.3583, "step": 149 }, { "epoch": 0.011508509903712133, "grad_norm": 0.29187625646591187, "learning_rate": 4.166666666666667e-06, "loss": 1.3181, "step": 150 }, { "epoch": 0.011585233303070215, "grad_norm": 0.20457340776920319, "learning_rate": 4.194444444444445e-06, "loss": 1.424, "step": 151 }, { "epoch": 0.011661956702428295, "grad_norm": 0.2303774207830429, "learning_rate": 4.222222222222223e-06, "loss": 1.3658, "step": 152 }, { "epoch": 0.011738680101786376, "grad_norm": 0.2813664972782135, "learning_rate": 4.25e-06, "loss": 1.3046, "step": 153 }, { "epoch": 0.011815403501144458, "grad_norm": 0.22461804747581482, "learning_rate": 4.277777777777778e-06, "loss": 1.396, "step": 154 }, { "epoch": 0.011892126900502538, "grad_norm": 0.31367745995521545, "learning_rate": 4.305555555555556e-06, "loss": 1.4089, "step": 155 }, { "epoch": 0.011968850299860619, "grad_norm": 0.3986515998840332, "learning_rate": 4.333333333333334e-06, "loss": 1.4207, "step": 156 }, { "epoch": 0.0120455736992187, "grad_norm": 0.4938881993293762, "learning_rate": 4.361111111111112e-06, "loss": 1.2853, "step": 157 }, { "epoch": 0.012122297098576781, "grad_norm": 0.2377733588218689, "learning_rate": 4.388888888888889e-06, "loss": 1.3298, "step": 158 }, { "epoch": 0.012199020497934861, "grad_norm": 0.29111260175704956, "learning_rate": 4.416666666666667e-06, "loss": 1.43, "step": 159 }, { "epoch": 0.012275743897292943, "grad_norm": 0.26671504974365234, "learning_rate": 4.444444444444444e-06, "loss": 1.3315, "step": 160 }, { "epoch": 0.012352467296651024, "grad_norm": 0.1913481503725052, "learning_rate": 4.472222222222223e-06, "loss": 1.3872, "step": 161 }, { "epoch": 0.012429190696009104, "grad_norm": 0.19374895095825195, "learning_rate": 4.5e-06, "loss": 1.4337, "step": 162 }, { "epoch": 0.012505914095367185, "grad_norm": 0.19327592849731445, "learning_rate": 4.527777777777778e-06, "loss": 1.3214, "step": 163 }, { "epoch": 0.012582637494725267, "grad_norm": 0.20345023274421692, "learning_rate": 4.555555555555556e-06, "loss": 1.3478, "step": 164 }, { "epoch": 0.012659360894083347, "grad_norm": 0.2033499926328659, "learning_rate": 4.583333333333333e-06, "loss": 1.3227, "step": 165 }, { "epoch": 0.012736084293441427, "grad_norm": 0.26677823066711426, "learning_rate": 4.611111111111112e-06, "loss": 1.3375, "step": 166 }, { "epoch": 0.01281280769279951, "grad_norm": 0.2533177137374878, "learning_rate": 4.638888888888889e-06, "loss": 1.3401, "step": 167 }, { "epoch": 0.01288953109215759, "grad_norm": 0.20695236325263977, "learning_rate": 4.666666666666667e-06, "loss": 1.3966, "step": 168 }, { "epoch": 0.01296625449151567, "grad_norm": 0.21036547422409058, "learning_rate": 4.694444444444445e-06, "loss": 1.3753, "step": 169 }, { "epoch": 0.013042977890873752, "grad_norm": 0.21267098188400269, "learning_rate": 4.722222222222222e-06, "loss": 1.4447, "step": 170 }, { "epoch": 0.013119701290231833, "grad_norm": 0.3361635208129883, "learning_rate": 4.75e-06, "loss": 1.357, "step": 171 }, { "epoch": 0.013196424689589913, "grad_norm": 0.3548314869403839, "learning_rate": 4.777777777777778e-06, "loss": 1.3918, "step": 172 }, { "epoch": 0.013273148088947995, "grad_norm": 0.27654603123664856, "learning_rate": 4.805555555555556e-06, "loss": 1.3441, "step": 173 }, { "epoch": 0.013349871488306075, "grad_norm": 0.23871082067489624, "learning_rate": 4.833333333333333e-06, "loss": 1.4145, "step": 174 }, { "epoch": 0.013426594887664156, "grad_norm": 0.23770327866077423, "learning_rate": 4.861111111111111e-06, "loss": 1.4462, "step": 175 }, { "epoch": 0.013503318287022238, "grad_norm": 0.28003600239753723, "learning_rate": 4.888888888888889e-06, "loss": 1.3851, "step": 176 }, { "epoch": 0.013580041686380318, "grad_norm": 0.28843802213668823, "learning_rate": 4.9166666666666665e-06, "loss": 1.3984, "step": 177 }, { "epoch": 0.013656765085738399, "grad_norm": 0.3120441436767578, "learning_rate": 4.944444444444445e-06, "loss": 1.3223, "step": 178 }, { "epoch": 0.013733488485096479, "grad_norm": 0.26455622911453247, "learning_rate": 4.9722222222222224e-06, "loss": 1.3487, "step": 179 }, { "epoch": 0.013810211884454561, "grad_norm": 0.28754720091819763, "learning_rate": 5e-06, "loss": 1.3621, "step": 180 }, { "epoch": 0.013886935283812641, "grad_norm": 0.19645000994205475, "learning_rate": 5.027777777777778e-06, "loss": 1.3512, "step": 181 }, { "epoch": 0.013963658683170722, "grad_norm": 0.27140361070632935, "learning_rate": 5.0555555555555555e-06, "loss": 1.3958, "step": 182 }, { "epoch": 0.014040382082528804, "grad_norm": 0.18881404399871826, "learning_rate": 5.0833333333333335e-06, "loss": 1.4206, "step": 183 }, { "epoch": 0.014117105481886884, "grad_norm": 0.20885105431079865, "learning_rate": 5.1111111111111115e-06, "loss": 1.3632, "step": 184 }, { "epoch": 0.014193828881244965, "grad_norm": 0.4787726104259491, "learning_rate": 5.138888888888889e-06, "loss": 1.3772, "step": 185 }, { "epoch": 0.014270552280603047, "grad_norm": 0.45614442229270935, "learning_rate": 5.1666666666666675e-06, "loss": 1.3442, "step": 186 }, { "epoch": 0.014347275679961127, "grad_norm": 0.6313778162002563, "learning_rate": 5.1944444444444454e-06, "loss": 1.4008, "step": 187 }, { "epoch": 0.014423999079319207, "grad_norm": 0.22854045033454895, "learning_rate": 5.2222222222222226e-06, "loss": 1.4419, "step": 188 }, { "epoch": 0.01450072247867729, "grad_norm": 0.266183465719223, "learning_rate": 5.2500000000000006e-06, "loss": 1.3744, "step": 189 }, { "epoch": 0.01457744587803537, "grad_norm": 0.46180471777915955, "learning_rate": 5.2777777777777785e-06, "loss": 1.4406, "step": 190 }, { "epoch": 0.01465416927739345, "grad_norm": 0.29175621271133423, "learning_rate": 5.305555555555556e-06, "loss": 1.3688, "step": 191 }, { "epoch": 0.01473089267675153, "grad_norm": 0.23730742931365967, "learning_rate": 5.333333333333334e-06, "loss": 1.3041, "step": 192 }, { "epoch": 0.014807616076109613, "grad_norm": 0.3502316474914551, "learning_rate": 5.361111111111112e-06, "loss": 1.3426, "step": 193 }, { "epoch": 0.014884339475467693, "grad_norm": 0.31522420048713684, "learning_rate": 5.388888888888889e-06, "loss": 1.3447, "step": 194 }, { "epoch": 0.014961062874825773, "grad_norm": 0.24487262964248657, "learning_rate": 5.416666666666667e-06, "loss": 1.4398, "step": 195 }, { "epoch": 0.015037786274183855, "grad_norm": 0.24528391659259796, "learning_rate": 5.444444444444445e-06, "loss": 1.3744, "step": 196 }, { "epoch": 0.015114509673541936, "grad_norm": 0.26381585001945496, "learning_rate": 5.4722222222222236e-06, "loss": 1.3189, "step": 197 }, { "epoch": 0.015191233072900016, "grad_norm": 0.204240083694458, "learning_rate": 5.500000000000001e-06, "loss": 1.2918, "step": 198 }, { "epoch": 0.015267956472258098, "grad_norm": 0.20685020089149475, "learning_rate": 5.527777777777779e-06, "loss": 1.3421, "step": 199 }, { "epoch": 0.015344679871616178, "grad_norm": 0.21413590013980865, "learning_rate": 5.555555555555557e-06, "loss": 1.4063, "step": 200 }, { "epoch": 0.015421403270974259, "grad_norm": 0.21831047534942627, "learning_rate": 5.583333333333334e-06, "loss": 1.314, "step": 201 }, { "epoch": 0.015498126670332341, "grad_norm": 0.23345640301704407, "learning_rate": 5.611111111111112e-06, "loss": 1.4544, "step": 202 }, { "epoch": 0.015574850069690421, "grad_norm": 0.22305883467197418, "learning_rate": 5.638888888888889e-06, "loss": 1.3377, "step": 203 }, { "epoch": 0.015651573469048503, "grad_norm": 0.18714657425880432, "learning_rate": 5.666666666666667e-06, "loss": 1.2726, "step": 204 }, { "epoch": 0.015728296868406582, "grad_norm": 1.3198148012161255, "learning_rate": 5.694444444444445e-06, "loss": 1.2978, "step": 205 }, { "epoch": 0.015805020267764664, "grad_norm": 1.0213266611099243, "learning_rate": 5.722222222222222e-06, "loss": 1.3569, "step": 206 }, { "epoch": 0.015881743667122746, "grad_norm": 0.27178916335105896, "learning_rate": 5.75e-06, "loss": 1.3715, "step": 207 }, { "epoch": 0.015958467066480825, "grad_norm": 0.207829087972641, "learning_rate": 5.777777777777778e-06, "loss": 1.3932, "step": 208 }, { "epoch": 0.016035190465838907, "grad_norm": 0.24165739119052887, "learning_rate": 5.805555555555557e-06, "loss": 1.3945, "step": 209 }, { "epoch": 0.01611191386519699, "grad_norm": 0.3072274923324585, "learning_rate": 5.833333333333334e-06, "loss": 1.3698, "step": 210 }, { "epoch": 0.016188637264555068, "grad_norm": 0.25251662731170654, "learning_rate": 5.861111111111112e-06, "loss": 1.3186, "step": 211 }, { "epoch": 0.01626536066391315, "grad_norm": 0.23354296386241913, "learning_rate": 5.88888888888889e-06, "loss": 1.3299, "step": 212 }, { "epoch": 0.01634208406327123, "grad_norm": 0.26305750012397766, "learning_rate": 5.916666666666667e-06, "loss": 1.3041, "step": 213 }, { "epoch": 0.01641880746262931, "grad_norm": 0.27292490005493164, "learning_rate": 5.944444444444445e-06, "loss": 1.2966, "step": 214 }, { "epoch": 0.016495530861987392, "grad_norm": 0.24226795136928558, "learning_rate": 5.972222222222222e-06, "loss": 1.3719, "step": 215 }, { "epoch": 0.01657225426134547, "grad_norm": 0.3032761514186859, "learning_rate": 6e-06, "loss": 1.3929, "step": 216 }, { "epoch": 0.016648977660703553, "grad_norm": 0.23508524894714355, "learning_rate": 6.027777777777778e-06, "loss": 1.365, "step": 217 }, { "epoch": 0.016725701060061635, "grad_norm": 0.19655096530914307, "learning_rate": 6.055555555555555e-06, "loss": 1.438, "step": 218 }, { "epoch": 0.016802424459419714, "grad_norm": 0.20995531976222992, "learning_rate": 6.083333333333333e-06, "loss": 1.3216, "step": 219 }, { "epoch": 0.016879147858777796, "grad_norm": 0.23676876723766327, "learning_rate": 6.111111111111112e-06, "loss": 1.3552, "step": 220 }, { "epoch": 0.016955871258135878, "grad_norm": 0.27489981055259705, "learning_rate": 6.13888888888889e-06, "loss": 1.3368, "step": 221 }, { "epoch": 0.017032594657493957, "grad_norm": 0.24763287603855133, "learning_rate": 6.166666666666667e-06, "loss": 1.3236, "step": 222 }, { "epoch": 0.01710931805685204, "grad_norm": 0.1859695464372635, "learning_rate": 6.194444444444445e-06, "loss": 1.3984, "step": 223 }, { "epoch": 0.01718604145621012, "grad_norm": 0.19630742073059082, "learning_rate": 6.222222222222223e-06, "loss": 1.346, "step": 224 }, { "epoch": 0.0172627648555682, "grad_norm": 0.2590942680835724, "learning_rate": 6.25e-06, "loss": 1.2887, "step": 225 }, { "epoch": 0.01733948825492628, "grad_norm": 0.22341588139533997, "learning_rate": 6.277777777777778e-06, "loss": 1.3433, "step": 226 }, { "epoch": 0.017416211654284364, "grad_norm": 0.3323090076446533, "learning_rate": 6.305555555555556e-06, "loss": 1.3441, "step": 227 }, { "epoch": 0.017492935053642442, "grad_norm": 0.5306505560874939, "learning_rate": 6.333333333333333e-06, "loss": 1.2716, "step": 228 }, { "epoch": 0.017569658453000524, "grad_norm": 0.45183423161506653, "learning_rate": 6.361111111111111e-06, "loss": 1.3572, "step": 229 }, { "epoch": 0.017646381852358606, "grad_norm": 0.28237447142601013, "learning_rate": 6.3888888888888885e-06, "loss": 1.3334, "step": 230 }, { "epoch": 0.017723105251716685, "grad_norm": 0.19198645651340485, "learning_rate": 6.416666666666667e-06, "loss": 1.3011, "step": 231 }, { "epoch": 0.017799828651074767, "grad_norm": 0.23006920516490936, "learning_rate": 6.444444444444445e-06, "loss": 1.3828, "step": 232 }, { "epoch": 0.01787655205043285, "grad_norm": 0.24800439178943634, "learning_rate": 6.472222222222223e-06, "loss": 1.3598, "step": 233 }, { "epoch": 0.017953275449790928, "grad_norm": 0.37140586972236633, "learning_rate": 6.5000000000000004e-06, "loss": 1.3097, "step": 234 }, { "epoch": 0.01802999884914901, "grad_norm": 0.40723714232444763, "learning_rate": 6.5277777777777784e-06, "loss": 1.3901, "step": 235 }, { "epoch": 0.018106722248507092, "grad_norm": 0.15035581588745117, "learning_rate": 6.555555555555556e-06, "loss": 1.3969, "step": 236 }, { "epoch": 0.01818344564786517, "grad_norm": 0.2044820487499237, "learning_rate": 6.5833333333333335e-06, "loss": 1.3962, "step": 237 }, { "epoch": 0.018260169047223253, "grad_norm": 0.22510306537151337, "learning_rate": 6.6111111111111115e-06, "loss": 1.3337, "step": 238 }, { "epoch": 0.018336892446581335, "grad_norm": 0.14209140837192535, "learning_rate": 6.6388888888888895e-06, "loss": 1.4459, "step": 239 }, { "epoch": 0.018413615845939414, "grad_norm": 0.2282388061285019, "learning_rate": 6.666666666666667e-06, "loss": 1.3118, "step": 240 }, { "epoch": 0.018490339245297496, "grad_norm": 0.6250060200691223, "learning_rate": 6.694444444444445e-06, "loss": 1.3482, "step": 241 }, { "epoch": 0.018567062644655574, "grad_norm": 0.28111544251441956, "learning_rate": 6.7222222222222235e-06, "loss": 1.3485, "step": 242 }, { "epoch": 0.018643786044013656, "grad_norm": 0.32857516407966614, "learning_rate": 6.750000000000001e-06, "loss": 1.3015, "step": 243 }, { "epoch": 0.01872050944337174, "grad_norm": 0.23387357592582703, "learning_rate": 6.777777777777779e-06, "loss": 1.3923, "step": 244 }, { "epoch": 0.018797232842729817, "grad_norm": 0.317812442779541, "learning_rate": 6.8055555555555566e-06, "loss": 1.3975, "step": 245 }, { "epoch": 0.0188739562420879, "grad_norm": 0.7639262080192566, "learning_rate": 6.833333333333334e-06, "loss": 1.322, "step": 246 }, { "epoch": 0.01895067964144598, "grad_norm": 0.2424580603837967, "learning_rate": 6.861111111111112e-06, "loss": 1.3238, "step": 247 }, { "epoch": 0.01902740304080406, "grad_norm": 0.1337641179561615, "learning_rate": 6.88888888888889e-06, "loss": 1.3636, "step": 248 }, { "epoch": 0.019104126440162142, "grad_norm": 0.4444699287414551, "learning_rate": 6.916666666666667e-06, "loss": 1.3327, "step": 249 }, { "epoch": 0.019180849839520224, "grad_norm": 0.20817242562770844, "learning_rate": 6.944444444444445e-06, "loss": 1.2909, "step": 250 }, { "epoch": 0.019257573238878303, "grad_norm": 0.20990446209907532, "learning_rate": 6.972222222222223e-06, "loss": 1.3074, "step": 251 }, { "epoch": 0.019334296638236385, "grad_norm": 0.203199103474617, "learning_rate": 7e-06, "loss": 1.3015, "step": 252 }, { "epoch": 0.019411020037594467, "grad_norm": 0.28530558943748474, "learning_rate": 7.027777777777778e-06, "loss": 1.2653, "step": 253 }, { "epoch": 0.019487743436952545, "grad_norm": 0.22985193133354187, "learning_rate": 7.055555555555557e-06, "loss": 1.3002, "step": 254 }, { "epoch": 0.019564466836310627, "grad_norm": 0.1223839670419693, "learning_rate": 7.083333333333335e-06, "loss": 1.3382, "step": 255 }, { "epoch": 0.01964119023566871, "grad_norm": 0.21660493314266205, "learning_rate": 7.111111111111112e-06, "loss": 1.4378, "step": 256 }, { "epoch": 0.019717913635026788, "grad_norm": 0.20559892058372498, "learning_rate": 7.13888888888889e-06, "loss": 1.365, "step": 257 }, { "epoch": 0.01979463703438487, "grad_norm": 0.2327881008386612, "learning_rate": 7.166666666666667e-06, "loss": 1.279, "step": 258 }, { "epoch": 0.019871360433742952, "grad_norm": 0.2036377340555191, "learning_rate": 7.194444444444445e-06, "loss": 1.3754, "step": 259 }, { "epoch": 0.01994808383310103, "grad_norm": 0.3526148498058319, "learning_rate": 7.222222222222223e-06, "loss": 1.3315, "step": 260 }, { "epoch": 0.020024807232459113, "grad_norm": 0.2549625337123871, "learning_rate": 7.25e-06, "loss": 1.3729, "step": 261 }, { "epoch": 0.020101530631817195, "grad_norm": 0.2245316505432129, "learning_rate": 7.277777777777778e-06, "loss": 1.3637, "step": 262 }, { "epoch": 0.020178254031175274, "grad_norm": 0.2640211284160614, "learning_rate": 7.305555555555556e-06, "loss": 1.388, "step": 263 }, { "epoch": 0.020254977430533356, "grad_norm": 0.23793992400169373, "learning_rate": 7.333333333333333e-06, "loss": 1.3186, "step": 264 }, { "epoch": 0.020331700829891438, "grad_norm": 0.22204631567001343, "learning_rate": 7.361111111111112e-06, "loss": 1.3117, "step": 265 }, { "epoch": 0.020408424229249517, "grad_norm": 0.38714224100112915, "learning_rate": 7.38888888888889e-06, "loss": 1.3495, "step": 266 }, { "epoch": 0.0204851476286076, "grad_norm": 0.26643088459968567, "learning_rate": 7.416666666666668e-06, "loss": 1.3319, "step": 267 }, { "epoch": 0.02056187102796568, "grad_norm": 0.21205386519432068, "learning_rate": 7.444444444444445e-06, "loss": 1.2503, "step": 268 }, { "epoch": 0.02063859442732376, "grad_norm": 0.38216808438301086, "learning_rate": 7.472222222222223e-06, "loss": 1.2962, "step": 269 }, { "epoch": 0.02071531782668184, "grad_norm": 0.2440405637025833, "learning_rate": 7.500000000000001e-06, "loss": 1.3401, "step": 270 }, { "epoch": 0.02079204122603992, "grad_norm": 0.2033012956380844, "learning_rate": 7.527777777777778e-06, "loss": 1.3129, "step": 271 }, { "epoch": 0.020868764625398002, "grad_norm": 0.2614838182926178, "learning_rate": 7.555555555555556e-06, "loss": 1.2271, "step": 272 }, { "epoch": 0.020945488024756084, "grad_norm": 0.20672713220119476, "learning_rate": 7.583333333333333e-06, "loss": 1.3567, "step": 273 }, { "epoch": 0.021022211424114163, "grad_norm": 0.184756800532341, "learning_rate": 7.611111111111111e-06, "loss": 1.3705, "step": 274 }, { "epoch": 0.021098934823472245, "grad_norm": 0.18846789002418518, "learning_rate": 7.638888888888888e-06, "loss": 1.2516, "step": 275 }, { "epoch": 0.021175658222830327, "grad_norm": 0.2402200549840927, "learning_rate": 7.666666666666667e-06, "loss": 1.3418, "step": 276 }, { "epoch": 0.021252381622188406, "grad_norm": 0.32494693994522095, "learning_rate": 7.694444444444446e-06, "loss": 1.238, "step": 277 }, { "epoch": 0.021329105021546488, "grad_norm": 0.2063516527414322, "learning_rate": 7.722222222222223e-06, "loss": 1.2813, "step": 278 }, { "epoch": 0.02140582842090457, "grad_norm": 0.21830257773399353, "learning_rate": 7.75e-06, "loss": 1.4131, "step": 279 }, { "epoch": 0.02148255182026265, "grad_norm": 0.20986048877239227, "learning_rate": 7.77777777777778e-06, "loss": 1.2652, "step": 280 }, { "epoch": 0.02155927521962073, "grad_norm": 0.5255032777786255, "learning_rate": 7.805555555555556e-06, "loss": 1.2868, "step": 281 }, { "epoch": 0.021635998618978813, "grad_norm": 0.19530753791332245, "learning_rate": 7.833333333333333e-06, "loss": 1.2981, "step": 282 }, { "epoch": 0.02171272201833689, "grad_norm": 0.26318228244781494, "learning_rate": 7.861111111111112e-06, "loss": 1.2931, "step": 283 }, { "epoch": 0.021789445417694973, "grad_norm": 0.3491661250591278, "learning_rate": 7.88888888888889e-06, "loss": 1.2722, "step": 284 }, { "epoch": 0.021866168817053055, "grad_norm": 0.2236832231283188, "learning_rate": 7.916666666666667e-06, "loss": 1.2673, "step": 285 }, { "epoch": 0.021942892216411134, "grad_norm": 0.32265084981918335, "learning_rate": 7.944444444444445e-06, "loss": 1.1972, "step": 286 }, { "epoch": 0.022019615615769216, "grad_norm": 0.315105676651001, "learning_rate": 7.972222222222224e-06, "loss": 1.2713, "step": 287 }, { "epoch": 0.0220963390151273, "grad_norm": 0.2091902792453766, "learning_rate": 8.000000000000001e-06, "loss": 1.272, "step": 288 }, { "epoch": 0.022173062414485377, "grad_norm": 0.20905162394046783, "learning_rate": 8.027777777777778e-06, "loss": 1.2868, "step": 289 }, { "epoch": 0.02224978581384346, "grad_norm": 0.21716968715190887, "learning_rate": 8.055555555555557e-06, "loss": 1.4037, "step": 290 }, { "epoch": 0.02232650921320154, "grad_norm": 0.35912537574768066, "learning_rate": 8.083333333333334e-06, "loss": 1.3784, "step": 291 }, { "epoch": 0.02240323261255962, "grad_norm": 0.1983366310596466, "learning_rate": 8.111111111111112e-06, "loss": 1.2662, "step": 292 }, { "epoch": 0.022479956011917702, "grad_norm": 0.5038043260574341, "learning_rate": 8.138888888888889e-06, "loss": 1.2399, "step": 293 }, { "epoch": 0.022556679411275784, "grad_norm": 0.21409650146961212, "learning_rate": 8.166666666666668e-06, "loss": 1.2968, "step": 294 }, { "epoch": 0.022633402810633863, "grad_norm": 0.23050349950790405, "learning_rate": 8.194444444444445e-06, "loss": 1.2638, "step": 295 }, { "epoch": 0.022710126209991945, "grad_norm": 0.22917930781841278, "learning_rate": 8.222222222222222e-06, "loss": 1.3074, "step": 296 }, { "epoch": 0.022786849609350023, "grad_norm": 0.21382929384708405, "learning_rate": 8.25e-06, "loss": 1.3038, "step": 297 }, { "epoch": 0.022863573008708105, "grad_norm": 0.2923835813999176, "learning_rate": 8.277777777777778e-06, "loss": 1.2989, "step": 298 }, { "epoch": 0.022940296408066187, "grad_norm": 0.19050240516662598, "learning_rate": 8.305555555555557e-06, "loss": 1.3198, "step": 299 }, { "epoch": 0.023017019807424266, "grad_norm": 0.21598075330257416, "learning_rate": 8.333333333333334e-06, "loss": 1.308, "step": 300 }, { "epoch": 0.023093743206782348, "grad_norm": 0.23614083230495453, "learning_rate": 8.361111111111113e-06, "loss": 1.306, "step": 301 }, { "epoch": 0.02317046660614043, "grad_norm": 0.5501736402511597, "learning_rate": 8.38888888888889e-06, "loss": 1.4418, "step": 302 }, { "epoch": 0.02324719000549851, "grad_norm": 0.2041560858488083, "learning_rate": 8.416666666666667e-06, "loss": 1.3405, "step": 303 }, { "epoch": 0.02332391340485659, "grad_norm": 0.4183835983276367, "learning_rate": 8.444444444444446e-06, "loss": 1.4142, "step": 304 }, { "epoch": 0.023400636804214673, "grad_norm": 0.23726940155029297, "learning_rate": 8.472222222222223e-06, "loss": 1.2591, "step": 305 }, { "epoch": 0.02347736020357275, "grad_norm": 0.249480202794075, "learning_rate": 8.5e-06, "loss": 1.2778, "step": 306 }, { "epoch": 0.023554083602930834, "grad_norm": 0.22034887969493866, "learning_rate": 8.527777777777779e-06, "loss": 1.2137, "step": 307 }, { "epoch": 0.023630807002288916, "grad_norm": 0.207785502076149, "learning_rate": 8.555555555555556e-06, "loss": 1.2729, "step": 308 }, { "epoch": 0.023707530401646994, "grad_norm": 0.2113504409790039, "learning_rate": 8.583333333333333e-06, "loss": 1.2671, "step": 309 }, { "epoch": 0.023784253801005076, "grad_norm": 0.19178412854671478, "learning_rate": 8.611111111111112e-06, "loss": 1.3612, "step": 310 }, { "epoch": 0.02386097720036316, "grad_norm": 0.26081740856170654, "learning_rate": 8.63888888888889e-06, "loss": 1.2338, "step": 311 }, { "epoch": 0.023937700599721237, "grad_norm": 0.44088754057884216, "learning_rate": 8.666666666666668e-06, "loss": 1.2589, "step": 312 }, { "epoch": 0.02401442399907932, "grad_norm": 0.21403051912784576, "learning_rate": 8.694444444444445e-06, "loss": 1.3018, "step": 313 }, { "epoch": 0.0240911473984374, "grad_norm": 0.196856290102005, "learning_rate": 8.722222222222224e-06, "loss": 1.3263, "step": 314 }, { "epoch": 0.02416787079779548, "grad_norm": 0.6188136339187622, "learning_rate": 8.750000000000001e-06, "loss": 1.2462, "step": 315 }, { "epoch": 0.024244594197153562, "grad_norm": 0.33793559670448303, "learning_rate": 8.777777777777778e-06, "loss": 1.2318, "step": 316 }, { "epoch": 0.024321317596511644, "grad_norm": 0.235843226313591, "learning_rate": 8.805555555555557e-06, "loss": 1.2328, "step": 317 }, { "epoch": 0.024398040995869723, "grad_norm": 0.28222358226776123, "learning_rate": 8.833333333333334e-06, "loss": 1.2865, "step": 318 }, { "epoch": 0.024474764395227805, "grad_norm": 0.2569884955883026, "learning_rate": 8.861111111111111e-06, "loss": 1.2696, "step": 319 }, { "epoch": 0.024551487794585887, "grad_norm": 0.20360565185546875, "learning_rate": 8.888888888888888e-06, "loss": 1.3357, "step": 320 }, { "epoch": 0.024628211193943966, "grad_norm": 0.2935555875301361, "learning_rate": 8.916666666666667e-06, "loss": 1.2254, "step": 321 }, { "epoch": 0.024704934593302048, "grad_norm": 0.28029829263687134, "learning_rate": 8.944444444444446e-06, "loss": 1.3147, "step": 322 }, { "epoch": 0.02478165799266013, "grad_norm": 0.23762787878513336, "learning_rate": 8.972222222222223e-06, "loss": 1.3209, "step": 323 }, { "epoch": 0.02485838139201821, "grad_norm": 0.2058754712343216, "learning_rate": 9e-06, "loss": 1.3722, "step": 324 }, { "epoch": 0.02493510479137629, "grad_norm": 0.21872583031654358, "learning_rate": 9.027777777777779e-06, "loss": 1.3103, "step": 325 }, { "epoch": 0.02501182819073437, "grad_norm": 0.20575585961341858, "learning_rate": 9.055555555555556e-06, "loss": 1.2227, "step": 326 }, { "epoch": 0.02508855159009245, "grad_norm": 0.21074314415454865, "learning_rate": 9.083333333333333e-06, "loss": 1.3138, "step": 327 }, { "epoch": 0.025165274989450533, "grad_norm": 0.26552915573120117, "learning_rate": 9.111111111111112e-06, "loss": 1.3215, "step": 328 }, { "epoch": 0.025241998388808612, "grad_norm": 0.3163210451602936, "learning_rate": 9.13888888888889e-06, "loss": 1.3031, "step": 329 }, { "epoch": 0.025318721788166694, "grad_norm": 0.2978346049785614, "learning_rate": 9.166666666666666e-06, "loss": 1.2799, "step": 330 }, { "epoch": 0.025395445187524776, "grad_norm": 0.2957921326160431, "learning_rate": 9.194444444444445e-06, "loss": 1.3147, "step": 331 }, { "epoch": 0.025472168586882855, "grad_norm": 0.2176271229982376, "learning_rate": 9.222222222222224e-06, "loss": 1.281, "step": 332 }, { "epoch": 0.025548891986240937, "grad_norm": 0.22850987315177917, "learning_rate": 9.250000000000001e-06, "loss": 1.3732, "step": 333 }, { "epoch": 0.02562561538559902, "grad_norm": 0.46819421648979187, "learning_rate": 9.277777777777778e-06, "loss": 1.2186, "step": 334 }, { "epoch": 0.025702338784957098, "grad_norm": 0.2496044784784317, "learning_rate": 9.305555555555557e-06, "loss": 1.3183, "step": 335 }, { "epoch": 0.02577906218431518, "grad_norm": 0.3168233633041382, "learning_rate": 9.333333333333334e-06, "loss": 1.2918, "step": 336 }, { "epoch": 0.02585578558367326, "grad_norm": 0.18996654450893402, "learning_rate": 9.361111111111111e-06, "loss": 1.2854, "step": 337 }, { "epoch": 0.02593250898303134, "grad_norm": 0.19044160842895508, "learning_rate": 9.38888888888889e-06, "loss": 1.3196, "step": 338 }, { "epoch": 0.026009232382389422, "grad_norm": 0.23125137388706207, "learning_rate": 9.416666666666667e-06, "loss": 1.2689, "step": 339 }, { "epoch": 0.026085955781747504, "grad_norm": 0.22291024029254913, "learning_rate": 9.444444444444445e-06, "loss": 1.3128, "step": 340 }, { "epoch": 0.026162679181105583, "grad_norm": 0.2564409375190735, "learning_rate": 9.472222222222223e-06, "loss": 1.3116, "step": 341 }, { "epoch": 0.026239402580463665, "grad_norm": 0.22809293866157532, "learning_rate": 9.5e-06, "loss": 1.2573, "step": 342 }, { "epoch": 0.026316125979821747, "grad_norm": 0.22864463925361633, "learning_rate": 9.527777777777778e-06, "loss": 1.3013, "step": 343 }, { "epoch": 0.026392849379179826, "grad_norm": 0.22596347332000732, "learning_rate": 9.555555555555556e-06, "loss": 1.3411, "step": 344 }, { "epoch": 0.026469572778537908, "grad_norm": 0.26324763894081116, "learning_rate": 9.583333333333335e-06, "loss": 1.2194, "step": 345 }, { "epoch": 0.02654629617789599, "grad_norm": 0.16247566044330597, "learning_rate": 9.611111111111112e-06, "loss": 1.3824, "step": 346 }, { "epoch": 0.02662301957725407, "grad_norm": 0.5034821033477783, "learning_rate": 9.63888888888889e-06, "loss": 1.2453, "step": 347 }, { "epoch": 0.02669974297661215, "grad_norm": 0.3118758499622345, "learning_rate": 9.666666666666667e-06, "loss": 1.2901, "step": 348 }, { "epoch": 0.026776466375970233, "grad_norm": 0.3394371569156647, "learning_rate": 9.694444444444446e-06, "loss": 1.3126, "step": 349 }, { "epoch": 0.02685318977532831, "grad_norm": 0.4983554482460022, "learning_rate": 9.722222222222223e-06, "loss": 1.2907, "step": 350 }, { "epoch": 0.026929913174686394, "grad_norm": 0.26865246891975403, "learning_rate": 9.75e-06, "loss": 1.3172, "step": 351 }, { "epoch": 0.027006636574044476, "grad_norm": 0.3784191608428955, "learning_rate": 9.777777777777779e-06, "loss": 1.2549, "step": 352 }, { "epoch": 0.027083359973402554, "grad_norm": 0.3131134510040283, "learning_rate": 9.805555555555556e-06, "loss": 1.3187, "step": 353 }, { "epoch": 0.027160083372760636, "grad_norm": 0.27086225152015686, "learning_rate": 9.833333333333333e-06, "loss": 1.2317, "step": 354 }, { "epoch": 0.027236806772118715, "grad_norm": 0.24681605398654938, "learning_rate": 9.861111111111112e-06, "loss": 1.2971, "step": 355 }, { "epoch": 0.027313530171476797, "grad_norm": 0.24066777527332306, "learning_rate": 9.88888888888889e-06, "loss": 1.3493, "step": 356 }, { "epoch": 0.02739025357083488, "grad_norm": 0.25297632813453674, "learning_rate": 9.916666666666668e-06, "loss": 1.2903, "step": 357 }, { "epoch": 0.027466976970192958, "grad_norm": 0.29002055525779724, "learning_rate": 9.944444444444445e-06, "loss": 1.324, "step": 358 }, { "epoch": 0.02754370036955104, "grad_norm": 0.33497852087020874, "learning_rate": 9.972222222222224e-06, "loss": 1.3046, "step": 359 }, { "epoch": 0.027620423768909122, "grad_norm": 0.24065718054771423, "learning_rate": 1e-05, "loss": 1.2409, "step": 360 }, { "epoch": 0.0276971471682672, "grad_norm": 0.20961250364780426, "learning_rate": 9.999999846368285e-06, "loss": 1.335, "step": 361 }, { "epoch": 0.027773870567625283, "grad_norm": 0.23415729403495789, "learning_rate": 9.999999385473146e-06, "loss": 1.2294, "step": 362 }, { "epoch": 0.027850593966983365, "grad_norm": 0.6271972060203552, "learning_rate": 9.999998617314612e-06, "loss": 1.3394, "step": 363 }, { "epoch": 0.027927317366341443, "grad_norm": 0.7471867203712463, "learning_rate": 9.999997541892729e-06, "loss": 1.2455, "step": 364 }, { "epoch": 0.028004040765699525, "grad_norm": 0.2137805074453354, "learning_rate": 9.999996159207567e-06, "loss": 1.2905, "step": 365 }, { "epoch": 0.028080764165057608, "grad_norm": 0.2280188947916031, "learning_rate": 9.999994469259208e-06, "loss": 1.2938, "step": 366 }, { "epoch": 0.028157487564415686, "grad_norm": 0.1263938546180725, "learning_rate": 9.999992472047757e-06, "loss": 1.3917, "step": 367 }, { "epoch": 0.02823421096377377, "grad_norm": 0.3649177551269531, "learning_rate": 9.999990167573334e-06, "loss": 1.2162, "step": 368 }, { "epoch": 0.02831093436313185, "grad_norm": 0.2026376724243164, "learning_rate": 9.999987555836085e-06, "loss": 1.3745, "step": 369 }, { "epoch": 0.02838765776248993, "grad_norm": 0.2716897428035736, "learning_rate": 9.999984636836167e-06, "loss": 1.2946, "step": 370 }, { "epoch": 0.02846438116184801, "grad_norm": 0.12254034727811813, "learning_rate": 9.999981410573762e-06, "loss": 1.3959, "step": 371 }, { "epoch": 0.028541104561206093, "grad_norm": 0.2425498217344284, "learning_rate": 9.999977877049065e-06, "loss": 1.3025, "step": 372 }, { "epoch": 0.028617827960564172, "grad_norm": 0.20647771656513214, "learning_rate": 9.999974036262297e-06, "loss": 1.2364, "step": 373 }, { "epoch": 0.028694551359922254, "grad_norm": 0.28653720021247864, "learning_rate": 9.999969888213691e-06, "loss": 1.2965, "step": 374 }, { "epoch": 0.028771274759280336, "grad_norm": 0.2446184754371643, "learning_rate": 9.999965432903503e-06, "loss": 1.3326, "step": 375 }, { "epoch": 0.028847998158638415, "grad_norm": 0.25100669264793396, "learning_rate": 9.999960670332008e-06, "loss": 1.2751, "step": 376 }, { "epoch": 0.028924721557996497, "grad_norm": 0.26377129554748535, "learning_rate": 9.999955600499496e-06, "loss": 1.2407, "step": 377 }, { "epoch": 0.02900144495735458, "grad_norm": 0.20929333567619324, "learning_rate": 9.999950223406279e-06, "loss": 1.2279, "step": 378 }, { "epoch": 0.029078168356712657, "grad_norm": 0.21801289916038513, "learning_rate": 9.99994453905269e-06, "loss": 1.2502, "step": 379 }, { "epoch": 0.02915489175607074, "grad_norm": 0.24854576587677002, "learning_rate": 9.999938547439077e-06, "loss": 1.2424, "step": 380 }, { "epoch": 0.02923161515542882, "grad_norm": 0.21967458724975586, "learning_rate": 9.999932248565808e-06, "loss": 1.3067, "step": 381 }, { "epoch": 0.0293083385547869, "grad_norm": 0.20092909038066864, "learning_rate": 9.99992564243327e-06, "loss": 1.2907, "step": 382 }, { "epoch": 0.029385061954144982, "grad_norm": 0.1181725487112999, "learning_rate": 9.999918729041869e-06, "loss": 1.3259, "step": 383 }, { "epoch": 0.02946178535350306, "grad_norm": 0.20130378007888794, "learning_rate": 9.999911508392028e-06, "loss": 1.2517, "step": 384 }, { "epoch": 0.029538508752861143, "grad_norm": 0.22152341902256012, "learning_rate": 9.999903980484196e-06, "loss": 1.2106, "step": 385 }, { "epoch": 0.029615232152219225, "grad_norm": 0.3716928958892822, "learning_rate": 9.99989614531883e-06, "loss": 1.2626, "step": 386 }, { "epoch": 0.029691955551577304, "grad_norm": 0.23800987005233765, "learning_rate": 9.999888002896413e-06, "loss": 1.2604, "step": 387 }, { "epoch": 0.029768678950935386, "grad_norm": 0.245666041970253, "learning_rate": 9.999879553217448e-06, "loss": 1.2361, "step": 388 }, { "epoch": 0.029845402350293468, "grad_norm": 0.20150867104530334, "learning_rate": 9.999870796282452e-06, "loss": 1.2774, "step": 389 }, { "epoch": 0.029922125749651547, "grad_norm": 0.32943621277809143, "learning_rate": 9.999861732091963e-06, "loss": 1.1973, "step": 390 }, { "epoch": 0.02999884914900963, "grad_norm": 0.2224591076374054, "learning_rate": 9.999852360646538e-06, "loss": 1.2539, "step": 391 }, { "epoch": 0.03007557254836771, "grad_norm": 0.31324848532676697, "learning_rate": 9.999842681946755e-06, "loss": 1.3626, "step": 392 }, { "epoch": 0.03015229594772579, "grad_norm": 0.254006952047348, "learning_rate": 9.999832695993206e-06, "loss": 1.2471, "step": 393 }, { "epoch": 0.03022901934708387, "grad_norm": 0.11512749642133713, "learning_rate": 9.999822402786505e-06, "loss": 1.3918, "step": 394 }, { "epoch": 0.030305742746441953, "grad_norm": 0.3207680284976959, "learning_rate": 9.999811802327286e-06, "loss": 1.2632, "step": 395 }, { "epoch": 0.030382466145800032, "grad_norm": 0.27405670285224915, "learning_rate": 9.9998008946162e-06, "loss": 1.2368, "step": 396 }, { "epoch": 0.030459189545158114, "grad_norm": 0.41203466057777405, "learning_rate": 9.999789679653918e-06, "loss": 1.3116, "step": 397 }, { "epoch": 0.030535912944516196, "grad_norm": 0.23092012107372284, "learning_rate": 9.999778157441126e-06, "loss": 1.2068, "step": 398 }, { "epoch": 0.030612636343874275, "grad_norm": 0.26642632484436035, "learning_rate": 9.999766327978537e-06, "loss": 1.2306, "step": 399 }, { "epoch": 0.030689359743232357, "grad_norm": 0.2380271852016449, "learning_rate": 9.999754191266875e-06, "loss": 1.2584, "step": 400 }, { "epoch": 0.03076608314259044, "grad_norm": 0.2307986170053482, "learning_rate": 9.999741747306884e-06, "loss": 1.2788, "step": 401 }, { "epoch": 0.030842806541948518, "grad_norm": 0.18989695608615875, "learning_rate": 9.999728996099331e-06, "loss": 1.2913, "step": 402 }, { "epoch": 0.0309195299413066, "grad_norm": 0.25443583726882935, "learning_rate": 9.999715937645003e-06, "loss": 1.2633, "step": 403 }, { "epoch": 0.030996253340664682, "grad_norm": 0.21544253826141357, "learning_rate": 9.999702571944695e-06, "loss": 1.2413, "step": 404 }, { "epoch": 0.03107297674002276, "grad_norm": 0.26106247305870056, "learning_rate": 9.999688898999234e-06, "loss": 1.3327, "step": 405 }, { "epoch": 0.031149700139380843, "grad_norm": 0.4407820403575897, "learning_rate": 9.99967491880946e-06, "loss": 1.3246, "step": 406 }, { "epoch": 0.031226423538738925, "grad_norm": 0.2630557417869568, "learning_rate": 9.999660631376228e-06, "loss": 1.3365, "step": 407 }, { "epoch": 0.03130314693809701, "grad_norm": 0.23466984927654266, "learning_rate": 9.99964603670042e-06, "loss": 1.3286, "step": 408 }, { "epoch": 0.03137987033745508, "grad_norm": 0.5117154717445374, "learning_rate": 9.999631134782932e-06, "loss": 1.2453, "step": 409 }, { "epoch": 0.031456593736813164, "grad_norm": 0.2808983027935028, "learning_rate": 9.999615925624677e-06, "loss": 1.237, "step": 410 }, { "epoch": 0.031533317136171246, "grad_norm": 0.20327483117580414, "learning_rate": 9.999600409226596e-06, "loss": 1.3252, "step": 411 }, { "epoch": 0.03161004053552933, "grad_norm": 0.20786860585212708, "learning_rate": 9.999584585589634e-06, "loss": 1.2668, "step": 412 }, { "epoch": 0.03168676393488741, "grad_norm": 0.2153361588716507, "learning_rate": 9.99956845471477e-06, "loss": 1.2768, "step": 413 }, { "epoch": 0.03176348733424549, "grad_norm": 0.20121413469314575, "learning_rate": 9.999552016602993e-06, "loss": 1.2783, "step": 414 }, { "epoch": 0.03184021073360357, "grad_norm": 0.20692448318004608, "learning_rate": 9.999535271255314e-06, "loss": 1.2913, "step": 415 }, { "epoch": 0.03191693413296165, "grad_norm": 0.22863489389419556, "learning_rate": 9.99951821867276e-06, "loss": 1.3499, "step": 416 }, { "epoch": 0.03199365753231973, "grad_norm": 0.21207138895988464, "learning_rate": 9.999500858856382e-06, "loss": 1.2595, "step": 417 }, { "epoch": 0.032070380931677814, "grad_norm": 0.42240145802497864, "learning_rate": 9.999483191807245e-06, "loss": 1.2302, "step": 418 }, { "epoch": 0.032147104331035896, "grad_norm": 0.2779189944267273, "learning_rate": 9.999465217526434e-06, "loss": 1.3028, "step": 419 }, { "epoch": 0.03222382773039398, "grad_norm": 0.35462456941604614, "learning_rate": 9.999446936015053e-06, "loss": 1.1931, "step": 420 }, { "epoch": 0.03230055112975205, "grad_norm": 0.20221197605133057, "learning_rate": 9.99942834727423e-06, "loss": 1.2576, "step": 421 }, { "epoch": 0.032377274529110135, "grad_norm": 0.20989973843097687, "learning_rate": 9.999409451305101e-06, "loss": 1.1942, "step": 422 }, { "epoch": 0.03245399792846822, "grad_norm": 0.32011669874191284, "learning_rate": 9.999390248108832e-06, "loss": 1.2932, "step": 423 }, { "epoch": 0.0325307213278263, "grad_norm": 0.2704211175441742, "learning_rate": 9.999370737686602e-06, "loss": 1.3173, "step": 424 }, { "epoch": 0.03260744472718438, "grad_norm": 0.311178594827652, "learning_rate": 9.999350920039607e-06, "loss": 1.2535, "step": 425 }, { "epoch": 0.03268416812654246, "grad_norm": 0.24470765888690948, "learning_rate": 9.99933079516907e-06, "loss": 1.3002, "step": 426 }, { "epoch": 0.03276089152590054, "grad_norm": 0.3648536801338196, "learning_rate": 9.999310363076224e-06, "loss": 1.2716, "step": 427 }, { "epoch": 0.03283761492525862, "grad_norm": 0.2561253607273102, "learning_rate": 9.999289623762326e-06, "loss": 1.2931, "step": 428 }, { "epoch": 0.0329143383246167, "grad_norm": 0.21034353971481323, "learning_rate": 9.999268577228649e-06, "loss": 1.2442, "step": 429 }, { "epoch": 0.032991061723974785, "grad_norm": 0.2822010815143585, "learning_rate": 9.999247223476486e-06, "loss": 1.2448, "step": 430 }, { "epoch": 0.03306778512333287, "grad_norm": 0.2780669033527374, "learning_rate": 9.999225562507154e-06, "loss": 1.1708, "step": 431 }, { "epoch": 0.03314450852269094, "grad_norm": 0.24322453141212463, "learning_rate": 9.99920359432198e-06, "loss": 1.3698, "step": 432 }, { "epoch": 0.033221231922049024, "grad_norm": 0.24373644590377808, "learning_rate": 9.999181318922315e-06, "loss": 1.227, "step": 433 }, { "epoch": 0.033297955321407106, "grad_norm": 0.2248879075050354, "learning_rate": 9.999158736309526e-06, "loss": 1.2583, "step": 434 }, { "epoch": 0.03337467872076519, "grad_norm": 0.21658751368522644, "learning_rate": 9.999135846485005e-06, "loss": 1.2687, "step": 435 }, { "epoch": 0.03345140212012327, "grad_norm": 0.2823522090911865, "learning_rate": 9.999112649450154e-06, "loss": 1.2359, "step": 436 }, { "epoch": 0.03352812551948135, "grad_norm": 0.23070208728313446, "learning_rate": 9.9990891452064e-06, "loss": 1.2127, "step": 437 }, { "epoch": 0.03360484891883943, "grad_norm": 0.19224904477596283, "learning_rate": 9.99906533375519e-06, "loss": 1.2444, "step": 438 }, { "epoch": 0.03368157231819751, "grad_norm": 0.20204207301139832, "learning_rate": 9.999041215097984e-06, "loss": 1.3372, "step": 439 }, { "epoch": 0.03375829571755559, "grad_norm": 0.2248278558254242, "learning_rate": 9.999016789236266e-06, "loss": 1.187, "step": 440 }, { "epoch": 0.033835019116913674, "grad_norm": 0.20719681680202484, "learning_rate": 9.998992056171537e-06, "loss": 1.3173, "step": 441 }, { "epoch": 0.033911742516271756, "grad_norm": 0.24998779594898224, "learning_rate": 9.998967015905315e-06, "loss": 1.1821, "step": 442 }, { "epoch": 0.03398846591562984, "grad_norm": 0.3454965651035309, "learning_rate": 9.998941668439141e-06, "loss": 1.2733, "step": 443 }, { "epoch": 0.03406518931498791, "grad_norm": 0.2085293084383011, "learning_rate": 9.998916013774574e-06, "loss": 1.3521, "step": 444 }, { "epoch": 0.034141912714345996, "grad_norm": 0.26291969418525696, "learning_rate": 9.998890051913185e-06, "loss": 1.3719, "step": 445 }, { "epoch": 0.03421863611370408, "grad_norm": 0.2824214994907379, "learning_rate": 9.998863782856573e-06, "loss": 1.2333, "step": 446 }, { "epoch": 0.03429535951306216, "grad_norm": 0.19462254643440247, "learning_rate": 9.998837206606355e-06, "loss": 1.2493, "step": 447 }, { "epoch": 0.03437208291242024, "grad_norm": 0.19144897162914276, "learning_rate": 9.99881032316416e-06, "loss": 1.287, "step": 448 }, { "epoch": 0.034448806311778324, "grad_norm": 0.3518667221069336, "learning_rate": 9.99878313253164e-06, "loss": 1.2451, "step": 449 }, { "epoch": 0.0345255297111364, "grad_norm": 0.5003178715705872, "learning_rate": 9.998755634710468e-06, "loss": 1.2445, "step": 450 }, { "epoch": 0.03460225311049448, "grad_norm": 0.2247992902994156, "learning_rate": 9.998727829702335e-06, "loss": 1.2245, "step": 451 }, { "epoch": 0.03467897650985256, "grad_norm": 0.27939313650131226, "learning_rate": 9.998699717508947e-06, "loss": 1.2652, "step": 452 }, { "epoch": 0.034755699909210645, "grad_norm": 0.20998750627040863, "learning_rate": 9.99867129813203e-06, "loss": 1.2667, "step": 453 }, { "epoch": 0.03483242330856873, "grad_norm": 0.24637573957443237, "learning_rate": 9.998642571573334e-06, "loss": 1.2743, "step": 454 }, { "epoch": 0.0349091467079268, "grad_norm": 0.2874943017959595, "learning_rate": 9.998613537834625e-06, "loss": 1.2329, "step": 455 }, { "epoch": 0.034985870107284885, "grad_norm": 0.18814606964588165, "learning_rate": 9.998584196917685e-06, "loss": 1.3175, "step": 456 }, { "epoch": 0.03506259350664297, "grad_norm": 0.25453877449035645, "learning_rate": 9.998554548824316e-06, "loss": 1.3114, "step": 457 }, { "epoch": 0.03513931690600105, "grad_norm": 0.2647716701030731, "learning_rate": 9.998524593556342e-06, "loss": 1.3003, "step": 458 }, { "epoch": 0.03521604030535913, "grad_norm": 0.2636350393295288, "learning_rate": 9.998494331115602e-06, "loss": 1.1437, "step": 459 }, { "epoch": 0.03529276370471721, "grad_norm": 0.4004635810852051, "learning_rate": 9.99846376150396e-06, "loss": 1.2473, "step": 460 }, { "epoch": 0.03536948710407529, "grad_norm": 0.31685516238212585, "learning_rate": 9.99843288472329e-06, "loss": 1.2391, "step": 461 }, { "epoch": 0.03544621050343337, "grad_norm": 0.23081371188163757, "learning_rate": 9.998401700775489e-06, "loss": 1.2729, "step": 462 }, { "epoch": 0.03552293390279145, "grad_norm": 0.21839474141597748, "learning_rate": 9.998370209662478e-06, "loss": 1.2558, "step": 463 }, { "epoch": 0.035599657302149534, "grad_norm": 0.18313482403755188, "learning_rate": 9.998338411386189e-06, "loss": 1.2378, "step": 464 }, { "epoch": 0.035676380701507616, "grad_norm": 0.35606321692466736, "learning_rate": 9.998306305948575e-06, "loss": 1.2155, "step": 465 }, { "epoch": 0.0357531041008657, "grad_norm": 0.21764087677001953, "learning_rate": 9.998273893351614e-06, "loss": 1.1631, "step": 466 }, { "epoch": 0.035829827500223774, "grad_norm": 0.13963039219379425, "learning_rate": 9.998241173597292e-06, "loss": 1.4362, "step": 467 }, { "epoch": 0.035906550899581856, "grad_norm": 0.24763673543930054, "learning_rate": 9.998208146687623e-06, "loss": 1.3513, "step": 468 }, { "epoch": 0.03598327429893994, "grad_norm": 0.210787832736969, "learning_rate": 9.998174812624633e-06, "loss": 1.23, "step": 469 }, { "epoch": 0.03605999769829802, "grad_norm": 0.19599029421806335, "learning_rate": 9.998141171410375e-06, "loss": 1.2057, "step": 470 }, { "epoch": 0.0361367210976561, "grad_norm": 0.22438791394233704, "learning_rate": 9.998107223046915e-06, "loss": 1.2299, "step": 471 }, { "epoch": 0.036213444497014184, "grad_norm": 0.2688242793083191, "learning_rate": 9.99807296753634e-06, "loss": 1.2297, "step": 472 }, { "epoch": 0.03629016789637226, "grad_norm": 0.20212848484516144, "learning_rate": 9.998038404880751e-06, "loss": 1.3036, "step": 473 }, { "epoch": 0.03636689129573034, "grad_norm": 0.4924832880496979, "learning_rate": 9.99800353508228e-06, "loss": 1.2978, "step": 474 }, { "epoch": 0.036443614695088423, "grad_norm": 0.21449308097362518, "learning_rate": 9.99796835814306e-06, "loss": 1.1512, "step": 475 }, { "epoch": 0.036520338094446506, "grad_norm": 0.24397321045398712, "learning_rate": 9.997932874065259e-06, "loss": 1.2722, "step": 476 }, { "epoch": 0.03659706149380459, "grad_norm": 0.210028737783432, "learning_rate": 9.997897082851055e-06, "loss": 1.2471, "step": 477 }, { "epoch": 0.03667378489316267, "grad_norm": 0.26072102785110474, "learning_rate": 9.99786098450265e-06, "loss": 1.2042, "step": 478 }, { "epoch": 0.036750508292520745, "grad_norm": 0.3092025816440582, "learning_rate": 9.997824579022258e-06, "loss": 1.3405, "step": 479 }, { "epoch": 0.03682723169187883, "grad_norm": 0.34074264764785767, "learning_rate": 9.997787866412121e-06, "loss": 1.2592, "step": 480 }, { "epoch": 0.03690395509123691, "grad_norm": 0.19806985557079315, "learning_rate": 9.997750846674494e-06, "loss": 1.2014, "step": 481 }, { "epoch": 0.03698067849059499, "grad_norm": 0.23458391427993774, "learning_rate": 9.99771351981165e-06, "loss": 1.2992, "step": 482 }, { "epoch": 0.03705740188995307, "grad_norm": 0.2674688994884491, "learning_rate": 9.997675885825883e-06, "loss": 1.339, "step": 483 }, { "epoch": 0.03713412528931115, "grad_norm": 0.37647053599357605, "learning_rate": 9.997637944719507e-06, "loss": 1.1946, "step": 484 }, { "epoch": 0.03721084868866923, "grad_norm": 0.21641066670417786, "learning_rate": 9.997599696494855e-06, "loss": 1.2384, "step": 485 }, { "epoch": 0.03728757208802731, "grad_norm": 0.24923792481422424, "learning_rate": 9.997561141154274e-06, "loss": 1.2442, "step": 486 }, { "epoch": 0.037364295487385395, "grad_norm": 0.2703666090965271, "learning_rate": 9.997522278700136e-06, "loss": 1.2487, "step": 487 }, { "epoch": 0.03744101888674348, "grad_norm": 0.2123616635799408, "learning_rate": 9.997483109134827e-06, "loss": 1.2519, "step": 488 }, { "epoch": 0.03751774228610156, "grad_norm": 0.29915329813957214, "learning_rate": 9.997443632460756e-06, "loss": 1.2524, "step": 489 }, { "epoch": 0.037594465685459634, "grad_norm": 0.4299515187740326, "learning_rate": 9.997403848680347e-06, "loss": 1.2773, "step": 490 }, { "epoch": 0.037671189084817716, "grad_norm": 0.281760573387146, "learning_rate": 9.997363757796048e-06, "loss": 1.2095, "step": 491 }, { "epoch": 0.0377479124841758, "grad_norm": 0.2850954234600067, "learning_rate": 9.99732335981032e-06, "loss": 1.2723, "step": 492 }, { "epoch": 0.03782463588353388, "grad_norm": 0.11482904851436615, "learning_rate": 9.997282654725645e-06, "loss": 1.3108, "step": 493 }, { "epoch": 0.03790135928289196, "grad_norm": 0.2294633984565735, "learning_rate": 9.997241642544527e-06, "loss": 1.2767, "step": 494 }, { "epoch": 0.037978082682250044, "grad_norm": 0.2733232080936432, "learning_rate": 9.997200323269485e-06, "loss": 1.2661, "step": 495 }, { "epoch": 0.03805480608160812, "grad_norm": 0.2584412395954132, "learning_rate": 9.997158696903057e-06, "loss": 1.2453, "step": 496 }, { "epoch": 0.0381315294809662, "grad_norm": 0.21919406950473785, "learning_rate": 9.997116763447805e-06, "loss": 1.1807, "step": 497 }, { "epoch": 0.038208252880324284, "grad_norm": 0.2779228687286377, "learning_rate": 9.9970745229063e-06, "loss": 1.2329, "step": 498 }, { "epoch": 0.038284976279682366, "grad_norm": 0.11442968249320984, "learning_rate": 9.997031975281144e-06, "loss": 1.3958, "step": 499 }, { "epoch": 0.03836169967904045, "grad_norm": 0.3286196291446686, "learning_rate": 9.996989120574946e-06, "loss": 1.2805, "step": 500 }, { "epoch": 0.03843842307839853, "grad_norm": 0.24684564769268036, "learning_rate": 9.996945958790343e-06, "loss": 1.2744, "step": 501 }, { "epoch": 0.038515146477756605, "grad_norm": 0.2318197637796402, "learning_rate": 9.996902489929987e-06, "loss": 1.2629, "step": 502 }, { "epoch": 0.03859186987711469, "grad_norm": 0.25205299258232117, "learning_rate": 9.996858713996549e-06, "loss": 1.2436, "step": 503 }, { "epoch": 0.03866859327647277, "grad_norm": 0.2643290162086487, "learning_rate": 9.996814630992719e-06, "loss": 1.2474, "step": 504 }, { "epoch": 0.03874531667583085, "grad_norm": 0.3599962890148163, "learning_rate": 9.996770240921205e-06, "loss": 1.285, "step": 505 }, { "epoch": 0.038822040075188934, "grad_norm": 0.24787691235542297, "learning_rate": 9.996725543784737e-06, "loss": 1.287, "step": 506 }, { "epoch": 0.038898763474547016, "grad_norm": 0.2712421715259552, "learning_rate": 9.996680539586061e-06, "loss": 1.2967, "step": 507 }, { "epoch": 0.03897548687390509, "grad_norm": 0.22311891615390778, "learning_rate": 9.99663522832794e-06, "loss": 1.3178, "step": 508 }, { "epoch": 0.03905221027326317, "grad_norm": 0.19409561157226562, "learning_rate": 9.996589610013162e-06, "loss": 1.2938, "step": 509 }, { "epoch": 0.039128933672621255, "grad_norm": 0.3466380834579468, "learning_rate": 9.996543684644531e-06, "loss": 1.2699, "step": 510 }, { "epoch": 0.03920565707197934, "grad_norm": 0.26541173458099365, "learning_rate": 9.996497452224866e-06, "loss": 1.1803, "step": 511 }, { "epoch": 0.03928238047133742, "grad_norm": 0.2754061818122864, "learning_rate": 9.99645091275701e-06, "loss": 1.2529, "step": 512 }, { "epoch": 0.039359103870695494, "grad_norm": 0.6809425354003906, "learning_rate": 9.996404066243822e-06, "loss": 1.2351, "step": 513 }, { "epoch": 0.039435827270053576, "grad_norm": 0.36456820368766785, "learning_rate": 9.996356912688181e-06, "loss": 1.2501, "step": 514 }, { "epoch": 0.03951255066941166, "grad_norm": 0.21652455627918243, "learning_rate": 9.996309452092986e-06, "loss": 1.2617, "step": 515 }, { "epoch": 0.03958927406876974, "grad_norm": 0.18310308456420898, "learning_rate": 9.996261684461153e-06, "loss": 1.3137, "step": 516 }, { "epoch": 0.03966599746812782, "grad_norm": 0.2332114428281784, "learning_rate": 9.996213609795615e-06, "loss": 1.1665, "step": 517 }, { "epoch": 0.039742720867485905, "grad_norm": 0.2283095419406891, "learning_rate": 9.99616522809933e-06, "loss": 1.2868, "step": 518 }, { "epoch": 0.03981944426684398, "grad_norm": 0.2711581885814667, "learning_rate": 9.99611653937527e-06, "loss": 1.2376, "step": 519 }, { "epoch": 0.03989616766620206, "grad_norm": 0.22139790654182434, "learning_rate": 9.996067543626424e-06, "loss": 1.2697, "step": 520 }, { "epoch": 0.039972891065560144, "grad_norm": 0.3903944492340088, "learning_rate": 9.996018240855809e-06, "loss": 1.2883, "step": 521 }, { "epoch": 0.040049614464918226, "grad_norm": 0.2954972982406616, "learning_rate": 9.995968631066449e-06, "loss": 1.1468, "step": 522 }, { "epoch": 0.04012633786427631, "grad_norm": 0.2505805790424347, "learning_rate": 9.995918714261396e-06, "loss": 1.2891, "step": 523 }, { "epoch": 0.04020306126363439, "grad_norm": 0.24833396077156067, "learning_rate": 9.995868490443716e-06, "loss": 1.2345, "step": 524 }, { "epoch": 0.040279784662992466, "grad_norm": 0.2623893916606903, "learning_rate": 9.995817959616496e-06, "loss": 1.2425, "step": 525 }, { "epoch": 0.04035650806235055, "grad_norm": 0.26694464683532715, "learning_rate": 9.995767121782841e-06, "loss": 1.2649, "step": 526 }, { "epoch": 0.04043323146170863, "grad_norm": 0.2950114607810974, "learning_rate": 9.995715976945876e-06, "loss": 1.3109, "step": 527 }, { "epoch": 0.04050995486106671, "grad_norm": 0.20784491300582886, "learning_rate": 9.995664525108743e-06, "loss": 1.2532, "step": 528 }, { "epoch": 0.040586678260424794, "grad_norm": 0.19237695634365082, "learning_rate": 9.995612766274604e-06, "loss": 1.2416, "step": 529 }, { "epoch": 0.040663401659782876, "grad_norm": 0.2260851263999939, "learning_rate": 9.99556070044664e-06, "loss": 1.2132, "step": 530 }, { "epoch": 0.04074012505914095, "grad_norm": 0.23201480507850647, "learning_rate": 9.99550832762805e-06, "loss": 1.2477, "step": 531 }, { "epoch": 0.04081684845849903, "grad_norm": 0.20699240267276764, "learning_rate": 9.995455647822053e-06, "loss": 1.3271, "step": 532 }, { "epoch": 0.040893571857857115, "grad_norm": 0.2764204144477844, "learning_rate": 9.995402661031886e-06, "loss": 1.3171, "step": 533 }, { "epoch": 0.0409702952572152, "grad_norm": 0.3914581537246704, "learning_rate": 9.995349367260807e-06, "loss": 1.2006, "step": 534 }, { "epoch": 0.04104701865657328, "grad_norm": 0.41268664598464966, "learning_rate": 9.995295766512088e-06, "loss": 1.2523, "step": 535 }, { "epoch": 0.04112374205593136, "grad_norm": 0.24759916961193085, "learning_rate": 9.995241858789024e-06, "loss": 1.2371, "step": 536 }, { "epoch": 0.04120046545528944, "grad_norm": 0.279669851064682, "learning_rate": 9.995187644094929e-06, "loss": 1.2254, "step": 537 }, { "epoch": 0.04127718885464752, "grad_norm": 0.34428122639656067, "learning_rate": 9.995133122433134e-06, "loss": 1.2312, "step": 538 }, { "epoch": 0.0413539122540056, "grad_norm": 0.2280312180519104, "learning_rate": 9.995078293806988e-06, "loss": 1.2452, "step": 539 }, { "epoch": 0.04143063565336368, "grad_norm": 0.24581754207611084, "learning_rate": 9.995023158219862e-06, "loss": 1.2117, "step": 540 }, { "epoch": 0.041507359052721765, "grad_norm": 0.22451943159103394, "learning_rate": 9.994967715675144e-06, "loss": 1.2924, "step": 541 }, { "epoch": 0.04158408245207984, "grad_norm": 0.24037761986255646, "learning_rate": 9.994911966176242e-06, "loss": 1.2144, "step": 542 }, { "epoch": 0.04166080585143792, "grad_norm": 0.2972753643989563, "learning_rate": 9.994855909726579e-06, "loss": 1.2001, "step": 543 }, { "epoch": 0.041737529250796004, "grad_norm": 0.2558774948120117, "learning_rate": 9.994799546329604e-06, "loss": 1.347, "step": 544 }, { "epoch": 0.041814252650154086, "grad_norm": 0.30288153886795044, "learning_rate": 9.994742875988776e-06, "loss": 1.2233, "step": 545 }, { "epoch": 0.04189097604951217, "grad_norm": 0.25475507974624634, "learning_rate": 9.994685898707581e-06, "loss": 1.2194, "step": 546 }, { "epoch": 0.04196769944887025, "grad_norm": 0.1237250491976738, "learning_rate": 9.994628614489519e-06, "loss": 1.3889, "step": 547 }, { "epoch": 0.042044422848228326, "grad_norm": 0.20429961383342743, "learning_rate": 9.99457102333811e-06, "loss": 1.2724, "step": 548 }, { "epoch": 0.04212114624758641, "grad_norm": 0.21725128591060638, "learning_rate": 9.994513125256896e-06, "loss": 1.3035, "step": 549 }, { "epoch": 0.04219786964694449, "grad_norm": 0.23822589218616486, "learning_rate": 9.994454920249433e-06, "loss": 1.1949, "step": 550 }, { "epoch": 0.04227459304630257, "grad_norm": 0.18021629750728607, "learning_rate": 9.994396408319294e-06, "loss": 1.2053, "step": 551 }, { "epoch": 0.042351316445660654, "grad_norm": 0.11439035087823868, "learning_rate": 9.99433758947008e-06, "loss": 1.3519, "step": 552 }, { "epoch": 0.042428039845018736, "grad_norm": 0.20459936559200287, "learning_rate": 9.994278463705405e-06, "loss": 1.2214, "step": 553 }, { "epoch": 0.04250476324437681, "grad_norm": 0.3479028642177582, "learning_rate": 9.9942190310289e-06, "loss": 1.2226, "step": 554 }, { "epoch": 0.042581486643734894, "grad_norm": 0.20189169049263, "learning_rate": 9.994159291444221e-06, "loss": 1.3161, "step": 555 }, { "epoch": 0.042658210043092976, "grad_norm": 0.1949400007724762, "learning_rate": 9.994099244955034e-06, "loss": 1.2156, "step": 556 }, { "epoch": 0.04273493344245106, "grad_norm": 0.24813023209571838, "learning_rate": 9.994038891565034e-06, "loss": 1.2009, "step": 557 }, { "epoch": 0.04281165684180914, "grad_norm": 0.1944672167301178, "learning_rate": 9.993978231277926e-06, "loss": 1.3152, "step": 558 }, { "epoch": 0.04288838024116722, "grad_norm": 0.18616439402103424, "learning_rate": 9.993917264097439e-06, "loss": 1.2804, "step": 559 }, { "epoch": 0.0429651036405253, "grad_norm": 0.5330526828765869, "learning_rate": 9.993855990027321e-06, "loss": 1.2284, "step": 560 }, { "epoch": 0.04304182703988338, "grad_norm": 0.29492369294166565, "learning_rate": 9.993794409071337e-06, "loss": 1.2548, "step": 561 }, { "epoch": 0.04311855043924146, "grad_norm": 0.20122799277305603, "learning_rate": 9.99373252123327e-06, "loss": 1.2783, "step": 562 }, { "epoch": 0.04319527383859954, "grad_norm": 0.21647034585475922, "learning_rate": 9.993670326516924e-06, "loss": 1.325, "step": 563 }, { "epoch": 0.043271997237957625, "grad_norm": 0.6621251106262207, "learning_rate": 9.993607824926122e-06, "loss": 1.2458, "step": 564 }, { "epoch": 0.04334872063731571, "grad_norm": 0.20965684950351715, "learning_rate": 9.993545016464703e-06, "loss": 1.2524, "step": 565 }, { "epoch": 0.04342544403667378, "grad_norm": 0.2666740417480469, "learning_rate": 9.993481901136528e-06, "loss": 1.1964, "step": 566 }, { "epoch": 0.043502167436031865, "grad_norm": 0.25023695826530457, "learning_rate": 9.993418478945474e-06, "loss": 1.2028, "step": 567 }, { "epoch": 0.04357889083538995, "grad_norm": 0.48844221234321594, "learning_rate": 9.993354749895442e-06, "loss": 1.2052, "step": 568 }, { "epoch": 0.04365561423474803, "grad_norm": 0.2547411322593689, "learning_rate": 9.993290713990343e-06, "loss": 1.1387, "step": 569 }, { "epoch": 0.04373233763410611, "grad_norm": 0.10419487953186035, "learning_rate": 9.993226371234118e-06, "loss": 1.2923, "step": 570 }, { "epoch": 0.043809061033464186, "grad_norm": 0.2313835620880127, "learning_rate": 9.993161721630717e-06, "loss": 1.2243, "step": 571 }, { "epoch": 0.04388578443282227, "grad_norm": 0.2284388542175293, "learning_rate": 9.993096765184115e-06, "loss": 1.1966, "step": 572 }, { "epoch": 0.04396250783218035, "grad_norm": 1.2066099643707275, "learning_rate": 9.993031501898301e-06, "loss": 1.2931, "step": 573 }, { "epoch": 0.04403923123153843, "grad_norm": 0.1880829781293869, "learning_rate": 9.992965931777287e-06, "loss": 1.2919, "step": 574 }, { "epoch": 0.044115954630896514, "grad_norm": 0.21972961723804474, "learning_rate": 9.992900054825106e-06, "loss": 1.2825, "step": 575 }, { "epoch": 0.0441926780302546, "grad_norm": 0.21770690381526947, "learning_rate": 9.992833871045802e-06, "loss": 1.2158, "step": 576 }, { "epoch": 0.04426940142961267, "grad_norm": 0.22363857924938202, "learning_rate": 9.992767380443442e-06, "loss": 1.22, "step": 577 }, { "epoch": 0.044346124828970754, "grad_norm": 0.20427748560905457, "learning_rate": 9.992700583022115e-06, "loss": 1.1658, "step": 578 }, { "epoch": 0.044422848228328836, "grad_norm": 0.4425443410873413, "learning_rate": 9.992633478785924e-06, "loss": 1.2646, "step": 579 }, { "epoch": 0.04449957162768692, "grad_norm": 0.2521590292453766, "learning_rate": 9.992566067738994e-06, "loss": 1.2532, "step": 580 }, { "epoch": 0.044576295027045, "grad_norm": 0.19182202219963074, "learning_rate": 9.992498349885464e-06, "loss": 1.2754, "step": 581 }, { "epoch": 0.04465301842640308, "grad_norm": 0.23042963445186615, "learning_rate": 9.992430325229501e-06, "loss": 1.2703, "step": 582 }, { "epoch": 0.04472974182576116, "grad_norm": 0.230568528175354, "learning_rate": 9.99236199377528e-06, "loss": 1.2174, "step": 583 }, { "epoch": 0.04480646522511924, "grad_norm": 0.28546300530433655, "learning_rate": 9.992293355527006e-06, "loss": 1.2698, "step": 584 }, { "epoch": 0.04488318862447732, "grad_norm": 0.21866431832313538, "learning_rate": 9.99222441048889e-06, "loss": 1.2241, "step": 585 }, { "epoch": 0.044959912023835404, "grad_norm": 0.23840831220149994, "learning_rate": 9.992155158665174e-06, "loss": 1.1737, "step": 586 }, { "epoch": 0.045036635423193486, "grad_norm": 0.24156378209590912, "learning_rate": 9.992085600060111e-06, "loss": 1.2289, "step": 587 }, { "epoch": 0.04511335882255157, "grad_norm": 0.27334803342819214, "learning_rate": 9.992015734677979e-06, "loss": 1.1606, "step": 588 }, { "epoch": 0.04519008222190964, "grad_norm": 0.18279331922531128, "learning_rate": 9.991945562523067e-06, "loss": 1.297, "step": 589 }, { "epoch": 0.045266805621267725, "grad_norm": 0.24264231324195862, "learning_rate": 9.991875083599689e-06, "loss": 1.2631, "step": 590 }, { "epoch": 0.04534352902062581, "grad_norm": 0.2491876631975174, "learning_rate": 9.991804297912178e-06, "loss": 1.2269, "step": 591 }, { "epoch": 0.04542025241998389, "grad_norm": 0.21858304738998413, "learning_rate": 9.991733205464882e-06, "loss": 1.2289, "step": 592 }, { "epoch": 0.04549697581934197, "grad_norm": 0.21209487318992615, "learning_rate": 9.99166180626217e-06, "loss": 1.175, "step": 593 }, { "epoch": 0.045573699218700046, "grad_norm": 0.259203165769577, "learning_rate": 9.99159010030843e-06, "loss": 1.2625, "step": 594 }, { "epoch": 0.04565042261805813, "grad_norm": 0.2300913780927658, "learning_rate": 9.991518087608069e-06, "loss": 1.2578, "step": 595 }, { "epoch": 0.04572714601741621, "grad_norm": 0.20810957252979279, "learning_rate": 9.99144576816551e-06, "loss": 1.1296, "step": 596 }, { "epoch": 0.04580386941677429, "grad_norm": 0.20679593086242676, "learning_rate": 9.991373141985202e-06, "loss": 1.1446, "step": 597 }, { "epoch": 0.045880592816132375, "grad_norm": 0.24771319329738617, "learning_rate": 9.991300209071605e-06, "loss": 1.2808, "step": 598 }, { "epoch": 0.04595731621549046, "grad_norm": 0.4097304046154022, "learning_rate": 9.9912269694292e-06, "loss": 1.2246, "step": 599 }, { "epoch": 0.04603403961484853, "grad_norm": 0.2275397926568985, "learning_rate": 9.991153423062488e-06, "loss": 1.2375, "step": 600 }, { "epoch": 0.046110763014206614, "grad_norm": 0.24694626033306122, "learning_rate": 9.99107956997599e-06, "loss": 1.2019, "step": 601 }, { "epoch": 0.046187486413564696, "grad_norm": 0.2502727806568146, "learning_rate": 9.991005410174244e-06, "loss": 1.2533, "step": 602 }, { "epoch": 0.04626420981292278, "grad_norm": 0.11889129877090454, "learning_rate": 9.990930943661807e-06, "loss": 1.2968, "step": 603 }, { "epoch": 0.04634093321228086, "grad_norm": 0.23334583640098572, "learning_rate": 9.990856170443255e-06, "loss": 1.1989, "step": 604 }, { "epoch": 0.04641765661163894, "grad_norm": 0.19696934521198273, "learning_rate": 9.990781090523184e-06, "loss": 1.2012, "step": 605 }, { "epoch": 0.04649438001099702, "grad_norm": 0.2737552225589752, "learning_rate": 9.99070570390621e-06, "loss": 1.2925, "step": 606 }, { "epoch": 0.0465711034103551, "grad_norm": 0.1098664253950119, "learning_rate": 9.99063001059696e-06, "loss": 1.3077, "step": 607 }, { "epoch": 0.04664782680971318, "grad_norm": 0.21293967962265015, "learning_rate": 9.990554010600088e-06, "loss": 1.1907, "step": 608 }, { "epoch": 0.046724550209071264, "grad_norm": 0.20213910937309265, "learning_rate": 9.990477703920267e-06, "loss": 1.2327, "step": 609 }, { "epoch": 0.046801273608429346, "grad_norm": 0.2048557847738266, "learning_rate": 9.990401090562182e-06, "loss": 1.2041, "step": 610 }, { "epoch": 0.04687799700778743, "grad_norm": 0.26774269342422485, "learning_rate": 9.990324170530545e-06, "loss": 1.2539, "step": 611 }, { "epoch": 0.0469547204071455, "grad_norm": 0.1084364727139473, "learning_rate": 9.990246943830082e-06, "loss": 1.3227, "step": 612 }, { "epoch": 0.047031443806503585, "grad_norm": 0.25765904784202576, "learning_rate": 9.990169410465537e-06, "loss": 1.2299, "step": 613 }, { "epoch": 0.04710816720586167, "grad_norm": 0.31429535150527954, "learning_rate": 9.990091570441675e-06, "loss": 1.2085, "step": 614 }, { "epoch": 0.04718489060521975, "grad_norm": 0.2105252742767334, "learning_rate": 9.99001342376328e-06, "loss": 1.2195, "step": 615 }, { "epoch": 0.04726161400457783, "grad_norm": 0.19080349802970886, "learning_rate": 9.989934970435155e-06, "loss": 1.2357, "step": 616 }, { "epoch": 0.047338337403935914, "grad_norm": 0.2577090561389923, "learning_rate": 9.989856210462123e-06, "loss": 1.2658, "step": 617 }, { "epoch": 0.04741506080329399, "grad_norm": 0.3184415102005005, "learning_rate": 9.98977714384902e-06, "loss": 1.2773, "step": 618 }, { "epoch": 0.04749178420265207, "grad_norm": 0.21229054033756256, "learning_rate": 9.989697770600706e-06, "loss": 1.3071, "step": 619 }, { "epoch": 0.04756850760201015, "grad_norm": 0.2445012927055359, "learning_rate": 9.98961809072206e-06, "loss": 1.1697, "step": 620 }, { "epoch": 0.047645231001368235, "grad_norm": 0.34534141421318054, "learning_rate": 9.989538104217975e-06, "loss": 1.19, "step": 621 }, { "epoch": 0.04772195440072632, "grad_norm": 0.23724716901779175, "learning_rate": 9.989457811093372e-06, "loss": 1.32, "step": 622 }, { "epoch": 0.04779867780008439, "grad_norm": 0.2452801764011383, "learning_rate": 9.989377211353182e-06, "loss": 1.1865, "step": 623 }, { "epoch": 0.047875401199442474, "grad_norm": 0.2237192988395691, "learning_rate": 9.989296305002358e-06, "loss": 1.1901, "step": 624 }, { "epoch": 0.047952124598800556, "grad_norm": 0.5894109010696411, "learning_rate": 9.989215092045871e-06, "loss": 1.3194, "step": 625 }, { "epoch": 0.04802884799815864, "grad_norm": 0.18264083564281464, "learning_rate": 9.989133572488716e-06, "loss": 1.2688, "step": 626 }, { "epoch": 0.04810557139751672, "grad_norm": 0.24105511605739594, "learning_rate": 9.989051746335898e-06, "loss": 1.2129, "step": 627 }, { "epoch": 0.0481822947968748, "grad_norm": 0.19629959762096405, "learning_rate": 9.988969613592448e-06, "loss": 1.1675, "step": 628 }, { "epoch": 0.04825901819623288, "grad_norm": 0.3396308124065399, "learning_rate": 9.988887174263412e-06, "loss": 1.2439, "step": 629 }, { "epoch": 0.04833574159559096, "grad_norm": 0.10687503218650818, "learning_rate": 9.988804428353856e-06, "loss": 1.3996, "step": 630 }, { "epoch": 0.04841246499494904, "grad_norm": 0.21053101122379303, "learning_rate": 9.988721375868867e-06, "loss": 1.2476, "step": 631 }, { "epoch": 0.048489188394307124, "grad_norm": 0.29876211285591125, "learning_rate": 9.988638016813545e-06, "loss": 1.2264, "step": 632 }, { "epoch": 0.048565911793665206, "grad_norm": 0.21271085739135742, "learning_rate": 9.988554351193016e-06, "loss": 1.2234, "step": 633 }, { "epoch": 0.04864263519302329, "grad_norm": 0.601025402545929, "learning_rate": 9.988470379012421e-06, "loss": 1.2309, "step": 634 }, { "epoch": 0.048719358592381364, "grad_norm": 0.24204622209072113, "learning_rate": 9.98838610027692e-06, "loss": 1.2037, "step": 635 }, { "epoch": 0.048796081991739446, "grad_norm": 0.35635578632354736, "learning_rate": 9.988301514991692e-06, "loss": 1.216, "step": 636 }, { "epoch": 0.04887280539109753, "grad_norm": 0.2113218754529953, "learning_rate": 9.988216623161933e-06, "loss": 1.1802, "step": 637 }, { "epoch": 0.04894952879045561, "grad_norm": 0.18918994069099426, "learning_rate": 9.988131424792862e-06, "loss": 1.17, "step": 638 }, { "epoch": 0.04902625218981369, "grad_norm": 0.28360670804977417, "learning_rate": 9.988045919889717e-06, "loss": 1.2274, "step": 639 }, { "epoch": 0.049102975589171774, "grad_norm": 0.21496595442295074, "learning_rate": 9.987960108457748e-06, "loss": 1.2169, "step": 640 }, { "epoch": 0.04917969898852985, "grad_norm": 0.22111119329929352, "learning_rate": 9.98787399050223e-06, "loss": 1.2018, "step": 641 }, { "epoch": 0.04925642238788793, "grad_norm": 0.11109046638011932, "learning_rate": 9.987787566028455e-06, "loss": 1.3005, "step": 642 }, { "epoch": 0.04933314578724601, "grad_norm": 0.2547661066055298, "learning_rate": 9.987700835041737e-06, "loss": 1.1949, "step": 643 }, { "epoch": 0.049409869186604095, "grad_norm": 0.30830472707748413, "learning_rate": 9.9876137975474e-06, "loss": 1.2112, "step": 644 }, { "epoch": 0.04948659258596218, "grad_norm": 0.24565006792545319, "learning_rate": 9.987526453550798e-06, "loss": 1.2956, "step": 645 }, { "epoch": 0.04956331598532026, "grad_norm": 0.2418341040611267, "learning_rate": 9.987438803057295e-06, "loss": 1.2011, "step": 646 }, { "epoch": 0.049640039384678335, "grad_norm": 0.2360900491476059, "learning_rate": 9.987350846072282e-06, "loss": 1.1913, "step": 647 }, { "epoch": 0.04971676278403642, "grad_norm": 0.21127799153327942, "learning_rate": 9.987262582601156e-06, "loss": 1.2267, "step": 648 }, { "epoch": 0.0497934861833945, "grad_norm": 0.2809925079345703, "learning_rate": 9.98717401264935e-06, "loss": 1.2198, "step": 649 }, { "epoch": 0.04987020958275258, "grad_norm": 0.3074859380722046, "learning_rate": 9.987085136222302e-06, "loss": 1.2299, "step": 650 }, { "epoch": 0.04994693298211066, "grad_norm": 0.2540885806083679, "learning_rate": 9.986995953325474e-06, "loss": 1.2182, "step": 651 }, { "epoch": 0.05002365638146874, "grad_norm": 0.23530882596969604, "learning_rate": 9.986906463964347e-06, "loss": 1.2065, "step": 652 }, { "epoch": 0.05010037978082682, "grad_norm": 0.17502422630786896, "learning_rate": 9.986816668144423e-06, "loss": 1.2493, "step": 653 }, { "epoch": 0.0501771031801849, "grad_norm": 0.2436494529247284, "learning_rate": 9.986726565871214e-06, "loss": 1.2451, "step": 654 }, { "epoch": 0.050253826579542984, "grad_norm": 0.28221800923347473, "learning_rate": 9.986636157150264e-06, "loss": 1.1875, "step": 655 }, { "epoch": 0.05033054997890107, "grad_norm": 0.18366551399230957, "learning_rate": 9.986545441987123e-06, "loss": 1.2136, "step": 656 }, { "epoch": 0.05040727337825915, "grad_norm": 0.23397698998451233, "learning_rate": 9.98645442038737e-06, "loss": 1.2724, "step": 657 }, { "epoch": 0.050483996777617224, "grad_norm": 0.236169695854187, "learning_rate": 9.986363092356597e-06, "loss": 1.2727, "step": 658 }, { "epoch": 0.050560720176975306, "grad_norm": 0.2387424260377884, "learning_rate": 9.986271457900414e-06, "loss": 1.2412, "step": 659 }, { "epoch": 0.05063744357633339, "grad_norm": 0.10751493275165558, "learning_rate": 9.986179517024458e-06, "loss": 1.309, "step": 660 }, { "epoch": 0.05071416697569147, "grad_norm": 0.23048174381256104, "learning_rate": 9.986087269734373e-06, "loss": 1.2204, "step": 661 }, { "epoch": 0.05079089037504955, "grad_norm": 0.20765632390975952, "learning_rate": 9.98599471603583e-06, "loss": 1.2993, "step": 662 }, { "epoch": 0.050867613774407634, "grad_norm": 0.2412852793931961, "learning_rate": 9.985901855934516e-06, "loss": 1.1228, "step": 663 }, { "epoch": 0.05094433717376571, "grad_norm": 0.1999836266040802, "learning_rate": 9.98580868943614e-06, "loss": 1.2727, "step": 664 }, { "epoch": 0.05102106057312379, "grad_norm": 0.4222097396850586, "learning_rate": 9.985715216546425e-06, "loss": 1.2307, "step": 665 }, { "epoch": 0.051097783972481874, "grad_norm": 0.20755556225776672, "learning_rate": 9.985621437271115e-06, "loss": 1.1099, "step": 666 }, { "epoch": 0.051174507371839956, "grad_norm": 0.24093271791934967, "learning_rate": 9.985527351615977e-06, "loss": 1.1105, "step": 667 }, { "epoch": 0.05125123077119804, "grad_norm": 0.2956949472427368, "learning_rate": 9.985432959586787e-06, "loss": 1.3228, "step": 668 }, { "epoch": 0.05132795417055612, "grad_norm": 0.25842249393463135, "learning_rate": 9.98533826118935e-06, "loss": 1.1829, "step": 669 }, { "epoch": 0.051404677569914195, "grad_norm": 0.22988830506801605, "learning_rate": 9.985243256429482e-06, "loss": 1.2004, "step": 670 }, { "epoch": 0.05148140096927228, "grad_norm": 0.19600611925125122, "learning_rate": 9.985147945313024e-06, "loss": 1.2025, "step": 671 }, { "epoch": 0.05155812436863036, "grad_norm": 0.23289014399051666, "learning_rate": 9.985052327845833e-06, "loss": 1.1856, "step": 672 }, { "epoch": 0.05163484776798844, "grad_norm": 0.21189795434474945, "learning_rate": 9.984956404033784e-06, "loss": 1.1917, "step": 673 }, { "epoch": 0.05171157116734652, "grad_norm": 1.0300487279891968, "learning_rate": 9.98486017388277e-06, "loss": 1.2122, "step": 674 }, { "epoch": 0.051788294566704605, "grad_norm": 0.18170499801635742, "learning_rate": 9.984763637398707e-06, "loss": 1.1709, "step": 675 }, { "epoch": 0.05186501796606268, "grad_norm": 0.20418716967105865, "learning_rate": 9.984666794587528e-06, "loss": 1.3126, "step": 676 }, { "epoch": 0.05194174136542076, "grad_norm": 0.19500063359737396, "learning_rate": 9.984569645455184e-06, "loss": 1.1214, "step": 677 }, { "epoch": 0.052018464764778845, "grad_norm": 0.2688665986061096, "learning_rate": 9.984472190007643e-06, "loss": 1.2177, "step": 678 }, { "epoch": 0.05209518816413693, "grad_norm": 0.2771223187446594, "learning_rate": 9.984374428250894e-06, "loss": 1.2532, "step": 679 }, { "epoch": 0.05217191156349501, "grad_norm": 0.21454617381095886, "learning_rate": 9.984276360190948e-06, "loss": 1.2136, "step": 680 }, { "epoch": 0.052248634962853084, "grad_norm": 0.2970345616340637, "learning_rate": 9.98417798583383e-06, "loss": 1.2554, "step": 681 }, { "epoch": 0.052325358362211166, "grad_norm": 1.2983810901641846, "learning_rate": 9.984079305185582e-06, "loss": 1.1706, "step": 682 }, { "epoch": 0.05240208176156925, "grad_norm": 0.20650850236415863, "learning_rate": 9.983980318252274e-06, "loss": 1.231, "step": 683 }, { "epoch": 0.05247880516092733, "grad_norm": 0.2142941951751709, "learning_rate": 9.983881025039984e-06, "loss": 1.1759, "step": 684 }, { "epoch": 0.05255552856028541, "grad_norm": 0.20206665992736816, "learning_rate": 9.983781425554817e-06, "loss": 1.2011, "step": 685 }, { "epoch": 0.052632251959643495, "grad_norm": 0.19401752948760986, "learning_rate": 9.983681519802892e-06, "loss": 1.3036, "step": 686 }, { "epoch": 0.05270897535900157, "grad_norm": 0.24168650805950165, "learning_rate": 9.983581307790348e-06, "loss": 1.2482, "step": 687 }, { "epoch": 0.05278569875835965, "grad_norm": 0.22906172275543213, "learning_rate": 9.983480789523346e-06, "loss": 1.2397, "step": 688 }, { "epoch": 0.052862422157717734, "grad_norm": 0.23516534268856049, "learning_rate": 9.98337996500806e-06, "loss": 1.2291, "step": 689 }, { "epoch": 0.052939145557075816, "grad_norm": 0.20652970671653748, "learning_rate": 9.98327883425069e-06, "loss": 1.1826, "step": 690 }, { "epoch": 0.0530158689564339, "grad_norm": 0.2302001565694809, "learning_rate": 9.983177397257444e-06, "loss": 1.2164, "step": 691 }, { "epoch": 0.05309259235579198, "grad_norm": 0.2381875365972519, "learning_rate": 9.983075654034563e-06, "loss": 1.2806, "step": 692 }, { "epoch": 0.053169315755150055, "grad_norm": 0.24792851507663727, "learning_rate": 9.982973604588296e-06, "loss": 1.1999, "step": 693 }, { "epoch": 0.05324603915450814, "grad_norm": 0.21285973489284515, "learning_rate": 9.982871248924912e-06, "loss": 1.2289, "step": 694 }, { "epoch": 0.05332276255386622, "grad_norm": 0.2033403217792511, "learning_rate": 9.982768587050703e-06, "loss": 1.191, "step": 695 }, { "epoch": 0.0533994859532243, "grad_norm": 0.24607358872890472, "learning_rate": 9.98266561897198e-06, "loss": 1.2345, "step": 696 }, { "epoch": 0.053476209352582384, "grad_norm": 0.47056663036346436, "learning_rate": 9.98256234469507e-06, "loss": 1.204, "step": 697 }, { "epoch": 0.053552932751940466, "grad_norm": 0.24326743185520172, "learning_rate": 9.982458764226317e-06, "loss": 1.1695, "step": 698 }, { "epoch": 0.05362965615129854, "grad_norm": 0.17625798285007477, "learning_rate": 9.982354877572087e-06, "loss": 1.3563, "step": 699 }, { "epoch": 0.05370637955065662, "grad_norm": 0.19182004034519196, "learning_rate": 9.982250684738766e-06, "loss": 1.2702, "step": 700 }, { "epoch": 0.053783102950014705, "grad_norm": 0.18429864943027496, "learning_rate": 9.982146185732754e-06, "loss": 1.2606, "step": 701 }, { "epoch": 0.05385982634937279, "grad_norm": 0.23147521913051605, "learning_rate": 9.982041380560476e-06, "loss": 1.2587, "step": 702 }, { "epoch": 0.05393654974873087, "grad_norm": 0.20978869497776031, "learning_rate": 9.981936269228369e-06, "loss": 1.2711, "step": 703 }, { "epoch": 0.05401327314808895, "grad_norm": 0.2553621232509613, "learning_rate": 9.981830851742898e-06, "loss": 1.2639, "step": 704 }, { "epoch": 0.054089996547447027, "grad_norm": 0.371830016374588, "learning_rate": 9.981725128110533e-06, "loss": 1.1792, "step": 705 }, { "epoch": 0.05416671994680511, "grad_norm": 0.3280158042907715, "learning_rate": 9.981619098337779e-06, "loss": 1.2667, "step": 706 }, { "epoch": 0.05424344334616319, "grad_norm": 0.20234818756580353, "learning_rate": 9.981512762431148e-06, "loss": 1.1756, "step": 707 }, { "epoch": 0.05432016674552127, "grad_norm": 0.21105781197547913, "learning_rate": 9.981406120397172e-06, "loss": 1.2969, "step": 708 }, { "epoch": 0.054396890144879355, "grad_norm": 0.24239419400691986, "learning_rate": 9.98129917224241e-06, "loss": 1.2055, "step": 709 }, { "epoch": 0.05447361354423743, "grad_norm": 0.2665744721889496, "learning_rate": 9.981191917973431e-06, "loss": 1.1915, "step": 710 }, { "epoch": 0.05455033694359551, "grad_norm": 0.2387894093990326, "learning_rate": 9.981084357596827e-06, "loss": 1.1751, "step": 711 }, { "epoch": 0.054627060342953594, "grad_norm": 0.2770284414291382, "learning_rate": 9.980976491119207e-06, "loss": 1.1627, "step": 712 }, { "epoch": 0.054703783742311676, "grad_norm": 0.3014388382434845, "learning_rate": 9.980868318547201e-06, "loss": 1.1706, "step": 713 }, { "epoch": 0.05478050714166976, "grad_norm": 0.26382187008857727, "learning_rate": 9.980759839887456e-06, "loss": 1.1947, "step": 714 }, { "epoch": 0.05485723054102784, "grad_norm": 0.2248222976922989, "learning_rate": 9.980651055146638e-06, "loss": 1.1691, "step": 715 }, { "epoch": 0.054933953940385916, "grad_norm": 0.20848211646080017, "learning_rate": 9.98054196433143e-06, "loss": 1.3176, "step": 716 }, { "epoch": 0.055010677339744, "grad_norm": 0.30045029520988464, "learning_rate": 9.98043256744854e-06, "loss": 1.1813, "step": 717 }, { "epoch": 0.05508740073910208, "grad_norm": 0.24183505773544312, "learning_rate": 9.980322864504688e-06, "loss": 1.212, "step": 718 }, { "epoch": 0.05516412413846016, "grad_norm": 0.22522902488708496, "learning_rate": 9.980212855506617e-06, "loss": 1.2411, "step": 719 }, { "epoch": 0.055240847537818244, "grad_norm": 0.295135498046875, "learning_rate": 9.980102540461088e-06, "loss": 1.2167, "step": 720 }, { "epoch": 0.055317570937176326, "grad_norm": 0.1863960474729538, "learning_rate": 9.979991919374877e-06, "loss": 1.2651, "step": 721 }, { "epoch": 0.0553942943365344, "grad_norm": 0.18597117066383362, "learning_rate": 9.979880992254785e-06, "loss": 1.2713, "step": 722 }, { "epoch": 0.05547101773589248, "grad_norm": 0.28389275074005127, "learning_rate": 9.979769759107626e-06, "loss": 1.2612, "step": 723 }, { "epoch": 0.055547741135250565, "grad_norm": 0.36194804310798645, "learning_rate": 9.979658219940238e-06, "loss": 1.1901, "step": 724 }, { "epoch": 0.05562446453460865, "grad_norm": 0.24286049604415894, "learning_rate": 9.979546374759477e-06, "loss": 1.2063, "step": 725 }, { "epoch": 0.05570118793396673, "grad_norm": 0.1098378375172615, "learning_rate": 9.979434223572211e-06, "loss": 1.2905, "step": 726 }, { "epoch": 0.05577791133332481, "grad_norm": 4.029506683349609, "learning_rate": 9.979321766385335e-06, "loss": 1.2798, "step": 727 }, { "epoch": 0.05585463473268289, "grad_norm": 0.22922979295253754, "learning_rate": 9.979209003205761e-06, "loss": 1.1627, "step": 728 }, { "epoch": 0.05593135813204097, "grad_norm": 0.2888300120830536, "learning_rate": 9.979095934040414e-06, "loss": 1.3003, "step": 729 }, { "epoch": 0.05600808153139905, "grad_norm": 0.3216095268726349, "learning_rate": 9.978982558896248e-06, "loss": 1.1498, "step": 730 }, { "epoch": 0.05608480493075713, "grad_norm": 0.44607722759246826, "learning_rate": 9.978868877780227e-06, "loss": 1.1468, "step": 731 }, { "epoch": 0.056161528330115215, "grad_norm": 0.5633273124694824, "learning_rate": 9.97875489069934e-06, "loss": 1.1783, "step": 732 }, { "epoch": 0.0562382517294733, "grad_norm": 0.34170329570770264, "learning_rate": 9.978640597660587e-06, "loss": 1.2264, "step": 733 }, { "epoch": 0.05631497512883137, "grad_norm": 0.23327936232089996, "learning_rate": 9.978525998670996e-06, "loss": 1.2662, "step": 734 }, { "epoch": 0.056391698528189454, "grad_norm": 0.2734239995479584, "learning_rate": 9.978411093737607e-06, "loss": 1.1264, "step": 735 }, { "epoch": 0.05646842192754754, "grad_norm": 0.2888774275779724, "learning_rate": 9.978295882867481e-06, "loss": 1.2767, "step": 736 }, { "epoch": 0.05654514532690562, "grad_norm": 0.24896174669265747, "learning_rate": 9.9781803660677e-06, "loss": 1.1892, "step": 737 }, { "epoch": 0.0566218687262637, "grad_norm": 0.2967846393585205, "learning_rate": 9.97806454334536e-06, "loss": 1.2613, "step": 738 }, { "epoch": 0.056698592125621776, "grad_norm": 0.22093869745731354, "learning_rate": 9.977948414707581e-06, "loss": 1.2091, "step": 739 }, { "epoch": 0.05677531552497986, "grad_norm": 0.24744769930839539, "learning_rate": 9.9778319801615e-06, "loss": 1.2626, "step": 740 }, { "epoch": 0.05685203892433794, "grad_norm": 0.25268763303756714, "learning_rate": 9.97771523971427e-06, "loss": 1.2144, "step": 741 }, { "epoch": 0.05692876232369602, "grad_norm": 0.3008286654949188, "learning_rate": 9.977598193373065e-06, "loss": 1.248, "step": 742 }, { "epoch": 0.057005485723054104, "grad_norm": 0.30116909742355347, "learning_rate": 9.977480841145082e-06, "loss": 1.2321, "step": 743 }, { "epoch": 0.057082209122412186, "grad_norm": 0.25970542430877686, "learning_rate": 9.977363183037526e-06, "loss": 1.2174, "step": 744 }, { "epoch": 0.05715893252177026, "grad_norm": 0.22120636701583862, "learning_rate": 9.977245219057631e-06, "loss": 1.2354, "step": 745 }, { "epoch": 0.057235655921128344, "grad_norm": 0.23240453004837036, "learning_rate": 9.977126949212648e-06, "loss": 1.166, "step": 746 }, { "epoch": 0.057312379320486426, "grad_norm": 0.20269761979579926, "learning_rate": 9.97700837350984e-06, "loss": 1.2304, "step": 747 }, { "epoch": 0.05738910271984451, "grad_norm": 0.2470996081829071, "learning_rate": 9.9768894919565e-06, "loss": 1.2341, "step": 748 }, { "epoch": 0.05746582611920259, "grad_norm": 0.23030386865139008, "learning_rate": 9.97677030455993e-06, "loss": 1.2759, "step": 749 }, { "epoch": 0.05754254951856067, "grad_norm": 0.4368867874145508, "learning_rate": 9.976650811327451e-06, "loss": 1.2113, "step": 750 }, { "epoch": 0.05761927291791875, "grad_norm": 0.22525988519191742, "learning_rate": 9.976531012266414e-06, "loss": 1.1443, "step": 751 }, { "epoch": 0.05769599631727683, "grad_norm": 0.3235156536102295, "learning_rate": 9.976410907384176e-06, "loss": 1.2028, "step": 752 }, { "epoch": 0.05777271971663491, "grad_norm": 0.21749155223369598, "learning_rate": 9.976290496688117e-06, "loss": 1.2321, "step": 753 }, { "epoch": 0.05784944311599299, "grad_norm": 0.27873778343200684, "learning_rate": 9.976169780185639e-06, "loss": 1.2925, "step": 754 }, { "epoch": 0.057926166515351075, "grad_norm": 0.25601449608802795, "learning_rate": 9.97604875788416e-06, "loss": 1.2424, "step": 755 }, { "epoch": 0.05800288991470916, "grad_norm": 0.30982038378715515, "learning_rate": 9.975927429791117e-06, "loss": 1.2225, "step": 756 }, { "epoch": 0.05807961331406723, "grad_norm": 0.24239327013492584, "learning_rate": 9.975805795913966e-06, "loss": 1.2036, "step": 757 }, { "epoch": 0.058156336713425315, "grad_norm": 0.2422780692577362, "learning_rate": 9.97568385626018e-06, "loss": 1.2297, "step": 758 }, { "epoch": 0.0582330601127834, "grad_norm": 0.2091873586177826, "learning_rate": 9.975561610837254e-06, "loss": 1.1991, "step": 759 }, { "epoch": 0.05830978351214148, "grad_norm": 0.19156131148338318, "learning_rate": 9.975439059652701e-06, "loss": 1.2503, "step": 760 }, { "epoch": 0.05838650691149956, "grad_norm": 0.1803409904241562, "learning_rate": 9.975316202714052e-06, "loss": 1.2142, "step": 761 }, { "epoch": 0.05846323031085764, "grad_norm": 0.2847938537597656, "learning_rate": 9.975193040028854e-06, "loss": 1.2003, "step": 762 }, { "epoch": 0.05853995371021572, "grad_norm": 0.2668178379535675, "learning_rate": 9.975069571604681e-06, "loss": 1.2084, "step": 763 }, { "epoch": 0.0586166771095738, "grad_norm": 0.21032565832138062, "learning_rate": 9.974945797449115e-06, "loss": 1.1771, "step": 764 }, { "epoch": 0.05869340050893188, "grad_norm": 0.1755046248435974, "learning_rate": 9.974821717569764e-06, "loss": 1.1827, "step": 765 }, { "epoch": 0.058770123908289965, "grad_norm": 0.32251060009002686, "learning_rate": 9.974697331974255e-06, "loss": 1.2471, "step": 766 }, { "epoch": 0.05884684730764805, "grad_norm": 0.18384161591529846, "learning_rate": 9.97457264067023e-06, "loss": 1.26, "step": 767 }, { "epoch": 0.05892357070700612, "grad_norm": 0.20257076621055603, "learning_rate": 9.974447643665354e-06, "loss": 1.2258, "step": 768 }, { "epoch": 0.059000294106364204, "grad_norm": 0.2636127769947052, "learning_rate": 9.974322340967305e-06, "loss": 1.1936, "step": 769 }, { "epoch": 0.059077017505722286, "grad_norm": 0.23883545398712158, "learning_rate": 9.974196732583784e-06, "loss": 1.2069, "step": 770 }, { "epoch": 0.05915374090508037, "grad_norm": 0.24259132146835327, "learning_rate": 9.97407081852251e-06, "loss": 1.2695, "step": 771 }, { "epoch": 0.05923046430443845, "grad_norm": 0.26173973083496094, "learning_rate": 9.973944598791222e-06, "loss": 1.1625, "step": 772 }, { "epoch": 0.05930718770379653, "grad_norm": 0.19459156692028046, "learning_rate": 9.973818073397679e-06, "loss": 1.1837, "step": 773 }, { "epoch": 0.05938391110315461, "grad_norm": 0.23620089888572693, "learning_rate": 9.97369124234965e-06, "loss": 1.1812, "step": 774 }, { "epoch": 0.05946063450251269, "grad_norm": 0.9791275858879089, "learning_rate": 9.973564105654933e-06, "loss": 1.1582, "step": 775 }, { "epoch": 0.05953735790187077, "grad_norm": 0.2896304130554199, "learning_rate": 9.97343666332134e-06, "loss": 1.189, "step": 776 }, { "epoch": 0.059614081301228854, "grad_norm": 0.248335599899292, "learning_rate": 9.973308915356705e-06, "loss": 1.2823, "step": 777 }, { "epoch": 0.059690804700586936, "grad_norm": 0.20257015526294708, "learning_rate": 9.973180861768874e-06, "loss": 1.2036, "step": 778 }, { "epoch": 0.05976752809994502, "grad_norm": 0.18539679050445557, "learning_rate": 9.97305250256572e-06, "loss": 1.2502, "step": 779 }, { "epoch": 0.05984425149930309, "grad_norm": 0.1814163774251938, "learning_rate": 9.972923837755128e-06, "loss": 1.2548, "step": 780 }, { "epoch": 0.059920974898661175, "grad_norm": 0.23471570014953613, "learning_rate": 9.972794867345008e-06, "loss": 1.1372, "step": 781 }, { "epoch": 0.05999769829801926, "grad_norm": 0.21239715814590454, "learning_rate": 9.972665591343285e-06, "loss": 1.2836, "step": 782 }, { "epoch": 0.06007442169737734, "grad_norm": 0.2500782012939453, "learning_rate": 9.9725360097579e-06, "loss": 1.2094, "step": 783 }, { "epoch": 0.06015114509673542, "grad_norm": 0.21370382606983185, "learning_rate": 9.97240612259682e-06, "loss": 1.2299, "step": 784 }, { "epoch": 0.0602278684960935, "grad_norm": 0.1897403448820114, "learning_rate": 9.972275929868025e-06, "loss": 1.2243, "step": 785 }, { "epoch": 0.06030459189545158, "grad_norm": 0.2652016282081604, "learning_rate": 9.972145431579516e-06, "loss": 1.2258, "step": 786 }, { "epoch": 0.06038131529480966, "grad_norm": 0.2365858256816864, "learning_rate": 9.972014627739314e-06, "loss": 1.2457, "step": 787 }, { "epoch": 0.06045803869416774, "grad_norm": 0.3744635581970215, "learning_rate": 9.971883518355455e-06, "loss": 1.0999, "step": 788 }, { "epoch": 0.060534762093525825, "grad_norm": 0.3725389838218689, "learning_rate": 9.971752103435996e-06, "loss": 1.2563, "step": 789 }, { "epoch": 0.06061148549288391, "grad_norm": 0.18986164033412933, "learning_rate": 9.971620382989014e-06, "loss": 1.2385, "step": 790 }, { "epoch": 0.06068820889224199, "grad_norm": 0.1872301697731018, "learning_rate": 9.971488357022603e-06, "loss": 1.2337, "step": 791 }, { "epoch": 0.060764932291600064, "grad_norm": 0.26436084508895874, "learning_rate": 9.971356025544878e-06, "loss": 1.2157, "step": 792 }, { "epoch": 0.060841655690958146, "grad_norm": 0.2116760015487671, "learning_rate": 9.971223388563969e-06, "loss": 1.2503, "step": 793 }, { "epoch": 0.06091837909031623, "grad_norm": 0.20102429389953613, "learning_rate": 9.971090446088027e-06, "loss": 1.155, "step": 794 }, { "epoch": 0.06099510248967431, "grad_norm": 0.29029595851898193, "learning_rate": 9.970957198125224e-06, "loss": 1.3098, "step": 795 }, { "epoch": 0.06107182588903239, "grad_norm": 0.22340722382068634, "learning_rate": 9.970823644683745e-06, "loss": 1.2067, "step": 796 }, { "epoch": 0.06114854928839047, "grad_norm": 0.2604519724845886, "learning_rate": 9.970689785771798e-06, "loss": 1.1901, "step": 797 }, { "epoch": 0.06122527268774855, "grad_norm": 0.4126574993133545, "learning_rate": 9.970555621397613e-06, "loss": 1.2681, "step": 798 }, { "epoch": 0.06130199608710663, "grad_norm": 0.2243219017982483, "learning_rate": 9.970421151569429e-06, "loss": 1.2489, "step": 799 }, { "epoch": 0.061378719486464714, "grad_norm": 0.2938466966152191, "learning_rate": 9.970286376295513e-06, "loss": 1.1676, "step": 800 }, { "epoch": 0.061455442885822796, "grad_norm": 0.19453920423984528, "learning_rate": 9.970151295584147e-06, "loss": 1.1992, "step": 801 }, { "epoch": 0.06153216628518088, "grad_norm": 0.19231103360652924, "learning_rate": 9.970015909443633e-06, "loss": 1.1695, "step": 802 }, { "epoch": 0.06160888968453895, "grad_norm": 0.18520250916481018, "learning_rate": 9.969880217882288e-06, "loss": 1.2307, "step": 803 }, { "epoch": 0.061685613083897035, "grad_norm": 0.3316732943058014, "learning_rate": 9.969744220908452e-06, "loss": 1.2599, "step": 804 }, { "epoch": 0.06176233648325512, "grad_norm": 0.20263895392417908, "learning_rate": 9.969607918530481e-06, "loss": 1.1764, "step": 805 }, { "epoch": 0.0618390598826132, "grad_norm": 0.28803712129592896, "learning_rate": 9.969471310756755e-06, "loss": 1.2562, "step": 806 }, { "epoch": 0.06191578328197128, "grad_norm": 0.2411261796951294, "learning_rate": 9.969334397595667e-06, "loss": 1.2375, "step": 807 }, { "epoch": 0.061992506681329364, "grad_norm": 0.240923672914505, "learning_rate": 9.969197179055629e-06, "loss": 1.1111, "step": 808 }, { "epoch": 0.06206923008068744, "grad_norm": 0.20830363035202026, "learning_rate": 9.969059655145075e-06, "loss": 1.2431, "step": 809 }, { "epoch": 0.06214595348004552, "grad_norm": 0.299892783164978, "learning_rate": 9.968921825872454e-06, "loss": 1.1657, "step": 810 }, { "epoch": 0.0622226768794036, "grad_norm": 0.5830362439155579, "learning_rate": 9.96878369124624e-06, "loss": 1.1909, "step": 811 }, { "epoch": 0.062299400278761685, "grad_norm": 0.18161526322364807, "learning_rate": 9.968645251274919e-06, "loss": 1.1714, "step": 812 }, { "epoch": 0.06237612367811977, "grad_norm": 0.2263403832912445, "learning_rate": 9.968506505966999e-06, "loss": 1.2635, "step": 813 }, { "epoch": 0.06245284707747785, "grad_norm": 0.22934651374816895, "learning_rate": 9.968367455331006e-06, "loss": 1.2494, "step": 814 }, { "epoch": 0.06252957047683592, "grad_norm": 0.21693474054336548, "learning_rate": 9.968228099375485e-06, "loss": 1.1366, "step": 815 }, { "epoch": 0.06260629387619401, "grad_norm": 0.28187698125839233, "learning_rate": 9.968088438109002e-06, "loss": 1.2682, "step": 816 }, { "epoch": 0.06268301727555209, "grad_norm": 0.19727379083633423, "learning_rate": 9.967948471540136e-06, "loss": 1.1904, "step": 817 }, { "epoch": 0.06275974067491016, "grad_norm": 0.20494480431079865, "learning_rate": 9.967808199677492e-06, "loss": 1.2772, "step": 818 }, { "epoch": 0.06283646407426825, "grad_norm": 0.18013429641723633, "learning_rate": 9.967667622529686e-06, "loss": 1.228, "step": 819 }, { "epoch": 0.06291318747362633, "grad_norm": 0.26480966806411743, "learning_rate": 9.96752674010536e-06, "loss": 1.2007, "step": 820 }, { "epoch": 0.06298991087298442, "grad_norm": 0.18801580369472504, "learning_rate": 9.967385552413171e-06, "loss": 1.2999, "step": 821 }, { "epoch": 0.06306663427234249, "grad_norm": 0.23330138623714447, "learning_rate": 9.967244059461793e-06, "loss": 1.169, "step": 822 }, { "epoch": 0.06314335767170057, "grad_norm": 0.2028713971376419, "learning_rate": 9.967102261259927e-06, "loss": 1.2287, "step": 823 }, { "epoch": 0.06322008107105866, "grad_norm": 0.18306513130664825, "learning_rate": 9.966960157816279e-06, "loss": 1.2271, "step": 824 }, { "epoch": 0.06329680447041673, "grad_norm": 0.2580893635749817, "learning_rate": 9.966817749139588e-06, "loss": 1.2021, "step": 825 }, { "epoch": 0.06337352786977482, "grad_norm": 0.13242530822753906, "learning_rate": 9.966675035238602e-06, "loss": 1.3304, "step": 826 }, { "epoch": 0.0634502512691329, "grad_norm": 0.2339506298303604, "learning_rate": 9.966532016122092e-06, "loss": 1.264, "step": 827 }, { "epoch": 0.06352697466849098, "grad_norm": 1.0004160404205322, "learning_rate": 9.966388691798848e-06, "loss": 1.2056, "step": 828 }, { "epoch": 0.06360369806784906, "grad_norm": 0.17491956055164337, "learning_rate": 9.966245062277677e-06, "loss": 1.2211, "step": 829 }, { "epoch": 0.06368042146720714, "grad_norm": 0.42992937564849854, "learning_rate": 9.966101127567404e-06, "loss": 1.2413, "step": 830 }, { "epoch": 0.06375714486656522, "grad_norm": 0.3092092275619507, "learning_rate": 9.965956887676875e-06, "loss": 1.2237, "step": 831 }, { "epoch": 0.0638338682659233, "grad_norm": 0.24057431519031525, "learning_rate": 9.965812342614956e-06, "loss": 1.2135, "step": 832 }, { "epoch": 0.06391059166528139, "grad_norm": 0.33607152104377747, "learning_rate": 9.965667492390527e-06, "loss": 1.1687, "step": 833 }, { "epoch": 0.06398731506463946, "grad_norm": 0.3337099552154541, "learning_rate": 9.96552233701249e-06, "loss": 1.2005, "step": 834 }, { "epoch": 0.06406403846399754, "grad_norm": 0.24076706171035767, "learning_rate": 9.965376876489765e-06, "loss": 1.123, "step": 835 }, { "epoch": 0.06414076186335563, "grad_norm": 0.259301096200943, "learning_rate": 9.965231110831292e-06, "loss": 1.1522, "step": 836 }, { "epoch": 0.0642174852627137, "grad_norm": 0.2536930441856384, "learning_rate": 9.96508504004603e-06, "loss": 1.2259, "step": 837 }, { "epoch": 0.06429420866207179, "grad_norm": 0.10489003360271454, "learning_rate": 9.964938664142951e-06, "loss": 1.35, "step": 838 }, { "epoch": 0.06437093206142987, "grad_norm": 0.2377825528383255, "learning_rate": 9.964791983131054e-06, "loss": 1.2133, "step": 839 }, { "epoch": 0.06444765546078796, "grad_norm": 0.1813834309577942, "learning_rate": 9.964644997019353e-06, "loss": 1.2025, "step": 840 }, { "epoch": 0.06452437886014603, "grad_norm": 0.2707487642765045, "learning_rate": 9.964497705816877e-06, "loss": 1.1167, "step": 841 }, { "epoch": 0.0646011022595041, "grad_norm": 0.26350802183151245, "learning_rate": 9.96435010953268e-06, "loss": 1.2402, "step": 842 }, { "epoch": 0.0646778256588622, "grad_norm": 0.1747569888830185, "learning_rate": 9.964202208175835e-06, "loss": 1.198, "step": 843 }, { "epoch": 0.06475454905822027, "grad_norm": 0.30819904804229736, "learning_rate": 9.964054001755426e-06, "loss": 1.1155, "step": 844 }, { "epoch": 0.06483127245757836, "grad_norm": 0.10081088542938232, "learning_rate": 9.963905490280563e-06, "loss": 1.2657, "step": 845 }, { "epoch": 0.06490799585693643, "grad_norm": 0.20478694140911102, "learning_rate": 9.963756673760371e-06, "loss": 1.2253, "step": 846 }, { "epoch": 0.06498471925629451, "grad_norm": 0.21954280138015747, "learning_rate": 9.963607552203999e-06, "loss": 1.1436, "step": 847 }, { "epoch": 0.0650614426556526, "grad_norm": 0.1998278945684433, "learning_rate": 9.963458125620606e-06, "loss": 1.2081, "step": 848 }, { "epoch": 0.06513816605501067, "grad_norm": 0.19562950730323792, "learning_rate": 9.963308394019378e-06, "loss": 1.1545, "step": 849 }, { "epoch": 0.06521488945436876, "grad_norm": 0.7192337512969971, "learning_rate": 9.963158357409514e-06, "loss": 1.1713, "step": 850 }, { "epoch": 0.06529161285372684, "grad_norm": 0.4776272475719452, "learning_rate": 9.963008015800237e-06, "loss": 1.2393, "step": 851 }, { "epoch": 0.06536833625308491, "grad_norm": 0.17468340694904327, "learning_rate": 9.962857369200784e-06, "loss": 1.2079, "step": 852 }, { "epoch": 0.065445059652443, "grad_norm": 0.1934666931629181, "learning_rate": 9.962706417620413e-06, "loss": 1.2576, "step": 853 }, { "epoch": 0.06552178305180108, "grad_norm": 0.21164865791797638, "learning_rate": 9.962555161068401e-06, "loss": 1.2178, "step": 854 }, { "epoch": 0.06559850645115917, "grad_norm": 0.2207188606262207, "learning_rate": 9.96240359955404e-06, "loss": 1.2236, "step": 855 }, { "epoch": 0.06567522985051724, "grad_norm": 0.24304938316345215, "learning_rate": 9.962251733086649e-06, "loss": 1.1548, "step": 856 }, { "epoch": 0.06575195324987533, "grad_norm": 0.25389817357063293, "learning_rate": 9.962099561675557e-06, "loss": 1.1545, "step": 857 }, { "epoch": 0.0658286766492334, "grad_norm": 0.19835898280143738, "learning_rate": 9.961947085330117e-06, "loss": 1.2027, "step": 858 }, { "epoch": 0.06590540004859148, "grad_norm": 0.2336633950471878, "learning_rate": 9.961794304059697e-06, "loss": 1.1873, "step": 859 }, { "epoch": 0.06598212344794957, "grad_norm": 0.2200196087360382, "learning_rate": 9.961641217873688e-06, "loss": 1.1202, "step": 860 }, { "epoch": 0.06605884684730765, "grad_norm": 0.19298043847084045, "learning_rate": 9.961487826781495e-06, "loss": 1.2504, "step": 861 }, { "epoch": 0.06613557024666573, "grad_norm": 0.20086762309074402, "learning_rate": 9.961334130792548e-06, "loss": 1.1721, "step": 862 }, { "epoch": 0.06621229364602381, "grad_norm": 0.22524422407150269, "learning_rate": 9.96118012991629e-06, "loss": 1.2228, "step": 863 }, { "epoch": 0.06628901704538188, "grad_norm": 0.20571956038475037, "learning_rate": 9.961025824162185e-06, "loss": 1.2347, "step": 864 }, { "epoch": 0.06636574044473997, "grad_norm": 0.2065979242324829, "learning_rate": 9.960871213539715e-06, "loss": 1.2246, "step": 865 }, { "epoch": 0.06644246384409805, "grad_norm": 0.19373077154159546, "learning_rate": 9.960716298058382e-06, "loss": 1.2459, "step": 866 }, { "epoch": 0.06651918724345614, "grad_norm": 0.23490791022777557, "learning_rate": 9.960561077727705e-06, "loss": 1.1896, "step": 867 }, { "epoch": 0.06659591064281421, "grad_norm": 0.19971098005771637, "learning_rate": 9.960405552557223e-06, "loss": 1.1843, "step": 868 }, { "epoch": 0.0666726340421723, "grad_norm": 0.22759564220905304, "learning_rate": 9.960249722556494e-06, "loss": 1.1328, "step": 869 }, { "epoch": 0.06674935744153038, "grad_norm": 0.11242936551570892, "learning_rate": 9.960093587735094e-06, "loss": 1.2894, "step": 870 }, { "epoch": 0.06682608084088845, "grad_norm": 0.193562850356102, "learning_rate": 9.959937148102619e-06, "loss": 1.155, "step": 871 }, { "epoch": 0.06690280424024654, "grad_norm": 0.2101021707057953, "learning_rate": 9.95978040366868e-06, "loss": 1.1902, "step": 872 }, { "epoch": 0.06697952763960462, "grad_norm": 0.2457417994737625, "learning_rate": 9.95962335444291e-06, "loss": 1.2501, "step": 873 }, { "epoch": 0.0670562510389627, "grad_norm": 0.20427970588207245, "learning_rate": 9.959466000434962e-06, "loss": 1.2694, "step": 874 }, { "epoch": 0.06713297443832078, "grad_norm": 0.18166649341583252, "learning_rate": 9.959308341654505e-06, "loss": 1.1749, "step": 875 }, { "epoch": 0.06720969783767886, "grad_norm": 0.22622977197170258, "learning_rate": 9.959150378111226e-06, "loss": 1.1869, "step": 876 }, { "epoch": 0.06728642123703694, "grad_norm": 0.3743363916873932, "learning_rate": 9.958992109814835e-06, "loss": 1.2084, "step": 877 }, { "epoch": 0.06736314463639502, "grad_norm": 0.27311259508132935, "learning_rate": 9.958833536775057e-06, "loss": 1.211, "step": 878 }, { "epoch": 0.06743986803575311, "grad_norm": 0.22005458176136017, "learning_rate": 9.958674659001635e-06, "loss": 1.1701, "step": 879 }, { "epoch": 0.06751659143511118, "grad_norm": 0.2607896327972412, "learning_rate": 9.958515476504333e-06, "loss": 1.2446, "step": 880 }, { "epoch": 0.06759331483446926, "grad_norm": 0.22317105531692505, "learning_rate": 9.958355989292937e-06, "loss": 1.2353, "step": 881 }, { "epoch": 0.06767003823382735, "grad_norm": 0.184448704123497, "learning_rate": 9.958196197377242e-06, "loss": 1.144, "step": 882 }, { "epoch": 0.06774676163318542, "grad_norm": 0.28899452090263367, "learning_rate": 9.958036100767072e-06, "loss": 1.2098, "step": 883 }, { "epoch": 0.06782348503254351, "grad_norm": 0.23450854420661926, "learning_rate": 9.957875699472262e-06, "loss": 1.235, "step": 884 }, { "epoch": 0.06790020843190159, "grad_norm": 0.8005877137184143, "learning_rate": 9.957714993502673e-06, "loss": 1.1801, "step": 885 }, { "epoch": 0.06797693183125968, "grad_norm": 0.2514505386352539, "learning_rate": 9.957553982868178e-06, "loss": 1.2167, "step": 886 }, { "epoch": 0.06805365523061775, "grad_norm": 0.28007182478904724, "learning_rate": 9.957392667578672e-06, "loss": 1.1908, "step": 887 }, { "epoch": 0.06813037862997583, "grad_norm": 0.23354186117649078, "learning_rate": 9.957231047644068e-06, "loss": 1.2428, "step": 888 }, { "epoch": 0.06820710202933392, "grad_norm": 0.24119693040847778, "learning_rate": 9.9570691230743e-06, "loss": 1.2145, "step": 889 }, { "epoch": 0.06828382542869199, "grad_norm": 0.1872604787349701, "learning_rate": 9.956906893879318e-06, "loss": 1.2123, "step": 890 }, { "epoch": 0.06836054882805008, "grad_norm": 0.28033873438835144, "learning_rate": 9.95674436006909e-06, "loss": 1.1594, "step": 891 }, { "epoch": 0.06843727222740816, "grad_norm": 0.10264953970909119, "learning_rate": 9.956581521653604e-06, "loss": 1.3217, "step": 892 }, { "epoch": 0.06851399562676623, "grad_norm": 0.2502174973487854, "learning_rate": 9.95641837864287e-06, "loss": 1.1384, "step": 893 }, { "epoch": 0.06859071902612432, "grad_norm": 0.2591671943664551, "learning_rate": 9.956254931046909e-06, "loss": 1.2165, "step": 894 }, { "epoch": 0.0686674424254824, "grad_norm": 0.2500416040420532, "learning_rate": 9.956091178875767e-06, "loss": 1.1563, "step": 895 }, { "epoch": 0.06874416582484048, "grad_norm": 0.1878778636455536, "learning_rate": 9.955927122139508e-06, "loss": 1.2307, "step": 896 }, { "epoch": 0.06882088922419856, "grad_norm": 0.2288202941417694, "learning_rate": 9.955762760848214e-06, "loss": 1.2124, "step": 897 }, { "epoch": 0.06889761262355665, "grad_norm": 0.20093581080436707, "learning_rate": 9.955598095011985e-06, "loss": 1.1994, "step": 898 }, { "epoch": 0.06897433602291472, "grad_norm": 0.21465827524662018, "learning_rate": 9.95543312464094e-06, "loss": 1.2417, "step": 899 }, { "epoch": 0.0690510594222728, "grad_norm": 0.2044578641653061, "learning_rate": 9.955267849745215e-06, "loss": 1.1825, "step": 900 }, { "epoch": 0.06912778282163089, "grad_norm": 0.2323216199874878, "learning_rate": 9.95510227033497e-06, "loss": 1.1592, "step": 901 }, { "epoch": 0.06920450622098896, "grad_norm": 0.21269214153289795, "learning_rate": 9.95493638642038e-06, "loss": 1.2443, "step": 902 }, { "epoch": 0.06928122962034705, "grad_norm": 0.19654321670532227, "learning_rate": 9.954770198011634e-06, "loss": 1.2104, "step": 903 }, { "epoch": 0.06935795301970513, "grad_norm": 0.2075645625591278, "learning_rate": 9.954603705118951e-06, "loss": 1.1909, "step": 904 }, { "epoch": 0.0694346764190632, "grad_norm": 0.2103104442358017, "learning_rate": 9.95443690775256e-06, "loss": 1.252, "step": 905 }, { "epoch": 0.06951139981842129, "grad_norm": 1.1093826293945312, "learning_rate": 9.954269805922709e-06, "loss": 1.2173, "step": 906 }, { "epoch": 0.06958812321777937, "grad_norm": 0.25885844230651855, "learning_rate": 9.954102399639671e-06, "loss": 1.1873, "step": 907 }, { "epoch": 0.06966484661713745, "grad_norm": 0.19779928028583527, "learning_rate": 9.95393468891373e-06, "loss": 1.2081, "step": 908 }, { "epoch": 0.06974157001649553, "grad_norm": 0.19713525474071503, "learning_rate": 9.953766673755193e-06, "loss": 1.2762, "step": 909 }, { "epoch": 0.0698182934158536, "grad_norm": 0.19986748695373535, "learning_rate": 9.953598354174388e-06, "loss": 1.1913, "step": 910 }, { "epoch": 0.0698950168152117, "grad_norm": 0.2084265500307083, "learning_rate": 9.953429730181653e-06, "loss": 1.2673, "step": 911 }, { "epoch": 0.06997174021456977, "grad_norm": 0.2857493758201599, "learning_rate": 9.953260801787357e-06, "loss": 1.2054, "step": 912 }, { "epoch": 0.07004846361392786, "grad_norm": 0.1982901692390442, "learning_rate": 9.953091569001877e-06, "loss": 1.2394, "step": 913 }, { "epoch": 0.07012518701328593, "grad_norm": 0.2565310299396515, "learning_rate": 9.952922031835614e-06, "loss": 1.1774, "step": 914 }, { "epoch": 0.07020191041264402, "grad_norm": 0.28109726309776306, "learning_rate": 9.952752190298985e-06, "loss": 1.2547, "step": 915 }, { "epoch": 0.0702786338120021, "grad_norm": 0.18135510385036469, "learning_rate": 9.952582044402429e-06, "loss": 1.1529, "step": 916 }, { "epoch": 0.07035535721136017, "grad_norm": 0.3738580048084259, "learning_rate": 9.952411594156402e-06, "loss": 1.2525, "step": 917 }, { "epoch": 0.07043208061071826, "grad_norm": 0.2058338224887848, "learning_rate": 9.952240839571377e-06, "loss": 1.2001, "step": 918 }, { "epoch": 0.07050880401007634, "grad_norm": 0.22666239738464355, "learning_rate": 9.952069780657847e-06, "loss": 1.196, "step": 919 }, { "epoch": 0.07058552740943443, "grad_norm": 0.19755500555038452, "learning_rate": 9.951898417426327e-06, "loss": 1.2166, "step": 920 }, { "epoch": 0.0706622508087925, "grad_norm": 0.2136542946100235, "learning_rate": 9.951726749887344e-06, "loss": 1.2329, "step": 921 }, { "epoch": 0.07073897420815058, "grad_norm": 0.29253020882606506, "learning_rate": 9.951554778051454e-06, "loss": 1.2109, "step": 922 }, { "epoch": 0.07081569760750867, "grad_norm": 0.208912193775177, "learning_rate": 9.951382501929217e-06, "loss": 1.1217, "step": 923 }, { "epoch": 0.07089242100686674, "grad_norm": 0.20787504315376282, "learning_rate": 9.951209921531225e-06, "loss": 1.32, "step": 924 }, { "epoch": 0.07096914440622483, "grad_norm": 0.23924504220485687, "learning_rate": 9.95103703686808e-06, "loss": 1.2856, "step": 925 }, { "epoch": 0.0710458678055829, "grad_norm": 0.23158548772335052, "learning_rate": 9.95086384795041e-06, "loss": 1.1225, "step": 926 }, { "epoch": 0.071122591204941, "grad_norm": 0.19399146735668182, "learning_rate": 9.950690354788854e-06, "loss": 1.1685, "step": 927 }, { "epoch": 0.07119931460429907, "grad_norm": 0.2364669144153595, "learning_rate": 9.950516557394076e-06, "loss": 1.1483, "step": 928 }, { "epoch": 0.07127603800365714, "grad_norm": 0.21599891781806946, "learning_rate": 9.950342455776759e-06, "loss": 1.1213, "step": 929 }, { "epoch": 0.07135276140301523, "grad_norm": 0.2718731462955475, "learning_rate": 9.950168049947597e-06, "loss": 1.1656, "step": 930 }, { "epoch": 0.07142948480237331, "grad_norm": 0.1931348741054535, "learning_rate": 9.94999333991731e-06, "loss": 1.2183, "step": 931 }, { "epoch": 0.0715062082017314, "grad_norm": 0.2271847128868103, "learning_rate": 9.949818325696633e-06, "loss": 1.2044, "step": 932 }, { "epoch": 0.07158293160108947, "grad_norm": 0.2261444479227066, "learning_rate": 9.949643007296323e-06, "loss": 1.2219, "step": 933 }, { "epoch": 0.07165965500044755, "grad_norm": 0.22262246906757355, "learning_rate": 9.949467384727154e-06, "loss": 1.258, "step": 934 }, { "epoch": 0.07173637839980564, "grad_norm": 0.5637944340705872, "learning_rate": 9.949291457999917e-06, "loss": 1.2019, "step": 935 }, { "epoch": 0.07181310179916371, "grad_norm": 0.21768438816070557, "learning_rate": 9.949115227125424e-06, "loss": 1.1427, "step": 936 }, { "epoch": 0.0718898251985218, "grad_norm": 0.32564738392829895, "learning_rate": 9.948938692114504e-06, "loss": 1.2448, "step": 937 }, { "epoch": 0.07196654859787988, "grad_norm": 0.2626262903213501, "learning_rate": 9.948761852978006e-06, "loss": 1.2501, "step": 938 }, { "epoch": 0.07204327199723795, "grad_norm": 0.1879408359527588, "learning_rate": 9.948584709726797e-06, "loss": 1.0623, "step": 939 }, { "epoch": 0.07211999539659604, "grad_norm": 0.20391350984573364, "learning_rate": 9.948407262371764e-06, "loss": 1.1533, "step": 940 }, { "epoch": 0.07219671879595412, "grad_norm": 0.23372158408164978, "learning_rate": 9.948229510923811e-06, "loss": 1.2005, "step": 941 }, { "epoch": 0.0722734421953122, "grad_norm": 0.2727314531803131, "learning_rate": 9.94805145539386e-06, "loss": 1.2317, "step": 942 }, { "epoch": 0.07235016559467028, "grad_norm": 0.21817904710769653, "learning_rate": 9.947873095792856e-06, "loss": 1.2331, "step": 943 }, { "epoch": 0.07242688899402837, "grad_norm": 0.25909242033958435, "learning_rate": 9.947694432131757e-06, "loss": 1.0916, "step": 944 }, { "epoch": 0.07250361239338644, "grad_norm": 0.2372003048658371, "learning_rate": 9.947515464421541e-06, "loss": 1.1004, "step": 945 }, { "epoch": 0.07258033579274452, "grad_norm": 0.22176916897296906, "learning_rate": 9.947336192673211e-06, "loss": 1.1944, "step": 946 }, { "epoch": 0.07265705919210261, "grad_norm": 0.23065438866615295, "learning_rate": 9.947156616897782e-06, "loss": 1.1248, "step": 947 }, { "epoch": 0.07273378259146068, "grad_norm": 0.1824401617050171, "learning_rate": 9.946976737106286e-06, "loss": 1.2461, "step": 948 }, { "epoch": 0.07281050599081877, "grad_norm": 0.195927694439888, "learning_rate": 9.94679655330978e-06, "loss": 1.2266, "step": 949 }, { "epoch": 0.07288722939017685, "grad_norm": 0.2477184534072876, "learning_rate": 9.946616065519336e-06, "loss": 1.1891, "step": 950 }, { "epoch": 0.07296395278953492, "grad_norm": 0.2682974934577942, "learning_rate": 9.946435273746047e-06, "loss": 1.1716, "step": 951 }, { "epoch": 0.07304067618889301, "grad_norm": 0.21578280627727509, "learning_rate": 9.946254178001022e-06, "loss": 1.1982, "step": 952 }, { "epoch": 0.07311739958825109, "grad_norm": 0.2744922637939453, "learning_rate": 9.946072778295388e-06, "loss": 1.1721, "step": 953 }, { "epoch": 0.07319412298760918, "grad_norm": 0.23179516196250916, "learning_rate": 9.945891074640295e-06, "loss": 1.1921, "step": 954 }, { "epoch": 0.07327084638696725, "grad_norm": 0.2669156789779663, "learning_rate": 9.94570906704691e-06, "loss": 1.1774, "step": 955 }, { "epoch": 0.07334756978632534, "grad_norm": 0.2808479070663452, "learning_rate": 9.945526755526414e-06, "loss": 1.2043, "step": 956 }, { "epoch": 0.07342429318568341, "grad_norm": 0.3050091564655304, "learning_rate": 9.945344140090013e-06, "loss": 1.2371, "step": 957 }, { "epoch": 0.07350101658504149, "grad_norm": 0.22936396300792694, "learning_rate": 9.945161220748928e-06, "loss": 1.2679, "step": 958 }, { "epoch": 0.07357773998439958, "grad_norm": 0.19443553686141968, "learning_rate": 9.944977997514404e-06, "loss": 1.2094, "step": 959 }, { "epoch": 0.07365446338375765, "grad_norm": 0.2266218513250351, "learning_rate": 9.944794470397696e-06, "loss": 1.3058, "step": 960 }, { "epoch": 0.07373118678311574, "grad_norm": 0.2122851461172104, "learning_rate": 9.944610639410083e-06, "loss": 1.1956, "step": 961 }, { "epoch": 0.07380791018247382, "grad_norm": 0.27322113513946533, "learning_rate": 9.944426504562863e-06, "loss": 1.2385, "step": 962 }, { "epoch": 0.0738846335818319, "grad_norm": 0.24090619385242462, "learning_rate": 9.94424206586735e-06, "loss": 1.2395, "step": 963 }, { "epoch": 0.07396135698118998, "grad_norm": 0.30140766501426697, "learning_rate": 9.944057323334881e-06, "loss": 1.2326, "step": 964 }, { "epoch": 0.07403808038054806, "grad_norm": 0.23198257386684418, "learning_rate": 9.943872276976807e-06, "loss": 1.252, "step": 965 }, { "epoch": 0.07411480377990615, "grad_norm": 0.2282196581363678, "learning_rate": 9.9436869268045e-06, "loss": 1.1204, "step": 966 }, { "epoch": 0.07419152717926422, "grad_norm": 0.2204085737466812, "learning_rate": 9.94350127282935e-06, "loss": 1.1896, "step": 967 }, { "epoch": 0.0742682505786223, "grad_norm": 0.17493033409118652, "learning_rate": 9.943315315062766e-06, "loss": 1.1813, "step": 968 }, { "epoch": 0.07434497397798039, "grad_norm": 0.2612670361995697, "learning_rate": 9.943129053516176e-06, "loss": 1.2091, "step": 969 }, { "epoch": 0.07442169737733846, "grad_norm": 0.24403053522109985, "learning_rate": 9.942942488201026e-06, "loss": 1.2268, "step": 970 }, { "epoch": 0.07449842077669655, "grad_norm": 0.42355653643608093, "learning_rate": 9.94275561912878e-06, "loss": 1.1912, "step": 971 }, { "epoch": 0.07457514417605463, "grad_norm": 0.2431958168745041, "learning_rate": 9.942568446310922e-06, "loss": 1.1599, "step": 972 }, { "epoch": 0.07465186757541271, "grad_norm": 0.2723410725593567, "learning_rate": 9.942380969758958e-06, "loss": 1.2136, "step": 973 }, { "epoch": 0.07472859097477079, "grad_norm": 0.284738153219223, "learning_rate": 9.942193189484403e-06, "loss": 1.2625, "step": 974 }, { "epoch": 0.07480531437412886, "grad_norm": 0.27162235975265503, "learning_rate": 9.9420051054988e-06, "loss": 1.1714, "step": 975 }, { "epoch": 0.07488203777348695, "grad_norm": 0.260276734828949, "learning_rate": 9.941816717813706e-06, "loss": 1.2271, "step": 976 }, { "epoch": 0.07495876117284503, "grad_norm": 0.31117287278175354, "learning_rate": 9.941628026440699e-06, "loss": 1.1901, "step": 977 }, { "epoch": 0.07503548457220312, "grad_norm": 0.23543961346149445, "learning_rate": 9.941439031391376e-06, "loss": 1.2101, "step": 978 }, { "epoch": 0.07511220797156119, "grad_norm": 0.17705385386943817, "learning_rate": 9.941249732677348e-06, "loss": 1.2427, "step": 979 }, { "epoch": 0.07518893137091927, "grad_norm": 0.2289450764656067, "learning_rate": 9.941060130310248e-06, "loss": 1.2409, "step": 980 }, { "epoch": 0.07526565477027736, "grad_norm": 0.26448771357536316, "learning_rate": 9.94087022430173e-06, "loss": 1.2082, "step": 981 }, { "epoch": 0.07534237816963543, "grad_norm": 1.3910754919052124, "learning_rate": 9.940680014663465e-06, "loss": 1.1831, "step": 982 }, { "epoch": 0.07541910156899352, "grad_norm": 0.2071550041437149, "learning_rate": 9.940489501407137e-06, "loss": 1.1745, "step": 983 }, { "epoch": 0.0754958249683516, "grad_norm": 0.2538459300994873, "learning_rate": 9.940298684544459e-06, "loss": 1.1716, "step": 984 }, { "epoch": 0.07557254836770969, "grad_norm": 0.23980402946472168, "learning_rate": 9.940107564087152e-06, "loss": 1.1823, "step": 985 }, { "epoch": 0.07564927176706776, "grad_norm": 0.23047474026679993, "learning_rate": 9.939916140046965e-06, "loss": 1.2355, "step": 986 }, { "epoch": 0.07572599516642584, "grad_norm": 0.26280704140663147, "learning_rate": 9.939724412435661e-06, "loss": 1.2448, "step": 987 }, { "epoch": 0.07580271856578392, "grad_norm": 0.240580216050148, "learning_rate": 9.93953238126502e-06, "loss": 1.2257, "step": 988 }, { "epoch": 0.075879441965142, "grad_norm": 0.21047954261302948, "learning_rate": 9.939340046546845e-06, "loss": 1.2186, "step": 989 }, { "epoch": 0.07595616536450009, "grad_norm": 0.22713221609592438, "learning_rate": 9.939147408292955e-06, "loss": 1.1311, "step": 990 }, { "epoch": 0.07603288876385816, "grad_norm": 0.32703810930252075, "learning_rate": 9.938954466515186e-06, "loss": 1.1793, "step": 991 }, { "epoch": 0.07610961216321624, "grad_norm": 0.2245197296142578, "learning_rate": 9.938761221225399e-06, "loss": 1.1571, "step": 992 }, { "epoch": 0.07618633556257433, "grad_norm": 0.23684343695640564, "learning_rate": 9.938567672435465e-06, "loss": 1.2149, "step": 993 }, { "epoch": 0.0762630589619324, "grad_norm": 0.3925381898880005, "learning_rate": 9.93837382015728e-06, "loss": 1.2367, "step": 994 }, { "epoch": 0.07633978236129049, "grad_norm": 0.18008674681186676, "learning_rate": 9.938179664402759e-06, "loss": 1.1644, "step": 995 }, { "epoch": 0.07641650576064857, "grad_norm": 0.39098578691482544, "learning_rate": 9.93798520518383e-06, "loss": 1.243, "step": 996 }, { "epoch": 0.07649322916000664, "grad_norm": 0.2117733359336853, "learning_rate": 9.937790442512445e-06, "loss": 1.177, "step": 997 }, { "epoch": 0.07656995255936473, "grad_norm": 0.2002440094947815, "learning_rate": 9.93759537640057e-06, "loss": 1.211, "step": 998 }, { "epoch": 0.0766466759587228, "grad_norm": 0.23014864325523376, "learning_rate": 9.937400006860197e-06, "loss": 1.2156, "step": 999 }, { "epoch": 0.0767233993580809, "grad_norm": 0.2738056480884552, "learning_rate": 9.937204333903328e-06, "loss": 1.1729, "step": 1000 }, { "epoch": 0.07680012275743897, "grad_norm": 0.37499964237213135, "learning_rate": 9.937008357541989e-06, "loss": 1.0943, "step": 1001 }, { "epoch": 0.07687684615679706, "grad_norm": 0.21195897459983826, "learning_rate": 9.936812077788223e-06, "loss": 1.1664, "step": 1002 }, { "epoch": 0.07695356955615514, "grad_norm": 0.20922423899173737, "learning_rate": 9.936615494654092e-06, "loss": 1.2514, "step": 1003 }, { "epoch": 0.07703029295551321, "grad_norm": 0.26550111174583435, "learning_rate": 9.936418608151677e-06, "loss": 1.2674, "step": 1004 }, { "epoch": 0.0771070163548713, "grad_norm": 0.2708261013031006, "learning_rate": 9.936221418293078e-06, "loss": 1.2513, "step": 1005 }, { "epoch": 0.07718373975422937, "grad_norm": 0.2613411843776703, "learning_rate": 9.93602392509041e-06, "loss": 1.2778, "step": 1006 }, { "epoch": 0.07726046315358746, "grad_norm": 0.1866612583398819, "learning_rate": 9.935826128555812e-06, "loss": 1.1807, "step": 1007 }, { "epoch": 0.07733718655294554, "grad_norm": 0.22555550932884216, "learning_rate": 9.935628028701437e-06, "loss": 1.1762, "step": 1008 }, { "epoch": 0.07741390995230361, "grad_norm": 0.18564485013484955, "learning_rate": 9.935429625539462e-06, "loss": 1.2202, "step": 1009 }, { "epoch": 0.0774906333516617, "grad_norm": 0.22444866597652435, "learning_rate": 9.935230919082076e-06, "loss": 1.2145, "step": 1010 }, { "epoch": 0.07756735675101978, "grad_norm": 0.2182379961013794, "learning_rate": 9.935031909341493e-06, "loss": 1.1352, "step": 1011 }, { "epoch": 0.07764408015037787, "grad_norm": 0.11901833862066269, "learning_rate": 9.934832596329939e-06, "loss": 1.2578, "step": 1012 }, { "epoch": 0.07772080354973594, "grad_norm": 0.2744264304637909, "learning_rate": 9.934632980059667e-06, "loss": 1.1555, "step": 1013 }, { "epoch": 0.07779752694909403, "grad_norm": 0.1928497850894928, "learning_rate": 9.93443306054294e-06, "loss": 1.2889, "step": 1014 }, { "epoch": 0.0778742503484521, "grad_norm": 0.1042381078004837, "learning_rate": 9.934232837792048e-06, "loss": 1.245, "step": 1015 }, { "epoch": 0.07795097374781018, "grad_norm": 0.22043073177337646, "learning_rate": 9.93403231181929e-06, "loss": 1.1356, "step": 1016 }, { "epoch": 0.07802769714716827, "grad_norm": 0.4169433116912842, "learning_rate": 9.933831482636991e-06, "loss": 1.1857, "step": 1017 }, { "epoch": 0.07810442054652635, "grad_norm": 0.27084964513778687, "learning_rate": 9.933630350257493e-06, "loss": 1.1413, "step": 1018 }, { "epoch": 0.07818114394588443, "grad_norm": 0.2312810719013214, "learning_rate": 9.933428914693157e-06, "loss": 1.1991, "step": 1019 }, { "epoch": 0.07825786734524251, "grad_norm": 0.21805909276008606, "learning_rate": 9.933227175956359e-06, "loss": 1.2498, "step": 1020 }, { "epoch": 0.07833459074460059, "grad_norm": 0.3265678286552429, "learning_rate": 9.933025134059498e-06, "loss": 1.2031, "step": 1021 }, { "epoch": 0.07841131414395867, "grad_norm": 0.18305543065071106, "learning_rate": 9.932822789014991e-06, "loss": 1.1608, "step": 1022 }, { "epoch": 0.07848803754331675, "grad_norm": 0.10527224093675613, "learning_rate": 9.93262014083527e-06, "loss": 1.2618, "step": 1023 }, { "epoch": 0.07856476094267484, "grad_norm": 0.21942579746246338, "learning_rate": 9.932417189532792e-06, "loss": 1.1626, "step": 1024 }, { "epoch": 0.07864148434203291, "grad_norm": 0.3033125102519989, "learning_rate": 9.932213935120025e-06, "loss": 1.1855, "step": 1025 }, { "epoch": 0.07871820774139099, "grad_norm": 0.18471337854862213, "learning_rate": 9.932010377609463e-06, "loss": 1.1151, "step": 1026 }, { "epoch": 0.07879493114074908, "grad_norm": 0.18265429139137268, "learning_rate": 9.931806517013612e-06, "loss": 1.3305, "step": 1027 }, { "epoch": 0.07887165454010715, "grad_norm": 0.44751328229904175, "learning_rate": 9.931602353345002e-06, "loss": 1.0976, "step": 1028 }, { "epoch": 0.07894837793946524, "grad_norm": 0.2045108824968338, "learning_rate": 9.931397886616179e-06, "loss": 1.2632, "step": 1029 }, { "epoch": 0.07902510133882332, "grad_norm": 0.23103629052639008, "learning_rate": 9.931193116839708e-06, "loss": 1.1642, "step": 1030 }, { "epoch": 0.0791018247381814, "grad_norm": 0.20675817131996155, "learning_rate": 9.930988044028172e-06, "loss": 1.1792, "step": 1031 }, { "epoch": 0.07917854813753948, "grad_norm": 0.21651430428028107, "learning_rate": 9.930782668194174e-06, "loss": 1.2104, "step": 1032 }, { "epoch": 0.07925527153689756, "grad_norm": 0.21198566257953644, "learning_rate": 9.930576989350334e-06, "loss": 1.1894, "step": 1033 }, { "epoch": 0.07933199493625565, "grad_norm": 0.18273554742336273, "learning_rate": 9.930371007509292e-06, "loss": 1.1668, "step": 1034 }, { "epoch": 0.07940871833561372, "grad_norm": 0.20427937805652618, "learning_rate": 9.930164722683705e-06, "loss": 1.2269, "step": 1035 }, { "epoch": 0.07948544173497181, "grad_norm": 0.47950276732444763, "learning_rate": 9.929958134886253e-06, "loss": 1.1834, "step": 1036 }, { "epoch": 0.07956216513432988, "grad_norm": 0.2095332145690918, "learning_rate": 9.929751244129629e-06, "loss": 1.1969, "step": 1037 }, { "epoch": 0.07963888853368796, "grad_norm": 0.21393795311450958, "learning_rate": 9.929544050426545e-06, "loss": 1.2399, "step": 1038 }, { "epoch": 0.07971561193304605, "grad_norm": 0.19390805065631866, "learning_rate": 9.92933655378974e-06, "loss": 1.1984, "step": 1039 }, { "epoch": 0.07979233533240412, "grad_norm": 0.19458727538585663, "learning_rate": 9.929128754231958e-06, "loss": 1.1729, "step": 1040 }, { "epoch": 0.07986905873176221, "grad_norm": 0.22701646387577057, "learning_rate": 9.928920651765971e-06, "loss": 1.1349, "step": 1041 }, { "epoch": 0.07994578213112029, "grad_norm": 0.29918497800827026, "learning_rate": 9.928712246404571e-06, "loss": 1.1775, "step": 1042 }, { "epoch": 0.08002250553047838, "grad_norm": 0.2728596031665802, "learning_rate": 9.928503538160563e-06, "loss": 1.1258, "step": 1043 }, { "epoch": 0.08009922892983645, "grad_norm": 0.20609959959983826, "learning_rate": 9.928294527046771e-06, "loss": 1.2083, "step": 1044 }, { "epoch": 0.08017595232919453, "grad_norm": 0.18329907953739166, "learning_rate": 9.92808521307604e-06, "loss": 1.2023, "step": 1045 }, { "epoch": 0.08025267572855262, "grad_norm": 0.19179847836494446, "learning_rate": 9.927875596261234e-06, "loss": 1.2779, "step": 1046 }, { "epoch": 0.08032939912791069, "grad_norm": 0.3307632803916931, "learning_rate": 9.927665676615235e-06, "loss": 1.1484, "step": 1047 }, { "epoch": 0.08040612252726878, "grad_norm": 0.2910520136356354, "learning_rate": 9.927455454150941e-06, "loss": 1.2484, "step": 1048 }, { "epoch": 0.08048284592662686, "grad_norm": 0.21712945401668549, "learning_rate": 9.92724492888127e-06, "loss": 1.1949, "step": 1049 }, { "epoch": 0.08055956932598493, "grad_norm": 0.22905634343624115, "learning_rate": 9.927034100819163e-06, "loss": 1.1868, "step": 1050 }, { "epoch": 0.08063629272534302, "grad_norm": 0.34831443428993225, "learning_rate": 9.926822969977574e-06, "loss": 1.1668, "step": 1051 }, { "epoch": 0.0807130161247011, "grad_norm": 0.22864112257957458, "learning_rate": 9.92661153636948e-06, "loss": 1.1434, "step": 1052 }, { "epoch": 0.08078973952405918, "grad_norm": 0.2275581657886505, "learning_rate": 9.92639980000787e-06, "loss": 1.1455, "step": 1053 }, { "epoch": 0.08086646292341726, "grad_norm": 0.22903022170066833, "learning_rate": 9.926187760905754e-06, "loss": 1.2503, "step": 1054 }, { "epoch": 0.08094318632277533, "grad_norm": 0.22391654551029205, "learning_rate": 9.92597541907617e-06, "loss": 1.2439, "step": 1055 }, { "epoch": 0.08101990972213342, "grad_norm": 0.2733956575393677, "learning_rate": 9.925762774532162e-06, "loss": 1.2346, "step": 1056 }, { "epoch": 0.0810966331214915, "grad_norm": 0.2225377857685089, "learning_rate": 9.925549827286797e-06, "loss": 1.2136, "step": 1057 }, { "epoch": 0.08117335652084959, "grad_norm": 0.19402088224887848, "learning_rate": 9.925336577353166e-06, "loss": 1.1946, "step": 1058 }, { "epoch": 0.08125007992020766, "grad_norm": 0.24931733310222626, "learning_rate": 9.925123024744367e-06, "loss": 1.186, "step": 1059 }, { "epoch": 0.08132680331956575, "grad_norm": 0.20290003716945648, "learning_rate": 9.924909169473529e-06, "loss": 1.1699, "step": 1060 }, { "epoch": 0.08140352671892383, "grad_norm": 0.0980476513504982, "learning_rate": 9.924695011553792e-06, "loss": 1.1891, "step": 1061 }, { "epoch": 0.0814802501182819, "grad_norm": 0.24492090940475464, "learning_rate": 9.924480550998315e-06, "loss": 1.2373, "step": 1062 }, { "epoch": 0.08155697351763999, "grad_norm": 0.2069745510816574, "learning_rate": 9.924265787820279e-06, "loss": 1.1593, "step": 1063 }, { "epoch": 0.08163369691699807, "grad_norm": 0.19388580322265625, "learning_rate": 9.92405072203288e-06, "loss": 1.2098, "step": 1064 }, { "epoch": 0.08171042031635616, "grad_norm": 0.2029048651456833, "learning_rate": 9.923835353649338e-06, "loss": 1.1661, "step": 1065 }, { "epoch": 0.08178714371571423, "grad_norm": 0.3669566810131073, "learning_rate": 9.923619682682885e-06, "loss": 1.2063, "step": 1066 }, { "epoch": 0.0818638671150723, "grad_norm": 0.2882246971130371, "learning_rate": 9.923403709146775e-06, "loss": 1.2697, "step": 1067 }, { "epoch": 0.0819405905144304, "grad_norm": 0.29929637908935547, "learning_rate": 9.92318743305428e-06, "loss": 1.0967, "step": 1068 }, { "epoch": 0.08201731391378847, "grad_norm": 0.1843663901090622, "learning_rate": 9.922970854418693e-06, "loss": 1.07, "step": 1069 }, { "epoch": 0.08209403731314656, "grad_norm": 0.32970407605171204, "learning_rate": 9.922753973253319e-06, "loss": 1.2236, "step": 1070 }, { "epoch": 0.08217076071250463, "grad_norm": 0.2636527121067047, "learning_rate": 9.92253678957149e-06, "loss": 1.1897, "step": 1071 }, { "epoch": 0.08224748411186272, "grad_norm": 0.2085145115852356, "learning_rate": 9.92231930338655e-06, "loss": 1.2337, "step": 1072 }, { "epoch": 0.0823242075112208, "grad_norm": 0.22725094854831696, "learning_rate": 9.922101514711866e-06, "loss": 1.2265, "step": 1073 }, { "epoch": 0.08240093091057887, "grad_norm": 0.24350287020206451, "learning_rate": 9.92188342356082e-06, "loss": 1.1214, "step": 1074 }, { "epoch": 0.08247765430993696, "grad_norm": 0.21560700237751007, "learning_rate": 9.921665029946815e-06, "loss": 1.2316, "step": 1075 }, { "epoch": 0.08255437770929504, "grad_norm": 0.23670199513435364, "learning_rate": 9.921446333883273e-06, "loss": 1.1418, "step": 1076 }, { "epoch": 0.08263110110865313, "grad_norm": 0.20685508847236633, "learning_rate": 9.92122733538363e-06, "loss": 1.21, "step": 1077 }, { "epoch": 0.0827078245080112, "grad_norm": 0.19488787651062012, "learning_rate": 9.92100803446135e-06, "loss": 1.1079, "step": 1078 }, { "epoch": 0.08278454790736928, "grad_norm": 0.20500725507736206, "learning_rate": 9.920788431129903e-06, "loss": 1.0766, "step": 1079 }, { "epoch": 0.08286127130672737, "grad_norm": 0.25221821665763855, "learning_rate": 9.92056852540279e-06, "loss": 1.1839, "step": 1080 }, { "epoch": 0.08293799470608544, "grad_norm": 0.1902034729719162, "learning_rate": 9.920348317293518e-06, "loss": 1.1083, "step": 1081 }, { "epoch": 0.08301471810544353, "grad_norm": 0.22840996086597443, "learning_rate": 9.920127806815627e-06, "loss": 1.2196, "step": 1082 }, { "epoch": 0.0830914415048016, "grad_norm": 0.2219136655330658, "learning_rate": 9.919906993982662e-06, "loss": 1.18, "step": 1083 }, { "epoch": 0.08316816490415968, "grad_norm": 0.19898070394992828, "learning_rate": 9.919685878808196e-06, "loss": 1.2051, "step": 1084 }, { "epoch": 0.08324488830351777, "grad_norm": 0.19063054025173187, "learning_rate": 9.919464461305817e-06, "loss": 1.1861, "step": 1085 }, { "epoch": 0.08332161170287584, "grad_norm": 0.2410380244255066, "learning_rate": 9.91924274148913e-06, "loss": 1.3212, "step": 1086 }, { "epoch": 0.08339833510223393, "grad_norm": 0.21804897487163544, "learning_rate": 9.91902071937176e-06, "loss": 1.1925, "step": 1087 }, { "epoch": 0.08347505850159201, "grad_norm": 0.22179453074932098, "learning_rate": 9.918798394967352e-06, "loss": 1.2531, "step": 1088 }, { "epoch": 0.0835517819009501, "grad_norm": 0.22840720415115356, "learning_rate": 9.918575768289571e-06, "loss": 1.2865, "step": 1089 }, { "epoch": 0.08362850530030817, "grad_norm": 0.2514110505580902, "learning_rate": 9.918352839352093e-06, "loss": 1.2001, "step": 1090 }, { "epoch": 0.08370522869966625, "grad_norm": 0.1782693862915039, "learning_rate": 9.918129608168621e-06, "loss": 1.1863, "step": 1091 }, { "epoch": 0.08378195209902434, "grad_norm": 0.11117269098758698, "learning_rate": 9.917906074752872e-06, "loss": 1.2442, "step": 1092 }, { "epoch": 0.08385867549838241, "grad_norm": 0.21115633845329285, "learning_rate": 9.917682239118582e-06, "loss": 1.0932, "step": 1093 }, { "epoch": 0.0839353988977405, "grad_norm": 0.17632122337818146, "learning_rate": 9.917458101279507e-06, "loss": 1.1888, "step": 1094 }, { "epoch": 0.08401212229709858, "grad_norm": 0.2541700601577759, "learning_rate": 9.917233661249422e-06, "loss": 1.2023, "step": 1095 }, { "epoch": 0.08408884569645665, "grad_norm": 0.2972080707550049, "learning_rate": 9.917008919042117e-06, "loss": 1.2248, "step": 1096 }, { "epoch": 0.08416556909581474, "grad_norm": 0.2312524914741516, "learning_rate": 9.916783874671405e-06, "loss": 1.2239, "step": 1097 }, { "epoch": 0.08424229249517282, "grad_norm": 0.23905955255031586, "learning_rate": 9.916558528151115e-06, "loss": 1.1537, "step": 1098 }, { "epoch": 0.0843190158945309, "grad_norm": 0.25427156686782837, "learning_rate": 9.916332879495096e-06, "loss": 1.2237, "step": 1099 }, { "epoch": 0.08439573929388898, "grad_norm": 0.21291756629943848, "learning_rate": 9.916106928717213e-06, "loss": 1.1822, "step": 1100 }, { "epoch": 0.08447246269324707, "grad_norm": 0.2046860158443451, "learning_rate": 9.915880675831352e-06, "loss": 1.255, "step": 1101 }, { "epoch": 0.08454918609260514, "grad_norm": 0.2362949252128601, "learning_rate": 9.915654120851418e-06, "loss": 1.1758, "step": 1102 }, { "epoch": 0.08462590949196322, "grad_norm": 0.2037370651960373, "learning_rate": 9.915427263791332e-06, "loss": 1.1332, "step": 1103 }, { "epoch": 0.08470263289132131, "grad_norm": 0.1971253752708435, "learning_rate": 9.915200104665035e-06, "loss": 1.175, "step": 1104 }, { "epoch": 0.08477935629067938, "grad_norm": 0.25117233395576477, "learning_rate": 9.914972643486487e-06, "loss": 1.163, "step": 1105 }, { "epoch": 0.08485607969003747, "grad_norm": 0.2798774242401123, "learning_rate": 9.914744880269667e-06, "loss": 1.2387, "step": 1106 }, { "epoch": 0.08493280308939555, "grad_norm": 0.22516782581806183, "learning_rate": 9.91451681502857e-06, "loss": 1.2338, "step": 1107 }, { "epoch": 0.08500952648875362, "grad_norm": 0.2237289547920227, "learning_rate": 9.914288447777211e-06, "loss": 1.1462, "step": 1108 }, { "epoch": 0.08508624988811171, "grad_norm": 0.22809267044067383, "learning_rate": 9.914059778529627e-06, "loss": 1.1227, "step": 1109 }, { "epoch": 0.08516297328746979, "grad_norm": 0.23178233206272125, "learning_rate": 9.913830807299867e-06, "loss": 1.2181, "step": 1110 }, { "epoch": 0.08523969668682788, "grad_norm": 0.28267979621887207, "learning_rate": 9.913601534102002e-06, "loss": 1.1955, "step": 1111 }, { "epoch": 0.08531642008618595, "grad_norm": 0.22907012701034546, "learning_rate": 9.913371958950123e-06, "loss": 1.2428, "step": 1112 }, { "epoch": 0.08539314348554403, "grad_norm": 0.21698683500289917, "learning_rate": 9.913142081858336e-06, "loss": 1.2401, "step": 1113 }, { "epoch": 0.08546986688490212, "grad_norm": 0.22840194404125214, "learning_rate": 9.912911902840771e-06, "loss": 1.2188, "step": 1114 }, { "epoch": 0.08554659028426019, "grad_norm": 0.2911187708377838, "learning_rate": 9.91268142191157e-06, "loss": 1.1539, "step": 1115 }, { "epoch": 0.08562331368361828, "grad_norm": 0.24343249201774597, "learning_rate": 9.912450639084899e-06, "loss": 1.1676, "step": 1116 }, { "epoch": 0.08570003708297635, "grad_norm": 0.24252061545848846, "learning_rate": 9.912219554374938e-06, "loss": 1.1215, "step": 1117 }, { "epoch": 0.08577676048233444, "grad_norm": 0.24329107999801636, "learning_rate": 9.911988167795888e-06, "loss": 1.1654, "step": 1118 }, { "epoch": 0.08585348388169252, "grad_norm": 0.22499340772628784, "learning_rate": 9.911756479361971e-06, "loss": 1.1498, "step": 1119 }, { "epoch": 0.0859302072810506, "grad_norm": 0.2288931906223297, "learning_rate": 9.91152448908742e-06, "loss": 1.1496, "step": 1120 }, { "epoch": 0.08600693068040868, "grad_norm": 0.26295775175094604, "learning_rate": 9.911292196986496e-06, "loss": 1.1998, "step": 1121 }, { "epoch": 0.08608365407976676, "grad_norm": 0.19847194850444794, "learning_rate": 9.911059603073474e-06, "loss": 1.1658, "step": 1122 }, { "epoch": 0.08616037747912485, "grad_norm": 0.23997770249843597, "learning_rate": 9.910826707362643e-06, "loss": 1.1772, "step": 1123 }, { "epoch": 0.08623710087848292, "grad_norm": 0.21857357025146484, "learning_rate": 9.910593509868322e-06, "loss": 1.279, "step": 1124 }, { "epoch": 0.086313824277841, "grad_norm": 0.10210113227367401, "learning_rate": 9.910360010604834e-06, "loss": 1.3843, "step": 1125 }, { "epoch": 0.08639054767719909, "grad_norm": 0.29676562547683716, "learning_rate": 9.910126209586532e-06, "loss": 1.2598, "step": 1126 }, { "epoch": 0.08646727107655716, "grad_norm": 0.22210891544818878, "learning_rate": 9.909892106827785e-06, "loss": 1.2608, "step": 1127 }, { "epoch": 0.08654399447591525, "grad_norm": 0.4108749032020569, "learning_rate": 9.909657702342977e-06, "loss": 1.1701, "step": 1128 }, { "epoch": 0.08662071787527333, "grad_norm": 0.21538597345352173, "learning_rate": 9.909422996146513e-06, "loss": 1.2077, "step": 1129 }, { "epoch": 0.08669744127463141, "grad_norm": 0.2943647801876068, "learning_rate": 9.909187988252817e-06, "loss": 1.1634, "step": 1130 }, { "epoch": 0.08677416467398949, "grad_norm": 0.5461112260818481, "learning_rate": 9.908952678676331e-06, "loss": 1.2108, "step": 1131 }, { "epoch": 0.08685088807334757, "grad_norm": 1.3371391296386719, "learning_rate": 9.908717067431514e-06, "loss": 1.1994, "step": 1132 }, { "epoch": 0.08692761147270565, "grad_norm": 0.2003183662891388, "learning_rate": 9.908481154532848e-06, "loss": 1.2713, "step": 1133 }, { "epoch": 0.08700433487206373, "grad_norm": 0.21089458465576172, "learning_rate": 9.908244939994827e-06, "loss": 1.215, "step": 1134 }, { "epoch": 0.08708105827142182, "grad_norm": 0.21967028081417084, "learning_rate": 9.908008423831969e-06, "loss": 1.2044, "step": 1135 }, { "epoch": 0.0871577816707799, "grad_norm": 0.19250385463237762, "learning_rate": 9.907771606058807e-06, "loss": 1.1768, "step": 1136 }, { "epoch": 0.08723450507013797, "grad_norm": 0.18707700073719025, "learning_rate": 9.907534486689896e-06, "loss": 1.141, "step": 1137 }, { "epoch": 0.08731122846949606, "grad_norm": 0.25958144664764404, "learning_rate": 9.907297065739807e-06, "loss": 1.222, "step": 1138 }, { "epoch": 0.08738795186885413, "grad_norm": 0.20612826943397522, "learning_rate": 9.907059343223129e-06, "loss": 1.1553, "step": 1139 }, { "epoch": 0.08746467526821222, "grad_norm": 0.24172557890415192, "learning_rate": 9.906821319154471e-06, "loss": 1.1769, "step": 1140 }, { "epoch": 0.0875413986675703, "grad_norm": 0.24539723992347717, "learning_rate": 9.906582993548463e-06, "loss": 1.2122, "step": 1141 }, { "epoch": 0.08761812206692837, "grad_norm": 0.3694198429584503, "learning_rate": 9.906344366419749e-06, "loss": 1.1422, "step": 1142 }, { "epoch": 0.08769484546628646, "grad_norm": 0.214204341173172, "learning_rate": 9.90610543778299e-06, "loss": 1.1802, "step": 1143 }, { "epoch": 0.08777156886564454, "grad_norm": 0.27632880210876465, "learning_rate": 9.905866207652873e-06, "loss": 1.155, "step": 1144 }, { "epoch": 0.08784829226500263, "grad_norm": 0.29100772738456726, "learning_rate": 9.905626676044099e-06, "loss": 1.1995, "step": 1145 }, { "epoch": 0.0879250156643607, "grad_norm": 0.2650126516819, "learning_rate": 9.905386842971385e-06, "loss": 1.21, "step": 1146 }, { "epoch": 0.08800173906371879, "grad_norm": 0.2824767827987671, "learning_rate": 9.905146708449472e-06, "loss": 1.0971, "step": 1147 }, { "epoch": 0.08807846246307686, "grad_norm": 0.19631223380565643, "learning_rate": 9.904906272493115e-06, "loss": 1.1635, "step": 1148 }, { "epoch": 0.08815518586243494, "grad_norm": 0.33972373604774475, "learning_rate": 9.904665535117092e-06, "loss": 1.0711, "step": 1149 }, { "epoch": 0.08823190926179303, "grad_norm": 0.22260870039463043, "learning_rate": 9.904424496336196e-06, "loss": 1.1778, "step": 1150 }, { "epoch": 0.0883086326611511, "grad_norm": 0.23882749676704407, "learning_rate": 9.904183156165238e-06, "loss": 1.1917, "step": 1151 }, { "epoch": 0.0883853560605092, "grad_norm": 0.21509703993797302, "learning_rate": 9.90394151461905e-06, "loss": 1.2621, "step": 1152 }, { "epoch": 0.08846207945986727, "grad_norm": 0.20884299278259277, "learning_rate": 9.903699571712483e-06, "loss": 1.0981, "step": 1153 }, { "epoch": 0.08853880285922534, "grad_norm": 0.25895583629608154, "learning_rate": 9.903457327460401e-06, "loss": 1.1697, "step": 1154 }, { "epoch": 0.08861552625858343, "grad_norm": 0.23745277523994446, "learning_rate": 9.903214781877694e-06, "loss": 1.1609, "step": 1155 }, { "epoch": 0.08869224965794151, "grad_norm": 0.18557418882846832, "learning_rate": 9.902971934979269e-06, "loss": 1.2467, "step": 1156 }, { "epoch": 0.0887689730572996, "grad_norm": 0.21829375624656677, "learning_rate": 9.902728786780044e-06, "loss": 1.2356, "step": 1157 }, { "epoch": 0.08884569645665767, "grad_norm": 0.26170745491981506, "learning_rate": 9.902485337294965e-06, "loss": 1.2884, "step": 1158 }, { "epoch": 0.08892241985601575, "grad_norm": 0.11356359720230103, "learning_rate": 9.90224158653899e-06, "loss": 1.3005, "step": 1159 }, { "epoch": 0.08899914325537384, "grad_norm": 0.2445794641971588, "learning_rate": 9.901997534527102e-06, "loss": 1.2186, "step": 1160 }, { "epoch": 0.08907586665473191, "grad_norm": 0.2400323450565338, "learning_rate": 9.901753181274294e-06, "loss": 1.2141, "step": 1161 }, { "epoch": 0.08915259005409, "grad_norm": 0.2271866798400879, "learning_rate": 9.901508526795584e-06, "loss": 1.2499, "step": 1162 }, { "epoch": 0.08922931345344808, "grad_norm": 0.19472499191761017, "learning_rate": 9.90126357110601e-06, "loss": 1.2201, "step": 1163 }, { "epoch": 0.08930603685280616, "grad_norm": 0.19221778213977814, "learning_rate": 9.90101831422062e-06, "loss": 1.1688, "step": 1164 }, { "epoch": 0.08938276025216424, "grad_norm": 0.2548292577266693, "learning_rate": 9.900772756154491e-06, "loss": 1.2798, "step": 1165 }, { "epoch": 0.08945948365152231, "grad_norm": 0.2095993608236313, "learning_rate": 9.900526896922707e-06, "loss": 1.1519, "step": 1166 }, { "epoch": 0.0895362070508804, "grad_norm": 0.2584666609764099, "learning_rate": 9.900280736540382e-06, "loss": 1.2147, "step": 1167 }, { "epoch": 0.08961293045023848, "grad_norm": 0.24128499627113342, "learning_rate": 9.900034275022641e-06, "loss": 1.1511, "step": 1168 }, { "epoch": 0.08968965384959657, "grad_norm": 0.22915497422218323, "learning_rate": 9.899787512384628e-06, "loss": 1.1821, "step": 1169 }, { "epoch": 0.08976637724895464, "grad_norm": 0.24606487154960632, "learning_rate": 9.89954044864151e-06, "loss": 1.1865, "step": 1170 }, { "epoch": 0.08984310064831272, "grad_norm": 0.10407765209674835, "learning_rate": 9.899293083808472e-06, "loss": 1.2934, "step": 1171 }, { "epoch": 0.08991982404767081, "grad_norm": 0.1051381379365921, "learning_rate": 9.899045417900709e-06, "loss": 1.3817, "step": 1172 }, { "epoch": 0.08999654744702888, "grad_norm": 0.21162840723991394, "learning_rate": 9.898797450933444e-06, "loss": 1.1598, "step": 1173 }, { "epoch": 0.09007327084638697, "grad_norm": 0.3697911202907562, "learning_rate": 9.898549182921916e-06, "loss": 1.135, "step": 1174 }, { "epoch": 0.09014999424574505, "grad_norm": 0.25342440605163574, "learning_rate": 9.89830061388138e-06, "loss": 1.1585, "step": 1175 }, { "epoch": 0.09022671764510314, "grad_norm": 0.2855245769023895, "learning_rate": 9.898051743827113e-06, "loss": 1.2206, "step": 1176 }, { "epoch": 0.09030344104446121, "grad_norm": 0.21398691833019257, "learning_rate": 9.897802572774407e-06, "loss": 1.2021, "step": 1177 }, { "epoch": 0.09038016444381929, "grad_norm": 0.22843897342681885, "learning_rate": 9.897553100738575e-06, "loss": 1.2335, "step": 1178 }, { "epoch": 0.09045688784317737, "grad_norm": 0.21859325468540192, "learning_rate": 9.897303327734948e-06, "loss": 1.2074, "step": 1179 }, { "epoch": 0.09053361124253545, "grad_norm": 0.2151924967765808, "learning_rate": 9.897053253778874e-06, "loss": 1.172, "step": 1180 }, { "epoch": 0.09061033464189354, "grad_norm": 0.2739099860191345, "learning_rate": 9.896802878885723e-06, "loss": 1.1709, "step": 1181 }, { "epoch": 0.09068705804125161, "grad_norm": 0.19913484156131744, "learning_rate": 9.896552203070879e-06, "loss": 1.1582, "step": 1182 }, { "epoch": 0.09076378144060969, "grad_norm": 0.2386896163225174, "learning_rate": 9.896301226349748e-06, "loss": 1.2344, "step": 1183 }, { "epoch": 0.09084050483996778, "grad_norm": 0.20783303678035736, "learning_rate": 9.896049948737752e-06, "loss": 1.2263, "step": 1184 }, { "epoch": 0.09091722823932585, "grad_norm": 0.2883125841617584, "learning_rate": 9.895798370250336e-06, "loss": 1.1429, "step": 1185 }, { "epoch": 0.09099395163868394, "grad_norm": 0.18884465098381042, "learning_rate": 9.895546490902953e-06, "loss": 1.2326, "step": 1186 }, { "epoch": 0.09107067503804202, "grad_norm": 0.20839020609855652, "learning_rate": 9.89529431071109e-06, "loss": 1.1623, "step": 1187 }, { "epoch": 0.09114739843740009, "grad_norm": 0.7052544355392456, "learning_rate": 9.89504182969024e-06, "loss": 1.227, "step": 1188 }, { "epoch": 0.09122412183675818, "grad_norm": 0.27602261304855347, "learning_rate": 9.894789047855919e-06, "loss": 1.244, "step": 1189 }, { "epoch": 0.09130084523611626, "grad_norm": 0.2792525291442871, "learning_rate": 9.894535965223661e-06, "loss": 1.1494, "step": 1190 }, { "epoch": 0.09137756863547435, "grad_norm": 0.20717427134513855, "learning_rate": 9.894282581809019e-06, "loss": 1.2431, "step": 1191 }, { "epoch": 0.09145429203483242, "grad_norm": 0.21562515199184418, "learning_rate": 9.894028897627564e-06, "loss": 1.234, "step": 1192 }, { "epoch": 0.09153101543419051, "grad_norm": 0.2036726325750351, "learning_rate": 9.893774912694884e-06, "loss": 1.1845, "step": 1193 }, { "epoch": 0.09160773883354859, "grad_norm": 0.24171477556228638, "learning_rate": 9.893520627026589e-06, "loss": 1.1798, "step": 1194 }, { "epoch": 0.09168446223290666, "grad_norm": 0.22982214391231537, "learning_rate": 9.893266040638307e-06, "loss": 1.2355, "step": 1195 }, { "epoch": 0.09176118563226475, "grad_norm": 0.21414770185947418, "learning_rate": 9.893011153545679e-06, "loss": 1.2077, "step": 1196 }, { "epoch": 0.09183790903162282, "grad_norm": 0.35010719299316406, "learning_rate": 9.89275596576437e-06, "loss": 1.2029, "step": 1197 }, { "epoch": 0.09191463243098091, "grad_norm": 0.20767323672771454, "learning_rate": 9.892500477310065e-06, "loss": 1.2529, "step": 1198 }, { "epoch": 0.09199135583033899, "grad_norm": 0.2884334921836853, "learning_rate": 9.892244688198463e-06, "loss": 1.1662, "step": 1199 }, { "epoch": 0.09206807922969706, "grad_norm": 0.20124533772468567, "learning_rate": 9.891988598445279e-06, "loss": 1.2431, "step": 1200 }, { "epoch": 0.09214480262905515, "grad_norm": 0.2734549641609192, "learning_rate": 9.891732208066254e-06, "loss": 1.2173, "step": 1201 }, { "epoch": 0.09222152602841323, "grad_norm": 0.20965880155563354, "learning_rate": 9.891475517077143e-06, "loss": 1.1709, "step": 1202 }, { "epoch": 0.09229824942777132, "grad_norm": 0.24063228070735931, "learning_rate": 9.891218525493722e-06, "loss": 1.2167, "step": 1203 }, { "epoch": 0.09237497282712939, "grad_norm": 0.17963552474975586, "learning_rate": 9.89096123333178e-06, "loss": 1.2162, "step": 1204 }, { "epoch": 0.09245169622648748, "grad_norm": 0.27913349866867065, "learning_rate": 9.890703640607133e-06, "loss": 1.1984, "step": 1205 }, { "epoch": 0.09252841962584556, "grad_norm": 0.3879263997077942, "learning_rate": 9.890445747335608e-06, "loss": 1.221, "step": 1206 }, { "epoch": 0.09260514302520363, "grad_norm": 0.23932205140590668, "learning_rate": 9.890187553533053e-06, "loss": 1.228, "step": 1207 }, { "epoch": 0.09268186642456172, "grad_norm": 0.20852452516555786, "learning_rate": 9.889929059215336e-06, "loss": 1.2083, "step": 1208 }, { "epoch": 0.0927585898239198, "grad_norm": 0.7609672546386719, "learning_rate": 9.889670264398342e-06, "loss": 1.1513, "step": 1209 }, { "epoch": 0.09283531322327788, "grad_norm": 0.1812545359134674, "learning_rate": 9.889411169097972e-06, "loss": 1.1888, "step": 1210 }, { "epoch": 0.09291203662263596, "grad_norm": 0.20543549954891205, "learning_rate": 9.889151773330152e-06, "loss": 1.1524, "step": 1211 }, { "epoch": 0.09298876002199404, "grad_norm": 0.20377448201179504, "learning_rate": 9.888892077110821e-06, "loss": 1.2041, "step": 1212 }, { "epoch": 0.09306548342135212, "grad_norm": 0.374438613653183, "learning_rate": 9.888632080455937e-06, "loss": 1.1366, "step": 1213 }, { "epoch": 0.0931422068207102, "grad_norm": 0.33089062571525574, "learning_rate": 9.888371783381478e-06, "loss": 1.2337, "step": 1214 }, { "epoch": 0.09321893022006829, "grad_norm": 0.3653604984283447, "learning_rate": 9.888111185903442e-06, "loss": 1.2077, "step": 1215 }, { "epoch": 0.09329565361942636, "grad_norm": 0.28085142374038696, "learning_rate": 9.887850288037842e-06, "loss": 1.1933, "step": 1216 }, { "epoch": 0.09337237701878444, "grad_norm": 0.335904985666275, "learning_rate": 9.887589089800708e-06, "loss": 1.154, "step": 1217 }, { "epoch": 0.09344910041814253, "grad_norm": 0.20574958622455597, "learning_rate": 9.887327591208097e-06, "loss": 1.1894, "step": 1218 }, { "epoch": 0.0935258238175006, "grad_norm": 0.21220481395721436, "learning_rate": 9.887065792276074e-06, "loss": 1.1711, "step": 1219 }, { "epoch": 0.09360254721685869, "grad_norm": 0.43097424507141113, "learning_rate": 9.88680369302073e-06, "loss": 1.2373, "step": 1220 }, { "epoch": 0.09367927061621677, "grad_norm": 0.18840840458869934, "learning_rate": 9.88654129345817e-06, "loss": 1.1629, "step": 1221 }, { "epoch": 0.09375599401557486, "grad_norm": 0.2248634546995163, "learning_rate": 9.886278593604518e-06, "loss": 1.113, "step": 1222 }, { "epoch": 0.09383271741493293, "grad_norm": 0.20923346281051636, "learning_rate": 9.886015593475924e-06, "loss": 1.1188, "step": 1223 }, { "epoch": 0.093909440814291, "grad_norm": 0.5083162784576416, "learning_rate": 9.885752293088541e-06, "loss": 1.1336, "step": 1224 }, { "epoch": 0.0939861642136491, "grad_norm": 0.2521149516105652, "learning_rate": 9.885488692458558e-06, "loss": 1.15, "step": 1225 }, { "epoch": 0.09406288761300717, "grad_norm": 0.33281412720680237, "learning_rate": 9.885224791602168e-06, "loss": 1.1576, "step": 1226 }, { "epoch": 0.09413961101236526, "grad_norm": 0.24217279255390167, "learning_rate": 9.88496059053559e-06, "loss": 1.1732, "step": 1227 }, { "epoch": 0.09421633441172333, "grad_norm": 0.25073838233947754, "learning_rate": 9.884696089275061e-06, "loss": 1.1391, "step": 1228 }, { "epoch": 0.09429305781108141, "grad_norm": 0.5862687230110168, "learning_rate": 9.884431287836835e-06, "loss": 1.1707, "step": 1229 }, { "epoch": 0.0943697812104395, "grad_norm": 0.21776409447193146, "learning_rate": 9.884166186237185e-06, "loss": 1.1911, "step": 1230 }, { "epoch": 0.09444650460979757, "grad_norm": 0.22671452164649963, "learning_rate": 9.883900784492399e-06, "loss": 1.1533, "step": 1231 }, { "epoch": 0.09452322800915566, "grad_norm": 0.19130398333072662, "learning_rate": 9.883635082618791e-06, "loss": 1.2079, "step": 1232 }, { "epoch": 0.09459995140851374, "grad_norm": 0.23282498121261597, "learning_rate": 9.883369080632688e-06, "loss": 1.1611, "step": 1233 }, { "epoch": 0.09467667480787183, "grad_norm": 0.258710116147995, "learning_rate": 9.883102778550434e-06, "loss": 1.225, "step": 1234 }, { "epoch": 0.0947533982072299, "grad_norm": 0.2618682086467743, "learning_rate": 9.882836176388397e-06, "loss": 1.1477, "step": 1235 }, { "epoch": 0.09483012160658798, "grad_norm": 0.23071947693824768, "learning_rate": 9.882569274162958e-06, "loss": 1.1319, "step": 1236 }, { "epoch": 0.09490684500594607, "grad_norm": 0.16900105774402618, "learning_rate": 9.882302071890523e-06, "loss": 1.1277, "step": 1237 }, { "epoch": 0.09498356840530414, "grad_norm": 0.217560276389122, "learning_rate": 9.882034569587506e-06, "loss": 1.2094, "step": 1238 }, { "epoch": 0.09506029180466223, "grad_norm": 0.6322441697120667, "learning_rate": 9.881766767270353e-06, "loss": 1.2013, "step": 1239 }, { "epoch": 0.0951370152040203, "grad_norm": 0.19787870347499847, "learning_rate": 9.881498664955514e-06, "loss": 1.2266, "step": 1240 }, { "epoch": 0.09521373860337838, "grad_norm": 0.22343632578849792, "learning_rate": 9.881230262659469e-06, "loss": 1.1418, "step": 1241 }, { "epoch": 0.09529046200273647, "grad_norm": 0.2087598592042923, "learning_rate": 9.880961560398711e-06, "loss": 1.1818, "step": 1242 }, { "epoch": 0.09536718540209455, "grad_norm": 0.19296588003635406, "learning_rate": 9.880692558189754e-06, "loss": 1.2415, "step": 1243 }, { "epoch": 0.09544390880145263, "grad_norm": 0.259941041469574, "learning_rate": 9.880423256049126e-06, "loss": 1.1328, "step": 1244 }, { "epoch": 0.09552063220081071, "grad_norm": 0.4053671956062317, "learning_rate": 9.880153653993377e-06, "loss": 1.2024, "step": 1245 }, { "epoch": 0.09559735560016878, "grad_norm": 0.10537394136190414, "learning_rate": 9.879883752039075e-06, "loss": 1.3119, "step": 1246 }, { "epoch": 0.09567407899952687, "grad_norm": 0.19970236718654633, "learning_rate": 9.879613550202809e-06, "loss": 1.1987, "step": 1247 }, { "epoch": 0.09575080239888495, "grad_norm": 0.24968558549880981, "learning_rate": 9.879343048501179e-06, "loss": 1.2101, "step": 1248 }, { "epoch": 0.09582752579824304, "grad_norm": 0.5132637619972229, "learning_rate": 9.879072246950811e-06, "loss": 1.1885, "step": 1249 }, { "epoch": 0.09590424919760111, "grad_norm": 0.2564467489719391, "learning_rate": 9.878801145568347e-06, "loss": 1.1895, "step": 1250 }, { "epoch": 0.0959809725969592, "grad_norm": 0.18315231800079346, "learning_rate": 9.878529744370445e-06, "loss": 1.1605, "step": 1251 }, { "epoch": 0.09605769599631728, "grad_norm": 0.26233288645744324, "learning_rate": 9.878258043373786e-06, "loss": 1.1627, "step": 1252 }, { "epoch": 0.09613441939567535, "grad_norm": 0.2999817132949829, "learning_rate": 9.877986042595062e-06, "loss": 1.1869, "step": 1253 }, { "epoch": 0.09621114279503344, "grad_norm": 0.2583628296852112, "learning_rate": 9.877713742050992e-06, "loss": 1.2215, "step": 1254 }, { "epoch": 0.09628786619439152, "grad_norm": 0.24916929006576538, "learning_rate": 9.877441141758308e-06, "loss": 1.1804, "step": 1255 }, { "epoch": 0.0963645895937496, "grad_norm": 0.21033665537834167, "learning_rate": 9.877168241733764e-06, "loss": 1.1004, "step": 1256 }, { "epoch": 0.09644131299310768, "grad_norm": 0.5341978073120117, "learning_rate": 9.876895041994128e-06, "loss": 1.1243, "step": 1257 }, { "epoch": 0.09651803639246576, "grad_norm": 0.21791920065879822, "learning_rate": 9.87662154255619e-06, "loss": 1.1764, "step": 1258 }, { "epoch": 0.09659475979182384, "grad_norm": 0.19181619584560394, "learning_rate": 9.876347743436758e-06, "loss": 1.2097, "step": 1259 }, { "epoch": 0.09667148319118192, "grad_norm": 0.20692627131938934, "learning_rate": 9.876073644652656e-06, "loss": 1.1801, "step": 1260 }, { "epoch": 0.09674820659054001, "grad_norm": 0.20797397196292877, "learning_rate": 9.87579924622073e-06, "loss": 1.2347, "step": 1261 }, { "epoch": 0.09682492998989808, "grad_norm": 0.22746902704238892, "learning_rate": 9.87552454815784e-06, "loss": 1.1452, "step": 1262 }, { "epoch": 0.09690165338925617, "grad_norm": 0.3241468667984009, "learning_rate": 9.87524955048087e-06, "loss": 1.1925, "step": 1263 }, { "epoch": 0.09697837678861425, "grad_norm": 0.2240346372127533, "learning_rate": 9.874974253206717e-06, "loss": 1.2009, "step": 1264 }, { "epoch": 0.09705510018797232, "grad_norm": 0.19317425787448883, "learning_rate": 9.8746986563523e-06, "loss": 1.1921, "step": 1265 }, { "epoch": 0.09713182358733041, "grad_norm": 0.2098277062177658, "learning_rate": 9.874422759934555e-06, "loss": 1.2334, "step": 1266 }, { "epoch": 0.09720854698668849, "grad_norm": 0.21686658263206482, "learning_rate": 9.874146563970435e-06, "loss": 1.2226, "step": 1267 }, { "epoch": 0.09728527038604658, "grad_norm": 0.18537050485610962, "learning_rate": 9.873870068476917e-06, "loss": 1.1407, "step": 1268 }, { "epoch": 0.09736199378540465, "grad_norm": 0.23662838339805603, "learning_rate": 9.873593273470988e-06, "loss": 1.1668, "step": 1269 }, { "epoch": 0.09743871718476273, "grad_norm": 0.24941377341747284, "learning_rate": 9.873316178969659e-06, "loss": 1.1382, "step": 1270 }, { "epoch": 0.09751544058412082, "grad_norm": 0.28640034794807434, "learning_rate": 9.873038784989958e-06, "loss": 1.1952, "step": 1271 }, { "epoch": 0.09759216398347889, "grad_norm": 0.19279330968856812, "learning_rate": 9.872761091548933e-06, "loss": 1.2232, "step": 1272 }, { "epoch": 0.09766888738283698, "grad_norm": 0.2925608158111572, "learning_rate": 9.87248309866365e-06, "loss": 1.1673, "step": 1273 }, { "epoch": 0.09774561078219506, "grad_norm": 0.17717015743255615, "learning_rate": 9.872204806351189e-06, "loss": 1.2059, "step": 1274 }, { "epoch": 0.09782233418155313, "grad_norm": 0.24136076867580414, "learning_rate": 9.871926214628652e-06, "loss": 1.1565, "step": 1275 }, { "epoch": 0.09789905758091122, "grad_norm": 0.3183631896972656, "learning_rate": 9.871647323513164e-06, "loss": 1.1036, "step": 1276 }, { "epoch": 0.0979757809802693, "grad_norm": 0.3381653130054474, "learning_rate": 9.87136813302186e-06, "loss": 1.2496, "step": 1277 }, { "epoch": 0.09805250437962738, "grad_norm": 0.2211785763502121, "learning_rate": 9.871088643171895e-06, "loss": 1.1883, "step": 1278 }, { "epoch": 0.09812922777898546, "grad_norm": 0.21128098666667938, "learning_rate": 9.87080885398045e-06, "loss": 1.237, "step": 1279 }, { "epoch": 0.09820595117834355, "grad_norm": 0.21866708993911743, "learning_rate": 9.870528765464713e-06, "loss": 1.1605, "step": 1280 }, { "epoch": 0.09828267457770162, "grad_norm": 0.23885636031627655, "learning_rate": 9.870248377641898e-06, "loss": 1.2199, "step": 1281 }, { "epoch": 0.0983593979770597, "grad_norm": 0.22478963434696198, "learning_rate": 9.869967690529238e-06, "loss": 1.2715, "step": 1282 }, { "epoch": 0.09843612137641779, "grad_norm": 0.20853225886821747, "learning_rate": 9.869686704143981e-06, "loss": 1.1491, "step": 1283 }, { "epoch": 0.09851284477577586, "grad_norm": 0.2782912254333496, "learning_rate": 9.869405418503392e-06, "loss": 1.16, "step": 1284 }, { "epoch": 0.09858956817513395, "grad_norm": 0.2357097864151001, "learning_rate": 9.86912383362476e-06, "loss": 1.1587, "step": 1285 }, { "epoch": 0.09866629157449203, "grad_norm": 0.5347362160682678, "learning_rate": 9.868841949525386e-06, "loss": 1.179, "step": 1286 }, { "epoch": 0.0987430149738501, "grad_norm": 0.1976490318775177, "learning_rate": 9.868559766222594e-06, "loss": 1.226, "step": 1287 }, { "epoch": 0.09881973837320819, "grad_norm": 0.2504449784755707, "learning_rate": 9.868277283733725e-06, "loss": 1.1768, "step": 1288 }, { "epoch": 0.09889646177256627, "grad_norm": 0.25080353021621704, "learning_rate": 9.86799450207614e-06, "loss": 1.167, "step": 1289 }, { "epoch": 0.09897318517192435, "grad_norm": 0.19646993279457092, "learning_rate": 9.867711421267214e-06, "loss": 1.1616, "step": 1290 }, { "epoch": 0.09904990857128243, "grad_norm": 0.24993734061717987, "learning_rate": 9.867428041324345e-06, "loss": 1.1916, "step": 1291 }, { "epoch": 0.09912663197064052, "grad_norm": 0.22261302173137665, "learning_rate": 9.867144362264946e-06, "loss": 1.1263, "step": 1292 }, { "epoch": 0.0992033553699986, "grad_norm": 0.26242730021476746, "learning_rate": 9.866860384106449e-06, "loss": 1.1343, "step": 1293 }, { "epoch": 0.09928007876935667, "grad_norm": 0.2749252915382385, "learning_rate": 9.866576106866307e-06, "loss": 1.2609, "step": 1294 }, { "epoch": 0.09935680216871476, "grad_norm": 0.21984818577766418, "learning_rate": 9.866291530561991e-06, "loss": 1.1382, "step": 1295 }, { "epoch": 0.09943352556807283, "grad_norm": 0.31837552785873413, "learning_rate": 9.866006655210988e-06, "loss": 1.225, "step": 1296 }, { "epoch": 0.09951024896743092, "grad_norm": 0.20655032992362976, "learning_rate": 9.8657214808308e-06, "loss": 1.1916, "step": 1297 }, { "epoch": 0.099586972366789, "grad_norm": 0.21408076584339142, "learning_rate": 9.865436007438957e-06, "loss": 1.1413, "step": 1298 }, { "epoch": 0.09966369576614707, "grad_norm": 0.18155929446220398, "learning_rate": 9.865150235053e-06, "loss": 1.1853, "step": 1299 }, { "epoch": 0.09974041916550516, "grad_norm": 0.2167743444442749, "learning_rate": 9.864864163690494e-06, "loss": 1.2062, "step": 1300 }, { "epoch": 0.09981714256486324, "grad_norm": 0.32002606987953186, "learning_rate": 9.864577793369013e-06, "loss": 1.1737, "step": 1301 }, { "epoch": 0.09989386596422133, "grad_norm": 0.18187780678272247, "learning_rate": 9.864291124106156e-06, "loss": 1.1303, "step": 1302 }, { "epoch": 0.0999705893635794, "grad_norm": 0.2641666531562805, "learning_rate": 9.864004155919545e-06, "loss": 1.1676, "step": 1303 }, { "epoch": 0.10004731276293748, "grad_norm": 0.19882243871688843, "learning_rate": 9.86371688882681e-06, "loss": 1.2602, "step": 1304 }, { "epoch": 0.10012403616229557, "grad_norm": 0.20336119830608368, "learning_rate": 9.863429322845605e-06, "loss": 1.2323, "step": 1305 }, { "epoch": 0.10020075956165364, "grad_norm": 0.24298761785030365, "learning_rate": 9.863141457993604e-06, "loss": 1.1439, "step": 1306 }, { "epoch": 0.10027748296101173, "grad_norm": 0.22013293206691742, "learning_rate": 9.862853294288495e-06, "loss": 1.1967, "step": 1307 }, { "epoch": 0.1003542063603698, "grad_norm": 0.2204931527376175, "learning_rate": 9.862564831747988e-06, "loss": 1.1431, "step": 1308 }, { "epoch": 0.1004309297597279, "grad_norm": 0.3074149787425995, "learning_rate": 9.862276070389808e-06, "loss": 1.1262, "step": 1309 }, { "epoch": 0.10050765315908597, "grad_norm": 0.3456571698188782, "learning_rate": 9.861987010231701e-06, "loss": 1.1656, "step": 1310 }, { "epoch": 0.10058437655844404, "grad_norm": 0.23883762955665588, "learning_rate": 9.86169765129143e-06, "loss": 1.169, "step": 1311 }, { "epoch": 0.10066109995780213, "grad_norm": 0.19919613003730774, "learning_rate": 9.861407993586778e-06, "loss": 1.2009, "step": 1312 }, { "epoch": 0.10073782335716021, "grad_norm": 0.2269894778728485, "learning_rate": 9.861118037135546e-06, "loss": 1.1883, "step": 1313 }, { "epoch": 0.1008145467565183, "grad_norm": 0.2506515681743622, "learning_rate": 9.86082778195555e-06, "loss": 1.221, "step": 1314 }, { "epoch": 0.10089127015587637, "grad_norm": 0.21088090538978577, "learning_rate": 9.860537228064628e-06, "loss": 1.2595, "step": 1315 }, { "epoch": 0.10096799355523445, "grad_norm": 0.3091139793395996, "learning_rate": 9.860246375480636e-06, "loss": 1.1788, "step": 1316 }, { "epoch": 0.10104471695459254, "grad_norm": 0.25775185227394104, "learning_rate": 9.859955224221446e-06, "loss": 1.1564, "step": 1317 }, { "epoch": 0.10112144035395061, "grad_norm": 0.22751857340335846, "learning_rate": 9.859663774304952e-06, "loss": 1.2146, "step": 1318 }, { "epoch": 0.1011981637533087, "grad_norm": 0.23068535327911377, "learning_rate": 9.859372025749066e-06, "loss": 1.1371, "step": 1319 }, { "epoch": 0.10127488715266678, "grad_norm": 0.23937028646469116, "learning_rate": 9.85907997857171e-06, "loss": 1.1879, "step": 1320 }, { "epoch": 0.10135161055202487, "grad_norm": 0.2414516657590866, "learning_rate": 9.85878763279084e-06, "loss": 1.1649, "step": 1321 }, { "epoch": 0.10142833395138294, "grad_norm": 0.5318148136138916, "learning_rate": 9.858494988424414e-06, "loss": 1.109, "step": 1322 }, { "epoch": 0.10150505735074102, "grad_norm": 0.4511772394180298, "learning_rate": 9.85820204549042e-06, "loss": 1.1685, "step": 1323 }, { "epoch": 0.1015817807500991, "grad_norm": 0.22507862746715546, "learning_rate": 9.857908804006858e-06, "loss": 1.1877, "step": 1324 }, { "epoch": 0.10165850414945718, "grad_norm": 0.1961466670036316, "learning_rate": 9.85761526399175e-06, "loss": 1.217, "step": 1325 }, { "epoch": 0.10173522754881527, "grad_norm": 0.20746639370918274, "learning_rate": 9.857321425463132e-06, "loss": 1.2256, "step": 1326 }, { "epoch": 0.10181195094817334, "grad_norm": 0.41059523820877075, "learning_rate": 9.857027288439065e-06, "loss": 1.191, "step": 1327 }, { "epoch": 0.10188867434753142, "grad_norm": 0.2105163186788559, "learning_rate": 9.856732852937623e-06, "loss": 1.2053, "step": 1328 }, { "epoch": 0.10196539774688951, "grad_norm": 0.11274492740631104, "learning_rate": 9.856438118976899e-06, "loss": 1.2922, "step": 1329 }, { "epoch": 0.10204212114624758, "grad_norm": 0.19715072214603424, "learning_rate": 9.856143086575005e-06, "loss": 1.2037, "step": 1330 }, { "epoch": 0.10211884454560567, "grad_norm": 0.3278452754020691, "learning_rate": 9.855847755750075e-06, "loss": 1.1227, "step": 1331 }, { "epoch": 0.10219556794496375, "grad_norm": 0.2157287448644638, "learning_rate": 9.855552126520252e-06, "loss": 1.2205, "step": 1332 }, { "epoch": 0.10227229134432182, "grad_norm": 0.18778663873672485, "learning_rate": 9.855256198903709e-06, "loss": 1.1306, "step": 1333 }, { "epoch": 0.10234901474367991, "grad_norm": 0.24262838065624237, "learning_rate": 9.854959972918627e-06, "loss": 1.1081, "step": 1334 }, { "epoch": 0.10242573814303799, "grad_norm": 0.23195475339889526, "learning_rate": 9.854663448583212e-06, "loss": 1.2023, "step": 1335 }, { "epoch": 0.10250246154239608, "grad_norm": 0.19067823886871338, "learning_rate": 9.854366625915688e-06, "loss": 1.1813, "step": 1336 }, { "epoch": 0.10257918494175415, "grad_norm": 0.3773871064186096, "learning_rate": 9.854069504934291e-06, "loss": 1.1864, "step": 1337 }, { "epoch": 0.10265590834111224, "grad_norm": 0.36148348450660706, "learning_rate": 9.853772085657285e-06, "loss": 1.208, "step": 1338 }, { "epoch": 0.10273263174047031, "grad_norm": 0.17553456127643585, "learning_rate": 9.853474368102945e-06, "loss": 1.1472, "step": 1339 }, { "epoch": 0.10280935513982839, "grad_norm": 0.10686103254556656, "learning_rate": 9.853176352289565e-06, "loss": 1.3193, "step": 1340 }, { "epoch": 0.10288607853918648, "grad_norm": 0.1881365031003952, "learning_rate": 9.85287803823546e-06, "loss": 1.2388, "step": 1341 }, { "epoch": 0.10296280193854455, "grad_norm": 0.25108441710472107, "learning_rate": 9.852579425958961e-06, "loss": 1.1389, "step": 1342 }, { "epoch": 0.10303952533790264, "grad_norm": 0.23751147091388702, "learning_rate": 9.852280515478423e-06, "loss": 1.2121, "step": 1343 }, { "epoch": 0.10311624873726072, "grad_norm": 0.21593181788921356, "learning_rate": 9.85198130681221e-06, "loss": 1.1503, "step": 1344 }, { "epoch": 0.1031929721366188, "grad_norm": 0.20754800736904144, "learning_rate": 9.851681799978709e-06, "loss": 1.1923, "step": 1345 }, { "epoch": 0.10326969553597688, "grad_norm": 0.18345163762569427, "learning_rate": 9.85138199499633e-06, "loss": 1.2096, "step": 1346 }, { "epoch": 0.10334641893533496, "grad_norm": 0.2018374800682068, "learning_rate": 9.851081891883494e-06, "loss": 1.1037, "step": 1347 }, { "epoch": 0.10342314233469305, "grad_norm": 0.200768381357193, "learning_rate": 9.850781490658643e-06, "loss": 1.2006, "step": 1348 }, { "epoch": 0.10349986573405112, "grad_norm": 0.22094403207302094, "learning_rate": 9.850480791340238e-06, "loss": 1.1481, "step": 1349 }, { "epoch": 0.10357658913340921, "grad_norm": 0.28276848793029785, "learning_rate": 9.850179793946757e-06, "loss": 1.1311, "step": 1350 }, { "epoch": 0.10365331253276729, "grad_norm": 0.21603339910507202, "learning_rate": 9.8498784984967e-06, "loss": 1.1895, "step": 1351 }, { "epoch": 0.10373003593212536, "grad_norm": 0.29669877886772156, "learning_rate": 9.849576905008577e-06, "loss": 1.1243, "step": 1352 }, { "epoch": 0.10380675933148345, "grad_norm": 0.28908365964889526, "learning_rate": 9.849275013500925e-06, "loss": 1.1892, "step": 1353 }, { "epoch": 0.10388348273084153, "grad_norm": 0.760038435459137, "learning_rate": 9.848972823992298e-06, "loss": 1.1061, "step": 1354 }, { "epoch": 0.10396020613019961, "grad_norm": 0.2574855387210846, "learning_rate": 9.848670336501262e-06, "loss": 1.1502, "step": 1355 }, { "epoch": 0.10403692952955769, "grad_norm": 0.25966137647628784, "learning_rate": 9.84836755104641e-06, "loss": 1.1929, "step": 1356 }, { "epoch": 0.10411365292891576, "grad_norm": 0.18459303677082062, "learning_rate": 9.848064467646346e-06, "loss": 1.227, "step": 1357 }, { "epoch": 0.10419037632827385, "grad_norm": 0.18242812156677246, "learning_rate": 9.847761086319695e-06, "loss": 1.1194, "step": 1358 }, { "epoch": 0.10426709972763193, "grad_norm": 0.18473269045352936, "learning_rate": 9.847457407085104e-06, "loss": 1.1879, "step": 1359 }, { "epoch": 0.10434382312699002, "grad_norm": 0.1995459347963333, "learning_rate": 9.847153429961231e-06, "loss": 1.1416, "step": 1360 }, { "epoch": 0.1044205465263481, "grad_norm": 0.47759488224983215, "learning_rate": 9.84684915496676e-06, "loss": 1.146, "step": 1361 }, { "epoch": 0.10449726992570617, "grad_norm": 0.20156118273735046, "learning_rate": 9.846544582120386e-06, "loss": 1.1467, "step": 1362 }, { "epoch": 0.10457399332506426, "grad_norm": 0.23615305125713348, "learning_rate": 9.846239711440827e-06, "loss": 1.1724, "step": 1363 }, { "epoch": 0.10465071672442233, "grad_norm": 0.19246281683444977, "learning_rate": 9.84593454294682e-06, "loss": 1.1763, "step": 1364 }, { "epoch": 0.10472744012378042, "grad_norm": 0.20962269604206085, "learning_rate": 9.845629076657116e-06, "loss": 1.1489, "step": 1365 }, { "epoch": 0.1048041635231385, "grad_norm": 0.21236635744571686, "learning_rate": 9.845323312590484e-06, "loss": 1.1646, "step": 1366 }, { "epoch": 0.10488088692249659, "grad_norm": 0.30708077549934387, "learning_rate": 9.845017250765721e-06, "loss": 1.2268, "step": 1367 }, { "epoch": 0.10495761032185466, "grad_norm": 0.5431815981864929, "learning_rate": 9.844710891201633e-06, "loss": 1.1241, "step": 1368 }, { "epoch": 0.10503433372121274, "grad_norm": 0.21631789207458496, "learning_rate": 9.844404233917042e-06, "loss": 1.1312, "step": 1369 }, { "epoch": 0.10511105712057082, "grad_norm": 0.21850985288619995, "learning_rate": 9.844097278930799e-06, "loss": 1.1514, "step": 1370 }, { "epoch": 0.1051877805199289, "grad_norm": 0.2584953010082245, "learning_rate": 9.843790026261763e-06, "loss": 1.104, "step": 1371 }, { "epoch": 0.10526450391928699, "grad_norm": 0.1040101945400238, "learning_rate": 9.843482475928818e-06, "loss": 1.3634, "step": 1372 }, { "epoch": 0.10534122731864506, "grad_norm": 0.2575283646583557, "learning_rate": 9.843174627950862e-06, "loss": 1.143, "step": 1373 }, { "epoch": 0.10541795071800314, "grad_norm": 0.260415643453598, "learning_rate": 9.842866482346816e-06, "loss": 1.1699, "step": 1374 }, { "epoch": 0.10549467411736123, "grad_norm": 0.5155551433563232, "learning_rate": 9.842558039135612e-06, "loss": 1.2254, "step": 1375 }, { "epoch": 0.1055713975167193, "grad_norm": 0.30658209323883057, "learning_rate": 9.842249298336208e-06, "loss": 1.1829, "step": 1376 }, { "epoch": 0.10564812091607739, "grad_norm": 0.18660302460193634, "learning_rate": 9.841940259967577e-06, "loss": 1.1024, "step": 1377 }, { "epoch": 0.10572484431543547, "grad_norm": 0.28132325410842896, "learning_rate": 9.841630924048708e-06, "loss": 1.167, "step": 1378 }, { "epoch": 0.10580156771479356, "grad_norm": 0.19309525191783905, "learning_rate": 9.841321290598613e-06, "loss": 1.1601, "step": 1379 }, { "epoch": 0.10587829111415163, "grad_norm": 0.413732647895813, "learning_rate": 9.841011359636316e-06, "loss": 1.1522, "step": 1380 }, { "epoch": 0.10595501451350971, "grad_norm": 0.29362720251083374, "learning_rate": 9.840701131180868e-06, "loss": 1.178, "step": 1381 }, { "epoch": 0.1060317379128678, "grad_norm": 0.24520131945610046, "learning_rate": 9.84039060525133e-06, "loss": 1.1791, "step": 1382 }, { "epoch": 0.10610846131222587, "grad_norm": 0.2604202330112457, "learning_rate": 9.840079781866784e-06, "loss": 1.2139, "step": 1383 }, { "epoch": 0.10618518471158396, "grad_norm": 0.23701319098472595, "learning_rate": 9.839768661046336e-06, "loss": 1.1345, "step": 1384 }, { "epoch": 0.10626190811094204, "grad_norm": 0.2161092311143875, "learning_rate": 9.8394572428091e-06, "loss": 1.1595, "step": 1385 }, { "epoch": 0.10633863151030011, "grad_norm": 0.22699862718582153, "learning_rate": 9.839145527174216e-06, "loss": 1.1137, "step": 1386 }, { "epoch": 0.1064153549096582, "grad_norm": 0.1926688849925995, "learning_rate": 9.838833514160837e-06, "loss": 1.1364, "step": 1387 }, { "epoch": 0.10649207830901627, "grad_norm": 0.17909327149391174, "learning_rate": 9.838521203788141e-06, "loss": 1.2072, "step": 1388 }, { "epoch": 0.10656880170837436, "grad_norm": 0.2480635941028595, "learning_rate": 9.838208596075319e-06, "loss": 1.246, "step": 1389 }, { "epoch": 0.10664552510773244, "grad_norm": 0.47838664054870605, "learning_rate": 9.837895691041578e-06, "loss": 1.2624, "step": 1390 }, { "epoch": 0.10672224850709051, "grad_norm": 0.1989351212978363, "learning_rate": 9.837582488706152e-06, "loss": 1.1826, "step": 1391 }, { "epoch": 0.1067989719064486, "grad_norm": 0.2450477033853531, "learning_rate": 9.837268989088286e-06, "loss": 1.1785, "step": 1392 }, { "epoch": 0.10687569530580668, "grad_norm": 0.2520241141319275, "learning_rate": 9.836955192207245e-06, "loss": 1.2718, "step": 1393 }, { "epoch": 0.10695241870516477, "grad_norm": 0.26549333333969116, "learning_rate": 9.836641098082313e-06, "loss": 1.1028, "step": 1394 }, { "epoch": 0.10702914210452284, "grad_norm": 0.6218889355659485, "learning_rate": 9.836326706732792e-06, "loss": 1.1713, "step": 1395 }, { "epoch": 0.10710586550388093, "grad_norm": 0.25856754183769226, "learning_rate": 9.836012018178003e-06, "loss": 1.1187, "step": 1396 }, { "epoch": 0.107182588903239, "grad_norm": 0.19300296902656555, "learning_rate": 9.83569703243728e-06, "loss": 1.2168, "step": 1397 }, { "epoch": 0.10725931230259708, "grad_norm": 0.18776890635490417, "learning_rate": 9.835381749529989e-06, "loss": 1.1725, "step": 1398 }, { "epoch": 0.10733603570195517, "grad_norm": 0.24155493080615997, "learning_rate": 9.835066169475496e-06, "loss": 1.1366, "step": 1399 }, { "epoch": 0.10741275910131325, "grad_norm": 0.2176680564880371, "learning_rate": 9.834750292293197e-06, "loss": 1.2261, "step": 1400 }, { "epoch": 0.10748948250067134, "grad_norm": 0.21854375302791595, "learning_rate": 9.834434118002504e-06, "loss": 1.1604, "step": 1401 }, { "epoch": 0.10756620590002941, "grad_norm": 0.20740880072116852, "learning_rate": 9.834117646622849e-06, "loss": 1.095, "step": 1402 }, { "epoch": 0.10764292929938749, "grad_norm": 0.24608807265758514, "learning_rate": 9.833800878173675e-06, "loss": 1.147, "step": 1403 }, { "epoch": 0.10771965269874557, "grad_norm": 0.21770209074020386, "learning_rate": 9.833483812674453e-06, "loss": 1.0669, "step": 1404 }, { "epoch": 0.10779637609810365, "grad_norm": 0.31088897585868835, "learning_rate": 9.833166450144665e-06, "loss": 1.2085, "step": 1405 }, { "epoch": 0.10787309949746174, "grad_norm": 0.18898774683475494, "learning_rate": 9.832848790603815e-06, "loss": 1.2328, "step": 1406 }, { "epoch": 0.10794982289681981, "grad_norm": 0.20343169569969177, "learning_rate": 9.832530834071424e-06, "loss": 1.1714, "step": 1407 }, { "epoch": 0.1080265462961779, "grad_norm": 0.22438085079193115, "learning_rate": 9.83221258056703e-06, "loss": 1.1301, "step": 1408 }, { "epoch": 0.10810326969553598, "grad_norm": 0.1789776235818863, "learning_rate": 9.83189403011019e-06, "loss": 1.1211, "step": 1409 }, { "epoch": 0.10817999309489405, "grad_norm": 0.6548212766647339, "learning_rate": 9.831575182720481e-06, "loss": 1.2241, "step": 1410 }, { "epoch": 0.10825671649425214, "grad_norm": 0.36644116044044495, "learning_rate": 9.831256038417498e-06, "loss": 1.0852, "step": 1411 }, { "epoch": 0.10833343989361022, "grad_norm": 0.34827667474746704, "learning_rate": 9.830936597220852e-06, "loss": 1.1991, "step": 1412 }, { "epoch": 0.1084101632929683, "grad_norm": 0.23170290887355804, "learning_rate": 9.830616859150175e-06, "loss": 1.1811, "step": 1413 }, { "epoch": 0.10848688669232638, "grad_norm": 0.2540014386177063, "learning_rate": 9.830296824225113e-06, "loss": 1.1603, "step": 1414 }, { "epoch": 0.10856361009168446, "grad_norm": 0.1949627846479416, "learning_rate": 9.829976492465335e-06, "loss": 1.0804, "step": 1415 }, { "epoch": 0.10864033349104255, "grad_norm": 0.331794410943985, "learning_rate": 9.829655863890526e-06, "loss": 1.1528, "step": 1416 }, { "epoch": 0.10871705689040062, "grad_norm": 0.2019718438386917, "learning_rate": 9.82933493852039e-06, "loss": 1.2028, "step": 1417 }, { "epoch": 0.10879378028975871, "grad_norm": 0.10585971921682358, "learning_rate": 9.829013716374647e-06, "loss": 1.2144, "step": 1418 }, { "epoch": 0.10887050368911678, "grad_norm": 0.236016184091568, "learning_rate": 9.828692197473039e-06, "loss": 1.1969, "step": 1419 }, { "epoch": 0.10894722708847486, "grad_norm": 0.20557627081871033, "learning_rate": 9.828370381835323e-06, "loss": 1.2714, "step": 1420 }, { "epoch": 0.10902395048783295, "grad_norm": 0.2588704228401184, "learning_rate": 9.828048269481275e-06, "loss": 1.1717, "step": 1421 }, { "epoch": 0.10910067388719102, "grad_norm": 0.19467657804489136, "learning_rate": 9.82772586043069e-06, "loss": 1.1573, "step": 1422 }, { "epoch": 0.10917739728654911, "grad_norm": 0.28377655148506165, "learning_rate": 9.827403154703383e-06, "loss": 1.1161, "step": 1423 }, { "epoch": 0.10925412068590719, "grad_norm": 0.23465241491794586, "learning_rate": 9.827080152319182e-06, "loss": 1.1932, "step": 1424 }, { "epoch": 0.10933084408526528, "grad_norm": 0.21981193125247955, "learning_rate": 9.826756853297939e-06, "loss": 1.1709, "step": 1425 }, { "epoch": 0.10940756748462335, "grad_norm": 0.20237812399864197, "learning_rate": 9.82643325765952e-06, "loss": 1.1567, "step": 1426 }, { "epoch": 0.10948429088398143, "grad_norm": 0.24121607840061188, "learning_rate": 9.82610936542381e-06, "loss": 1.134, "step": 1427 }, { "epoch": 0.10956101428333952, "grad_norm": 0.20641088485717773, "learning_rate": 9.825785176610714e-06, "loss": 1.101, "step": 1428 }, { "epoch": 0.10963773768269759, "grad_norm": 0.19993087649345398, "learning_rate": 9.825460691240156e-06, "loss": 1.2694, "step": 1429 }, { "epoch": 0.10971446108205568, "grad_norm": 0.2146819531917572, "learning_rate": 9.825135909332074e-06, "loss": 1.1242, "step": 1430 }, { "epoch": 0.10979118448141376, "grad_norm": 0.2172749936580658, "learning_rate": 9.824810830906428e-06, "loss": 1.1791, "step": 1431 }, { "epoch": 0.10986790788077183, "grad_norm": 0.19926053285598755, "learning_rate": 9.824485455983195e-06, "loss": 1.1746, "step": 1432 }, { "epoch": 0.10994463128012992, "grad_norm": 0.2329225093126297, "learning_rate": 9.82415978458237e-06, "loss": 1.1918, "step": 1433 }, { "epoch": 0.110021354679488, "grad_norm": 0.20741716027259827, "learning_rate": 9.823833816723965e-06, "loss": 1.172, "step": 1434 }, { "epoch": 0.11009807807884608, "grad_norm": 0.17675785720348358, "learning_rate": 9.823507552428013e-06, "loss": 1.1138, "step": 1435 }, { "epoch": 0.11017480147820416, "grad_norm": 0.20610089600086212, "learning_rate": 9.823180991714565e-06, "loss": 1.1347, "step": 1436 }, { "epoch": 0.11025152487756225, "grad_norm": 0.2076302021741867, "learning_rate": 9.822854134603686e-06, "loss": 1.2155, "step": 1437 }, { "epoch": 0.11032824827692032, "grad_norm": 0.23849067091941833, "learning_rate": 9.822526981115465e-06, "loss": 1.1457, "step": 1438 }, { "epoch": 0.1104049716762784, "grad_norm": 0.5197685360908508, "learning_rate": 9.822199531270004e-06, "loss": 1.1986, "step": 1439 }, { "epoch": 0.11048169507563649, "grad_norm": 0.27082380652427673, "learning_rate": 9.821871785087428e-06, "loss": 1.2089, "step": 1440 }, { "epoch": 0.11055841847499456, "grad_norm": 0.19748516380786896, "learning_rate": 9.821543742587876e-06, "loss": 1.1163, "step": 1441 }, { "epoch": 0.11063514187435265, "grad_norm": 0.2398352324962616, "learning_rate": 9.821215403791508e-06, "loss": 1.2104, "step": 1442 }, { "epoch": 0.11071186527371073, "grad_norm": 0.22076866030693054, "learning_rate": 9.820886768718503e-06, "loss": 1.1592, "step": 1443 }, { "epoch": 0.1107885886730688, "grad_norm": 0.20402078330516815, "learning_rate": 9.820557837389055e-06, "loss": 1.0669, "step": 1444 }, { "epoch": 0.11086531207242689, "grad_norm": 0.17585520446300507, "learning_rate": 9.820228609823376e-06, "loss": 1.1531, "step": 1445 }, { "epoch": 0.11094203547178497, "grad_norm": 0.1901296079158783, "learning_rate": 9.8198990860417e-06, "loss": 1.1664, "step": 1446 }, { "epoch": 0.11101875887114306, "grad_norm": 0.3799322247505188, "learning_rate": 9.819569266064275e-06, "loss": 1.1955, "step": 1447 }, { "epoch": 0.11109548227050113, "grad_norm": 0.1847197413444519, "learning_rate": 9.819239149911373e-06, "loss": 1.2114, "step": 1448 }, { "epoch": 0.1111722056698592, "grad_norm": 0.09893456846475601, "learning_rate": 9.818908737603277e-06, "loss": 1.2528, "step": 1449 }, { "epoch": 0.1112489290692173, "grad_norm": 0.09792643040418625, "learning_rate": 9.818578029160294e-06, "loss": 1.2272, "step": 1450 }, { "epoch": 0.11132565246857537, "grad_norm": 0.26441869139671326, "learning_rate": 9.818247024602747e-06, "loss": 1.1692, "step": 1451 }, { "epoch": 0.11140237586793346, "grad_norm": 2.8445346355438232, "learning_rate": 9.817915723950975e-06, "loss": 1.1336, "step": 1452 }, { "epoch": 0.11147909926729153, "grad_norm": 0.3466053009033203, "learning_rate": 9.817584127225337e-06, "loss": 1.2036, "step": 1453 }, { "epoch": 0.11155582266664962, "grad_norm": 0.2142128199338913, "learning_rate": 9.817252234446215e-06, "loss": 1.2275, "step": 1454 }, { "epoch": 0.1116325460660077, "grad_norm": 0.1899275928735733, "learning_rate": 9.816920045634e-06, "loss": 1.2246, "step": 1455 }, { "epoch": 0.11170926946536577, "grad_norm": 0.1873079389333725, "learning_rate": 9.816587560809106e-06, "loss": 1.2649, "step": 1456 }, { "epoch": 0.11178599286472386, "grad_norm": 0.22468526661396027, "learning_rate": 9.816254779991969e-06, "loss": 1.1551, "step": 1457 }, { "epoch": 0.11186271626408194, "grad_norm": 0.2888057231903076, "learning_rate": 9.815921703203035e-06, "loss": 1.2683, "step": 1458 }, { "epoch": 0.11193943966344003, "grad_norm": 0.3286988139152527, "learning_rate": 9.815588330462778e-06, "loss": 1.1758, "step": 1459 }, { "epoch": 0.1120161630627981, "grad_norm": 0.20194561779499054, "learning_rate": 9.815254661791678e-06, "loss": 1.113, "step": 1460 }, { "epoch": 0.11209288646215618, "grad_norm": 0.10079697519540787, "learning_rate": 9.814920697210244e-06, "loss": 1.2355, "step": 1461 }, { "epoch": 0.11216960986151427, "grad_norm": 0.2000504732131958, "learning_rate": 9.814586436738998e-06, "loss": 1.2296, "step": 1462 }, { "epoch": 0.11224633326087234, "grad_norm": 0.18397323787212372, "learning_rate": 9.81425188039848e-06, "loss": 1.2072, "step": 1463 }, { "epoch": 0.11232305666023043, "grad_norm": 0.5623621940612793, "learning_rate": 9.81391702820925e-06, "loss": 1.1346, "step": 1464 }, { "epoch": 0.1123997800595885, "grad_norm": 0.19316647946834564, "learning_rate": 9.813581880191888e-06, "loss": 1.1841, "step": 1465 }, { "epoch": 0.1124765034589466, "grad_norm": 0.29956334829330444, "learning_rate": 9.813246436366987e-06, "loss": 1.0944, "step": 1466 }, { "epoch": 0.11255322685830467, "grad_norm": 0.2807074189186096, "learning_rate": 9.812910696755162e-06, "loss": 1.1438, "step": 1467 }, { "epoch": 0.11262995025766274, "grad_norm": 0.2456335872411728, "learning_rate": 9.812574661377046e-06, "loss": 1.1344, "step": 1468 }, { "epoch": 0.11270667365702083, "grad_norm": 0.10043669492006302, "learning_rate": 9.812238330253287e-06, "loss": 1.317, "step": 1469 }, { "epoch": 0.11278339705637891, "grad_norm": 0.20684416592121124, "learning_rate": 9.811901703404553e-06, "loss": 1.1206, "step": 1470 }, { "epoch": 0.112860120455737, "grad_norm": 0.26950767636299133, "learning_rate": 9.811564780851535e-06, "loss": 1.1243, "step": 1471 }, { "epoch": 0.11293684385509507, "grad_norm": 0.22106660902500153, "learning_rate": 9.811227562614934e-06, "loss": 1.2106, "step": 1472 }, { "epoch": 0.11301356725445315, "grad_norm": 0.09754426777362823, "learning_rate": 9.810890048715474e-06, "loss": 1.233, "step": 1473 }, { "epoch": 0.11309029065381124, "grad_norm": 0.23154056072235107, "learning_rate": 9.810552239173894e-06, "loss": 1.1623, "step": 1474 }, { "epoch": 0.11316701405316931, "grad_norm": 0.33407220244407654, "learning_rate": 9.810214134010958e-06, "loss": 1.129, "step": 1475 }, { "epoch": 0.1132437374525274, "grad_norm": 0.19276084005832672, "learning_rate": 9.809875733247441e-06, "loss": 1.1631, "step": 1476 }, { "epoch": 0.11332046085188548, "grad_norm": 0.37569838762283325, "learning_rate": 9.809537036904137e-06, "loss": 1.0881, "step": 1477 }, { "epoch": 0.11339718425124355, "grad_norm": 0.10277111828327179, "learning_rate": 9.809198045001861e-06, "loss": 1.2384, "step": 1478 }, { "epoch": 0.11347390765060164, "grad_norm": 0.20252402126789093, "learning_rate": 9.808858757561445e-06, "loss": 1.1483, "step": 1479 }, { "epoch": 0.11355063104995972, "grad_norm": 0.2443489283323288, "learning_rate": 9.808519174603741e-06, "loss": 1.2555, "step": 1480 }, { "epoch": 0.1136273544493178, "grad_norm": 0.4014085531234741, "learning_rate": 9.808179296149616e-06, "loss": 1.2031, "step": 1481 }, { "epoch": 0.11370407784867588, "grad_norm": 0.3141859173774719, "learning_rate": 9.807839122219956e-06, "loss": 1.232, "step": 1482 }, { "epoch": 0.11378080124803397, "grad_norm": 0.2817087769508362, "learning_rate": 9.807498652835664e-06, "loss": 1.2467, "step": 1483 }, { "epoch": 0.11385752464739204, "grad_norm": 0.2016172856092453, "learning_rate": 9.807157888017667e-06, "loss": 1.155, "step": 1484 }, { "epoch": 0.11393424804675012, "grad_norm": 0.2914222776889801, "learning_rate": 9.8068168277869e-06, "loss": 1.138, "step": 1485 }, { "epoch": 0.11401097144610821, "grad_norm": 0.28268396854400635, "learning_rate": 9.806475472164327e-06, "loss": 1.2107, "step": 1486 }, { "epoch": 0.11408769484546628, "grad_norm": 0.3054557144641876, "learning_rate": 9.806133821170925e-06, "loss": 1.2726, "step": 1487 }, { "epoch": 0.11416441824482437, "grad_norm": 0.24656766653060913, "learning_rate": 9.805791874827687e-06, "loss": 1.2177, "step": 1488 }, { "epoch": 0.11424114164418245, "grad_norm": 0.22384299337863922, "learning_rate": 9.805449633155629e-06, "loss": 1.1931, "step": 1489 }, { "epoch": 0.11431786504354052, "grad_norm": 0.20604035258293152, "learning_rate": 9.80510709617578e-06, "loss": 1.159, "step": 1490 }, { "epoch": 0.11439458844289861, "grad_norm": 0.19863593578338623, "learning_rate": 9.80476426390919e-06, "loss": 1.2392, "step": 1491 }, { "epoch": 0.11447131184225669, "grad_norm": 0.10290823876857758, "learning_rate": 9.804421136376929e-06, "loss": 1.2944, "step": 1492 }, { "epoch": 0.11454803524161478, "grad_norm": 0.21022428572177887, "learning_rate": 9.804077713600081e-06, "loss": 1.1869, "step": 1493 }, { "epoch": 0.11462475864097285, "grad_norm": 0.1920960396528244, "learning_rate": 9.803733995599754e-06, "loss": 1.1804, "step": 1494 }, { "epoch": 0.11470148204033094, "grad_norm": 0.28150472044944763, "learning_rate": 9.803389982397067e-06, "loss": 1.229, "step": 1495 }, { "epoch": 0.11477820543968902, "grad_norm": 0.22800739109516144, "learning_rate": 9.803045674013161e-06, "loss": 1.1312, "step": 1496 }, { "epoch": 0.11485492883904709, "grad_norm": 0.4668566882610321, "learning_rate": 9.802701070469194e-06, "loss": 1.1723, "step": 1497 }, { "epoch": 0.11493165223840518, "grad_norm": 0.0964796394109726, "learning_rate": 9.802356171786345e-06, "loss": 1.3067, "step": 1498 }, { "epoch": 0.11500837563776325, "grad_norm": 0.28407981991767883, "learning_rate": 9.802010977985807e-06, "loss": 1.1875, "step": 1499 }, { "epoch": 0.11508509903712134, "grad_norm": 0.28660401701927185, "learning_rate": 9.801665489088795e-06, "loss": 1.233, "step": 1500 }, { "epoch": 0.11516182243647942, "grad_norm": 0.19079165160655975, "learning_rate": 9.801319705116537e-06, "loss": 1.1662, "step": 1501 }, { "epoch": 0.1152385458358375, "grad_norm": 0.19691820442676544, "learning_rate": 9.800973626090286e-06, "loss": 1.2069, "step": 1502 }, { "epoch": 0.11531526923519558, "grad_norm": 0.21487516164779663, "learning_rate": 9.800627252031307e-06, "loss": 1.1255, "step": 1503 }, { "epoch": 0.11539199263455366, "grad_norm": 0.25070518255233765, "learning_rate": 9.800280582960887e-06, "loss": 1.1098, "step": 1504 }, { "epoch": 0.11546871603391175, "grad_norm": 0.22657877206802368, "learning_rate": 9.79993361890033e-06, "loss": 1.1747, "step": 1505 }, { "epoch": 0.11554543943326982, "grad_norm": 0.20152369141578674, "learning_rate": 9.799586359870957e-06, "loss": 1.2155, "step": 1506 }, { "epoch": 0.1156221628326279, "grad_norm": 0.2507525086402893, "learning_rate": 9.799238805894107e-06, "loss": 1.1842, "step": 1507 }, { "epoch": 0.11569888623198599, "grad_norm": 0.24676302075386047, "learning_rate": 9.798890956991139e-06, "loss": 1.2161, "step": 1508 }, { "epoch": 0.11577560963134406, "grad_norm": 0.196258082985878, "learning_rate": 9.798542813183431e-06, "loss": 1.1324, "step": 1509 }, { "epoch": 0.11585233303070215, "grad_norm": 0.22059020400047302, "learning_rate": 9.798194374492376e-06, "loss": 1.1876, "step": 1510 }, { "epoch": 0.11592905643006023, "grad_norm": 0.19657987356185913, "learning_rate": 9.797845640939385e-06, "loss": 1.1209, "step": 1511 }, { "epoch": 0.11600577982941832, "grad_norm": 0.2491808980703354, "learning_rate": 9.797496612545892e-06, "loss": 1.1053, "step": 1512 }, { "epoch": 0.11608250322877639, "grad_norm": 0.10120633989572525, "learning_rate": 9.797147289333343e-06, "loss": 1.2195, "step": 1513 }, { "epoch": 0.11615922662813447, "grad_norm": 0.23901133239269257, "learning_rate": 9.796797671323204e-06, "loss": 1.1812, "step": 1514 }, { "epoch": 0.11623595002749255, "grad_norm": 0.2066999226808548, "learning_rate": 9.796447758536963e-06, "loss": 1.1356, "step": 1515 }, { "epoch": 0.11631267342685063, "grad_norm": 0.0929831713438034, "learning_rate": 9.796097550996119e-06, "loss": 1.1997, "step": 1516 }, { "epoch": 0.11638939682620872, "grad_norm": 0.2671791911125183, "learning_rate": 9.7957470487222e-06, "loss": 1.1508, "step": 1517 }, { "epoch": 0.1164661202255668, "grad_norm": 0.2000580132007599, "learning_rate": 9.795396251736739e-06, "loss": 1.1408, "step": 1518 }, { "epoch": 0.11654284362492487, "grad_norm": 0.27781957387924194, "learning_rate": 9.795045160061295e-06, "loss": 1.2045, "step": 1519 }, { "epoch": 0.11661956702428296, "grad_norm": 0.19510141015052795, "learning_rate": 9.794693773717445e-06, "loss": 1.1944, "step": 1520 }, { "epoch": 0.11669629042364103, "grad_norm": 0.2308432161808014, "learning_rate": 9.79434209272678e-06, "loss": 1.1808, "step": 1521 }, { "epoch": 0.11677301382299912, "grad_norm": 0.09514348953962326, "learning_rate": 9.793990117110917e-06, "loss": 1.2722, "step": 1522 }, { "epoch": 0.1168497372223572, "grad_norm": 0.2330925017595291, "learning_rate": 9.79363784689148e-06, "loss": 1.2008, "step": 1523 }, { "epoch": 0.11692646062171529, "grad_norm": 0.3861830532550812, "learning_rate": 9.793285282090119e-06, "loss": 1.1593, "step": 1524 }, { "epoch": 0.11700318402107336, "grad_norm": 0.26540225744247437, "learning_rate": 9.7929324227285e-06, "loss": 1.2417, "step": 1525 }, { "epoch": 0.11707990742043144, "grad_norm": 0.24416673183441162, "learning_rate": 9.792579268828309e-06, "loss": 1.1592, "step": 1526 }, { "epoch": 0.11715663081978953, "grad_norm": 0.23007692396640778, "learning_rate": 9.792225820411246e-06, "loss": 1.0954, "step": 1527 }, { "epoch": 0.1172333542191476, "grad_norm": 0.23495519161224365, "learning_rate": 9.791872077499032e-06, "loss": 1.2435, "step": 1528 }, { "epoch": 0.11731007761850569, "grad_norm": 0.21454857289791107, "learning_rate": 9.791518040113405e-06, "loss": 1.1018, "step": 1529 }, { "epoch": 0.11738680101786376, "grad_norm": 0.1951836496591568, "learning_rate": 9.791163708276124e-06, "loss": 1.161, "step": 1530 }, { "epoch": 0.11746352441722184, "grad_norm": 0.26650097966194153, "learning_rate": 9.79080908200896e-06, "loss": 1.1348, "step": 1531 }, { "epoch": 0.11754024781657993, "grad_norm": 0.3482474088668823, "learning_rate": 9.790454161333708e-06, "loss": 1.1921, "step": 1532 }, { "epoch": 0.117616971215938, "grad_norm": 0.3334502577781677, "learning_rate": 9.790098946272177e-06, "loss": 1.1151, "step": 1533 }, { "epoch": 0.1176936946152961, "grad_norm": 0.24413324892520905, "learning_rate": 9.7897434368462e-06, "loss": 1.0393, "step": 1534 }, { "epoch": 0.11777041801465417, "grad_norm": 0.22181043028831482, "learning_rate": 9.789387633077618e-06, "loss": 1.1776, "step": 1535 }, { "epoch": 0.11784714141401224, "grad_norm": 0.2692579925060272, "learning_rate": 9.789031534988302e-06, "loss": 1.1699, "step": 1536 }, { "epoch": 0.11792386481337033, "grad_norm": 0.2143210768699646, "learning_rate": 9.788675142600131e-06, "loss": 1.1206, "step": 1537 }, { "epoch": 0.11800058821272841, "grad_norm": 0.2249426692724228, "learning_rate": 9.788318455935008e-06, "loss": 1.0998, "step": 1538 }, { "epoch": 0.1180773116120865, "grad_norm": 0.20653149485588074, "learning_rate": 9.787961475014852e-06, "loss": 1.2784, "step": 1539 }, { "epoch": 0.11815403501144457, "grad_norm": 0.22673116624355316, "learning_rate": 9.7876041998616e-06, "loss": 1.205, "step": 1540 }, { "epoch": 0.11823075841080266, "grad_norm": 0.22611646354198456, "learning_rate": 9.787246630497208e-06, "loss": 1.1745, "step": 1541 }, { "epoch": 0.11830748181016074, "grad_norm": 0.28459376096725464, "learning_rate": 9.786888766943653e-06, "loss": 1.2269, "step": 1542 }, { "epoch": 0.11838420520951881, "grad_norm": 0.18811635673046112, "learning_rate": 9.78653060922292e-06, "loss": 1.1752, "step": 1543 }, { "epoch": 0.1184609286088769, "grad_norm": 0.3791051208972931, "learning_rate": 9.786172157357024e-06, "loss": 1.1451, "step": 1544 }, { "epoch": 0.11853765200823498, "grad_norm": 0.778395414352417, "learning_rate": 9.785813411367988e-06, "loss": 1.1914, "step": 1545 }, { "epoch": 0.11861437540759306, "grad_norm": 0.21262358129024506, "learning_rate": 9.785454371277862e-06, "loss": 1.1243, "step": 1546 }, { "epoch": 0.11869109880695114, "grad_norm": 0.2878345549106598, "learning_rate": 9.78509503710871e-06, "loss": 1.0886, "step": 1547 }, { "epoch": 0.11876782220630921, "grad_norm": 0.3433103561401367, "learning_rate": 9.784735408882613e-06, "loss": 1.0841, "step": 1548 }, { "epoch": 0.1188445456056673, "grad_norm": 0.2690388262271881, "learning_rate": 9.78437548662167e-06, "loss": 1.1598, "step": 1549 }, { "epoch": 0.11892126900502538, "grad_norm": 0.18890143930912018, "learning_rate": 9.784015270348001e-06, "loss": 1.1465, "step": 1550 }, { "epoch": 0.11899799240438347, "grad_norm": 0.2623806297779083, "learning_rate": 9.783654760083741e-06, "loss": 1.2297, "step": 1551 }, { "epoch": 0.11907471580374154, "grad_norm": 0.2027038335800171, "learning_rate": 9.783293955851044e-06, "loss": 1.1192, "step": 1552 }, { "epoch": 0.11915143920309963, "grad_norm": 0.2198319137096405, "learning_rate": 9.782932857672084e-06, "loss": 1.1117, "step": 1553 }, { "epoch": 0.11922816260245771, "grad_norm": 0.23800648748874664, "learning_rate": 9.782571465569051e-06, "loss": 1.1475, "step": 1554 }, { "epoch": 0.11930488600181578, "grad_norm": 0.23029878735542297, "learning_rate": 9.782209779564151e-06, "loss": 1.2588, "step": 1555 }, { "epoch": 0.11938160940117387, "grad_norm": 0.2225593626499176, "learning_rate": 9.781847799679616e-06, "loss": 1.163, "step": 1556 }, { "epoch": 0.11945833280053195, "grad_norm": 0.22884106636047363, "learning_rate": 9.781485525937683e-06, "loss": 1.1204, "step": 1557 }, { "epoch": 0.11953505619989004, "grad_norm": 0.1950307935476303, "learning_rate": 9.781122958360622e-06, "loss": 1.1302, "step": 1558 }, { "epoch": 0.11961177959924811, "grad_norm": 0.2211586833000183, "learning_rate": 9.780760096970712e-06, "loss": 1.1721, "step": 1559 }, { "epoch": 0.11968850299860619, "grad_norm": 0.2746817469596863, "learning_rate": 9.780396941790248e-06, "loss": 1.14, "step": 1560 }, { "epoch": 0.11976522639796428, "grad_norm": 0.21846798062324524, "learning_rate": 9.780033492841551e-06, "loss": 1.1879, "step": 1561 }, { "epoch": 0.11984194979732235, "grad_norm": 0.21175043284893036, "learning_rate": 9.779669750146955e-06, "loss": 1.1372, "step": 1562 }, { "epoch": 0.11991867319668044, "grad_norm": 0.2033165991306305, "learning_rate": 9.77930571372881e-06, "loss": 1.1475, "step": 1563 }, { "epoch": 0.11999539659603851, "grad_norm": 0.19795428216457367, "learning_rate": 9.77894138360949e-06, "loss": 1.1271, "step": 1564 }, { "epoch": 0.12007211999539659, "grad_norm": 0.2094157189130783, "learning_rate": 9.778576759811384e-06, "loss": 1.1426, "step": 1565 }, { "epoch": 0.12014884339475468, "grad_norm": 0.30447638034820557, "learning_rate": 9.778211842356898e-06, "loss": 1.1067, "step": 1566 }, { "epoch": 0.12022556679411275, "grad_norm": 0.186275914311409, "learning_rate": 9.777846631268457e-06, "loss": 1.1943, "step": 1567 }, { "epoch": 0.12030229019347084, "grad_norm": 0.9650161266326904, "learning_rate": 9.777481126568507e-06, "loss": 1.1578, "step": 1568 }, { "epoch": 0.12037901359282892, "grad_norm": 0.20198878645896912, "learning_rate": 9.777115328279505e-06, "loss": 1.1943, "step": 1569 }, { "epoch": 0.120455736992187, "grad_norm": 0.20379012823104858, "learning_rate": 9.776749236423934e-06, "loss": 1.0964, "step": 1570 }, { "epoch": 0.12053246039154508, "grad_norm": 0.2085404396057129, "learning_rate": 9.77638285102429e-06, "loss": 1.1175, "step": 1571 }, { "epoch": 0.12060918379090316, "grad_norm": 0.26838478446006775, "learning_rate": 9.776016172103086e-06, "loss": 1.0997, "step": 1572 }, { "epoch": 0.12068590719026125, "grad_norm": 0.3784668445587158, "learning_rate": 9.77564919968286e-06, "loss": 1.0653, "step": 1573 }, { "epoch": 0.12076263058961932, "grad_norm": 0.2539951801300049, "learning_rate": 9.775281933786157e-06, "loss": 1.2125, "step": 1574 }, { "epoch": 0.12083935398897741, "grad_norm": 0.32308638095855713, "learning_rate": 9.774914374435555e-06, "loss": 1.2359, "step": 1575 }, { "epoch": 0.12091607738833549, "grad_norm": 0.281904935836792, "learning_rate": 9.774546521653633e-06, "loss": 1.1625, "step": 1576 }, { "epoch": 0.12099280078769356, "grad_norm": 0.2819288671016693, "learning_rate": 9.774178375463004e-06, "loss": 1.1869, "step": 1577 }, { "epoch": 0.12106952418705165, "grad_norm": 0.17949096858501434, "learning_rate": 9.773809935886287e-06, "loss": 1.1045, "step": 1578 }, { "epoch": 0.12114624758640972, "grad_norm": 0.21415382623672485, "learning_rate": 9.773441202946124e-06, "loss": 1.1345, "step": 1579 }, { "epoch": 0.12122297098576781, "grad_norm": 0.4376871883869171, "learning_rate": 9.773072176665175e-06, "loss": 1.1768, "step": 1580 }, { "epoch": 0.12129969438512589, "grad_norm": 0.21608005464076996, "learning_rate": 9.772702857066118e-06, "loss": 1.1374, "step": 1581 }, { "epoch": 0.12137641778448398, "grad_norm": 0.3081972599029541, "learning_rate": 9.772333244171648e-06, "loss": 1.2248, "step": 1582 }, { "epoch": 0.12145314118384205, "grad_norm": 0.27906253933906555, "learning_rate": 9.771963338004482e-06, "loss": 1.1425, "step": 1583 }, { "epoch": 0.12152986458320013, "grad_norm": 0.2363089919090271, "learning_rate": 9.771593138587347e-06, "loss": 1.1657, "step": 1584 }, { "epoch": 0.12160658798255822, "grad_norm": 0.18599139153957367, "learning_rate": 9.771222645942995e-06, "loss": 1.2168, "step": 1585 }, { "epoch": 0.12168331138191629, "grad_norm": 0.22294186055660248, "learning_rate": 9.770851860094194e-06, "loss": 1.257, "step": 1586 }, { "epoch": 0.12176003478127438, "grad_norm": 0.22479598224163055, "learning_rate": 9.770480781063729e-06, "loss": 1.1985, "step": 1587 }, { "epoch": 0.12183675818063246, "grad_norm": 0.18253093957901, "learning_rate": 9.770109408874404e-06, "loss": 1.1427, "step": 1588 }, { "epoch": 0.12191348157999053, "grad_norm": 0.22982743382453918, "learning_rate": 9.769737743549042e-06, "loss": 1.1413, "step": 1589 }, { "epoch": 0.12199020497934862, "grad_norm": 0.3225191533565521, "learning_rate": 9.769365785110481e-06, "loss": 1.1762, "step": 1590 }, { "epoch": 0.1220669283787067, "grad_norm": 0.2530233561992645, "learning_rate": 9.768993533581578e-06, "loss": 1.1711, "step": 1591 }, { "epoch": 0.12214365177806479, "grad_norm": 0.2980887293815613, "learning_rate": 9.768620988985213e-06, "loss": 1.188, "step": 1592 }, { "epoch": 0.12222037517742286, "grad_norm": 0.21671327948570251, "learning_rate": 9.768248151344278e-06, "loss": 1.1867, "step": 1593 }, { "epoch": 0.12229709857678094, "grad_norm": 0.23381304740905762, "learning_rate": 9.767875020681682e-06, "loss": 1.1184, "step": 1594 }, { "epoch": 0.12237382197613902, "grad_norm": 0.4140051603317261, "learning_rate": 9.767501597020357e-06, "loss": 1.2436, "step": 1595 }, { "epoch": 0.1224505453754971, "grad_norm": 0.19856323301792145, "learning_rate": 9.76712788038325e-06, "loss": 1.1953, "step": 1596 }, { "epoch": 0.12252726877485519, "grad_norm": 0.2157018780708313, "learning_rate": 9.766753870793332e-06, "loss": 1.2205, "step": 1597 }, { "epoch": 0.12260399217421326, "grad_norm": 0.10899586975574493, "learning_rate": 9.76637956827358e-06, "loss": 1.2709, "step": 1598 }, { "epoch": 0.12268071557357135, "grad_norm": 0.31520384550094604, "learning_rate": 9.766004972847e-06, "loss": 1.1421, "step": 1599 }, { "epoch": 0.12275743897292943, "grad_norm": 0.28436365723609924, "learning_rate": 9.765630084536611e-06, "loss": 1.1813, "step": 1600 }, { "epoch": 0.1228341623722875, "grad_norm": 0.21647638082504272, "learning_rate": 9.765254903365448e-06, "loss": 1.119, "step": 1601 }, { "epoch": 0.12291088577164559, "grad_norm": 0.25364595651626587, "learning_rate": 9.76487942935657e-06, "loss": 1.2308, "step": 1602 }, { "epoch": 0.12298760917100367, "grad_norm": 0.22880485653877258, "learning_rate": 9.764503662533052e-06, "loss": 1.0783, "step": 1603 }, { "epoch": 0.12306433257036176, "grad_norm": 0.20153911411762238, "learning_rate": 9.764127602917983e-06, "loss": 1.1441, "step": 1604 }, { "epoch": 0.12314105596971983, "grad_norm": 0.20823343098163605, "learning_rate": 9.763751250534474e-06, "loss": 1.1554, "step": 1605 }, { "epoch": 0.1232177793690779, "grad_norm": 0.22486282885074615, "learning_rate": 9.763374605405653e-06, "loss": 1.2221, "step": 1606 }, { "epoch": 0.123294502768436, "grad_norm": 0.2184714674949646, "learning_rate": 9.762997667554666e-06, "loss": 1.139, "step": 1607 }, { "epoch": 0.12337122616779407, "grad_norm": 0.21588638424873352, "learning_rate": 9.762620437004677e-06, "loss": 1.1805, "step": 1608 }, { "epoch": 0.12344794956715216, "grad_norm": 0.23503121733665466, "learning_rate": 9.762242913778868e-06, "loss": 1.1758, "step": 1609 }, { "epoch": 0.12352467296651023, "grad_norm": 0.2705335319042206, "learning_rate": 9.761865097900435e-06, "loss": 1.1928, "step": 1610 }, { "epoch": 0.12360139636586831, "grad_norm": 0.24086272716522217, "learning_rate": 9.761486989392602e-06, "loss": 1.2696, "step": 1611 }, { "epoch": 0.1236781197652264, "grad_norm": 0.3907132148742676, "learning_rate": 9.7611085882786e-06, "loss": 1.1372, "step": 1612 }, { "epoch": 0.12375484316458447, "grad_norm": 0.30571529269218445, "learning_rate": 9.760729894581686e-06, "loss": 1.1711, "step": 1613 }, { "epoch": 0.12383156656394256, "grad_norm": 0.2424890398979187, "learning_rate": 9.760350908325131e-06, "loss": 1.1665, "step": 1614 }, { "epoch": 0.12390828996330064, "grad_norm": 0.19031786918640137, "learning_rate": 9.759971629532222e-06, "loss": 1.1867, "step": 1615 }, { "epoch": 0.12398501336265873, "grad_norm": 0.20656892657279968, "learning_rate": 9.75959205822627e-06, "loss": 1.2238, "step": 1616 }, { "epoch": 0.1240617367620168, "grad_norm": 0.26220059394836426, "learning_rate": 9.759212194430598e-06, "loss": 1.2282, "step": 1617 }, { "epoch": 0.12413846016137488, "grad_norm": 0.20865848660469055, "learning_rate": 9.758832038168553e-06, "loss": 1.1558, "step": 1618 }, { "epoch": 0.12421518356073297, "grad_norm": 0.2582096457481384, "learning_rate": 9.758451589463493e-06, "loss": 1.1724, "step": 1619 }, { "epoch": 0.12429190696009104, "grad_norm": 0.186502605676651, "learning_rate": 9.758070848338801e-06, "loss": 1.1735, "step": 1620 }, { "epoch": 0.12436863035944913, "grad_norm": 0.1999397724866867, "learning_rate": 9.757689814817872e-06, "loss": 1.1685, "step": 1621 }, { "epoch": 0.1244453537588072, "grad_norm": 0.21610456705093384, "learning_rate": 9.757308488924121e-06, "loss": 1.1938, "step": 1622 }, { "epoch": 0.12452207715816528, "grad_norm": 0.26225998997688293, "learning_rate": 9.756926870680987e-06, "loss": 1.1606, "step": 1623 }, { "epoch": 0.12459880055752337, "grad_norm": 0.37332475185394287, "learning_rate": 9.756544960111913e-06, "loss": 1.1723, "step": 1624 }, { "epoch": 0.12467552395688145, "grad_norm": 0.19894498586654663, "learning_rate": 9.756162757240375e-06, "loss": 1.2274, "step": 1625 }, { "epoch": 0.12475224735623953, "grad_norm": 0.1953968107700348, "learning_rate": 9.755780262089857e-06, "loss": 1.1722, "step": 1626 }, { "epoch": 0.12482897075559761, "grad_norm": 0.24434392154216766, "learning_rate": 9.755397474683867e-06, "loss": 1.1628, "step": 1627 }, { "epoch": 0.1249056941549557, "grad_norm": 0.222855344414711, "learning_rate": 9.755014395045925e-06, "loss": 1.1893, "step": 1628 }, { "epoch": 0.12498241755431377, "grad_norm": 0.20889317989349365, "learning_rate": 9.754631023199576e-06, "loss": 1.2422, "step": 1629 }, { "epoch": 0.12505914095367185, "grad_norm": 0.18188638985157013, "learning_rate": 9.754247359168376e-06, "loss": 1.1277, "step": 1630 }, { "epoch": 0.12513586435302992, "grad_norm": 0.24008433520793915, "learning_rate": 9.753863402975905e-06, "loss": 1.188, "step": 1631 }, { "epoch": 0.12521258775238803, "grad_norm": 0.4408395290374756, "learning_rate": 9.753479154645754e-06, "loss": 1.0764, "step": 1632 }, { "epoch": 0.1252893111517461, "grad_norm": 0.2542000412940979, "learning_rate": 9.753094614201542e-06, "loss": 1.1298, "step": 1633 }, { "epoch": 0.12536603455110418, "grad_norm": 0.21660968661308289, "learning_rate": 9.752709781666897e-06, "loss": 1.1434, "step": 1634 }, { "epoch": 0.12544275795046225, "grad_norm": 0.2540670931339264, "learning_rate": 9.752324657065464e-06, "loss": 1.2306, "step": 1635 }, { "epoch": 0.12551948134982033, "grad_norm": 0.20594274997711182, "learning_rate": 9.751939240420916e-06, "loss": 1.1719, "step": 1636 }, { "epoch": 0.12559620474917843, "grad_norm": 0.10958658158779144, "learning_rate": 9.751553531756937e-06, "loss": 1.2454, "step": 1637 }, { "epoch": 0.1256729281485365, "grad_norm": 0.21080636978149414, "learning_rate": 9.751167531097226e-06, "loss": 1.1692, "step": 1638 }, { "epoch": 0.12574965154789458, "grad_norm": 0.20017197728157043, "learning_rate": 9.750781238465507e-06, "loss": 1.2483, "step": 1639 }, { "epoch": 0.12582637494725266, "grad_norm": 0.23532631993293762, "learning_rate": 9.750394653885518e-06, "loss": 1.1582, "step": 1640 }, { "epoch": 0.12590309834661073, "grad_norm": 0.1957232505083084, "learning_rate": 9.750007777381017e-06, "loss": 1.2383, "step": 1641 }, { "epoch": 0.12597982174596883, "grad_norm": 0.24088247120380402, "learning_rate": 9.749620608975776e-06, "loss": 1.1632, "step": 1642 }, { "epoch": 0.1260565451453269, "grad_norm": 0.20466196537017822, "learning_rate": 9.749233148693588e-06, "loss": 1.1357, "step": 1643 }, { "epoch": 0.12613326854468498, "grad_norm": 0.19260148704051971, "learning_rate": 9.748845396558265e-06, "loss": 1.2063, "step": 1644 }, { "epoch": 0.12620999194404306, "grad_norm": 0.2285013496875763, "learning_rate": 9.748457352593635e-06, "loss": 1.2009, "step": 1645 }, { "epoch": 0.12628671534340113, "grad_norm": 0.22287769615650177, "learning_rate": 9.748069016823544e-06, "loss": 1.1297, "step": 1646 }, { "epoch": 0.12636343874275924, "grad_norm": 0.21363738179206848, "learning_rate": 9.747680389271857e-06, "loss": 1.1923, "step": 1647 }, { "epoch": 0.1264401621421173, "grad_norm": 0.22011318802833557, "learning_rate": 9.747291469962454e-06, "loss": 1.1723, "step": 1648 }, { "epoch": 0.1265168855414754, "grad_norm": 0.1968793421983719, "learning_rate": 9.746902258919238e-06, "loss": 1.0905, "step": 1649 }, { "epoch": 0.12659360894083346, "grad_norm": 0.21988901495933533, "learning_rate": 9.746512756166123e-06, "loss": 1.0911, "step": 1650 }, { "epoch": 0.12667033234019157, "grad_norm": 0.1842363178730011, "learning_rate": 9.746122961727051e-06, "loss": 1.082, "step": 1651 }, { "epoch": 0.12674705573954964, "grad_norm": 0.10174164921045303, "learning_rate": 9.74573287562597e-06, "loss": 1.2863, "step": 1652 }, { "epoch": 0.12682377913890772, "grad_norm": 0.2208247184753418, "learning_rate": 9.745342497886855e-06, "loss": 1.2564, "step": 1653 }, { "epoch": 0.1269005025382658, "grad_norm": 0.2243749499320984, "learning_rate": 9.744951828533697e-06, "loss": 1.1084, "step": 1654 }, { "epoch": 0.12697722593762387, "grad_norm": 0.2221800982952118, "learning_rate": 9.7445608675905e-06, "loss": 1.2441, "step": 1655 }, { "epoch": 0.12705394933698197, "grad_norm": 0.4594520628452301, "learning_rate": 9.744169615081292e-06, "loss": 1.2199, "step": 1656 }, { "epoch": 0.12713067273634004, "grad_norm": 0.209394633769989, "learning_rate": 9.743778071030116e-06, "loss": 1.152, "step": 1657 }, { "epoch": 0.12720739613569812, "grad_norm": 0.19084471464157104, "learning_rate": 9.743386235461035e-06, "loss": 1.1844, "step": 1658 }, { "epoch": 0.1272841195350562, "grad_norm": 0.2374730408191681, "learning_rate": 9.742994108398123e-06, "loss": 1.1716, "step": 1659 }, { "epoch": 0.12736084293441427, "grad_norm": 0.3045714497566223, "learning_rate": 9.742601689865484e-06, "loss": 1.2151, "step": 1660 }, { "epoch": 0.12743756633377237, "grad_norm": 0.30975237488746643, "learning_rate": 9.74220897988723e-06, "loss": 1.2269, "step": 1661 }, { "epoch": 0.12751428973313045, "grad_norm": 0.3616715967655182, "learning_rate": 9.741815978487495e-06, "loss": 1.1826, "step": 1662 }, { "epoch": 0.12759101313248852, "grad_norm": 0.5651633739471436, "learning_rate": 9.741422685690426e-06, "loss": 1.2077, "step": 1663 }, { "epoch": 0.1276677365318466, "grad_norm": 0.22304192185401917, "learning_rate": 9.741029101520199e-06, "loss": 1.1471, "step": 1664 }, { "epoch": 0.12774445993120467, "grad_norm": 1.0061925649642944, "learning_rate": 9.740635226000994e-06, "loss": 1.1383, "step": 1665 }, { "epoch": 0.12782118333056278, "grad_norm": 0.262146919965744, "learning_rate": 9.74024105915702e-06, "loss": 1.2052, "step": 1666 }, { "epoch": 0.12789790672992085, "grad_norm": 0.22597436606884003, "learning_rate": 9.739846601012497e-06, "loss": 1.1566, "step": 1667 }, { "epoch": 0.12797463012927893, "grad_norm": 0.47011828422546387, "learning_rate": 9.73945185159167e-06, "loss": 1.1623, "step": 1668 }, { "epoch": 0.128051353528637, "grad_norm": 0.20009943842887878, "learning_rate": 9.739056810918792e-06, "loss": 1.1016, "step": 1669 }, { "epoch": 0.12812807692799508, "grad_norm": 0.33444711565971375, "learning_rate": 9.738661479018142e-06, "loss": 1.1601, "step": 1670 }, { "epoch": 0.12820480032735318, "grad_norm": 0.1975913643836975, "learning_rate": 9.738265855914014e-06, "loss": 1.2422, "step": 1671 }, { "epoch": 0.12828152372671126, "grad_norm": 0.21228602528572083, "learning_rate": 9.737869941630717e-06, "loss": 1.2318, "step": 1672 }, { "epoch": 0.12835824712606933, "grad_norm": 0.19141672551631927, "learning_rate": 9.737473736192588e-06, "loss": 1.1355, "step": 1673 }, { "epoch": 0.1284349705254274, "grad_norm": 0.20364812016487122, "learning_rate": 9.737077239623968e-06, "loss": 1.0909, "step": 1674 }, { "epoch": 0.12851169392478548, "grad_norm": 0.22535984218120575, "learning_rate": 9.736680451949228e-06, "loss": 1.224, "step": 1675 }, { "epoch": 0.12858841732414358, "grad_norm": 0.21962161362171173, "learning_rate": 9.736283373192747e-06, "loss": 1.19, "step": 1676 }, { "epoch": 0.12866514072350166, "grad_norm": 0.26199471950531006, "learning_rate": 9.735886003378929e-06, "loss": 1.1323, "step": 1677 }, { "epoch": 0.12874186412285973, "grad_norm": 0.21462535858154297, "learning_rate": 9.735488342532193e-06, "loss": 1.2247, "step": 1678 }, { "epoch": 0.1288185875222178, "grad_norm": 0.21084997057914734, "learning_rate": 9.735090390676978e-06, "loss": 1.1213, "step": 1679 }, { "epoch": 0.1288953109215759, "grad_norm": 0.23975960910320282, "learning_rate": 9.734692147837737e-06, "loss": 1.1344, "step": 1680 }, { "epoch": 0.128972034320934, "grad_norm": 0.3077406883239746, "learning_rate": 9.734293614038944e-06, "loss": 1.1598, "step": 1681 }, { "epoch": 0.12904875772029206, "grad_norm": 0.2587167024612427, "learning_rate": 9.733894789305089e-06, "loss": 1.2051, "step": 1682 }, { "epoch": 0.12912548111965014, "grad_norm": 0.2429068684577942, "learning_rate": 9.733495673660682e-06, "loss": 1.1611, "step": 1683 }, { "epoch": 0.1292022045190082, "grad_norm": 0.23222556710243225, "learning_rate": 9.73309626713025e-06, "loss": 1.1324, "step": 1684 }, { "epoch": 0.12927892791836632, "grad_norm": 0.2620430290699005, "learning_rate": 9.732696569738336e-06, "loss": 1.2273, "step": 1685 }, { "epoch": 0.1293556513177244, "grad_norm": 0.25507450103759766, "learning_rate": 9.732296581509503e-06, "loss": 1.2222, "step": 1686 }, { "epoch": 0.12943237471708247, "grad_norm": 0.2843271493911743, "learning_rate": 9.731896302468333e-06, "loss": 1.1069, "step": 1687 }, { "epoch": 0.12950909811644054, "grad_norm": 0.20535694062709808, "learning_rate": 9.731495732639424e-06, "loss": 1.1678, "step": 1688 }, { "epoch": 0.12958582151579862, "grad_norm": 0.21323567628860474, "learning_rate": 9.731094872047388e-06, "loss": 1.1548, "step": 1689 }, { "epoch": 0.12966254491515672, "grad_norm": 0.22416147589683533, "learning_rate": 9.730693720716866e-06, "loss": 1.2553, "step": 1690 }, { "epoch": 0.1297392683145148, "grad_norm": 0.2282259315252304, "learning_rate": 9.730292278672503e-06, "loss": 1.1225, "step": 1691 }, { "epoch": 0.12981599171387287, "grad_norm": 0.2870725095272064, "learning_rate": 9.729890545938973e-06, "loss": 1.228, "step": 1692 }, { "epoch": 0.12989271511323094, "grad_norm": 0.23029644787311554, "learning_rate": 9.729488522540962e-06, "loss": 1.2044, "step": 1693 }, { "epoch": 0.12996943851258902, "grad_norm": 0.2668450176715851, "learning_rate": 9.729086208503174e-06, "loss": 1.2079, "step": 1694 }, { "epoch": 0.13004616191194712, "grad_norm": 0.18437577784061432, "learning_rate": 9.728683603850336e-06, "loss": 1.1647, "step": 1695 }, { "epoch": 0.1301228853113052, "grad_norm": 0.19498716294765472, "learning_rate": 9.728280708607186e-06, "loss": 1.0975, "step": 1696 }, { "epoch": 0.13019960871066327, "grad_norm": 0.21889129281044006, "learning_rate": 9.727877522798484e-06, "loss": 1.1284, "step": 1697 }, { "epoch": 0.13027633211002135, "grad_norm": 0.20351018011569977, "learning_rate": 9.727474046449005e-06, "loss": 1.1181, "step": 1698 }, { "epoch": 0.13035305550937942, "grad_norm": 0.33257415890693665, "learning_rate": 9.727070279583548e-06, "loss": 1.1414, "step": 1699 }, { "epoch": 0.13042977890873753, "grad_norm": 0.2625391185283661, "learning_rate": 9.726666222226922e-06, "loss": 1.1783, "step": 1700 }, { "epoch": 0.1305065023080956, "grad_norm": 0.21569590270519257, "learning_rate": 9.72626187440396e-06, "loss": 1.203, "step": 1701 }, { "epoch": 0.13058322570745368, "grad_norm": 0.21857045590877533, "learning_rate": 9.725857236139506e-06, "loss": 1.1157, "step": 1702 }, { "epoch": 0.13065994910681175, "grad_norm": 0.30335861444473267, "learning_rate": 9.725452307458432e-06, "loss": 1.1108, "step": 1703 }, { "epoch": 0.13073667250616983, "grad_norm": 0.2705145478248596, "learning_rate": 9.725047088385617e-06, "loss": 1.2013, "step": 1704 }, { "epoch": 0.13081339590552793, "grad_norm": 0.21580855548381805, "learning_rate": 9.724641578945965e-06, "loss": 1.1279, "step": 1705 }, { "epoch": 0.130890119304886, "grad_norm": 0.24758745729923248, "learning_rate": 9.724235779164395e-06, "loss": 1.1718, "step": 1706 }, { "epoch": 0.13096684270424408, "grad_norm": 0.19660186767578125, "learning_rate": 9.723829689065845e-06, "loss": 1.1323, "step": 1707 }, { "epoch": 0.13104356610360215, "grad_norm": 0.2867240011692047, "learning_rate": 9.723423308675271e-06, "loss": 1.1822, "step": 1708 }, { "epoch": 0.13112028950296026, "grad_norm": 0.257741242647171, "learning_rate": 9.723016638017644e-06, "loss": 1.2104, "step": 1709 }, { "epoch": 0.13119701290231833, "grad_norm": 0.2763867676258087, "learning_rate": 9.722609677117955e-06, "loss": 1.1908, "step": 1710 }, { "epoch": 0.1312737363016764, "grad_norm": 0.2219056338071823, "learning_rate": 9.722202426001216e-06, "loss": 1.1832, "step": 1711 }, { "epoch": 0.13135045970103448, "grad_norm": 0.2070668637752533, "learning_rate": 9.721794884692453e-06, "loss": 1.1355, "step": 1712 }, { "epoch": 0.13142718310039256, "grad_norm": 0.23838363587856293, "learning_rate": 9.721387053216706e-06, "loss": 1.1097, "step": 1713 }, { "epoch": 0.13150390649975066, "grad_norm": 0.26056209206581116, "learning_rate": 9.720978931599043e-06, "loss": 1.2268, "step": 1714 }, { "epoch": 0.13158062989910874, "grad_norm": 0.21754513680934906, "learning_rate": 9.720570519864541e-06, "loss": 1.1575, "step": 1715 }, { "epoch": 0.1316573532984668, "grad_norm": 0.31087857484817505, "learning_rate": 9.7201618180383e-06, "loss": 1.2104, "step": 1716 }, { "epoch": 0.1317340766978249, "grad_norm": 0.2061856985092163, "learning_rate": 9.719752826145433e-06, "loss": 1.169, "step": 1717 }, { "epoch": 0.13181080009718296, "grad_norm": 0.22007812559604645, "learning_rate": 9.719343544211075e-06, "loss": 1.2351, "step": 1718 }, { "epoch": 0.13188752349654106, "grad_norm": 0.22226081788539886, "learning_rate": 9.71893397226038e-06, "loss": 1.0625, "step": 1719 }, { "epoch": 0.13196424689589914, "grad_norm": 0.18513411283493042, "learning_rate": 9.718524110318514e-06, "loss": 1.1951, "step": 1720 }, { "epoch": 0.13204097029525722, "grad_norm": 0.2010522484779358, "learning_rate": 9.718113958410665e-06, "loss": 1.1421, "step": 1721 }, { "epoch": 0.1321176936946153, "grad_norm": 0.18698188662528992, "learning_rate": 9.717703516562037e-06, "loss": 1.2135, "step": 1722 }, { "epoch": 0.13219441709397337, "grad_norm": 0.255890429019928, "learning_rate": 9.717292784797854e-06, "loss": 1.0761, "step": 1723 }, { "epoch": 0.13227114049333147, "grad_norm": 0.19113358855247498, "learning_rate": 9.716881763143358e-06, "loss": 1.1453, "step": 1724 }, { "epoch": 0.13234786389268954, "grad_norm": 0.183724507689476, "learning_rate": 9.716470451623806e-06, "loss": 1.1927, "step": 1725 }, { "epoch": 0.13242458729204762, "grad_norm": 0.1984855830669403, "learning_rate": 9.716058850264471e-06, "loss": 1.1525, "step": 1726 }, { "epoch": 0.1325013106914057, "grad_norm": 0.21371567249298096, "learning_rate": 9.715646959090652e-06, "loss": 1.1503, "step": 1727 }, { "epoch": 0.13257803409076377, "grad_norm": 0.0985681563615799, "learning_rate": 9.715234778127658e-06, "loss": 1.2838, "step": 1728 }, { "epoch": 0.13265475749012187, "grad_norm": 0.17890657484531403, "learning_rate": 9.714822307400822e-06, "loss": 1.1102, "step": 1729 }, { "epoch": 0.13273148088947995, "grad_norm": 0.18242980539798737, "learning_rate": 9.714409546935485e-06, "loss": 1.1601, "step": 1730 }, { "epoch": 0.13280820428883802, "grad_norm": 0.21384628117084503, "learning_rate": 9.71399649675702e-06, "loss": 1.1072, "step": 1731 }, { "epoch": 0.1328849276881961, "grad_norm": 0.2073659747838974, "learning_rate": 9.713583156890805e-06, "loss": 1.0888, "step": 1732 }, { "epoch": 0.13296165108755417, "grad_norm": 0.2598206102848053, "learning_rate": 9.713169527362241e-06, "loss": 1.2191, "step": 1733 }, { "epoch": 0.13303837448691228, "grad_norm": 0.2008918672800064, "learning_rate": 9.712755608196747e-06, "loss": 1.1982, "step": 1734 }, { "epoch": 0.13311509788627035, "grad_norm": 0.2087555080652237, "learning_rate": 9.712341399419763e-06, "loss": 1.1028, "step": 1735 }, { "epoch": 0.13319182128562843, "grad_norm": 0.23029078543186188, "learning_rate": 9.711926901056738e-06, "loss": 1.103, "step": 1736 }, { "epoch": 0.1332685446849865, "grad_norm": 0.2066081166267395, "learning_rate": 9.711512113133145e-06, "loss": 1.2114, "step": 1737 }, { "epoch": 0.1333452680843446, "grad_norm": 0.2058216780424118, "learning_rate": 9.71109703567448e-06, "loss": 1.1935, "step": 1738 }, { "epoch": 0.13342199148370268, "grad_norm": 0.25132450461387634, "learning_rate": 9.710681668706242e-06, "loss": 1.1423, "step": 1739 }, { "epoch": 0.13349871488306075, "grad_norm": 0.5219811797142029, "learning_rate": 9.71026601225396e-06, "loss": 1.15, "step": 1740 }, { "epoch": 0.13357543828241883, "grad_norm": 0.23406322300434113, "learning_rate": 9.70985006634318e-06, "loss": 1.1083, "step": 1741 }, { "epoch": 0.1336521616817769, "grad_norm": 0.2196330428123474, "learning_rate": 9.709433830999458e-06, "loss": 1.1094, "step": 1742 }, { "epoch": 0.133728885081135, "grad_norm": 0.20791968703269958, "learning_rate": 9.709017306248377e-06, "loss": 1.0917, "step": 1743 }, { "epoch": 0.13380560848049308, "grad_norm": 0.19653044641017914, "learning_rate": 9.708600492115531e-06, "loss": 1.1908, "step": 1744 }, { "epoch": 0.13388233187985116, "grad_norm": 0.21628619730472565, "learning_rate": 9.708183388626536e-06, "loss": 1.1284, "step": 1745 }, { "epoch": 0.13395905527920923, "grad_norm": 0.22790956497192383, "learning_rate": 9.707765995807022e-06, "loss": 1.1437, "step": 1746 }, { "epoch": 0.1340357786785673, "grad_norm": 0.09764190018177032, "learning_rate": 9.70734831368264e-06, "loss": 1.2137, "step": 1747 }, { "epoch": 0.1341125020779254, "grad_norm": 0.24718020856380463, "learning_rate": 9.706930342279059e-06, "loss": 1.0431, "step": 1748 }, { "epoch": 0.13418922547728349, "grad_norm": 0.21220602095127106, "learning_rate": 9.706512081621964e-06, "loss": 1.1431, "step": 1749 }, { "epoch": 0.13426594887664156, "grad_norm": 0.24088814854621887, "learning_rate": 9.706093531737058e-06, "loss": 1.1349, "step": 1750 }, { "epoch": 0.13434267227599964, "grad_norm": 0.2824132740497589, "learning_rate": 9.705674692650059e-06, "loss": 1.0957, "step": 1751 }, { "epoch": 0.1344193956753577, "grad_norm": 0.2144954800605774, "learning_rate": 9.70525556438671e-06, "loss": 1.1414, "step": 1752 }, { "epoch": 0.13449611907471581, "grad_norm": 0.3491595983505249, "learning_rate": 9.704836146972765e-06, "loss": 1.1323, "step": 1753 }, { "epoch": 0.1345728424740739, "grad_norm": 0.24289192259311676, "learning_rate": 9.704416440434e-06, "loss": 1.1032, "step": 1754 }, { "epoch": 0.13464956587343196, "grad_norm": 0.09591364860534668, "learning_rate": 9.703996444796206e-06, "loss": 1.2702, "step": 1755 }, { "epoch": 0.13472628927279004, "grad_norm": 0.2109205424785614, "learning_rate": 9.703576160085193e-06, "loss": 1.1977, "step": 1756 }, { "epoch": 0.13480301267214811, "grad_norm": 0.22430098056793213, "learning_rate": 9.70315558632679e-06, "loss": 1.1551, "step": 1757 }, { "epoch": 0.13487973607150622, "grad_norm": 0.22878076136112213, "learning_rate": 9.70273472354684e-06, "loss": 1.091, "step": 1758 }, { "epoch": 0.1349564594708643, "grad_norm": 0.24676156044006348, "learning_rate": 9.70231357177121e-06, "loss": 1.1158, "step": 1759 }, { "epoch": 0.13503318287022237, "grad_norm": 0.19630898535251617, "learning_rate": 9.701892131025774e-06, "loss": 1.201, "step": 1760 }, { "epoch": 0.13510990626958044, "grad_norm": 0.30158260464668274, "learning_rate": 9.701470401336438e-06, "loss": 1.1792, "step": 1761 }, { "epoch": 0.13518662966893852, "grad_norm": 0.21051841974258423, "learning_rate": 9.701048382729115e-06, "loss": 1.1464, "step": 1762 }, { "epoch": 0.13526335306829662, "grad_norm": 0.2413579374551773, "learning_rate": 9.700626075229739e-06, "loss": 1.1004, "step": 1763 }, { "epoch": 0.1353400764676547, "grad_norm": 0.21744197607040405, "learning_rate": 9.700203478864263e-06, "loss": 1.066, "step": 1764 }, { "epoch": 0.13541679986701277, "grad_norm": 0.21597285568714142, "learning_rate": 9.699780593658655e-06, "loss": 1.1983, "step": 1765 }, { "epoch": 0.13549352326637085, "grad_norm": 0.305082768201828, "learning_rate": 9.699357419638904e-06, "loss": 1.2396, "step": 1766 }, { "epoch": 0.13557024666572895, "grad_norm": 0.18323616683483124, "learning_rate": 9.698933956831016e-06, "loss": 1.1521, "step": 1767 }, { "epoch": 0.13564697006508702, "grad_norm": 0.18895672261714935, "learning_rate": 9.69851020526101e-06, "loss": 1.1981, "step": 1768 }, { "epoch": 0.1357236934644451, "grad_norm": 0.239903062582016, "learning_rate": 9.698086164954935e-06, "loss": 1.1629, "step": 1769 }, { "epoch": 0.13580041686380318, "grad_norm": 0.22386610507965088, "learning_rate": 9.697661835938839e-06, "loss": 1.2048, "step": 1770 }, { "epoch": 0.13587714026316125, "grad_norm": 0.2093152552843094, "learning_rate": 9.697237218238803e-06, "loss": 1.1428, "step": 1771 }, { "epoch": 0.13595386366251935, "grad_norm": 0.3118757903575897, "learning_rate": 9.696812311880924e-06, "loss": 1.1163, "step": 1772 }, { "epoch": 0.13603058706187743, "grad_norm": 0.1929921954870224, "learning_rate": 9.696387116891308e-06, "loss": 1.1362, "step": 1773 }, { "epoch": 0.1361073104612355, "grad_norm": 0.23022747039794922, "learning_rate": 9.695961633296086e-06, "loss": 1.188, "step": 1774 }, { "epoch": 0.13618403386059358, "grad_norm": 0.2352014183998108, "learning_rate": 9.695535861121407e-06, "loss": 1.1422, "step": 1775 }, { "epoch": 0.13626075725995165, "grad_norm": 0.2381790578365326, "learning_rate": 9.695109800393435e-06, "loss": 1.1182, "step": 1776 }, { "epoch": 0.13633748065930976, "grad_norm": 0.24712634086608887, "learning_rate": 9.694683451138354e-06, "loss": 1.1643, "step": 1777 }, { "epoch": 0.13641420405866783, "grad_norm": 0.18201707303524017, "learning_rate": 9.69425681338236e-06, "loss": 1.1888, "step": 1778 }, { "epoch": 0.1364909274580259, "grad_norm": 0.17659880220890045, "learning_rate": 9.693829887151676e-06, "loss": 1.2438, "step": 1779 }, { "epoch": 0.13656765085738398, "grad_norm": 0.2028670310974121, "learning_rate": 9.693402672472533e-06, "loss": 1.2345, "step": 1780 }, { "epoch": 0.13664437425674206, "grad_norm": 0.23374147713184357, "learning_rate": 9.692975169371189e-06, "loss": 1.1382, "step": 1781 }, { "epoch": 0.13672109765610016, "grad_norm": 0.22715246677398682, "learning_rate": 9.692547377873913e-06, "loss": 1.1548, "step": 1782 }, { "epoch": 0.13679782105545824, "grad_norm": 0.20345553755760193, "learning_rate": 9.692119298006994e-06, "loss": 1.1141, "step": 1783 }, { "epoch": 0.1368745444548163, "grad_norm": 0.18973268568515778, "learning_rate": 9.691690929796738e-06, "loss": 1.1049, "step": 1784 }, { "epoch": 0.13695126785417439, "grad_norm": 0.3368973731994629, "learning_rate": 9.691262273269472e-06, "loss": 1.0606, "step": 1785 }, { "epoch": 0.13702799125353246, "grad_norm": 0.22120468318462372, "learning_rate": 9.690833328451534e-06, "loss": 1.1896, "step": 1786 }, { "epoch": 0.13710471465289056, "grad_norm": 0.2198830395936966, "learning_rate": 9.690404095369288e-06, "loss": 1.1244, "step": 1787 }, { "epoch": 0.13718143805224864, "grad_norm": 0.2786906659603119, "learning_rate": 9.689974574049108e-06, "loss": 1.1483, "step": 1788 }, { "epoch": 0.13725816145160671, "grad_norm": 0.37258657813072205, "learning_rate": 9.689544764517393e-06, "loss": 1.1129, "step": 1789 }, { "epoch": 0.1373348848509648, "grad_norm": 0.5143101811408997, "learning_rate": 9.689114666800552e-06, "loss": 1.1052, "step": 1790 }, { "epoch": 0.13741160825032286, "grad_norm": 0.20538941025733948, "learning_rate": 9.688684280925018e-06, "loss": 1.1789, "step": 1791 }, { "epoch": 0.13748833164968097, "grad_norm": 0.1890144795179367, "learning_rate": 9.688253606917238e-06, "loss": 1.1428, "step": 1792 }, { "epoch": 0.13756505504903904, "grad_norm": 0.19180768728256226, "learning_rate": 9.68782264480368e-06, "loss": 1.1621, "step": 1793 }, { "epoch": 0.13764177844839712, "grad_norm": 0.2976333796977997, "learning_rate": 9.687391394610827e-06, "loss": 1.1772, "step": 1794 }, { "epoch": 0.1377185018477552, "grad_norm": 0.21940745413303375, "learning_rate": 9.686959856365179e-06, "loss": 1.1477, "step": 1795 }, { "epoch": 0.1377952252471133, "grad_norm": 0.23357276618480682, "learning_rate": 9.686528030093256e-06, "loss": 1.2006, "step": 1796 }, { "epoch": 0.13787194864647137, "grad_norm": 0.24580006301403046, "learning_rate": 9.686095915821597e-06, "loss": 1.1311, "step": 1797 }, { "epoch": 0.13794867204582945, "grad_norm": 0.22895440459251404, "learning_rate": 9.685663513576754e-06, "loss": 1.1915, "step": 1798 }, { "epoch": 0.13802539544518752, "grad_norm": 0.21312648057937622, "learning_rate": 9.6852308233853e-06, "loss": 1.1143, "step": 1799 }, { "epoch": 0.1381021188445456, "grad_norm": 0.2138633131980896, "learning_rate": 9.684797845273825e-06, "loss": 1.1687, "step": 1800 }, { "epoch": 0.1381788422439037, "grad_norm": 0.233515664935112, "learning_rate": 9.684364579268938e-06, "loss": 1.1476, "step": 1801 }, { "epoch": 0.13825556564326177, "grad_norm": 0.25105127692222595, "learning_rate": 9.683931025397262e-06, "loss": 1.2054, "step": 1802 }, { "epoch": 0.13833228904261985, "grad_norm": 0.24009758234024048, "learning_rate": 9.68349718368544e-06, "loss": 1.2097, "step": 1803 }, { "epoch": 0.13840901244197792, "grad_norm": 0.25348469614982605, "learning_rate": 9.683063054160136e-06, "loss": 1.1121, "step": 1804 }, { "epoch": 0.138485735841336, "grad_norm": 0.19994455575942993, "learning_rate": 9.682628636848024e-06, "loss": 1.1666, "step": 1805 }, { "epoch": 0.1385624592406941, "grad_norm": 0.22508297860622406, "learning_rate": 9.682193931775805e-06, "loss": 1.1604, "step": 1806 }, { "epoch": 0.13863918264005218, "grad_norm": 0.3765912353992462, "learning_rate": 9.681758938970189e-06, "loss": 1.0881, "step": 1807 }, { "epoch": 0.13871590603941025, "grad_norm": 0.2269248515367508, "learning_rate": 9.681323658457909e-06, "loss": 1.2084, "step": 1808 }, { "epoch": 0.13879262943876833, "grad_norm": 0.24340488016605377, "learning_rate": 9.680888090265714e-06, "loss": 1.1767, "step": 1809 }, { "epoch": 0.1388693528381264, "grad_norm": 0.2387782335281372, "learning_rate": 9.68045223442037e-06, "loss": 1.1607, "step": 1810 }, { "epoch": 0.1389460762374845, "grad_norm": 0.19317814707756042, "learning_rate": 9.680016090948663e-06, "loss": 1.1758, "step": 1811 }, { "epoch": 0.13902279963684258, "grad_norm": 0.26036176085472107, "learning_rate": 9.679579659877393e-06, "loss": 1.1211, "step": 1812 }, { "epoch": 0.13909952303620066, "grad_norm": 0.23442615568637848, "learning_rate": 9.679142941233382e-06, "loss": 1.1015, "step": 1813 }, { "epoch": 0.13917624643555873, "grad_norm": 0.2059893161058426, "learning_rate": 9.678705935043467e-06, "loss": 1.1103, "step": 1814 }, { "epoch": 0.1392529698349168, "grad_norm": 0.2176116406917572, "learning_rate": 9.678268641334502e-06, "loss": 1.1806, "step": 1815 }, { "epoch": 0.1393296932342749, "grad_norm": 0.21982449293136597, "learning_rate": 9.677831060133363e-06, "loss": 1.1484, "step": 1816 }, { "epoch": 0.13940641663363298, "grad_norm": 0.09677200764417648, "learning_rate": 9.677393191466936e-06, "loss": 1.2815, "step": 1817 }, { "epoch": 0.13948314003299106, "grad_norm": 0.2272997498512268, "learning_rate": 9.676955035362134e-06, "loss": 1.2192, "step": 1818 }, { "epoch": 0.13955986343234913, "grad_norm": 0.19748421013355255, "learning_rate": 9.67651659184588e-06, "loss": 1.0792, "step": 1819 }, { "epoch": 0.1396365868317072, "grad_norm": 0.16691778600215912, "learning_rate": 9.676077860945117e-06, "loss": 1.1612, "step": 1820 }, { "epoch": 0.1397133102310653, "grad_norm": 0.21080057322978973, "learning_rate": 9.675638842686808e-06, "loss": 1.1587, "step": 1821 }, { "epoch": 0.1397900336304234, "grad_norm": 0.2172919362783432, "learning_rate": 9.67519953709793e-06, "loss": 1.1983, "step": 1822 }, { "epoch": 0.13986675702978146, "grad_norm": 0.2589855194091797, "learning_rate": 9.67475994420548e-06, "loss": 1.1887, "step": 1823 }, { "epoch": 0.13994348042913954, "grad_norm": 0.2707909047603607, "learning_rate": 9.674320064036475e-06, "loss": 1.1529, "step": 1824 }, { "epoch": 0.14002020382849764, "grad_norm": 0.23772498965263367, "learning_rate": 9.673879896617944e-06, "loss": 1.2471, "step": 1825 }, { "epoch": 0.14009692722785572, "grad_norm": 0.18644247949123383, "learning_rate": 9.673439441976936e-06, "loss": 1.1654, "step": 1826 }, { "epoch": 0.1401736506272138, "grad_norm": 0.2069864422082901, "learning_rate": 9.672998700140519e-06, "loss": 1.1824, "step": 1827 }, { "epoch": 0.14025037402657187, "grad_norm": 0.2130579650402069, "learning_rate": 9.67255767113578e-06, "loss": 1.2163, "step": 1828 }, { "epoch": 0.14032709742592994, "grad_norm": 0.1797383427619934, "learning_rate": 9.672116354989818e-06, "loss": 1.0484, "step": 1829 }, { "epoch": 0.14040382082528804, "grad_norm": 0.25737640261650085, "learning_rate": 9.671674751729753e-06, "loss": 1.1551, "step": 1830 }, { "epoch": 0.14048054422464612, "grad_norm": 0.34888023138046265, "learning_rate": 9.671232861382726e-06, "loss": 1.1628, "step": 1831 }, { "epoch": 0.1405572676240042, "grad_norm": 0.21740210056304932, "learning_rate": 9.67079068397589e-06, "loss": 1.1066, "step": 1832 }, { "epoch": 0.14063399102336227, "grad_norm": 0.2655353844165802, "learning_rate": 9.67034821953642e-06, "loss": 1.1891, "step": 1833 }, { "epoch": 0.14071071442272035, "grad_norm": 0.2090390920639038, "learning_rate": 9.6699054680915e-06, "loss": 1.1924, "step": 1834 }, { "epoch": 0.14078743782207845, "grad_norm": 0.2107178270816803, "learning_rate": 9.669462429668347e-06, "loss": 1.1327, "step": 1835 }, { "epoch": 0.14086416122143652, "grad_norm": 0.3297303020954132, "learning_rate": 9.669019104294185e-06, "loss": 1.1901, "step": 1836 }, { "epoch": 0.1409408846207946, "grad_norm": 0.22163143754005432, "learning_rate": 9.668575491996252e-06, "loss": 1.138, "step": 1837 }, { "epoch": 0.14101760802015267, "grad_norm": 0.25213709473609924, "learning_rate": 9.668131592801815e-06, "loss": 1.1351, "step": 1838 }, { "epoch": 0.14109433141951075, "grad_norm": 0.29609689116477966, "learning_rate": 9.66768740673815e-06, "loss": 1.1686, "step": 1839 }, { "epoch": 0.14117105481886885, "grad_norm": 0.2330130636692047, "learning_rate": 9.667242933832555e-06, "loss": 1.1039, "step": 1840 }, { "epoch": 0.14124777821822693, "grad_norm": 0.29968807101249695, "learning_rate": 9.666798174112344e-06, "loss": 1.1981, "step": 1841 }, { "epoch": 0.141324501617585, "grad_norm": 0.09661325812339783, "learning_rate": 9.666353127604845e-06, "loss": 1.1875, "step": 1842 }, { "epoch": 0.14140122501694308, "grad_norm": 0.47213035821914673, "learning_rate": 9.665907794337412e-06, "loss": 1.1002, "step": 1843 }, { "epoch": 0.14147794841630115, "grad_norm": 0.21950265765190125, "learning_rate": 9.66546217433741e-06, "loss": 1.181, "step": 1844 }, { "epoch": 0.14155467181565926, "grad_norm": 0.1923253834247589, "learning_rate": 9.665016267632223e-06, "loss": 1.1162, "step": 1845 }, { "epoch": 0.14163139521501733, "grad_norm": 0.256465345621109, "learning_rate": 9.664570074249255e-06, "loss": 1.1505, "step": 1846 }, { "epoch": 0.1417081186143754, "grad_norm": 0.19888605177402496, "learning_rate": 9.664123594215924e-06, "loss": 1.0879, "step": 1847 }, { "epoch": 0.14178484201373348, "grad_norm": 0.23917777836322784, "learning_rate": 9.663676827559668e-06, "loss": 1.2414, "step": 1848 }, { "epoch": 0.14186156541309156, "grad_norm": 0.1820378601551056, "learning_rate": 9.663229774307942e-06, "loss": 1.151, "step": 1849 }, { "epoch": 0.14193828881244966, "grad_norm": 0.2629256248474121, "learning_rate": 9.66278243448822e-06, "loss": 1.1969, "step": 1850 }, { "epoch": 0.14201501221180773, "grad_norm": 0.20956537127494812, "learning_rate": 9.662334808127989e-06, "loss": 1.1979, "step": 1851 }, { "epoch": 0.1420917356111658, "grad_norm": 0.21996451914310455, "learning_rate": 9.66188689525476e-06, "loss": 1.2164, "step": 1852 }, { "epoch": 0.14216845901052388, "grad_norm": 0.2362685352563858, "learning_rate": 9.661438695896056e-06, "loss": 1.153, "step": 1853 }, { "epoch": 0.142245182409882, "grad_norm": 0.4252656400203705, "learning_rate": 9.660990210079421e-06, "loss": 1.0895, "step": 1854 }, { "epoch": 0.14232190580924006, "grad_norm": 0.30059385299682617, "learning_rate": 9.660541437832417e-06, "loss": 1.1271, "step": 1855 }, { "epoch": 0.14239862920859814, "grad_norm": 0.24281653761863708, "learning_rate": 9.660092379182622e-06, "loss": 1.1717, "step": 1856 }, { "epoch": 0.1424753526079562, "grad_norm": 0.2253628373146057, "learning_rate": 9.65964303415763e-06, "loss": 1.1655, "step": 1857 }, { "epoch": 0.1425520760073143, "grad_norm": 0.2650987207889557, "learning_rate": 9.659193402785054e-06, "loss": 1.1797, "step": 1858 }, { "epoch": 0.1426287994066724, "grad_norm": 0.2153782993555069, "learning_rate": 9.658743485092527e-06, "loss": 1.198, "step": 1859 }, { "epoch": 0.14270552280603047, "grad_norm": 0.20432838797569275, "learning_rate": 9.658293281107699e-06, "loss": 1.138, "step": 1860 }, { "epoch": 0.14278224620538854, "grad_norm": 0.18675659596920013, "learning_rate": 9.657842790858235e-06, "loss": 1.2295, "step": 1861 }, { "epoch": 0.14285896960474662, "grad_norm": 0.252824604511261, "learning_rate": 9.657392014371815e-06, "loss": 1.137, "step": 1862 }, { "epoch": 0.1429356930041047, "grad_norm": 0.3445465564727783, "learning_rate": 9.656940951676146e-06, "loss": 1.1607, "step": 1863 }, { "epoch": 0.1430124164034628, "grad_norm": 0.18658478558063507, "learning_rate": 9.656489602798943e-06, "loss": 1.2508, "step": 1864 }, { "epoch": 0.14308913980282087, "grad_norm": 0.22808711230754852, "learning_rate": 9.656037967767946e-06, "loss": 1.1382, "step": 1865 }, { "epoch": 0.14316586320217894, "grad_norm": 0.5660108923912048, "learning_rate": 9.655586046610906e-06, "loss": 1.1235, "step": 1866 }, { "epoch": 0.14324258660153702, "grad_norm": 0.22352419793605804, "learning_rate": 9.655133839355597e-06, "loss": 1.2059, "step": 1867 }, { "epoch": 0.1433193100008951, "grad_norm": 0.20893612504005432, "learning_rate": 9.654681346029809e-06, "loss": 1.1535, "step": 1868 }, { "epoch": 0.1433960334002532, "grad_norm": 0.18977123498916626, "learning_rate": 9.654228566661345e-06, "loss": 1.1895, "step": 1869 }, { "epoch": 0.14347275679961127, "grad_norm": 0.44855034351348877, "learning_rate": 9.653775501278032e-06, "loss": 1.1524, "step": 1870 }, { "epoch": 0.14354948019896935, "grad_norm": 0.2203122079372406, "learning_rate": 9.653322149907716e-06, "loss": 1.1679, "step": 1871 }, { "epoch": 0.14362620359832742, "grad_norm": 0.3707258999347687, "learning_rate": 9.652868512578249e-06, "loss": 1.1696, "step": 1872 }, { "epoch": 0.1437029269976855, "grad_norm": 0.20410504937171936, "learning_rate": 9.652414589317514e-06, "loss": 1.1745, "step": 1873 }, { "epoch": 0.1437796503970436, "grad_norm": 0.19676512479782104, "learning_rate": 9.651960380153402e-06, "loss": 1.1901, "step": 1874 }, { "epoch": 0.14385637379640168, "grad_norm": 0.25253909826278687, "learning_rate": 9.651505885113828e-06, "loss": 1.214, "step": 1875 }, { "epoch": 0.14393309719575975, "grad_norm": 0.3930768370628357, "learning_rate": 9.651051104226722e-06, "loss": 1.1301, "step": 1876 }, { "epoch": 0.14400982059511783, "grad_norm": 0.5648761987686157, "learning_rate": 9.65059603752003e-06, "loss": 1.1421, "step": 1877 }, { "epoch": 0.1440865439944759, "grad_norm": 0.24136203527450562, "learning_rate": 9.650140685021716e-06, "loss": 1.1277, "step": 1878 }, { "epoch": 0.144163267393834, "grad_norm": 0.19104556739330292, "learning_rate": 9.649685046759767e-06, "loss": 1.1965, "step": 1879 }, { "epoch": 0.14423999079319208, "grad_norm": 0.39848554134368896, "learning_rate": 9.64922912276218e-06, "loss": 1.2157, "step": 1880 }, { "epoch": 0.14431671419255016, "grad_norm": 0.31684666872024536, "learning_rate": 9.648772913056973e-06, "loss": 1.1687, "step": 1881 }, { "epoch": 0.14439343759190823, "grad_norm": 0.274878591299057, "learning_rate": 9.648316417672181e-06, "loss": 1.1495, "step": 1882 }, { "epoch": 0.14447016099126633, "grad_norm": 0.345380038022995, "learning_rate": 9.64785963663586e-06, "loss": 1.1907, "step": 1883 }, { "epoch": 0.1445468843906244, "grad_norm": 0.5121314525604248, "learning_rate": 9.647402569976074e-06, "loss": 1.1124, "step": 1884 }, { "epoch": 0.14462360778998248, "grad_norm": 0.2111993134021759, "learning_rate": 9.646945217720917e-06, "loss": 1.1675, "step": 1885 }, { "epoch": 0.14470033118934056, "grad_norm": 0.3049672544002533, "learning_rate": 9.646487579898491e-06, "loss": 1.0814, "step": 1886 }, { "epoch": 0.14477705458869863, "grad_norm": 0.2752722203731537, "learning_rate": 9.646029656536923e-06, "loss": 1.2382, "step": 1887 }, { "epoch": 0.14485377798805674, "grad_norm": 0.246255561709404, "learning_rate": 9.64557144766435e-06, "loss": 1.1855, "step": 1888 }, { "epoch": 0.1449305013874148, "grad_norm": 0.2722080647945404, "learning_rate": 9.64511295330893e-06, "loss": 1.1716, "step": 1889 }, { "epoch": 0.1450072247867729, "grad_norm": 0.2184867113828659, "learning_rate": 9.644654173498841e-06, "loss": 1.1054, "step": 1890 }, { "epoch": 0.14508394818613096, "grad_norm": 0.23272696137428284, "learning_rate": 9.644195108262276e-06, "loss": 1.0981, "step": 1891 }, { "epoch": 0.14516067158548904, "grad_norm": 0.1923963725566864, "learning_rate": 9.643735757627444e-06, "loss": 1.1613, "step": 1892 }, { "epoch": 0.14523739498484714, "grad_norm": 0.25026029348373413, "learning_rate": 9.643276121622575e-06, "loss": 1.2133, "step": 1893 }, { "epoch": 0.14531411838420522, "grad_norm": 0.20142152905464172, "learning_rate": 9.642816200275913e-06, "loss": 1.1531, "step": 1894 }, { "epoch": 0.1453908417835633, "grad_norm": 0.21276423335075378, "learning_rate": 9.642355993615724e-06, "loss": 1.1051, "step": 1895 }, { "epoch": 0.14546756518292137, "grad_norm": 0.20609600841999054, "learning_rate": 9.641895501670286e-06, "loss": 1.1763, "step": 1896 }, { "epoch": 0.14554428858227944, "grad_norm": 0.226213738322258, "learning_rate": 9.6414347244679e-06, "loss": 1.2086, "step": 1897 }, { "epoch": 0.14562101198163754, "grad_norm": 0.24762406945228577, "learning_rate": 9.640973662036882e-06, "loss": 1.1375, "step": 1898 }, { "epoch": 0.14569773538099562, "grad_norm": 0.1721472591161728, "learning_rate": 9.640512314405563e-06, "loss": 1.1365, "step": 1899 }, { "epoch": 0.1457744587803537, "grad_norm": 0.2121177762746811, "learning_rate": 9.640050681602296e-06, "loss": 1.188, "step": 1900 }, { "epoch": 0.14585118217971177, "grad_norm": 0.2065649926662445, "learning_rate": 9.63958876365545e-06, "loss": 1.2025, "step": 1901 }, { "epoch": 0.14592790557906984, "grad_norm": 0.20744867622852325, "learning_rate": 9.639126560593408e-06, "loss": 1.0906, "step": 1902 }, { "epoch": 0.14600462897842795, "grad_norm": 0.20311462879180908, "learning_rate": 9.638664072444578e-06, "loss": 1.1799, "step": 1903 }, { "epoch": 0.14608135237778602, "grad_norm": 0.2068684995174408, "learning_rate": 9.638201299237377e-06, "loss": 1.2028, "step": 1904 }, { "epoch": 0.1461580757771441, "grad_norm": 0.2590208053588867, "learning_rate": 9.637738241000248e-06, "loss": 1.1625, "step": 1905 }, { "epoch": 0.14623479917650217, "grad_norm": 0.20447653532028198, "learning_rate": 9.637274897761644e-06, "loss": 1.0983, "step": 1906 }, { "epoch": 0.14631152257586025, "grad_norm": 0.23349785804748535, "learning_rate": 9.636811269550039e-06, "loss": 1.0969, "step": 1907 }, { "epoch": 0.14638824597521835, "grad_norm": 0.21571043133735657, "learning_rate": 9.636347356393925e-06, "loss": 1.191, "step": 1908 }, { "epoch": 0.14646496937457643, "grad_norm": 0.2591140866279602, "learning_rate": 9.635883158321812e-06, "loss": 1.0715, "step": 1909 }, { "epoch": 0.1465416927739345, "grad_norm": 0.24733686447143555, "learning_rate": 9.635418675362222e-06, "loss": 1.1501, "step": 1910 }, { "epoch": 0.14661841617329258, "grad_norm": 0.31709152460098267, "learning_rate": 9.634953907543704e-06, "loss": 1.0815, "step": 1911 }, { "epoch": 0.14669513957265068, "grad_norm": 0.24998822808265686, "learning_rate": 9.634488854894815e-06, "loss": 1.0911, "step": 1912 }, { "epoch": 0.14677186297200875, "grad_norm": 0.10890856385231018, "learning_rate": 9.634023517444136e-06, "loss": 1.2244, "step": 1913 }, { "epoch": 0.14684858637136683, "grad_norm": 0.21896454691886902, "learning_rate": 9.633557895220263e-06, "loss": 1.1438, "step": 1914 }, { "epoch": 0.1469253097707249, "grad_norm": 0.27583059668540955, "learning_rate": 9.633091988251808e-06, "loss": 1.254, "step": 1915 }, { "epoch": 0.14700203317008298, "grad_norm": 0.2086028903722763, "learning_rate": 9.632625796567404e-06, "loss": 1.1038, "step": 1916 }, { "epoch": 0.14707875656944108, "grad_norm": 0.2407587617635727, "learning_rate": 9.6321593201957e-06, "loss": 1.1953, "step": 1917 }, { "epoch": 0.14715547996879916, "grad_norm": 0.2297658920288086, "learning_rate": 9.63169255916536e-06, "loss": 1.1947, "step": 1918 }, { "epoch": 0.14723220336815723, "grad_norm": 0.3973187506198883, "learning_rate": 9.631225513505071e-06, "loss": 1.0613, "step": 1919 }, { "epoch": 0.1473089267675153, "grad_norm": 0.18498894572257996, "learning_rate": 9.630758183243531e-06, "loss": 1.1445, "step": 1920 }, { "epoch": 0.14738565016687338, "grad_norm": 0.2351607382297516, "learning_rate": 9.630290568409461e-06, "loss": 1.0581, "step": 1921 }, { "epoch": 0.14746237356623149, "grad_norm": 0.23691241443157196, "learning_rate": 9.629822669031597e-06, "loss": 1.1351, "step": 1922 }, { "epoch": 0.14753909696558956, "grad_norm": 0.2368478924036026, "learning_rate": 9.62935448513869e-06, "loss": 1.2133, "step": 1923 }, { "epoch": 0.14761582036494764, "grad_norm": 0.21489359438419342, "learning_rate": 9.628886016759516e-06, "loss": 1.1453, "step": 1924 }, { "epoch": 0.1476925437643057, "grad_norm": 0.20065666735172272, "learning_rate": 9.62841726392286e-06, "loss": 1.1796, "step": 1925 }, { "epoch": 0.1477692671636638, "grad_norm": 0.20562860369682312, "learning_rate": 9.627948226657527e-06, "loss": 1.2068, "step": 1926 }, { "epoch": 0.1478459905630219, "grad_norm": 0.3094284236431122, "learning_rate": 9.627478904992344e-06, "loss": 1.1139, "step": 1927 }, { "epoch": 0.14792271396237996, "grad_norm": 0.19510014355182648, "learning_rate": 9.627009298956151e-06, "loss": 1.1353, "step": 1928 }, { "epoch": 0.14799943736173804, "grad_norm": 0.21252872049808502, "learning_rate": 9.626539408577804e-06, "loss": 1.1408, "step": 1929 }, { "epoch": 0.14807616076109612, "grad_norm": 0.27208587527275085, "learning_rate": 9.626069233886184e-06, "loss": 1.2177, "step": 1930 }, { "epoch": 0.1481528841604542, "grad_norm": 0.2517835795879364, "learning_rate": 9.62559877491018e-06, "loss": 1.1544, "step": 1931 }, { "epoch": 0.1482296075598123, "grad_norm": 0.24787671864032745, "learning_rate": 9.625128031678706e-06, "loss": 1.1099, "step": 1932 }, { "epoch": 0.14830633095917037, "grad_norm": 0.23053964972496033, "learning_rate": 9.624657004220687e-06, "loss": 1.0946, "step": 1933 }, { "epoch": 0.14838305435852844, "grad_norm": 0.6422882080078125, "learning_rate": 9.624185692565072e-06, "loss": 1.1578, "step": 1934 }, { "epoch": 0.14845977775788652, "grad_norm": 0.1961851418018341, "learning_rate": 9.623714096740825e-06, "loss": 1.1201, "step": 1935 }, { "epoch": 0.1485365011572446, "grad_norm": 0.18653340637683868, "learning_rate": 9.623242216776922e-06, "loss": 1.1381, "step": 1936 }, { "epoch": 0.1486132245566027, "grad_norm": 0.21689541637897491, "learning_rate": 9.622770052702366e-06, "loss": 1.0973, "step": 1937 }, { "epoch": 0.14868994795596077, "grad_norm": 0.22823300957679749, "learning_rate": 9.62229760454617e-06, "loss": 1.1932, "step": 1938 }, { "epoch": 0.14876667135531885, "grad_norm": 0.18046575784683228, "learning_rate": 9.621824872337373e-06, "loss": 1.1114, "step": 1939 }, { "epoch": 0.14884339475467692, "grad_norm": 0.290914386510849, "learning_rate": 9.621351856105017e-06, "loss": 1.0912, "step": 1940 }, { "epoch": 0.14892011815403502, "grad_norm": 0.25826966762542725, "learning_rate": 9.620878555878175e-06, "loss": 1.219, "step": 1941 }, { "epoch": 0.1489968415533931, "grad_norm": 0.18918626010417938, "learning_rate": 9.620404971685932e-06, "loss": 1.1504, "step": 1942 }, { "epoch": 0.14907356495275118, "grad_norm": 0.19549410045146942, "learning_rate": 9.61993110355739e-06, "loss": 1.1492, "step": 1943 }, { "epoch": 0.14915028835210925, "grad_norm": 0.21584084630012512, "learning_rate": 9.619456951521673e-06, "loss": 1.1278, "step": 1944 }, { "epoch": 0.14922701175146733, "grad_norm": 0.19806064665317535, "learning_rate": 9.618982515607913e-06, "loss": 1.2301, "step": 1945 }, { "epoch": 0.14930373515082543, "grad_norm": 0.7189170718193054, "learning_rate": 9.61850779584527e-06, "loss": 1.1605, "step": 1946 }, { "epoch": 0.1493804585501835, "grad_norm": 0.19098572432994843, "learning_rate": 9.618032792262915e-06, "loss": 1.0812, "step": 1947 }, { "epoch": 0.14945718194954158, "grad_norm": 0.2131974995136261, "learning_rate": 9.61755750489004e-06, "loss": 1.152, "step": 1948 }, { "epoch": 0.14953390534889965, "grad_norm": 0.2504991292953491, "learning_rate": 9.61708193375585e-06, "loss": 1.1052, "step": 1949 }, { "epoch": 0.14961062874825773, "grad_norm": 0.30051156878471375, "learning_rate": 9.61660607888957e-06, "loss": 1.1201, "step": 1950 }, { "epoch": 0.14968735214761583, "grad_norm": 0.27129486203193665, "learning_rate": 9.616129940320447e-06, "loss": 1.1331, "step": 1951 }, { "epoch": 0.1497640755469739, "grad_norm": 0.21534645557403564, "learning_rate": 9.615653518077736e-06, "loss": 1.1333, "step": 1952 }, { "epoch": 0.14984079894633198, "grad_norm": 0.22716452181339264, "learning_rate": 9.615176812190718e-06, "loss": 1.2211, "step": 1953 }, { "epoch": 0.14991752234569006, "grad_norm": 0.20769648253917694, "learning_rate": 9.614699822688685e-06, "loss": 1.1706, "step": 1954 }, { "epoch": 0.14999424574504813, "grad_norm": 0.2868845462799072, "learning_rate": 9.61422254960095e-06, "loss": 1.1078, "step": 1955 }, { "epoch": 0.15007096914440624, "grad_norm": 0.39177703857421875, "learning_rate": 9.613744992956844e-06, "loss": 1.2512, "step": 1956 }, { "epoch": 0.1501476925437643, "grad_norm": 0.2266407608985901, "learning_rate": 9.613267152785714e-06, "loss": 1.1506, "step": 1957 }, { "epoch": 0.15022441594312239, "grad_norm": 0.2966184616088867, "learning_rate": 9.612789029116922e-06, "loss": 1.0922, "step": 1958 }, { "epoch": 0.15030113934248046, "grad_norm": 0.19213813543319702, "learning_rate": 9.612310621979854e-06, "loss": 1.08, "step": 1959 }, { "epoch": 0.15037786274183854, "grad_norm": 0.20630623400211334, "learning_rate": 9.611831931403908e-06, "loss": 1.1681, "step": 1960 }, { "epoch": 0.15045458614119664, "grad_norm": 0.09913412481546402, "learning_rate": 9.611352957418499e-06, "loss": 1.2287, "step": 1961 }, { "epoch": 0.15053130954055471, "grad_norm": 0.1039334237575531, "learning_rate": 9.610873700053062e-06, "loss": 1.2241, "step": 1962 }, { "epoch": 0.1506080329399128, "grad_norm": 0.2354053407907486, "learning_rate": 9.610394159337049e-06, "loss": 1.196, "step": 1963 }, { "epoch": 0.15068475633927086, "grad_norm": 0.10296551138162613, "learning_rate": 9.60991433529993e-06, "loss": 1.3359, "step": 1964 }, { "epoch": 0.15076147973862894, "grad_norm": 0.208629310131073, "learning_rate": 9.609434227971189e-06, "loss": 1.1293, "step": 1965 }, { "epoch": 0.15083820313798704, "grad_norm": 0.2176009863615036, "learning_rate": 9.608953837380333e-06, "loss": 1.2111, "step": 1966 }, { "epoch": 0.15091492653734512, "grad_norm": 0.09331272542476654, "learning_rate": 9.608473163556882e-06, "loss": 1.2305, "step": 1967 }, { "epoch": 0.1509916499367032, "grad_norm": 0.1901782900094986, "learning_rate": 9.607992206530373e-06, "loss": 1.1037, "step": 1968 }, { "epoch": 0.15106837333606127, "grad_norm": 0.1783851683139801, "learning_rate": 9.607510966330365e-06, "loss": 1.1576, "step": 1969 }, { "epoch": 0.15114509673541937, "grad_norm": 0.23445037007331848, "learning_rate": 9.60702944298643e-06, "loss": 1.1497, "step": 1970 }, { "epoch": 0.15122182013477745, "grad_norm": 0.22430025041103363, "learning_rate": 9.606547636528159e-06, "loss": 1.1695, "step": 1971 }, { "epoch": 0.15129854353413552, "grad_norm": 0.23041030764579773, "learning_rate": 9.60606554698516e-06, "loss": 1.0916, "step": 1972 }, { "epoch": 0.1513752669334936, "grad_norm": 0.21771761775016785, "learning_rate": 9.605583174387059e-06, "loss": 1.0909, "step": 1973 }, { "epoch": 0.15145199033285167, "grad_norm": 0.20188051462173462, "learning_rate": 9.605100518763498e-06, "loss": 1.1791, "step": 1974 }, { "epoch": 0.15152871373220977, "grad_norm": 0.20875419676303864, "learning_rate": 9.60461758014414e-06, "loss": 1.163, "step": 1975 }, { "epoch": 0.15160543713156785, "grad_norm": 0.22320732474327087, "learning_rate": 9.60413435855866e-06, "loss": 1.2328, "step": 1976 }, { "epoch": 0.15168216053092592, "grad_norm": 0.24604742228984833, "learning_rate": 9.603650854036757e-06, "loss": 1.1201, "step": 1977 }, { "epoch": 0.151758883930284, "grad_norm": 0.19809429347515106, "learning_rate": 9.60316706660814e-06, "loss": 1.0935, "step": 1978 }, { "epoch": 0.15183560732964207, "grad_norm": 0.38930994272232056, "learning_rate": 9.602682996302539e-06, "loss": 1.1429, "step": 1979 }, { "epoch": 0.15191233072900018, "grad_norm": 0.2149508148431778, "learning_rate": 9.602198643149705e-06, "loss": 1.1464, "step": 1980 }, { "epoch": 0.15198905412835825, "grad_norm": 0.19251535832881927, "learning_rate": 9.601714007179399e-06, "loss": 1.1454, "step": 1981 }, { "epoch": 0.15206577752771633, "grad_norm": 0.21143852174282074, "learning_rate": 9.601229088421407e-06, "loss": 1.2155, "step": 1982 }, { "epoch": 0.1521425009270744, "grad_norm": 0.19191974401474, "learning_rate": 9.600743886905524e-06, "loss": 1.0882, "step": 1983 }, { "epoch": 0.15221922432643248, "grad_norm": 0.2143021523952484, "learning_rate": 9.60025840266157e-06, "loss": 1.1903, "step": 1984 }, { "epoch": 0.15229594772579058, "grad_norm": 0.20897223055362701, "learning_rate": 9.59977263571938e-06, "loss": 1.0739, "step": 1985 }, { "epoch": 0.15237267112514866, "grad_norm": 0.21814246475696564, "learning_rate": 9.599286586108803e-06, "loss": 1.1118, "step": 1986 }, { "epoch": 0.15244939452450673, "grad_norm": 0.28142112493515015, "learning_rate": 9.59880025385971e-06, "loss": 1.0774, "step": 1987 }, { "epoch": 0.1525261179238648, "grad_norm": 0.25347208976745605, "learning_rate": 9.598313639001986e-06, "loss": 1.1959, "step": 1988 }, { "epoch": 0.15260284132322288, "grad_norm": 0.26041752099990845, "learning_rate": 9.597826741565535e-06, "loss": 1.0899, "step": 1989 }, { "epoch": 0.15267956472258098, "grad_norm": 0.27077239751815796, "learning_rate": 9.59733956158028e-06, "loss": 1.1269, "step": 1990 }, { "epoch": 0.15275628812193906, "grad_norm": 0.2236669808626175, "learning_rate": 9.596852099076158e-06, "loss": 1.1899, "step": 1991 }, { "epoch": 0.15283301152129714, "grad_norm": 0.21696297824382782, "learning_rate": 9.596364354083125e-06, "loss": 1.1692, "step": 1992 }, { "epoch": 0.1529097349206552, "grad_norm": 0.26158788800239563, "learning_rate": 9.595876326631155e-06, "loss": 1.1845, "step": 1993 }, { "epoch": 0.15298645832001329, "grad_norm": 0.10245131701231003, "learning_rate": 9.595388016750236e-06, "loss": 1.2109, "step": 1994 }, { "epoch": 0.1530631817193714, "grad_norm": 0.09994842857122421, "learning_rate": 9.59489942447038e-06, "loss": 1.2728, "step": 1995 }, { "epoch": 0.15313990511872946, "grad_norm": 0.23218324780464172, "learning_rate": 9.594410549821608e-06, "loss": 1.1586, "step": 1996 }, { "epoch": 0.15321662851808754, "grad_norm": 0.20006409287452698, "learning_rate": 9.593921392833968e-06, "loss": 1.0635, "step": 1997 }, { "epoch": 0.1532933519174456, "grad_norm": 0.2538037598133087, "learning_rate": 9.593431953537513e-06, "loss": 1.1738, "step": 1998 }, { "epoch": 0.15337007531680372, "grad_norm": 0.21507582068443298, "learning_rate": 9.592942231962328e-06, "loss": 1.2067, "step": 1999 }, { "epoch": 0.1534467987161618, "grad_norm": 0.20709696412086487, "learning_rate": 9.592452228138501e-06, "loss": 1.1954, "step": 2000 }, { "epoch": 0.15352352211551987, "grad_norm": 0.19693197309970856, "learning_rate": 9.591961942096148e-06, "loss": 1.2076, "step": 2001 }, { "epoch": 0.15360024551487794, "grad_norm": 0.30203649401664734, "learning_rate": 9.591471373865398e-06, "loss": 1.2156, "step": 2002 }, { "epoch": 0.15367696891423602, "grad_norm": 0.24397410452365875, "learning_rate": 9.590980523476398e-06, "loss": 1.1754, "step": 2003 }, { "epoch": 0.15375369231359412, "grad_norm": 0.20230354368686676, "learning_rate": 9.59048939095931e-06, "loss": 1.2505, "step": 2004 }, { "epoch": 0.1538304157129522, "grad_norm": 0.23797915875911713, "learning_rate": 9.589997976344315e-06, "loss": 1.1231, "step": 2005 }, { "epoch": 0.15390713911231027, "grad_norm": 0.2897716462612152, "learning_rate": 9.589506279661616e-06, "loss": 1.1743, "step": 2006 }, { "epoch": 0.15398386251166835, "grad_norm": 0.1817731112241745, "learning_rate": 9.589014300941425e-06, "loss": 1.2546, "step": 2007 }, { "epoch": 0.15406058591102642, "grad_norm": 0.20441889762878418, "learning_rate": 9.588522040213978e-06, "loss": 1.2093, "step": 2008 }, { "epoch": 0.15413730931038452, "grad_norm": 0.19280493259429932, "learning_rate": 9.588029497509524e-06, "loss": 1.1122, "step": 2009 }, { "epoch": 0.1542140327097426, "grad_norm": 0.2254651039838791, "learning_rate": 9.587536672858333e-06, "loss": 1.2125, "step": 2010 }, { "epoch": 0.15429075610910067, "grad_norm": 0.2054474949836731, "learning_rate": 9.587043566290686e-06, "loss": 1.2406, "step": 2011 }, { "epoch": 0.15436747950845875, "grad_norm": 0.2563524544239044, "learning_rate": 9.58655017783689e-06, "loss": 1.2076, "step": 2012 }, { "epoch": 0.15444420290781682, "grad_norm": 0.1823742836713791, "learning_rate": 9.586056507527266e-06, "loss": 1.1844, "step": 2013 }, { "epoch": 0.15452092630717493, "grad_norm": 0.2035428136587143, "learning_rate": 9.585562555392147e-06, "loss": 1.0933, "step": 2014 }, { "epoch": 0.154597649706533, "grad_norm": 0.23156315088272095, "learning_rate": 9.585068321461892e-06, "loss": 1.1899, "step": 2015 }, { "epoch": 0.15467437310589108, "grad_norm": 0.22444723546504974, "learning_rate": 9.584573805766868e-06, "loss": 1.1603, "step": 2016 }, { "epoch": 0.15475109650524915, "grad_norm": 0.22137439250946045, "learning_rate": 9.584079008337468e-06, "loss": 1.123, "step": 2017 }, { "epoch": 0.15482781990460723, "grad_norm": 0.23636212944984436, "learning_rate": 9.5835839292041e-06, "loss": 1.2008, "step": 2018 }, { "epoch": 0.15490454330396533, "grad_norm": 0.09770432859659195, "learning_rate": 9.583088568397184e-06, "loss": 1.2383, "step": 2019 }, { "epoch": 0.1549812667033234, "grad_norm": 0.24273090064525604, "learning_rate": 9.582592925947163e-06, "loss": 1.127, "step": 2020 }, { "epoch": 0.15505799010268148, "grad_norm": 0.24665778875350952, "learning_rate": 9.582097001884495e-06, "loss": 1.113, "step": 2021 }, { "epoch": 0.15513471350203956, "grad_norm": 0.2181529402732849, "learning_rate": 9.581600796239658e-06, "loss": 1.1609, "step": 2022 }, { "epoch": 0.15521143690139763, "grad_norm": 0.2574891448020935, "learning_rate": 9.581104309043144e-06, "loss": 1.1615, "step": 2023 }, { "epoch": 0.15528816030075573, "grad_norm": 0.2374785840511322, "learning_rate": 9.580607540325461e-06, "loss": 1.0965, "step": 2024 }, { "epoch": 0.1553648837001138, "grad_norm": 0.3057141602039337, "learning_rate": 9.580110490117142e-06, "loss": 1.1341, "step": 2025 }, { "epoch": 0.15544160709947188, "grad_norm": 0.18170194327831268, "learning_rate": 9.579613158448727e-06, "loss": 1.1411, "step": 2026 }, { "epoch": 0.15551833049882996, "grad_norm": 0.233031764626503, "learning_rate": 9.57911554535078e-06, "loss": 1.2174, "step": 2027 }, { "epoch": 0.15559505389818806, "grad_norm": 0.1885393261909485, "learning_rate": 9.57861765085388e-06, "loss": 1.063, "step": 2028 }, { "epoch": 0.15567177729754614, "grad_norm": 0.30011457204818726, "learning_rate": 9.578119474988627e-06, "loss": 1.1496, "step": 2029 }, { "epoch": 0.1557485006969042, "grad_norm": 0.23142299056053162, "learning_rate": 9.577621017785634e-06, "loss": 1.072, "step": 2030 }, { "epoch": 0.1558252240962623, "grad_norm": 0.20868517458438873, "learning_rate": 9.57712227927553e-06, "loss": 1.224, "step": 2031 }, { "epoch": 0.15590194749562036, "grad_norm": 0.2642485201358795, "learning_rate": 9.576623259488966e-06, "loss": 1.145, "step": 2032 }, { "epoch": 0.15597867089497847, "grad_norm": 0.18220150470733643, "learning_rate": 9.576123958456607e-06, "loss": 1.1868, "step": 2033 }, { "epoch": 0.15605539429433654, "grad_norm": 0.2232791632413864, "learning_rate": 9.575624376209139e-06, "loss": 1.189, "step": 2034 }, { "epoch": 0.15613211769369462, "grad_norm": 0.2343621551990509, "learning_rate": 9.575124512777258e-06, "loss": 1.0755, "step": 2035 }, { "epoch": 0.1562088410930527, "grad_norm": 0.24991604685783386, "learning_rate": 9.574624368191685e-06, "loss": 1.1689, "step": 2036 }, { "epoch": 0.15628556449241077, "grad_norm": 0.18824012577533722, "learning_rate": 9.574123942483157e-06, "loss": 1.1248, "step": 2037 }, { "epoch": 0.15636228789176887, "grad_norm": 0.35677722096443176, "learning_rate": 9.573623235682424e-06, "loss": 1.1779, "step": 2038 }, { "epoch": 0.15643901129112694, "grad_norm": 0.21129262447357178, "learning_rate": 9.573122247820255e-06, "loss": 1.1637, "step": 2039 }, { "epoch": 0.15651573469048502, "grad_norm": 0.2012391835451126, "learning_rate": 9.572620978927438e-06, "loss": 1.1434, "step": 2040 }, { "epoch": 0.1565924580898431, "grad_norm": 0.20714183151721954, "learning_rate": 9.572119429034778e-06, "loss": 1.1037, "step": 2041 }, { "epoch": 0.15666918148920117, "grad_norm": 0.22453810274600983, "learning_rate": 9.571617598173097e-06, "loss": 1.0941, "step": 2042 }, { "epoch": 0.15674590488855927, "grad_norm": 0.1930866241455078, "learning_rate": 9.571115486373232e-06, "loss": 1.1395, "step": 2043 }, { "epoch": 0.15682262828791735, "grad_norm": 0.24701008200645447, "learning_rate": 9.57061309366604e-06, "loss": 1.1027, "step": 2044 }, { "epoch": 0.15689935168727542, "grad_norm": 0.19313572347164154, "learning_rate": 9.570110420082394e-06, "loss": 1.1284, "step": 2045 }, { "epoch": 0.1569760750866335, "grad_norm": 0.24622128903865814, "learning_rate": 9.569607465653186e-06, "loss": 1.1828, "step": 2046 }, { "epoch": 0.15705279848599157, "grad_norm": 0.22452202439308167, "learning_rate": 9.569104230409323e-06, "loss": 1.1436, "step": 2047 }, { "epoch": 0.15712952188534968, "grad_norm": 0.23727566003799438, "learning_rate": 9.56860071438173e-06, "loss": 1.1045, "step": 2048 }, { "epoch": 0.15720624528470775, "grad_norm": 0.3425019681453705, "learning_rate": 9.56809691760135e-06, "loss": 1.1653, "step": 2049 }, { "epoch": 0.15728296868406583, "grad_norm": 0.2278846949338913, "learning_rate": 9.567592840099142e-06, "loss": 1.1311, "step": 2050 }, { "epoch": 0.1573596920834239, "grad_norm": 0.2117299735546112, "learning_rate": 9.567088481906084e-06, "loss": 1.0887, "step": 2051 }, { "epoch": 0.15743641548278198, "grad_norm": 0.33694756031036377, "learning_rate": 9.566583843053169e-06, "loss": 1.1889, "step": 2052 }, { "epoch": 0.15751313888214008, "grad_norm": 0.24547486007213593, "learning_rate": 9.56607892357141e-06, "loss": 1.1477, "step": 2053 }, { "epoch": 0.15758986228149816, "grad_norm": 0.26392242312431335, "learning_rate": 9.565573723491831e-06, "loss": 1.1411, "step": 2054 }, { "epoch": 0.15766658568085623, "grad_norm": 0.24614481627941132, "learning_rate": 9.565068242845483e-06, "loss": 1.1569, "step": 2055 }, { "epoch": 0.1577433090802143, "grad_norm": 0.20265927910804749, "learning_rate": 9.564562481663428e-06, "loss": 1.1478, "step": 2056 }, { "epoch": 0.1578200324795724, "grad_norm": 0.18763500452041626, "learning_rate": 9.564056439976745e-06, "loss": 1.2376, "step": 2057 }, { "epoch": 0.15789675587893048, "grad_norm": 0.20923388004302979, "learning_rate": 9.563550117816534e-06, "loss": 1.1316, "step": 2058 }, { "epoch": 0.15797347927828856, "grad_norm": 0.22057418525218964, "learning_rate": 9.563043515213906e-06, "loss": 1.147, "step": 2059 }, { "epoch": 0.15805020267764663, "grad_norm": 0.33251771330833435, "learning_rate": 9.562536632199997e-06, "loss": 1.1864, "step": 2060 }, { "epoch": 0.1581269260770047, "grad_norm": 0.23729364573955536, "learning_rate": 9.562029468805955e-06, "loss": 1.1924, "step": 2061 }, { "epoch": 0.1582036494763628, "grad_norm": 0.1818159520626068, "learning_rate": 9.561522025062946e-06, "loss": 1.2166, "step": 2062 }, { "epoch": 0.1582803728757209, "grad_norm": 0.25044599175453186, "learning_rate": 9.561014301002154e-06, "loss": 1.1676, "step": 2063 }, { "epoch": 0.15835709627507896, "grad_norm": 0.18830016255378723, "learning_rate": 9.56050629665478e-06, "loss": 1.071, "step": 2064 }, { "epoch": 0.15843381967443704, "grad_norm": 0.20260435342788696, "learning_rate": 9.559998012052042e-06, "loss": 1.2182, "step": 2065 }, { "epoch": 0.1585105430737951, "grad_norm": 0.23603053390979767, "learning_rate": 9.559489447225175e-06, "loss": 1.1273, "step": 2066 }, { "epoch": 0.15858726647315322, "grad_norm": 0.21622511744499207, "learning_rate": 9.558980602205434e-06, "loss": 1.1337, "step": 2067 }, { "epoch": 0.1586639898725113, "grad_norm": 0.18918462097644806, "learning_rate": 9.558471477024088e-06, "loss": 1.1502, "step": 2068 }, { "epoch": 0.15874071327186937, "grad_norm": 0.09809857606887817, "learning_rate": 9.557962071712422e-06, "loss": 1.1776, "step": 2069 }, { "epoch": 0.15881743667122744, "grad_norm": 0.2507229745388031, "learning_rate": 9.55745238630174e-06, "loss": 1.1836, "step": 2070 }, { "epoch": 0.15889416007058552, "grad_norm": 0.19449305534362793, "learning_rate": 9.556942420823368e-06, "loss": 1.1815, "step": 2071 }, { "epoch": 0.15897088346994362, "grad_norm": 0.25919851660728455, "learning_rate": 9.55643217530864e-06, "loss": 1.1114, "step": 2072 }, { "epoch": 0.1590476068693017, "grad_norm": 0.2226218581199646, "learning_rate": 9.555921649788917e-06, "loss": 1.2002, "step": 2073 }, { "epoch": 0.15912433026865977, "grad_norm": 0.21056830883026123, "learning_rate": 9.555410844295568e-06, "loss": 1.1251, "step": 2074 }, { "epoch": 0.15920105366801784, "grad_norm": 0.414461612701416, "learning_rate": 9.554899758859983e-06, "loss": 1.2288, "step": 2075 }, { "epoch": 0.15927777706737592, "grad_norm": 0.18712544441223145, "learning_rate": 9.554388393513572e-06, "loss": 1.0804, "step": 2076 }, { "epoch": 0.15935450046673402, "grad_norm": 0.2156667709350586, "learning_rate": 9.55387674828776e-06, "loss": 1.1405, "step": 2077 }, { "epoch": 0.1594312238660921, "grad_norm": 0.31937262415885925, "learning_rate": 9.553364823213987e-06, "loss": 1.1835, "step": 2078 }, { "epoch": 0.15950794726545017, "grad_norm": 0.2515909969806671, "learning_rate": 9.552852618323714e-06, "loss": 1.0875, "step": 2079 }, { "epoch": 0.15958467066480825, "grad_norm": 0.21155846118927002, "learning_rate": 9.552340133648414e-06, "loss": 1.1789, "step": 2080 }, { "epoch": 0.15966139406416632, "grad_norm": 0.17414002120494843, "learning_rate": 9.551827369219584e-06, "loss": 1.1204, "step": 2081 }, { "epoch": 0.15973811746352443, "grad_norm": 0.3160047233104706, "learning_rate": 9.551314325068734e-06, "loss": 1.1797, "step": 2082 }, { "epoch": 0.1598148408628825, "grad_norm": 0.20656058192253113, "learning_rate": 9.550801001227393e-06, "loss": 1.1397, "step": 2083 }, { "epoch": 0.15989156426224058, "grad_norm": 0.24804997444152832, "learning_rate": 9.550287397727102e-06, "loss": 1.2056, "step": 2084 }, { "epoch": 0.15996828766159865, "grad_norm": 0.22514580190181732, "learning_rate": 9.54977351459943e-06, "loss": 1.2265, "step": 2085 }, { "epoch": 0.16004501106095675, "grad_norm": 0.18052972853183746, "learning_rate": 9.54925935187595e-06, "loss": 1.1895, "step": 2086 }, { "epoch": 0.16012173446031483, "grad_norm": 0.37201356887817383, "learning_rate": 9.548744909588261e-06, "loss": 1.0704, "step": 2087 }, { "epoch": 0.1601984578596729, "grad_norm": 0.09959478676319122, "learning_rate": 9.548230187767977e-06, "loss": 1.3276, "step": 2088 }, { "epoch": 0.16027518125903098, "grad_norm": 0.20743218064308167, "learning_rate": 9.547715186446732e-06, "loss": 1.1021, "step": 2089 }, { "epoch": 0.16035190465838906, "grad_norm": 0.1764477640390396, "learning_rate": 9.547199905656168e-06, "loss": 1.2106, "step": 2090 }, { "epoch": 0.16042862805774716, "grad_norm": 0.2483443319797516, "learning_rate": 9.546684345427955e-06, "loss": 1.026, "step": 2091 }, { "epoch": 0.16050535145710523, "grad_norm": 0.2307015359401703, "learning_rate": 9.546168505793776e-06, "loss": 1.1434, "step": 2092 }, { "epoch": 0.1605820748564633, "grad_norm": 0.16974154114723206, "learning_rate": 9.545652386785326e-06, "loss": 1.2054, "step": 2093 }, { "epoch": 0.16065879825582138, "grad_norm": 0.1993512660264969, "learning_rate": 9.545135988434327e-06, "loss": 1.2023, "step": 2094 }, { "epoch": 0.16073552165517946, "grad_norm": 0.36411920189857483, "learning_rate": 9.544619310772511e-06, "loss": 1.1722, "step": 2095 }, { "epoch": 0.16081224505453756, "grad_norm": 0.21457603573799133, "learning_rate": 9.544102353831628e-06, "loss": 1.1075, "step": 2096 }, { "epoch": 0.16088896845389564, "grad_norm": 0.20533545315265656, "learning_rate": 9.543585117643449e-06, "loss": 1.1752, "step": 2097 }, { "epoch": 0.1609656918532537, "grad_norm": 0.19450706243515015, "learning_rate": 9.543067602239758e-06, "loss": 1.1593, "step": 2098 }, { "epoch": 0.1610424152526118, "grad_norm": 0.2003222405910492, "learning_rate": 9.542549807652357e-06, "loss": 1.2266, "step": 2099 }, { "epoch": 0.16111913865196986, "grad_norm": 0.3374286890029907, "learning_rate": 9.542031733913069e-06, "loss": 1.1262, "step": 2100 }, { "epoch": 0.16119586205132796, "grad_norm": 0.18483766913414001, "learning_rate": 9.541513381053727e-06, "loss": 1.1239, "step": 2101 }, { "epoch": 0.16127258545068604, "grad_norm": 0.21147926151752472, "learning_rate": 9.540994749106187e-06, "loss": 1.1077, "step": 2102 }, { "epoch": 0.16134930885004412, "grad_norm": 0.4854583442211151, "learning_rate": 9.540475838102321e-06, "loss": 1.0441, "step": 2103 }, { "epoch": 0.1614260322494022, "grad_norm": 0.21921397745609283, "learning_rate": 9.539956648074015e-06, "loss": 1.1254, "step": 2104 }, { "epoch": 0.16150275564876027, "grad_norm": 0.23473750054836273, "learning_rate": 9.539437179053178e-06, "loss": 1.1953, "step": 2105 }, { "epoch": 0.16157947904811837, "grad_norm": 0.19573575258255005, "learning_rate": 9.538917431071732e-06, "loss": 1.1213, "step": 2106 }, { "epoch": 0.16165620244747644, "grad_norm": 0.31278982758522034, "learning_rate": 9.538397404161615e-06, "loss": 1.1992, "step": 2107 }, { "epoch": 0.16173292584683452, "grad_norm": 0.22348299622535706, "learning_rate": 9.537877098354787e-06, "loss": 1.1376, "step": 2108 }, { "epoch": 0.1618096492461926, "grad_norm": 0.23378513753414154, "learning_rate": 9.537356513683218e-06, "loss": 1.1799, "step": 2109 }, { "epoch": 0.16188637264555067, "grad_norm": 0.19391639530658722, "learning_rate": 9.536835650178901e-06, "loss": 1.0588, "step": 2110 }, { "epoch": 0.16196309604490877, "grad_norm": 0.19236041605472565, "learning_rate": 9.536314507873847e-06, "loss": 1.2312, "step": 2111 }, { "epoch": 0.16203981944426685, "grad_norm": 0.22567318379878998, "learning_rate": 9.535793086800081e-06, "loss": 1.1052, "step": 2112 }, { "epoch": 0.16211654284362492, "grad_norm": 0.20812644064426422, "learning_rate": 9.535271386989642e-06, "loss": 1.1581, "step": 2113 }, { "epoch": 0.162193266242983, "grad_norm": 0.33611151576042175, "learning_rate": 9.534749408474593e-06, "loss": 1.1518, "step": 2114 }, { "epoch": 0.1622699896423411, "grad_norm": 0.20488756895065308, "learning_rate": 9.53422715128701e-06, "loss": 1.1707, "step": 2115 }, { "epoch": 0.16234671304169918, "grad_norm": 0.2524953782558441, "learning_rate": 9.533704615458987e-06, "loss": 1.1371, "step": 2116 }, { "epoch": 0.16242343644105725, "grad_norm": 0.19166532158851624, "learning_rate": 9.533181801022638e-06, "loss": 1.1924, "step": 2117 }, { "epoch": 0.16250015984041533, "grad_norm": 0.24997901916503906, "learning_rate": 9.532658708010088e-06, "loss": 1.1833, "step": 2118 }, { "epoch": 0.1625768832397734, "grad_norm": 0.39434996247291565, "learning_rate": 9.532135336453481e-06, "loss": 1.1349, "step": 2119 }, { "epoch": 0.1626536066391315, "grad_norm": 0.25029027462005615, "learning_rate": 9.531611686384986e-06, "loss": 1.1107, "step": 2120 }, { "epoch": 0.16273033003848958, "grad_norm": 0.33847925066947937, "learning_rate": 9.531087757836775e-06, "loss": 1.2014, "step": 2121 }, { "epoch": 0.16280705343784765, "grad_norm": 0.2968520224094391, "learning_rate": 9.53056355084105e-06, "loss": 0.9999, "step": 2122 }, { "epoch": 0.16288377683720573, "grad_norm": 0.23385468125343323, "learning_rate": 9.530039065430024e-06, "loss": 1.0807, "step": 2123 }, { "epoch": 0.1629605002365638, "grad_norm": 0.24300432205200195, "learning_rate": 9.529514301635928e-06, "loss": 1.2442, "step": 2124 }, { "epoch": 0.1630372236359219, "grad_norm": 0.19020265340805054, "learning_rate": 9.528989259491008e-06, "loss": 1.1007, "step": 2125 }, { "epoch": 0.16311394703527998, "grad_norm": 0.1036483570933342, "learning_rate": 9.528463939027531e-06, "loss": 1.2842, "step": 2126 }, { "epoch": 0.16319067043463806, "grad_norm": 0.255843847990036, "learning_rate": 9.52793834027778e-06, "loss": 1.2026, "step": 2127 }, { "epoch": 0.16326739383399613, "grad_norm": 0.21262672543525696, "learning_rate": 9.527412463274055e-06, "loss": 1.1497, "step": 2128 }, { "epoch": 0.1633441172333542, "grad_norm": 0.21807974576950073, "learning_rate": 9.52688630804867e-06, "loss": 1.0955, "step": 2129 }, { "epoch": 0.1634208406327123, "grad_norm": 0.2768261432647705, "learning_rate": 9.52635987463396e-06, "loss": 1.1873, "step": 2130 }, { "epoch": 0.16349756403207039, "grad_norm": 0.19502860307693481, "learning_rate": 9.525833163062275e-06, "loss": 1.1118, "step": 2131 }, { "epoch": 0.16357428743142846, "grad_norm": 0.23477374017238617, "learning_rate": 9.525306173365984e-06, "loss": 1.0994, "step": 2132 }, { "epoch": 0.16365101083078654, "grad_norm": 0.24862033128738403, "learning_rate": 9.524778905577471e-06, "loss": 1.2381, "step": 2133 }, { "epoch": 0.1637277342301446, "grad_norm": 0.21838483214378357, "learning_rate": 9.52425135972914e-06, "loss": 1.1532, "step": 2134 }, { "epoch": 0.16380445762950271, "grad_norm": 1.5853451490402222, "learning_rate": 9.523723535853408e-06, "loss": 1.0483, "step": 2135 }, { "epoch": 0.1638811810288608, "grad_norm": 0.4500843584537506, "learning_rate": 9.523195433982711e-06, "loss": 1.0496, "step": 2136 }, { "epoch": 0.16395790442821886, "grad_norm": 0.20410193502902985, "learning_rate": 9.522667054149504e-06, "loss": 1.1843, "step": 2137 }, { "epoch": 0.16403462782757694, "grad_norm": 0.21866057813167572, "learning_rate": 9.522138396386256e-06, "loss": 1.2092, "step": 2138 }, { "epoch": 0.16411135122693501, "grad_norm": 0.2125825732946396, "learning_rate": 9.521609460725455e-06, "loss": 1.1337, "step": 2139 }, { "epoch": 0.16418807462629312, "grad_norm": 0.19987863302230835, "learning_rate": 9.521080247199606e-06, "loss": 1.0873, "step": 2140 }, { "epoch": 0.1642647980256512, "grad_norm": 0.44223955273628235, "learning_rate": 9.52055075584123e-06, "loss": 1.1656, "step": 2141 }, { "epoch": 0.16434152142500927, "grad_norm": 0.3164023160934448, "learning_rate": 9.520020986682863e-06, "loss": 1.0856, "step": 2142 }, { "epoch": 0.16441824482436734, "grad_norm": 0.6943272352218628, "learning_rate": 9.519490939757065e-06, "loss": 1.2088, "step": 2143 }, { "epoch": 0.16449496822372545, "grad_norm": 0.42794808745384216, "learning_rate": 9.518960615096407e-06, "loss": 1.081, "step": 2144 }, { "epoch": 0.16457169162308352, "grad_norm": 0.20396988093852997, "learning_rate": 9.51843001273348e-06, "loss": 1.1554, "step": 2145 }, { "epoch": 0.1646484150224416, "grad_norm": 0.22930631041526794, "learning_rate": 9.517899132700889e-06, "loss": 1.1636, "step": 2146 }, { "epoch": 0.16472513842179967, "grad_norm": 0.18801386654376984, "learning_rate": 9.51736797503126e-06, "loss": 1.1465, "step": 2147 }, { "epoch": 0.16480186182115775, "grad_norm": 0.19855280220508575, "learning_rate": 9.516836539757233e-06, "loss": 1.1962, "step": 2148 }, { "epoch": 0.16487858522051585, "grad_norm": 0.26976296305656433, "learning_rate": 9.516304826911466e-06, "loss": 1.152, "step": 2149 }, { "epoch": 0.16495530861987392, "grad_norm": 0.2148151695728302, "learning_rate": 9.515772836526633e-06, "loss": 1.1943, "step": 2150 }, { "epoch": 0.165032032019232, "grad_norm": 0.1911543309688568, "learning_rate": 9.51524056863543e-06, "loss": 1.0797, "step": 2151 }, { "epoch": 0.16510875541859008, "grad_norm": 0.22877611219882965, "learning_rate": 9.514708023270562e-06, "loss": 1.1077, "step": 2152 }, { "epoch": 0.16518547881794815, "grad_norm": 0.2842238247394562, "learning_rate": 9.514175200464758e-06, "loss": 1.0644, "step": 2153 }, { "epoch": 0.16526220221730625, "grad_norm": 0.30748894810676575, "learning_rate": 9.51364210025076e-06, "loss": 1.0647, "step": 2154 }, { "epoch": 0.16533892561666433, "grad_norm": 0.2948215901851654, "learning_rate": 9.51310872266133e-06, "loss": 1.146, "step": 2155 }, { "epoch": 0.1654156490160224, "grad_norm": 0.21288903057575226, "learning_rate": 9.512575067729243e-06, "loss": 1.1476, "step": 2156 }, { "epoch": 0.16549237241538048, "grad_norm": 0.28138405084609985, "learning_rate": 9.512041135487298e-06, "loss": 1.1936, "step": 2157 }, { "epoch": 0.16556909581473855, "grad_norm": 0.20611929893493652, "learning_rate": 9.511506925968302e-06, "loss": 1.1972, "step": 2158 }, { "epoch": 0.16564581921409666, "grad_norm": 0.23472869396209717, "learning_rate": 9.510972439205085e-06, "loss": 1.1436, "step": 2159 }, { "epoch": 0.16572254261345473, "grad_norm": 0.23776660859584808, "learning_rate": 9.510437675230492e-06, "loss": 1.0993, "step": 2160 }, { "epoch": 0.1657992660128128, "grad_norm": 0.22972682118415833, "learning_rate": 9.509902634077388e-06, "loss": 1.2076, "step": 2161 }, { "epoch": 0.16587598941217088, "grad_norm": 0.25331997871398926, "learning_rate": 9.509367315778652e-06, "loss": 1.1918, "step": 2162 }, { "epoch": 0.16595271281152896, "grad_norm": 0.20717839896678925, "learning_rate": 9.50883172036718e-06, "loss": 1.1719, "step": 2163 }, { "epoch": 0.16602943621088706, "grad_norm": 0.25136467814445496, "learning_rate": 9.508295847875885e-06, "loss": 1.0276, "step": 2164 }, { "epoch": 0.16610615961024514, "grad_norm": 0.21725061535835266, "learning_rate": 9.507759698337698e-06, "loss": 1.2076, "step": 2165 }, { "epoch": 0.1661828830096032, "grad_norm": 0.2496083825826645, "learning_rate": 9.507223271785568e-06, "loss": 1.1218, "step": 2166 }, { "epoch": 0.16625960640896129, "grad_norm": 0.3895926773548126, "learning_rate": 9.50668656825246e-06, "loss": 1.0806, "step": 2167 }, { "epoch": 0.16633632980831936, "grad_norm": 0.22803018987178802, "learning_rate": 9.506149587771356e-06, "loss": 1.1339, "step": 2168 }, { "epoch": 0.16641305320767746, "grad_norm": 0.1828988641500473, "learning_rate": 9.50561233037525e-06, "loss": 1.1102, "step": 2169 }, { "epoch": 0.16648977660703554, "grad_norm": 0.19422142207622528, "learning_rate": 9.505074796097167e-06, "loss": 1.0263, "step": 2170 }, { "epoch": 0.16656650000639361, "grad_norm": 0.19551660120487213, "learning_rate": 9.504536984970132e-06, "loss": 1.1005, "step": 2171 }, { "epoch": 0.1666432234057517, "grad_norm": 0.20192411541938782, "learning_rate": 9.503998897027199e-06, "loss": 1.2126, "step": 2172 }, { "epoch": 0.1667199468051098, "grad_norm": 0.19369912147521973, "learning_rate": 9.503460532301434e-06, "loss": 1.1369, "step": 2173 }, { "epoch": 0.16679667020446787, "grad_norm": 0.2055756002664566, "learning_rate": 9.50292189082592e-06, "loss": 1.2037, "step": 2174 }, { "epoch": 0.16687339360382594, "grad_norm": 0.18160320818424225, "learning_rate": 9.502382972633759e-06, "loss": 1.0908, "step": 2175 }, { "epoch": 0.16695011700318402, "grad_norm": 0.19459889829158783, "learning_rate": 9.501843777758068e-06, "loss": 1.2029, "step": 2176 }, { "epoch": 0.1670268404025421, "grad_norm": 0.19416548311710358, "learning_rate": 9.501304306231985e-06, "loss": 1.1479, "step": 2177 }, { "epoch": 0.1671035638019002, "grad_norm": 0.18875733017921448, "learning_rate": 9.500764558088657e-06, "loss": 1.1217, "step": 2178 }, { "epoch": 0.16718028720125827, "grad_norm": 0.2226128727197647, "learning_rate": 9.500224533361258e-06, "loss": 1.1519, "step": 2179 }, { "epoch": 0.16725701060061635, "grad_norm": 0.20497910678386688, "learning_rate": 9.499684232082971e-06, "loss": 1.156, "step": 2180 }, { "epoch": 0.16733373399997442, "grad_norm": 0.26705262064933777, "learning_rate": 9.499143654286998e-06, "loss": 1.1137, "step": 2181 }, { "epoch": 0.1674104573993325, "grad_norm": 0.26833343505859375, "learning_rate": 9.498602800006562e-06, "loss": 1.1561, "step": 2182 }, { "epoch": 0.1674871807986906, "grad_norm": 0.21096445620059967, "learning_rate": 9.498061669274899e-06, "loss": 1.1885, "step": 2183 }, { "epoch": 0.16756390419804867, "grad_norm": 0.2505190968513489, "learning_rate": 9.49752026212526e-06, "loss": 1.1364, "step": 2184 }, { "epoch": 0.16764062759740675, "grad_norm": 0.26950252056121826, "learning_rate": 9.49697857859092e-06, "loss": 1.116, "step": 2185 }, { "epoch": 0.16771735099676482, "grad_norm": 0.22410154342651367, "learning_rate": 9.496436618705165e-06, "loss": 1.1054, "step": 2186 }, { "epoch": 0.1677940743961229, "grad_norm": 0.2103593945503235, "learning_rate": 9.4958943825013e-06, "loss": 1.192, "step": 2187 }, { "epoch": 0.167870797795481, "grad_norm": 0.2381446659564972, "learning_rate": 9.495351870012648e-06, "loss": 1.1077, "step": 2188 }, { "epoch": 0.16794752119483908, "grad_norm": 0.20958901941776276, "learning_rate": 9.494809081272546e-06, "loss": 1.0528, "step": 2189 }, { "epoch": 0.16802424459419715, "grad_norm": 0.10032162815332413, "learning_rate": 9.494266016314351e-06, "loss": 1.2846, "step": 2190 }, { "epoch": 0.16810096799355523, "grad_norm": 0.24075327813625336, "learning_rate": 9.493722675171435e-06, "loss": 1.1265, "step": 2191 }, { "epoch": 0.1681776913929133, "grad_norm": 0.4734184443950653, "learning_rate": 9.493179057877189e-06, "loss": 1.1145, "step": 2192 }, { "epoch": 0.1682544147922714, "grad_norm": 0.2966945171356201, "learning_rate": 9.492635164465018e-06, "loss": 1.1355, "step": 2193 }, { "epoch": 0.16833113819162948, "grad_norm": 0.22833940386772156, "learning_rate": 9.492090994968349e-06, "loss": 1.1253, "step": 2194 }, { "epoch": 0.16840786159098756, "grad_norm": 0.42332908511161804, "learning_rate": 9.491546549420618e-06, "loss": 1.2067, "step": 2195 }, { "epoch": 0.16848458499034563, "grad_norm": 0.2542763650417328, "learning_rate": 9.491001827855285e-06, "loss": 1.1723, "step": 2196 }, { "epoch": 0.1685613083897037, "grad_norm": 0.25688502192497253, "learning_rate": 9.490456830305825e-06, "loss": 1.1007, "step": 2197 }, { "epoch": 0.1686380317890618, "grad_norm": 0.19626885652542114, "learning_rate": 9.489911556805731e-06, "loss": 1.1675, "step": 2198 }, { "epoch": 0.16871475518841988, "grad_norm": 0.23735469579696655, "learning_rate": 9.489366007388507e-06, "loss": 1.1197, "step": 2199 }, { "epoch": 0.16879147858777796, "grad_norm": 0.2051522135734558, "learning_rate": 9.488820182087683e-06, "loss": 1.2342, "step": 2200 }, { "epoch": 0.16886820198713604, "grad_norm": 0.19969405233860016, "learning_rate": 9.4882740809368e-06, "loss": 1.1504, "step": 2201 }, { "epoch": 0.16894492538649414, "grad_norm": 0.1875462830066681, "learning_rate": 9.487727703969416e-06, "loss": 1.1176, "step": 2202 }, { "epoch": 0.1690216487858522, "grad_norm": 0.3396759331226349, "learning_rate": 9.487181051219107e-06, "loss": 1.1817, "step": 2203 }, { "epoch": 0.1690983721852103, "grad_norm": 0.45657867193222046, "learning_rate": 9.486634122719471e-06, "loss": 1.1182, "step": 2204 }, { "epoch": 0.16917509558456836, "grad_norm": 0.3359649181365967, "learning_rate": 9.486086918504112e-06, "loss": 1.2392, "step": 2205 }, { "epoch": 0.16925181898392644, "grad_norm": 0.22778791189193726, "learning_rate": 9.485539438606661e-06, "loss": 1.1346, "step": 2206 }, { "epoch": 0.16932854238328454, "grad_norm": 0.18971261382102966, "learning_rate": 9.484991683060762e-06, "loss": 1.1771, "step": 2207 }, { "epoch": 0.16940526578264262, "grad_norm": 0.20428596436977386, "learning_rate": 9.484443651900074e-06, "loss": 1.1463, "step": 2208 }, { "epoch": 0.1694819891820007, "grad_norm": 0.21761702001094818, "learning_rate": 9.483895345158278e-06, "loss": 1.1853, "step": 2209 }, { "epoch": 0.16955871258135877, "grad_norm": 0.1935672163963318, "learning_rate": 9.483346762869065e-06, "loss": 1.1195, "step": 2210 }, { "epoch": 0.16963543598071684, "grad_norm": 0.22417443990707397, "learning_rate": 9.48279790506615e-06, "loss": 1.1608, "step": 2211 }, { "epoch": 0.16971215938007495, "grad_norm": 0.2057846188545227, "learning_rate": 9.48224877178326e-06, "loss": 1.1515, "step": 2212 }, { "epoch": 0.16978888277943302, "grad_norm": 0.20059747993946075, "learning_rate": 9.481699363054142e-06, "loss": 1.1781, "step": 2213 }, { "epoch": 0.1698656061787911, "grad_norm": 0.20704685151576996, "learning_rate": 9.481149678912557e-06, "loss": 1.1556, "step": 2214 }, { "epoch": 0.16994232957814917, "grad_norm": 0.18618303537368774, "learning_rate": 9.480599719392288e-06, "loss": 1.0592, "step": 2215 }, { "epoch": 0.17001905297750725, "grad_norm": 0.19959233701229095, "learning_rate": 9.480049484527127e-06, "loss": 1.1403, "step": 2216 }, { "epoch": 0.17009577637686535, "grad_norm": 0.40190064907073975, "learning_rate": 9.479498974350892e-06, "loss": 1.1262, "step": 2217 }, { "epoch": 0.17017249977622342, "grad_norm": 0.20349447429180145, "learning_rate": 9.478948188897408e-06, "loss": 1.1159, "step": 2218 }, { "epoch": 0.1702492231755815, "grad_norm": 0.18416503071784973, "learning_rate": 9.478397128200528e-06, "loss": 1.1197, "step": 2219 }, { "epoch": 0.17032594657493957, "grad_norm": 0.2353852391242981, "learning_rate": 9.477845792294111e-06, "loss": 1.1558, "step": 2220 }, { "epoch": 0.17040266997429765, "grad_norm": 0.25634971261024475, "learning_rate": 9.477294181212042e-06, "loss": 1.1323, "step": 2221 }, { "epoch": 0.17047939337365575, "grad_norm": 0.24557998776435852, "learning_rate": 9.476742294988214e-06, "loss": 1.1368, "step": 2222 }, { "epoch": 0.17055611677301383, "grad_norm": 0.23862715065479279, "learning_rate": 9.47619013365655e-06, "loss": 1.1261, "step": 2223 }, { "epoch": 0.1706328401723719, "grad_norm": 0.17180827260017395, "learning_rate": 9.475637697250975e-06, "loss": 1.1225, "step": 2224 }, { "epoch": 0.17070956357172998, "grad_norm": 0.26009756326675415, "learning_rate": 9.475084985805438e-06, "loss": 1.1336, "step": 2225 }, { "epoch": 0.17078628697108805, "grad_norm": 0.22904060781002045, "learning_rate": 9.474531999353908e-06, "loss": 1.1959, "step": 2226 }, { "epoch": 0.17086301037044616, "grad_norm": 0.23689502477645874, "learning_rate": 9.473978737930368e-06, "loss": 1.1267, "step": 2227 }, { "epoch": 0.17093973376980423, "grad_norm": 0.20051412284374237, "learning_rate": 9.473425201568811e-06, "loss": 1.1089, "step": 2228 }, { "epoch": 0.1710164571691623, "grad_norm": 0.1803937554359436, "learning_rate": 9.472871390303259e-06, "loss": 1.1174, "step": 2229 }, { "epoch": 0.17109318056852038, "grad_norm": 0.17332898080348969, "learning_rate": 9.472317304167745e-06, "loss": 1.1956, "step": 2230 }, { "epoch": 0.17116990396787848, "grad_norm": 0.21548639237880707, "learning_rate": 9.471762943196317e-06, "loss": 1.0706, "step": 2231 }, { "epoch": 0.17124662736723656, "grad_norm": 0.2405877560377121, "learning_rate": 9.471208307423043e-06, "loss": 1.1645, "step": 2232 }, { "epoch": 0.17132335076659463, "grad_norm": 0.32463350892066956, "learning_rate": 9.470653396882008e-06, "loss": 1.1503, "step": 2233 }, { "epoch": 0.1714000741659527, "grad_norm": 0.19975578784942627, "learning_rate": 9.470098211607308e-06, "loss": 1.2769, "step": 2234 }, { "epoch": 0.17147679756531078, "grad_norm": 0.21808740496635437, "learning_rate": 9.469542751633068e-06, "loss": 1.1121, "step": 2235 }, { "epoch": 0.1715535209646689, "grad_norm": 0.19147366285324097, "learning_rate": 9.468987016993417e-06, "loss": 1.176, "step": 2236 }, { "epoch": 0.17163024436402696, "grad_norm": 0.18565592169761658, "learning_rate": 9.468431007722507e-06, "loss": 1.1371, "step": 2237 }, { "epoch": 0.17170696776338504, "grad_norm": 0.24622434377670288, "learning_rate": 9.467874723854506e-06, "loss": 1.0405, "step": 2238 }, { "epoch": 0.1717836911627431, "grad_norm": 0.1730266660451889, "learning_rate": 9.467318165423603e-06, "loss": 1.0886, "step": 2239 }, { "epoch": 0.1718604145621012, "grad_norm": 0.2212134301662445, "learning_rate": 9.466761332463997e-06, "loss": 1.0848, "step": 2240 }, { "epoch": 0.1719371379614593, "grad_norm": 0.19561392068862915, "learning_rate": 9.466204225009905e-06, "loss": 1.177, "step": 2241 }, { "epoch": 0.17201386136081737, "grad_norm": 0.09859665483236313, "learning_rate": 9.465646843095566e-06, "loss": 1.185, "step": 2242 }, { "epoch": 0.17209058476017544, "grad_norm": 0.27210697531700134, "learning_rate": 9.465089186755232e-06, "loss": 1.1836, "step": 2243 }, { "epoch": 0.17216730815953352, "grad_norm": 0.19553600251674652, "learning_rate": 9.464531256023173e-06, "loss": 1.0887, "step": 2244 }, { "epoch": 0.1722440315588916, "grad_norm": 0.2750413417816162, "learning_rate": 9.463973050933674e-06, "loss": 1.0976, "step": 2245 }, { "epoch": 0.1723207549582497, "grad_norm": 0.40173277258872986, "learning_rate": 9.463414571521037e-06, "loss": 1.134, "step": 2246 }, { "epoch": 0.17239747835760777, "grad_norm": 0.20773018896579742, "learning_rate": 9.462855817819585e-06, "loss": 1.1175, "step": 2247 }, { "epoch": 0.17247420175696584, "grad_norm": 0.3306054174900055, "learning_rate": 9.462296789863654e-06, "loss": 1.1586, "step": 2248 }, { "epoch": 0.17255092515632392, "grad_norm": 0.2214864194393158, "learning_rate": 9.461737487687597e-06, "loss": 1.1892, "step": 2249 }, { "epoch": 0.172627648555682, "grad_norm": 0.35141053795814514, "learning_rate": 9.461177911325784e-06, "loss": 1.1018, "step": 2250 }, { "epoch": 0.1727043719550401, "grad_norm": 0.243432879447937, "learning_rate": 9.460618060812605e-06, "loss": 1.1748, "step": 2251 }, { "epoch": 0.17278109535439817, "grad_norm": 0.1984720230102539, "learning_rate": 9.46005793618246e-06, "loss": 1.1502, "step": 2252 }, { "epoch": 0.17285781875375625, "grad_norm": 0.09848660975694656, "learning_rate": 9.459497537469774e-06, "loss": 1.1999, "step": 2253 }, { "epoch": 0.17293454215311432, "grad_norm": 0.2322321981191635, "learning_rate": 9.458936864708987e-06, "loss": 1.1254, "step": 2254 }, { "epoch": 0.1730112655524724, "grad_norm": 0.18656785786151886, "learning_rate": 9.458375917934547e-06, "loss": 1.1545, "step": 2255 }, { "epoch": 0.1730879889518305, "grad_norm": 0.20691977441310883, "learning_rate": 9.457814697180931e-06, "loss": 1.2183, "step": 2256 }, { "epoch": 0.17316471235118858, "grad_norm": 0.2298673391342163, "learning_rate": 9.457253202482626e-06, "loss": 1.2105, "step": 2257 }, { "epoch": 0.17324143575054665, "grad_norm": 0.22239619493484497, "learning_rate": 9.456691433874137e-06, "loss": 1.0772, "step": 2258 }, { "epoch": 0.17331815914990473, "grad_norm": 0.1769251674413681, "learning_rate": 9.456129391389988e-06, "loss": 1.1569, "step": 2259 }, { "epoch": 0.17339488254926283, "grad_norm": 0.1990486979484558, "learning_rate": 9.455567075064715e-06, "loss": 1.181, "step": 2260 }, { "epoch": 0.1734716059486209, "grad_norm": 0.21717701852321625, "learning_rate": 9.455004484932875e-06, "loss": 1.1501, "step": 2261 }, { "epoch": 0.17354832934797898, "grad_norm": 0.22434483468532562, "learning_rate": 9.454441621029042e-06, "loss": 1.1453, "step": 2262 }, { "epoch": 0.17362505274733706, "grad_norm": 0.24732886254787445, "learning_rate": 9.453878483387806e-06, "loss": 1.1639, "step": 2263 }, { "epoch": 0.17370177614669513, "grad_norm": 0.1897663176059723, "learning_rate": 9.45331507204377e-06, "loss": 1.1422, "step": 2264 }, { "epoch": 0.17377849954605323, "grad_norm": 0.21556998789310455, "learning_rate": 9.452751387031557e-06, "loss": 1.1303, "step": 2265 }, { "epoch": 0.1738552229454113, "grad_norm": 0.23966392874717712, "learning_rate": 9.452187428385812e-06, "loss": 1.1704, "step": 2266 }, { "epoch": 0.17393194634476938, "grad_norm": 0.2667606770992279, "learning_rate": 9.451623196141188e-06, "loss": 1.1264, "step": 2267 }, { "epoch": 0.17400866974412746, "grad_norm": 0.18140365183353424, "learning_rate": 9.45105869033236e-06, "loss": 1.0877, "step": 2268 }, { "epoch": 0.17408539314348553, "grad_norm": 0.21300767362117767, "learning_rate": 9.450493910994019e-06, "loss": 1.1663, "step": 2269 }, { "epoch": 0.17416211654284364, "grad_norm": 0.20926786959171295, "learning_rate": 9.449928858160869e-06, "loss": 1.1788, "step": 2270 }, { "epoch": 0.1742388399422017, "grad_norm": 0.24936307966709137, "learning_rate": 9.449363531867634e-06, "loss": 1.1249, "step": 2271 }, { "epoch": 0.1743155633415598, "grad_norm": 0.4342026710510254, "learning_rate": 9.44879793214906e-06, "loss": 1.1858, "step": 2272 }, { "epoch": 0.17439228674091786, "grad_norm": 0.2214413285255432, "learning_rate": 9.448232059039902e-06, "loss": 1.1317, "step": 2273 }, { "epoch": 0.17446901014027594, "grad_norm": 0.2272646278142929, "learning_rate": 9.44766591257493e-06, "loss": 1.1624, "step": 2274 }, { "epoch": 0.17454573353963404, "grad_norm": 0.24615876376628876, "learning_rate": 9.447099492788943e-06, "loss": 1.1538, "step": 2275 }, { "epoch": 0.17462245693899212, "grad_norm": 0.20186364650726318, "learning_rate": 9.446532799716743e-06, "loss": 1.2775, "step": 2276 }, { "epoch": 0.1746991803383502, "grad_norm": 0.2002234309911728, "learning_rate": 9.445965833393155e-06, "loss": 1.2096, "step": 2277 }, { "epoch": 0.17477590373770827, "grad_norm": 0.25418147444725037, "learning_rate": 9.445398593853026e-06, "loss": 1.1912, "step": 2278 }, { "epoch": 0.17485262713706634, "grad_norm": 0.21623434126377106, "learning_rate": 9.444831081131209e-06, "loss": 1.1921, "step": 2279 }, { "epoch": 0.17492935053642444, "grad_norm": 0.2068912237882614, "learning_rate": 9.444263295262583e-06, "loss": 1.0679, "step": 2280 }, { "epoch": 0.17500607393578252, "grad_norm": 0.3313167095184326, "learning_rate": 9.443695236282037e-06, "loss": 1.1176, "step": 2281 }, { "epoch": 0.1750827973351406, "grad_norm": 0.19046823680400848, "learning_rate": 9.44312690422448e-06, "loss": 1.1344, "step": 2282 }, { "epoch": 0.17515952073449867, "grad_norm": 0.3890765905380249, "learning_rate": 9.44255829912484e-06, "loss": 1.0821, "step": 2283 }, { "epoch": 0.17523624413385674, "grad_norm": 0.29036301374435425, "learning_rate": 9.441989421018057e-06, "loss": 1.0992, "step": 2284 }, { "epoch": 0.17531296753321485, "grad_norm": 0.21359993517398834, "learning_rate": 9.441420269939089e-06, "loss": 1.1551, "step": 2285 }, { "epoch": 0.17538969093257292, "grad_norm": 1.6387380361557007, "learning_rate": 9.440850845922914e-06, "loss": 1.1884, "step": 2286 }, { "epoch": 0.175466414331931, "grad_norm": 0.2268790453672409, "learning_rate": 9.440281149004525e-06, "loss": 1.1579, "step": 2287 }, { "epoch": 0.17554313773128907, "grad_norm": 0.17945541441440582, "learning_rate": 9.439711179218931e-06, "loss": 1.1556, "step": 2288 }, { "epoch": 0.17561986113064718, "grad_norm": 0.22203391790390015, "learning_rate": 9.439140936601157e-06, "loss": 1.1471, "step": 2289 }, { "epoch": 0.17569658453000525, "grad_norm": 0.28237032890319824, "learning_rate": 9.438570421186248e-06, "loss": 1.1916, "step": 2290 }, { "epoch": 0.17577330792936333, "grad_norm": 0.19107875227928162, "learning_rate": 9.437999633009262e-06, "loss": 1.1655, "step": 2291 }, { "epoch": 0.1758500313287214, "grad_norm": 0.23173512518405914, "learning_rate": 9.437428572105276e-06, "loss": 1.1336, "step": 2292 }, { "epoch": 0.17592675472807948, "grad_norm": 0.22154854238033295, "learning_rate": 9.436857238509382e-06, "loss": 1.1189, "step": 2293 }, { "epoch": 0.17600347812743758, "grad_norm": 0.21880857646465302, "learning_rate": 9.436285632256692e-06, "loss": 1.1828, "step": 2294 }, { "epoch": 0.17608020152679565, "grad_norm": 0.26708757877349854, "learning_rate": 9.435713753382332e-06, "loss": 1.1636, "step": 2295 }, { "epoch": 0.17615692492615373, "grad_norm": 0.2850230038166046, "learning_rate": 9.435141601921444e-06, "loss": 1.1126, "step": 2296 }, { "epoch": 0.1762336483255118, "grad_norm": 0.24924278259277344, "learning_rate": 9.43456917790919e-06, "loss": 1.1124, "step": 2297 }, { "epoch": 0.17631037172486988, "grad_norm": 0.21796689927577972, "learning_rate": 9.433996481380747e-06, "loss": 1.1125, "step": 2298 }, { "epoch": 0.17638709512422798, "grad_norm": 0.2056092470884323, "learning_rate": 9.433423512371309e-06, "loss": 1.199, "step": 2299 }, { "epoch": 0.17646381852358606, "grad_norm": 0.10517267137765884, "learning_rate": 9.432850270916086e-06, "loss": 1.2837, "step": 2300 }, { "epoch": 0.17654054192294413, "grad_norm": 0.19695225358009338, "learning_rate": 9.432276757050305e-06, "loss": 1.1379, "step": 2301 }, { "epoch": 0.1766172653223022, "grad_norm": 0.5709760785102844, "learning_rate": 9.43170297080921e-06, "loss": 1.1679, "step": 2302 }, { "epoch": 0.17669398872166028, "grad_norm": 0.31763994693756104, "learning_rate": 9.43112891222806e-06, "loss": 1.1142, "step": 2303 }, { "epoch": 0.1767707121210184, "grad_norm": 0.2575581967830658, "learning_rate": 9.430554581342136e-06, "loss": 1.095, "step": 2304 }, { "epoch": 0.17684743552037646, "grad_norm": 0.5836862325668335, "learning_rate": 9.429979978186728e-06, "loss": 1.1853, "step": 2305 }, { "epoch": 0.17692415891973454, "grad_norm": 0.2193698137998581, "learning_rate": 9.429405102797152e-06, "loss": 1.14, "step": 2306 }, { "epoch": 0.1770008823190926, "grad_norm": 0.17472149431705475, "learning_rate": 9.42882995520873e-06, "loss": 1.0857, "step": 2307 }, { "epoch": 0.1770776057184507, "grad_norm": 0.2532162070274353, "learning_rate": 9.428254535456812e-06, "loss": 1.1644, "step": 2308 }, { "epoch": 0.1771543291178088, "grad_norm": 0.22910338640213013, "learning_rate": 9.427678843576756e-06, "loss": 1.121, "step": 2309 }, { "epoch": 0.17723105251716686, "grad_norm": 0.3407520353794098, "learning_rate": 9.42710287960394e-06, "loss": 1.1294, "step": 2310 }, { "epoch": 0.17730777591652494, "grad_norm": 0.32914409041404724, "learning_rate": 9.426526643573756e-06, "loss": 1.1912, "step": 2311 }, { "epoch": 0.17738449931588302, "grad_norm": 0.19462457299232483, "learning_rate": 9.425950135521622e-06, "loss": 1.1228, "step": 2312 }, { "epoch": 0.1774612227152411, "grad_norm": 0.31456682085990906, "learning_rate": 9.425373355482959e-06, "loss": 1.2375, "step": 2313 }, { "epoch": 0.1775379461145992, "grad_norm": 0.1816958785057068, "learning_rate": 9.424796303493215e-06, "loss": 1.1106, "step": 2314 }, { "epoch": 0.17761466951395727, "grad_norm": 0.20656925439834595, "learning_rate": 9.424218979587852e-06, "loss": 1.1057, "step": 2315 }, { "epoch": 0.17769139291331534, "grad_norm": 0.22406050562858582, "learning_rate": 9.423641383802346e-06, "loss": 1.1484, "step": 2316 }, { "epoch": 0.17776811631267342, "grad_norm": 0.2519122362136841, "learning_rate": 9.423063516172195e-06, "loss": 1.1912, "step": 2317 }, { "epoch": 0.1778448397120315, "grad_norm": 0.22727879881858826, "learning_rate": 9.422485376732907e-06, "loss": 1.0935, "step": 2318 }, { "epoch": 0.1779215631113896, "grad_norm": 0.2070193886756897, "learning_rate": 9.42190696552001e-06, "loss": 1.1828, "step": 2319 }, { "epoch": 0.17799828651074767, "grad_norm": 0.2702982723712921, "learning_rate": 9.421328282569053e-06, "loss": 1.1041, "step": 2320 }, { "epoch": 0.17807500991010575, "grad_norm": 0.250429630279541, "learning_rate": 9.420749327915595e-06, "loss": 1.1205, "step": 2321 }, { "epoch": 0.17815173330946382, "grad_norm": 0.25259333848953247, "learning_rate": 9.420170101595213e-06, "loss": 1.137, "step": 2322 }, { "epoch": 0.17822845670882193, "grad_norm": 0.5026772022247314, "learning_rate": 9.419590603643505e-06, "loss": 1.0882, "step": 2323 }, { "epoch": 0.17830518010818, "grad_norm": 0.21764060854911804, "learning_rate": 9.41901083409608e-06, "loss": 1.2139, "step": 2324 }, { "epoch": 0.17838190350753808, "grad_norm": 0.24241302907466888, "learning_rate": 9.418430792988569e-06, "loss": 1.1239, "step": 2325 }, { "epoch": 0.17845862690689615, "grad_norm": 0.28173160552978516, "learning_rate": 9.417850480356614e-06, "loss": 1.1626, "step": 2326 }, { "epoch": 0.17853535030625423, "grad_norm": 0.2119292914867401, "learning_rate": 9.41726989623588e-06, "loss": 1.0689, "step": 2327 }, { "epoch": 0.17861207370561233, "grad_norm": 0.21360523998737335, "learning_rate": 9.416689040662044e-06, "loss": 1.1013, "step": 2328 }, { "epoch": 0.1786887971049704, "grad_norm": 0.20058658719062805, "learning_rate": 9.4161079136708e-06, "loss": 1.1312, "step": 2329 }, { "epoch": 0.17876552050432848, "grad_norm": 0.2086380273103714, "learning_rate": 9.415526515297861e-06, "loss": 1.1397, "step": 2330 }, { "epoch": 0.17884224390368655, "grad_norm": 0.2287507951259613, "learning_rate": 9.414944845578955e-06, "loss": 1.1001, "step": 2331 }, { "epoch": 0.17891896730304463, "grad_norm": 0.18642880022525787, "learning_rate": 9.414362904549829e-06, "loss": 1.1382, "step": 2332 }, { "epoch": 0.17899569070240273, "grad_norm": 0.2357829213142395, "learning_rate": 9.413780692246241e-06, "loss": 1.2095, "step": 2333 }, { "epoch": 0.1790724141017608, "grad_norm": 0.09987955540418625, "learning_rate": 9.413198208703976e-06, "loss": 1.2668, "step": 2334 }, { "epoch": 0.17914913750111888, "grad_norm": 0.3159884512424469, "learning_rate": 9.41261545395882e-06, "loss": 1.1339, "step": 2335 }, { "epoch": 0.17922586090047696, "grad_norm": 0.1798710972070694, "learning_rate": 9.412032428046594e-06, "loss": 1.111, "step": 2336 }, { "epoch": 0.17930258429983503, "grad_norm": 0.368941992521286, "learning_rate": 9.41144913100312e-06, "loss": 1.1286, "step": 2337 }, { "epoch": 0.17937930769919314, "grad_norm": 0.09463492780923843, "learning_rate": 9.410865562864247e-06, "loss": 1.1878, "step": 2338 }, { "epoch": 0.1794560310985512, "grad_norm": 0.18612243235111237, "learning_rate": 9.410281723665834e-06, "loss": 1.1475, "step": 2339 }, { "epoch": 0.17953275449790929, "grad_norm": 0.2017063945531845, "learning_rate": 9.409697613443762e-06, "loss": 1.1977, "step": 2340 }, { "epoch": 0.17960947789726736, "grad_norm": 0.327810674905777, "learning_rate": 9.409113232233925e-06, "loss": 1.1158, "step": 2341 }, { "epoch": 0.17968620129662544, "grad_norm": 0.21462811529636383, "learning_rate": 9.408528580072233e-06, "loss": 1.1205, "step": 2342 }, { "epoch": 0.17976292469598354, "grad_norm": 0.24147255718708038, "learning_rate": 9.40794365699462e-06, "loss": 1.1484, "step": 2343 }, { "epoch": 0.17983964809534161, "grad_norm": 0.210632786154747, "learning_rate": 9.407358463037024e-06, "loss": 1.1741, "step": 2344 }, { "epoch": 0.1799163714946997, "grad_norm": 0.4359155297279358, "learning_rate": 9.406772998235412e-06, "loss": 1.153, "step": 2345 }, { "epoch": 0.17999309489405776, "grad_norm": 0.31301286816596985, "learning_rate": 9.40618726262576e-06, "loss": 1.0844, "step": 2346 }, { "epoch": 0.18006981829341584, "grad_norm": 0.5314606428146362, "learning_rate": 9.405601256244063e-06, "loss": 1.0167, "step": 2347 }, { "epoch": 0.18014654169277394, "grad_norm": 0.19321711361408234, "learning_rate": 9.405014979126335e-06, "loss": 1.1848, "step": 2348 }, { "epoch": 0.18022326509213202, "grad_norm": 0.1786811798810959, "learning_rate": 9.4044284313086e-06, "loss": 1.1703, "step": 2349 }, { "epoch": 0.1802999884914901, "grad_norm": 0.21417908370494843, "learning_rate": 9.403841612826908e-06, "loss": 1.0996, "step": 2350 }, { "epoch": 0.18037671189084817, "grad_norm": 0.3697952926158905, "learning_rate": 9.403254523717316e-06, "loss": 1.1948, "step": 2351 }, { "epoch": 0.18045343529020627, "grad_norm": 0.42355433106422424, "learning_rate": 9.402667164015904e-06, "loss": 1.1417, "step": 2352 }, { "epoch": 0.18053015868956435, "grad_norm": 0.22330224514007568, "learning_rate": 9.402079533758767e-06, "loss": 1.2396, "step": 2353 }, { "epoch": 0.18060688208892242, "grad_norm": 0.16782476007938385, "learning_rate": 9.401491632982018e-06, "loss": 1.1639, "step": 2354 }, { "epoch": 0.1806836054882805, "grad_norm": 0.20818805694580078, "learning_rate": 9.400903461721783e-06, "loss": 1.2554, "step": 2355 }, { "epoch": 0.18076032888763857, "grad_norm": 0.17683790624141693, "learning_rate": 9.400315020014208e-06, "loss": 1.1088, "step": 2356 }, { "epoch": 0.18083705228699667, "grad_norm": 0.2826659381389618, "learning_rate": 9.399726307895453e-06, "loss": 1.0898, "step": 2357 }, { "epoch": 0.18091377568635475, "grad_norm": 0.22912181913852692, "learning_rate": 9.399137325401695e-06, "loss": 1.1553, "step": 2358 }, { "epoch": 0.18099049908571282, "grad_norm": 0.18954841792583466, "learning_rate": 9.398548072569132e-06, "loss": 1.1153, "step": 2359 }, { "epoch": 0.1810672224850709, "grad_norm": 0.1938733458518982, "learning_rate": 9.397958549433973e-06, "loss": 1.1598, "step": 2360 }, { "epoch": 0.18114394588442898, "grad_norm": 0.2921582758426666, "learning_rate": 9.397368756032445e-06, "loss": 1.1079, "step": 2361 }, { "epoch": 0.18122066928378708, "grad_norm": 0.21375377476215363, "learning_rate": 9.396778692400795e-06, "loss": 1.1534, "step": 2362 }, { "epoch": 0.18129739268314515, "grad_norm": 0.19660167396068573, "learning_rate": 9.396188358575283e-06, "loss": 1.1358, "step": 2363 }, { "epoch": 0.18137411608250323, "grad_norm": 0.20714402198791504, "learning_rate": 9.395597754592183e-06, "loss": 1.1274, "step": 2364 }, { "epoch": 0.1814508394818613, "grad_norm": 0.19884182512760162, "learning_rate": 9.395006880487796e-06, "loss": 1.0949, "step": 2365 }, { "epoch": 0.18152756288121938, "grad_norm": 0.2518627643585205, "learning_rate": 9.394415736298426e-06, "loss": 1.1207, "step": 2366 }, { "epoch": 0.18160428628057748, "grad_norm": 0.21045829355716705, "learning_rate": 9.393824322060406e-06, "loss": 1.151, "step": 2367 }, { "epoch": 0.18168100967993556, "grad_norm": 0.1928771734237671, "learning_rate": 9.393232637810074e-06, "loss": 1.0329, "step": 2368 }, { "epoch": 0.18175773307929363, "grad_norm": 0.8645706176757812, "learning_rate": 9.392640683583798e-06, "loss": 1.1196, "step": 2369 }, { "epoch": 0.1818344564786517, "grad_norm": 0.23479972779750824, "learning_rate": 9.392048459417949e-06, "loss": 1.1242, "step": 2370 }, { "epoch": 0.18191117987800978, "grad_norm": 0.19704438745975494, "learning_rate": 9.391455965348923e-06, "loss": 1.1381, "step": 2371 }, { "epoch": 0.18198790327736789, "grad_norm": 0.24177175760269165, "learning_rate": 9.39086320141313e-06, "loss": 1.1435, "step": 2372 }, { "epoch": 0.18206462667672596, "grad_norm": 0.25409698486328125, "learning_rate": 9.390270167646997e-06, "loss": 1.0782, "step": 2373 }, { "epoch": 0.18214135007608404, "grad_norm": 0.21464750170707703, "learning_rate": 9.38967686408697e-06, "loss": 1.1978, "step": 2374 }, { "epoch": 0.1822180734754421, "grad_norm": 0.09886762499809265, "learning_rate": 9.389083290769505e-06, "loss": 1.2371, "step": 2375 }, { "epoch": 0.18229479687480019, "grad_norm": 0.10181647539138794, "learning_rate": 9.388489447731082e-06, "loss": 1.1705, "step": 2376 }, { "epoch": 0.1823715202741583, "grad_norm": 0.09788910299539566, "learning_rate": 9.387895335008192e-06, "loss": 1.2603, "step": 2377 }, { "epoch": 0.18244824367351636, "grad_norm": 0.20025131106376648, "learning_rate": 9.387300952637345e-06, "loss": 1.1173, "step": 2378 }, { "epoch": 0.18252496707287444, "grad_norm": 0.23149719834327698, "learning_rate": 9.386706300655069e-06, "loss": 1.2594, "step": 2379 }, { "epoch": 0.18260169047223251, "grad_norm": 0.2737790048122406, "learning_rate": 9.386111379097905e-06, "loss": 1.1499, "step": 2380 }, { "epoch": 0.18267841387159062, "grad_norm": 0.2521069645881653, "learning_rate": 9.385516188002414e-06, "loss": 1.2444, "step": 2381 }, { "epoch": 0.1827551372709487, "grad_norm": 0.19372665882110596, "learning_rate": 9.384920727405171e-06, "loss": 1.1097, "step": 2382 }, { "epoch": 0.18283186067030677, "grad_norm": 0.1934005469083786, "learning_rate": 9.38432499734277e-06, "loss": 1.192, "step": 2383 }, { "epoch": 0.18290858406966484, "grad_norm": 0.16101840138435364, "learning_rate": 9.38372899785182e-06, "loss": 1.0733, "step": 2384 }, { "epoch": 0.18298530746902292, "grad_norm": 0.19541218876838684, "learning_rate": 9.383132728968946e-06, "loss": 1.0817, "step": 2385 }, { "epoch": 0.18306203086838102, "grad_norm": 0.30020204186439514, "learning_rate": 9.38253619073079e-06, "loss": 1.1078, "step": 2386 }, { "epoch": 0.1831387542677391, "grad_norm": 0.19254373013973236, "learning_rate": 9.381939383174013e-06, "loss": 1.1474, "step": 2387 }, { "epoch": 0.18321547766709717, "grad_norm": 0.20799125730991364, "learning_rate": 9.381342306335286e-06, "loss": 1.1602, "step": 2388 }, { "epoch": 0.18329220106645525, "grad_norm": 0.17095202207565308, "learning_rate": 9.380744960251305e-06, "loss": 1.0992, "step": 2389 }, { "epoch": 0.18336892446581332, "grad_norm": 0.18986296653747559, "learning_rate": 9.380147344958778e-06, "loss": 1.0814, "step": 2390 }, { "epoch": 0.18344564786517142, "grad_norm": 0.214242622256279, "learning_rate": 9.37954946049443e-06, "loss": 1.1471, "step": 2391 }, { "epoch": 0.1835223712645295, "grad_norm": 0.20989304780960083, "learning_rate": 9.378951306895001e-06, "loss": 1.0537, "step": 2392 }, { "epoch": 0.18359909466388757, "grad_norm": 0.18013787269592285, "learning_rate": 9.37835288419725e-06, "loss": 1.1336, "step": 2393 }, { "epoch": 0.18367581806324565, "grad_norm": 0.7318264842033386, "learning_rate": 9.377754192437952e-06, "loss": 1.153, "step": 2394 }, { "epoch": 0.18375254146260372, "grad_norm": 0.2457684874534607, "learning_rate": 9.377155231653898e-06, "loss": 1.2449, "step": 2395 }, { "epoch": 0.18382926486196183, "grad_norm": 0.21996010839939117, "learning_rate": 9.376556001881896e-06, "loss": 1.2952, "step": 2396 }, { "epoch": 0.1839059882613199, "grad_norm": 0.20011165738105774, "learning_rate": 9.375956503158771e-06, "loss": 1.1202, "step": 2397 }, { "epoch": 0.18398271166067798, "grad_norm": 0.18757681548595428, "learning_rate": 9.375356735521361e-06, "loss": 1.115, "step": 2398 }, { "epoch": 0.18405943506003605, "grad_norm": 0.1891433745622635, "learning_rate": 9.374756699006527e-06, "loss": 1.1432, "step": 2399 }, { "epoch": 0.18413615845939413, "grad_norm": 0.4122011065483093, "learning_rate": 9.374156393651141e-06, "loss": 1.1181, "step": 2400 }, { "epoch": 0.18421288185875223, "grad_norm": 0.2267225682735443, "learning_rate": 9.373555819492093e-06, "loss": 1.1063, "step": 2401 }, { "epoch": 0.1842896052581103, "grad_norm": 0.2097996324300766, "learning_rate": 9.37295497656629e-06, "loss": 1.1252, "step": 2402 }, { "epoch": 0.18436632865746838, "grad_norm": 0.27888885140419006, "learning_rate": 9.372353864910657e-06, "loss": 1.1616, "step": 2403 }, { "epoch": 0.18444305205682646, "grad_norm": 0.19037437438964844, "learning_rate": 9.37175248456213e-06, "loss": 1.1419, "step": 2404 }, { "epoch": 0.18451977545618453, "grad_norm": 0.22991086542606354, "learning_rate": 9.371150835557671e-06, "loss": 1.0912, "step": 2405 }, { "epoch": 0.18459649885554263, "grad_norm": 0.33740538358688354, "learning_rate": 9.37054891793425e-06, "loss": 1.0964, "step": 2406 }, { "epoch": 0.1846732222549007, "grad_norm": 0.18935498595237732, "learning_rate": 9.369946731728855e-06, "loss": 1.1461, "step": 2407 }, { "epoch": 0.18474994565425878, "grad_norm": 0.2209906131029129, "learning_rate": 9.369344276978494e-06, "loss": 1.1389, "step": 2408 }, { "epoch": 0.18482666905361686, "grad_norm": 0.28039804100990295, "learning_rate": 9.368741553720189e-06, "loss": 1.1556, "step": 2409 }, { "epoch": 0.18490339245297496, "grad_norm": 0.18396000564098358, "learning_rate": 9.36813856199098e-06, "loss": 1.0868, "step": 2410 }, { "epoch": 0.18498011585233304, "grad_norm": 0.29609519243240356, "learning_rate": 9.36753530182792e-06, "loss": 1.0819, "step": 2411 }, { "epoch": 0.1850568392516911, "grad_norm": 0.2850872278213501, "learning_rate": 9.366931773268083e-06, "loss": 1.1715, "step": 2412 }, { "epoch": 0.1851335626510492, "grad_norm": 0.23130787909030914, "learning_rate": 9.366327976348557e-06, "loss": 1.1017, "step": 2413 }, { "epoch": 0.18521028605040726, "grad_norm": 0.3025303781032562, "learning_rate": 9.365723911106445e-06, "loss": 1.1307, "step": 2414 }, { "epoch": 0.18528700944976537, "grad_norm": 0.18974825739860535, "learning_rate": 9.365119577578873e-06, "loss": 1.206, "step": 2415 }, { "epoch": 0.18536373284912344, "grad_norm": 0.28872227668762207, "learning_rate": 9.364514975802973e-06, "loss": 1.1466, "step": 2416 }, { "epoch": 0.18544045624848152, "grad_norm": 0.17394046485424042, "learning_rate": 9.363910105815905e-06, "loss": 1.1388, "step": 2417 }, { "epoch": 0.1855171796478396, "grad_norm": 0.23120230436325073, "learning_rate": 9.363304967654838e-06, "loss": 1.1111, "step": 2418 }, { "epoch": 0.18559390304719767, "grad_norm": 0.20036065578460693, "learning_rate": 9.362699561356957e-06, "loss": 1.1467, "step": 2419 }, { "epoch": 0.18567062644655577, "grad_norm": 0.20037801563739777, "learning_rate": 9.362093886959469e-06, "loss": 1.1541, "step": 2420 }, { "epoch": 0.18574734984591385, "grad_norm": 0.19080275297164917, "learning_rate": 9.361487944499591e-06, "loss": 1.1588, "step": 2421 }, { "epoch": 0.18582407324527192, "grad_norm": 0.2699086666107178, "learning_rate": 9.360881734014563e-06, "loss": 1.1085, "step": 2422 }, { "epoch": 0.18590079664463, "grad_norm": 0.41403135657310486, "learning_rate": 9.360275255541637e-06, "loss": 1.1475, "step": 2423 }, { "epoch": 0.18597752004398807, "grad_norm": 0.17939577996730804, "learning_rate": 9.359668509118081e-06, "loss": 1.1012, "step": 2424 }, { "epoch": 0.18605424344334617, "grad_norm": 0.5334360003471375, "learning_rate": 9.359061494781186e-06, "loss": 1.0719, "step": 2425 }, { "epoch": 0.18613096684270425, "grad_norm": 0.19162875413894653, "learning_rate": 9.358454212568249e-06, "loss": 1.1409, "step": 2426 }, { "epoch": 0.18620769024206232, "grad_norm": 0.2746296525001526, "learning_rate": 9.357846662516592e-06, "loss": 1.1886, "step": 2427 }, { "epoch": 0.1862844136414204, "grad_norm": 0.22200383245944977, "learning_rate": 9.357238844663552e-06, "loss": 1.1193, "step": 2428 }, { "epoch": 0.18636113704077847, "grad_norm": 0.20646783709526062, "learning_rate": 9.356630759046478e-06, "loss": 1.1312, "step": 2429 }, { "epoch": 0.18643786044013658, "grad_norm": 0.23107363283634186, "learning_rate": 9.35602240570274e-06, "loss": 1.1844, "step": 2430 }, { "epoch": 0.18651458383949465, "grad_norm": 0.26555994153022766, "learning_rate": 9.355413784669722e-06, "loss": 1.2007, "step": 2431 }, { "epoch": 0.18659130723885273, "grad_norm": 0.0987178385257721, "learning_rate": 9.354804895984828e-06, "loss": 1.1885, "step": 2432 }, { "epoch": 0.1866680306382108, "grad_norm": 0.20292048156261444, "learning_rate": 9.354195739685472e-06, "loss": 1.2123, "step": 2433 }, { "epoch": 0.18674475403756888, "grad_norm": 0.22490976750850677, "learning_rate": 9.353586315809092e-06, "loss": 1.1042, "step": 2434 }, { "epoch": 0.18682147743692698, "grad_norm": 0.17571033537387848, "learning_rate": 9.352976624393135e-06, "loss": 1.1917, "step": 2435 }, { "epoch": 0.18689820083628506, "grad_norm": 0.19494062662124634, "learning_rate": 9.352366665475073e-06, "loss": 1.1855, "step": 2436 }, { "epoch": 0.18697492423564313, "grad_norm": 0.2306724339723587, "learning_rate": 9.351756439092385e-06, "loss": 1.1578, "step": 2437 }, { "epoch": 0.1870516476350012, "grad_norm": 0.17034202814102173, "learning_rate": 9.351145945282573e-06, "loss": 1.0498, "step": 2438 }, { "epoch": 0.1871283710343593, "grad_norm": 0.42072561383247375, "learning_rate": 9.350535184083153e-06, "loss": 1.175, "step": 2439 }, { "epoch": 0.18720509443371738, "grad_norm": 0.1870511919260025, "learning_rate": 9.349924155531659e-06, "loss": 1.1525, "step": 2440 }, { "epoch": 0.18728181783307546, "grad_norm": 0.20485630631446838, "learning_rate": 9.349312859665639e-06, "loss": 1.1526, "step": 2441 }, { "epoch": 0.18735854123243353, "grad_norm": 0.23674631118774414, "learning_rate": 9.34870129652266e-06, "loss": 1.1524, "step": 2442 }, { "epoch": 0.1874352646317916, "grad_norm": 0.17069941759109497, "learning_rate": 9.348089466140303e-06, "loss": 1.1161, "step": 2443 }, { "epoch": 0.1875119880311497, "grad_norm": 0.263435035943985, "learning_rate": 9.347477368556169e-06, "loss": 1.1339, "step": 2444 }, { "epoch": 0.1875887114305078, "grad_norm": 0.2824350595474243, "learning_rate": 9.346865003807868e-06, "loss": 1.0538, "step": 2445 }, { "epoch": 0.18766543482986586, "grad_norm": 0.2198401391506195, "learning_rate": 9.346252371933037e-06, "loss": 1.1405, "step": 2446 }, { "epoch": 0.18774215822922394, "grad_norm": 0.45627132058143616, "learning_rate": 9.34563947296932e-06, "loss": 1.081, "step": 2447 }, { "epoch": 0.187818881628582, "grad_norm": 0.23075175285339355, "learning_rate": 9.345026306954385e-06, "loss": 1.1931, "step": 2448 }, { "epoch": 0.18789560502794012, "grad_norm": 0.21118523180484772, "learning_rate": 9.34441287392591e-06, "loss": 1.1838, "step": 2449 }, { "epoch": 0.1879723284272982, "grad_norm": 0.2771071791648865, "learning_rate": 9.343799173921591e-06, "loss": 1.1125, "step": 2450 }, { "epoch": 0.18804905182665627, "grad_norm": 0.20014861226081848, "learning_rate": 9.343185206979144e-06, "loss": 1.1308, "step": 2451 }, { "epoch": 0.18812577522601434, "grad_norm": 0.20502130687236786, "learning_rate": 9.342570973136301e-06, "loss": 1.1796, "step": 2452 }, { "epoch": 0.18820249862537242, "grad_norm": 0.21130037307739258, "learning_rate": 9.341956472430803e-06, "loss": 1.1477, "step": 2453 }, { "epoch": 0.18827922202473052, "grad_norm": 0.17879191040992737, "learning_rate": 9.341341704900412e-06, "loss": 1.0798, "step": 2454 }, { "epoch": 0.1883559454240886, "grad_norm": 0.09793960303068161, "learning_rate": 9.340726670582916e-06, "loss": 1.1926, "step": 2455 }, { "epoch": 0.18843266882344667, "grad_norm": 0.2541601359844208, "learning_rate": 9.340111369516101e-06, "loss": 1.1693, "step": 2456 }, { "epoch": 0.18850939222280474, "grad_norm": 0.2067314237356186, "learning_rate": 9.339495801737783e-06, "loss": 1.1472, "step": 2457 }, { "epoch": 0.18858611562216282, "grad_norm": 0.23734678328037262, "learning_rate": 9.338879967285792e-06, "loss": 1.1138, "step": 2458 }, { "epoch": 0.18866283902152092, "grad_norm": 0.21949592232704163, "learning_rate": 9.33826386619797e-06, "loss": 1.1538, "step": 2459 }, { "epoch": 0.188739562420879, "grad_norm": 0.18662001192569733, "learning_rate": 9.337647498512177e-06, "loss": 1.1341, "step": 2460 }, { "epoch": 0.18881628582023707, "grad_norm": 0.2838782072067261, "learning_rate": 9.337030864266294e-06, "loss": 1.0805, "step": 2461 }, { "epoch": 0.18889300921959515, "grad_norm": 0.39552682638168335, "learning_rate": 9.336413963498212e-06, "loss": 1.1698, "step": 2462 }, { "epoch": 0.18896973261895322, "grad_norm": 0.21966984868049622, "learning_rate": 9.335796796245841e-06, "loss": 1.1523, "step": 2463 }, { "epoch": 0.18904645601831133, "grad_norm": 0.28044334053993225, "learning_rate": 9.335179362547112e-06, "loss": 1.2023, "step": 2464 }, { "epoch": 0.1891231794176694, "grad_norm": 0.21810971200466156, "learning_rate": 9.334561662439962e-06, "loss": 1.0789, "step": 2465 }, { "epoch": 0.18919990281702748, "grad_norm": 0.2297026664018631, "learning_rate": 9.333943695962356e-06, "loss": 1.1451, "step": 2466 }, { "epoch": 0.18927662621638555, "grad_norm": 0.21918229758739471, "learning_rate": 9.333325463152263e-06, "loss": 1.174, "step": 2467 }, { "epoch": 0.18935334961574365, "grad_norm": 0.3609507381916046, "learning_rate": 9.332706964047682e-06, "loss": 1.1012, "step": 2468 }, { "epoch": 0.18943007301510173, "grad_norm": 0.2762448489665985, "learning_rate": 9.332088198686618e-06, "loss": 1.1392, "step": 2469 }, { "epoch": 0.1895067964144598, "grad_norm": 0.21043172478675842, "learning_rate": 9.331469167107094e-06, "loss": 1.0658, "step": 2470 }, { "epoch": 0.18958351981381788, "grad_norm": 0.215960294008255, "learning_rate": 9.330849869347154e-06, "loss": 1.1735, "step": 2471 }, { "epoch": 0.18966024321317596, "grad_norm": 0.1918051540851593, "learning_rate": 9.330230305444856e-06, "loss": 1.107, "step": 2472 }, { "epoch": 0.18973696661253406, "grad_norm": 0.2292472869157791, "learning_rate": 9.32961047543827e-06, "loss": 1.1406, "step": 2473 }, { "epoch": 0.18981369001189213, "grad_norm": 0.21515332162380219, "learning_rate": 9.328990379365492e-06, "loss": 1.1246, "step": 2474 }, { "epoch": 0.1898904134112502, "grad_norm": 0.24942520260810852, "learning_rate": 9.328370017264624e-06, "loss": 1.0985, "step": 2475 }, { "epoch": 0.18996713681060828, "grad_norm": 0.19368977844715118, "learning_rate": 9.32774938917379e-06, "loss": 1.1915, "step": 2476 }, { "epoch": 0.19004386020996636, "grad_norm": 0.18712592124938965, "learning_rate": 9.32712849513113e-06, "loss": 1.1183, "step": 2477 }, { "epoch": 0.19012058360932446, "grad_norm": 0.21738122403621674, "learning_rate": 9.326507335174798e-06, "loss": 1.0742, "step": 2478 }, { "epoch": 0.19019730700868254, "grad_norm": 0.20409303903579712, "learning_rate": 9.325885909342968e-06, "loss": 1.1709, "step": 2479 }, { "epoch": 0.1902740304080406, "grad_norm": 0.3023408353328705, "learning_rate": 9.325264217673829e-06, "loss": 1.1918, "step": 2480 }, { "epoch": 0.1903507538073987, "grad_norm": 0.23928506672382355, "learning_rate": 9.324642260205583e-06, "loss": 1.05, "step": 2481 }, { "epoch": 0.19042747720675676, "grad_norm": 0.2281741052865982, "learning_rate": 9.324020036976451e-06, "loss": 1.0847, "step": 2482 }, { "epoch": 0.19050420060611487, "grad_norm": 0.27287134528160095, "learning_rate": 9.323397548024674e-06, "loss": 1.1373, "step": 2483 }, { "epoch": 0.19058092400547294, "grad_norm": 0.25296780467033386, "learning_rate": 9.3227747933885e-06, "loss": 1.1028, "step": 2484 }, { "epoch": 0.19065764740483102, "grad_norm": 0.19942763447761536, "learning_rate": 9.322151773106203e-06, "loss": 1.1599, "step": 2485 }, { "epoch": 0.1907343708041891, "grad_norm": 0.20653682947158813, "learning_rate": 9.321528487216067e-06, "loss": 1.2057, "step": 2486 }, { "epoch": 0.19081109420354717, "grad_norm": 0.20678238570690155, "learning_rate": 9.320904935756396e-06, "loss": 1.1356, "step": 2487 }, { "epoch": 0.19088781760290527, "grad_norm": 0.2872203290462494, "learning_rate": 9.32028111876551e-06, "loss": 1.2078, "step": 2488 }, { "epoch": 0.19096454100226334, "grad_norm": 0.17450734972953796, "learning_rate": 9.319657036281742e-06, "loss": 1.1006, "step": 2489 }, { "epoch": 0.19104126440162142, "grad_norm": 0.193325012922287, "learning_rate": 9.319032688343445e-06, "loss": 1.1362, "step": 2490 }, { "epoch": 0.1911179878009795, "grad_norm": 0.2814209759235382, "learning_rate": 9.318408074988985e-06, "loss": 1.1252, "step": 2491 }, { "epoch": 0.19119471120033757, "grad_norm": 0.18275415897369385, "learning_rate": 9.317783196256749e-06, "loss": 1.2358, "step": 2492 }, { "epoch": 0.19127143459969567, "grad_norm": 0.19886554777622223, "learning_rate": 9.317158052185134e-06, "loss": 1.136, "step": 2493 }, { "epoch": 0.19134815799905375, "grad_norm": 0.21910503506660461, "learning_rate": 9.31653264281256e-06, "loss": 1.1443, "step": 2494 }, { "epoch": 0.19142488139841182, "grad_norm": 0.2970143258571625, "learning_rate": 9.31590696817746e-06, "loss": 1.201, "step": 2495 }, { "epoch": 0.1915016047977699, "grad_norm": 0.20029577612876892, "learning_rate": 9.31528102831828e-06, "loss": 1.0736, "step": 2496 }, { "epoch": 0.191578328197128, "grad_norm": 0.2398151010274887, "learning_rate": 9.314654823273488e-06, "loss": 1.1025, "step": 2497 }, { "epoch": 0.19165505159648608, "grad_norm": 0.20355764031410217, "learning_rate": 9.314028353081565e-06, "loss": 1.1678, "step": 2498 }, { "epoch": 0.19173177499584415, "grad_norm": 0.19060446321964264, "learning_rate": 9.313401617781013e-06, "loss": 1.0609, "step": 2499 }, { "epoch": 0.19180849839520223, "grad_norm": 0.18154865503311157, "learning_rate": 9.312774617410342e-06, "loss": 1.0948, "step": 2500 }, { "epoch": 0.1918852217945603, "grad_norm": 0.21588993072509766, "learning_rate": 9.312147352008086e-06, "loss": 1.1838, "step": 2501 }, { "epoch": 0.1919619451939184, "grad_norm": 0.17698687314987183, "learning_rate": 9.31151982161279e-06, "loss": 1.1835, "step": 2502 }, { "epoch": 0.19203866859327648, "grad_norm": 0.19222405552864075, "learning_rate": 9.310892026263016e-06, "loss": 1.0744, "step": 2503 }, { "epoch": 0.19211539199263455, "grad_norm": 0.18522165715694427, "learning_rate": 9.310263965997347e-06, "loss": 1.0954, "step": 2504 }, { "epoch": 0.19219211539199263, "grad_norm": 0.18681468069553375, "learning_rate": 9.309635640854378e-06, "loss": 1.2272, "step": 2505 }, { "epoch": 0.1922688387913507, "grad_norm": 0.19989225268363953, "learning_rate": 9.309007050872722e-06, "loss": 1.0185, "step": 2506 }, { "epoch": 0.1923455621907088, "grad_norm": 0.23219139873981476, "learning_rate": 9.308378196091006e-06, "loss": 1.1267, "step": 2507 }, { "epoch": 0.19242228559006688, "grad_norm": 0.3544583320617676, "learning_rate": 9.307749076547873e-06, "loss": 1.2044, "step": 2508 }, { "epoch": 0.19249900898942496, "grad_norm": 0.22491201758384705, "learning_rate": 9.307119692281991e-06, "loss": 1.0907, "step": 2509 }, { "epoch": 0.19257573238878303, "grad_norm": 0.18774323165416718, "learning_rate": 9.306490043332031e-06, "loss": 1.1874, "step": 2510 }, { "epoch": 0.1926524557881411, "grad_norm": 0.8534430861473083, "learning_rate": 9.305860129736688e-06, "loss": 1.1503, "step": 2511 }, { "epoch": 0.1927291791874992, "grad_norm": 0.19699726998806, "learning_rate": 9.305229951534673e-06, "loss": 1.1387, "step": 2512 }, { "epoch": 0.1928059025868573, "grad_norm": 0.5184365510940552, "learning_rate": 9.30459950876471e-06, "loss": 1.1504, "step": 2513 }, { "epoch": 0.19288262598621536, "grad_norm": 0.2297804206609726, "learning_rate": 9.303968801465545e-06, "loss": 1.1026, "step": 2514 }, { "epoch": 0.19295934938557344, "grad_norm": 0.23855136334896088, "learning_rate": 9.303337829675932e-06, "loss": 1.1342, "step": 2515 }, { "epoch": 0.1930360727849315, "grad_norm": 0.19637566804885864, "learning_rate": 9.302706593434652e-06, "loss": 1.0435, "step": 2516 }, { "epoch": 0.19311279618428961, "grad_norm": 0.24353186786174774, "learning_rate": 9.30207509278049e-06, "loss": 1.1161, "step": 2517 }, { "epoch": 0.1931895195836477, "grad_norm": 0.23861853778362274, "learning_rate": 9.301443327752256e-06, "loss": 1.1539, "step": 2518 }, { "epoch": 0.19326624298300576, "grad_norm": 0.18293505907058716, "learning_rate": 9.300811298388775e-06, "loss": 1.1463, "step": 2519 }, { "epoch": 0.19334296638236384, "grad_norm": 0.09848205000162125, "learning_rate": 9.300179004728885e-06, "loss": 1.2551, "step": 2520 }, { "epoch": 0.19341968978172192, "grad_norm": 0.28046944737434387, "learning_rate": 9.299546446811442e-06, "loss": 1.1165, "step": 2521 }, { "epoch": 0.19349641318108002, "grad_norm": 0.28856360912323, "learning_rate": 9.29891362467532e-06, "loss": 1.143, "step": 2522 }, { "epoch": 0.1935731365804381, "grad_norm": 0.21269762516021729, "learning_rate": 9.298280538359407e-06, "loss": 1.1316, "step": 2523 }, { "epoch": 0.19364985997979617, "grad_norm": 0.607009768486023, "learning_rate": 9.297647187902608e-06, "loss": 1.063, "step": 2524 }, { "epoch": 0.19372658337915424, "grad_norm": 0.3764013350009918, "learning_rate": 9.297013573343844e-06, "loss": 1.0881, "step": 2525 }, { "epoch": 0.19380330677851235, "grad_norm": 0.2551335096359253, "learning_rate": 9.296379694722051e-06, "loss": 1.1221, "step": 2526 }, { "epoch": 0.19388003017787042, "grad_norm": 0.1988399475812912, "learning_rate": 9.295745552076183e-06, "loss": 1.1549, "step": 2527 }, { "epoch": 0.1939567535772285, "grad_norm": 0.1866539567708969, "learning_rate": 9.295111145445212e-06, "loss": 1.1171, "step": 2528 }, { "epoch": 0.19403347697658657, "grad_norm": 0.17660772800445557, "learning_rate": 9.294476474868122e-06, "loss": 1.0687, "step": 2529 }, { "epoch": 0.19411020037594465, "grad_norm": 0.20373564958572388, "learning_rate": 9.293841540383914e-06, "loss": 1.1337, "step": 2530 }, { "epoch": 0.19418692377530275, "grad_norm": 0.18286508321762085, "learning_rate": 9.293206342031608e-06, "loss": 1.0795, "step": 2531 }, { "epoch": 0.19426364717466083, "grad_norm": 0.35591283440589905, "learning_rate": 9.292570879850242e-06, "loss": 1.1267, "step": 2532 }, { "epoch": 0.1943403705740189, "grad_norm": 0.17500802874565125, "learning_rate": 9.29193515387886e-06, "loss": 1.1779, "step": 2533 }, { "epoch": 0.19441709397337698, "grad_norm": 0.18898549675941467, "learning_rate": 9.291299164156534e-06, "loss": 1.2293, "step": 2534 }, { "epoch": 0.19449381737273505, "grad_norm": 0.2579510509967804, "learning_rate": 9.290662910722346e-06, "loss": 1.2525, "step": 2535 }, { "epoch": 0.19457054077209315, "grad_norm": 0.38403651118278503, "learning_rate": 9.290026393615394e-06, "loss": 1.183, "step": 2536 }, { "epoch": 0.19464726417145123, "grad_norm": 0.20316669344902039, "learning_rate": 9.289389612874797e-06, "loss": 1.1314, "step": 2537 }, { "epoch": 0.1947239875708093, "grad_norm": 0.2110312283039093, "learning_rate": 9.288752568539683e-06, "loss": 1.1423, "step": 2538 }, { "epoch": 0.19480071097016738, "grad_norm": 0.21883869171142578, "learning_rate": 9.288115260649203e-06, "loss": 1.1764, "step": 2539 }, { "epoch": 0.19487743436952545, "grad_norm": 0.4189938008785248, "learning_rate": 9.28747768924252e-06, "loss": 1.0448, "step": 2540 }, { "epoch": 0.19495415776888356, "grad_norm": 0.21981891989707947, "learning_rate": 9.286839854358814e-06, "loss": 1.1416, "step": 2541 }, { "epoch": 0.19503088116824163, "grad_norm": 0.10143481194972992, "learning_rate": 9.286201756037284e-06, "loss": 1.2283, "step": 2542 }, { "epoch": 0.1951076045675997, "grad_norm": 0.293655127286911, "learning_rate": 9.28556339431714e-06, "loss": 1.048, "step": 2543 }, { "epoch": 0.19518432796695778, "grad_norm": 0.2092105597257614, "learning_rate": 9.284924769237613e-06, "loss": 1.1237, "step": 2544 }, { "epoch": 0.19526105136631586, "grad_norm": 0.23735636472702026, "learning_rate": 9.284285880837947e-06, "loss": 1.1822, "step": 2545 }, { "epoch": 0.19533777476567396, "grad_norm": 0.2138822078704834, "learning_rate": 9.283646729157404e-06, "loss": 1.1342, "step": 2546 }, { "epoch": 0.19541449816503204, "grad_norm": 0.47291967272758484, "learning_rate": 9.283007314235263e-06, "loss": 1.087, "step": 2547 }, { "epoch": 0.1954912215643901, "grad_norm": 0.18983684480190277, "learning_rate": 9.282367636110814e-06, "loss": 1.1606, "step": 2548 }, { "epoch": 0.19556794496374819, "grad_norm": 0.19232140481472015, "learning_rate": 9.28172769482337e-06, "loss": 1.2015, "step": 2549 }, { "epoch": 0.19564466836310626, "grad_norm": 0.20874521136283875, "learning_rate": 9.281087490412257e-06, "loss": 1.077, "step": 2550 }, { "epoch": 0.19572139176246436, "grad_norm": 0.27237895131111145, "learning_rate": 9.280447022916816e-06, "loss": 1.0388, "step": 2551 }, { "epoch": 0.19579811516182244, "grad_norm": 0.25707340240478516, "learning_rate": 9.279806292376405e-06, "loss": 1.1804, "step": 2552 }, { "epoch": 0.19587483856118051, "grad_norm": 0.28080105781555176, "learning_rate": 9.279165298830402e-06, "loss": 1.1291, "step": 2553 }, { "epoch": 0.1959515619605386, "grad_norm": 0.2180882841348648, "learning_rate": 9.278524042318193e-06, "loss": 1.1502, "step": 2554 }, { "epoch": 0.1960282853598967, "grad_norm": 0.37032899260520935, "learning_rate": 9.277882522879188e-06, "loss": 1.1618, "step": 2555 }, { "epoch": 0.19610500875925477, "grad_norm": 0.32004573941230774, "learning_rate": 9.27724074055281e-06, "loss": 1.0842, "step": 2556 }, { "epoch": 0.19618173215861284, "grad_norm": 0.24213822185993195, "learning_rate": 9.2765986953785e-06, "loss": 1.2179, "step": 2557 }, { "epoch": 0.19625845555797092, "grad_norm": 0.1872628927230835, "learning_rate": 9.275956387395708e-06, "loss": 1.1207, "step": 2558 }, { "epoch": 0.196335178957329, "grad_norm": 0.2750408947467804, "learning_rate": 9.27531381664391e-06, "loss": 1.1494, "step": 2559 }, { "epoch": 0.1964119023566871, "grad_norm": 0.21351969242095947, "learning_rate": 9.274670983162593e-06, "loss": 1.124, "step": 2560 }, { "epoch": 0.19648862575604517, "grad_norm": 0.20323333144187927, "learning_rate": 9.274027886991259e-06, "loss": 1.0965, "step": 2561 }, { "epoch": 0.19656534915540325, "grad_norm": 0.18214020133018494, "learning_rate": 9.273384528169428e-06, "loss": 1.1465, "step": 2562 }, { "epoch": 0.19664207255476132, "grad_norm": 0.19430948793888092, "learning_rate": 9.27274090673664e-06, "loss": 1.1311, "step": 2563 }, { "epoch": 0.1967187959541194, "grad_norm": 0.26256412267684937, "learning_rate": 9.272097022732444e-06, "loss": 1.1525, "step": 2564 }, { "epoch": 0.1967955193534775, "grad_norm": 0.1932523101568222, "learning_rate": 9.271452876196408e-06, "loss": 1.1398, "step": 2565 }, { "epoch": 0.19687224275283557, "grad_norm": 0.2034810185432434, "learning_rate": 9.270808467168118e-06, "loss": 1.181, "step": 2566 }, { "epoch": 0.19694896615219365, "grad_norm": 0.18785955011844635, "learning_rate": 9.270163795687176e-06, "loss": 1.2204, "step": 2567 }, { "epoch": 0.19702568955155172, "grad_norm": 0.30973392724990845, "learning_rate": 9.269518861793193e-06, "loss": 1.1452, "step": 2568 }, { "epoch": 0.1971024129509098, "grad_norm": 0.20814569294452667, "learning_rate": 9.268873665525808e-06, "loss": 1.1516, "step": 2569 }, { "epoch": 0.1971791363502679, "grad_norm": 0.20263445377349854, "learning_rate": 9.268228206924669e-06, "loss": 1.13, "step": 2570 }, { "epoch": 0.19725585974962598, "grad_norm": 0.1969517022371292, "learning_rate": 9.267582486029438e-06, "loss": 1.1536, "step": 2571 }, { "epoch": 0.19733258314898405, "grad_norm": 0.3571911156177521, "learning_rate": 9.266936502879801e-06, "loss": 1.0947, "step": 2572 }, { "epoch": 0.19740930654834213, "grad_norm": 0.27144184708595276, "learning_rate": 9.266290257515452e-06, "loss": 1.1757, "step": 2573 }, { "epoch": 0.1974860299477002, "grad_norm": 0.20909200608730316, "learning_rate": 9.265643749976105e-06, "loss": 1.127, "step": 2574 }, { "epoch": 0.1975627533470583, "grad_norm": 0.23880212008953094, "learning_rate": 9.264996980301489e-06, "loss": 1.1501, "step": 2575 }, { "epoch": 0.19763947674641638, "grad_norm": 0.20807580649852753, "learning_rate": 9.264349948531351e-06, "loss": 1.1864, "step": 2576 }, { "epoch": 0.19771620014577446, "grad_norm": 0.8263728618621826, "learning_rate": 9.263702654705453e-06, "loss": 1.1744, "step": 2577 }, { "epoch": 0.19779292354513253, "grad_norm": 0.23181487619876862, "learning_rate": 9.263055098863574e-06, "loss": 1.11, "step": 2578 }, { "epoch": 0.1978696469444906, "grad_norm": 0.18058878183364868, "learning_rate": 9.262407281045505e-06, "loss": 1.1577, "step": 2579 }, { "epoch": 0.1979463703438487, "grad_norm": 0.2783648669719696, "learning_rate": 9.261759201291055e-06, "loss": 1.1174, "step": 2580 }, { "epoch": 0.19802309374320679, "grad_norm": 0.22063246369361877, "learning_rate": 9.261110859640056e-06, "loss": 1.1407, "step": 2581 }, { "epoch": 0.19809981714256486, "grad_norm": 0.20966920256614685, "learning_rate": 9.260462256132348e-06, "loss": 1.2215, "step": 2582 }, { "epoch": 0.19817654054192294, "grad_norm": 0.18132494390010834, "learning_rate": 9.259813390807788e-06, "loss": 1.1176, "step": 2583 }, { "epoch": 0.19825326394128104, "grad_norm": 0.17695406079292297, "learning_rate": 9.25916426370625e-06, "loss": 1.1792, "step": 2584 }, { "epoch": 0.1983299873406391, "grad_norm": 0.21168336272239685, "learning_rate": 9.258514874867627e-06, "loss": 1.1356, "step": 2585 }, { "epoch": 0.1984067107399972, "grad_norm": 0.22268906235694885, "learning_rate": 9.257865224331823e-06, "loss": 1.1973, "step": 2586 }, { "epoch": 0.19848343413935526, "grad_norm": 0.20598824322223663, "learning_rate": 9.257215312138762e-06, "loss": 1.133, "step": 2587 }, { "epoch": 0.19856015753871334, "grad_norm": 0.36265307664871216, "learning_rate": 9.256565138328386e-06, "loss": 1.1457, "step": 2588 }, { "epoch": 0.19863688093807144, "grad_norm": 0.19116218388080597, "learning_rate": 9.255914702940644e-06, "loss": 1.1386, "step": 2589 }, { "epoch": 0.19871360433742952, "grad_norm": 0.19425593316555023, "learning_rate": 9.255264006015514e-06, "loss": 1.1397, "step": 2590 }, { "epoch": 0.1987903277367876, "grad_norm": 0.22791337966918945, "learning_rate": 9.254613047592975e-06, "loss": 1.0948, "step": 2591 }, { "epoch": 0.19886705113614567, "grad_norm": 0.242928609251976, "learning_rate": 9.253961827713035e-06, "loss": 1.0745, "step": 2592 }, { "epoch": 0.19894377453550374, "grad_norm": 0.21628640592098236, "learning_rate": 9.253310346415714e-06, "loss": 1.0818, "step": 2593 }, { "epoch": 0.19902049793486185, "grad_norm": 0.21397866308689117, "learning_rate": 9.252658603741045e-06, "loss": 1.1693, "step": 2594 }, { "epoch": 0.19909722133421992, "grad_norm": 0.2655959725379944, "learning_rate": 9.25200659972908e-06, "loss": 1.1256, "step": 2595 }, { "epoch": 0.199173944733578, "grad_norm": 0.24541404843330383, "learning_rate": 9.251354334419887e-06, "loss": 1.0943, "step": 2596 }, { "epoch": 0.19925066813293607, "grad_norm": 0.19006606936454773, "learning_rate": 9.250701807853549e-06, "loss": 1.1753, "step": 2597 }, { "epoch": 0.19932739153229415, "grad_norm": 0.22854457795619965, "learning_rate": 9.250049020070165e-06, "loss": 1.2383, "step": 2598 }, { "epoch": 0.19940411493165225, "grad_norm": 0.23415905237197876, "learning_rate": 9.24939597110985e-06, "loss": 1.1569, "step": 2599 }, { "epoch": 0.19948083833101032, "grad_norm": 0.20544971525669098, "learning_rate": 9.248742661012737e-06, "loss": 1.1314, "step": 2600 }, { "epoch": 0.1995575617303684, "grad_norm": 0.1937289535999298, "learning_rate": 9.248089089818974e-06, "loss": 1.1437, "step": 2601 }, { "epoch": 0.19963428512972647, "grad_norm": 0.20457544922828674, "learning_rate": 9.247435257568724e-06, "loss": 1.1887, "step": 2602 }, { "epoch": 0.19971100852908455, "grad_norm": 0.2574147582054138, "learning_rate": 9.246781164302166e-06, "loss": 1.0931, "step": 2603 }, { "epoch": 0.19978773192844265, "grad_norm": 0.17243705689907074, "learning_rate": 9.246126810059497e-06, "loss": 1.0666, "step": 2604 }, { "epoch": 0.19986445532780073, "grad_norm": 0.22510488331317902, "learning_rate": 9.245472194880929e-06, "loss": 1.0503, "step": 2605 }, { "epoch": 0.1999411787271588, "grad_norm": 0.26788872480392456, "learning_rate": 9.244817318806687e-06, "loss": 1.136, "step": 2606 }, { "epoch": 0.20001790212651688, "grad_norm": 0.4490772485733032, "learning_rate": 9.24416218187702e-06, "loss": 1.1172, "step": 2607 }, { "epoch": 0.20009462552587495, "grad_norm": 0.2959239184856415, "learning_rate": 9.243506784132184e-06, "loss": 1.1023, "step": 2608 }, { "epoch": 0.20017134892523306, "grad_norm": 0.20087051391601562, "learning_rate": 9.242851125612455e-06, "loss": 1.1421, "step": 2609 }, { "epoch": 0.20024807232459113, "grad_norm": 0.309501975774765, "learning_rate": 9.242195206358126e-06, "loss": 1.0876, "step": 2610 }, { "epoch": 0.2003247957239492, "grad_norm": 0.22486063838005066, "learning_rate": 9.241539026409506e-06, "loss": 1.1033, "step": 2611 }, { "epoch": 0.20040151912330728, "grad_norm": 0.1920100897550583, "learning_rate": 9.240882585806918e-06, "loss": 1.2024, "step": 2612 }, { "epoch": 0.20047824252266538, "grad_norm": 0.6634340286254883, "learning_rate": 9.240225884590702e-06, "loss": 1.0273, "step": 2613 }, { "epoch": 0.20055496592202346, "grad_norm": 0.24296753108501434, "learning_rate": 9.239568922801213e-06, "loss": 1.1853, "step": 2614 }, { "epoch": 0.20063168932138153, "grad_norm": 0.19506946206092834, "learning_rate": 9.238911700478825e-06, "loss": 1.0746, "step": 2615 }, { "epoch": 0.2007084127207396, "grad_norm": 0.2750384211540222, "learning_rate": 9.238254217663925e-06, "loss": 1.1529, "step": 2616 }, { "epoch": 0.20078513612009768, "grad_norm": 0.20466287434101105, "learning_rate": 9.237596474396916e-06, "loss": 1.1567, "step": 2617 }, { "epoch": 0.2008618595194558, "grad_norm": 0.21720682084560394, "learning_rate": 9.236938470718222e-06, "loss": 1.1436, "step": 2618 }, { "epoch": 0.20093858291881386, "grad_norm": 0.25886407494544983, "learning_rate": 9.236280206668276e-06, "loss": 1.1401, "step": 2619 }, { "epoch": 0.20101530631817194, "grad_norm": 0.19732670485973358, "learning_rate": 9.235621682287529e-06, "loss": 1.0936, "step": 2620 }, { "epoch": 0.20109202971753, "grad_norm": 0.2194903939962387, "learning_rate": 9.23496289761645e-06, "loss": 1.1499, "step": 2621 }, { "epoch": 0.2011687531168881, "grad_norm": 0.252194881439209, "learning_rate": 9.234303852695526e-06, "loss": 1.1001, "step": 2622 }, { "epoch": 0.2012454765162462, "grad_norm": 0.2873404920101166, "learning_rate": 9.233644547565255e-06, "loss": 1.1405, "step": 2623 }, { "epoch": 0.20132219991560427, "grad_norm": 0.4049360454082489, "learning_rate": 9.23298498226615e-06, "loss": 1.1099, "step": 2624 }, { "epoch": 0.20139892331496234, "grad_norm": 0.21035848557949066, "learning_rate": 9.232325156838748e-06, "loss": 1.0309, "step": 2625 }, { "epoch": 0.20147564671432042, "grad_norm": 0.18553276360034943, "learning_rate": 9.231665071323595e-06, "loss": 1.1248, "step": 2626 }, { "epoch": 0.2015523701136785, "grad_norm": 0.26643362641334534, "learning_rate": 9.231004725761256e-06, "loss": 1.1146, "step": 2627 }, { "epoch": 0.2016290935130366, "grad_norm": 0.3031284511089325, "learning_rate": 9.230344120192309e-06, "loss": 1.1091, "step": 2628 }, { "epoch": 0.20170581691239467, "grad_norm": 0.24159517884254456, "learning_rate": 9.229683254657351e-06, "loss": 1.145, "step": 2629 }, { "epoch": 0.20178254031175274, "grad_norm": 0.16513600945472717, "learning_rate": 9.229022129196995e-06, "loss": 1.1317, "step": 2630 }, { "epoch": 0.20185926371111082, "grad_norm": 0.20276682078838348, "learning_rate": 9.228360743851864e-06, "loss": 1.0637, "step": 2631 }, { "epoch": 0.2019359871104689, "grad_norm": 0.21850235760211945, "learning_rate": 9.22769909866261e-06, "loss": 1.1528, "step": 2632 }, { "epoch": 0.202012710509827, "grad_norm": 0.20218157768249512, "learning_rate": 9.227037193669888e-06, "loss": 1.161, "step": 2633 }, { "epoch": 0.20208943390918507, "grad_norm": 0.22403372824192047, "learning_rate": 9.226375028914375e-06, "loss": 1.1349, "step": 2634 }, { "epoch": 0.20216615730854315, "grad_norm": 0.21174845099449158, "learning_rate": 9.22571260443676e-06, "loss": 1.148, "step": 2635 }, { "epoch": 0.20224288070790122, "grad_norm": 0.19835425913333893, "learning_rate": 9.225049920277756e-06, "loss": 1.1584, "step": 2636 }, { "epoch": 0.2023196041072593, "grad_norm": 0.19570711255073547, "learning_rate": 9.224386976478082e-06, "loss": 1.1965, "step": 2637 }, { "epoch": 0.2023963275066174, "grad_norm": 0.2870130240917206, "learning_rate": 9.22372377307848e-06, "loss": 1.176, "step": 2638 }, { "epoch": 0.20247305090597548, "grad_norm": 0.2116210013628006, "learning_rate": 9.223060310119704e-06, "loss": 1.1329, "step": 2639 }, { "epoch": 0.20254977430533355, "grad_norm": 0.46388137340545654, "learning_rate": 9.222396587642528e-06, "loss": 1.1287, "step": 2640 }, { "epoch": 0.20262649770469163, "grad_norm": 0.20525678992271423, "learning_rate": 9.221732605687737e-06, "loss": 1.1355, "step": 2641 }, { "epoch": 0.20270322110404973, "grad_norm": 0.4250236749649048, "learning_rate": 9.221068364296138e-06, "loss": 1.1298, "step": 2642 }, { "epoch": 0.2027799445034078, "grad_norm": 0.2112552374601364, "learning_rate": 9.220403863508547e-06, "loss": 1.1988, "step": 2643 }, { "epoch": 0.20285666790276588, "grad_norm": 0.20039837062358856, "learning_rate": 9.2197391033658e-06, "loss": 1.1239, "step": 2644 }, { "epoch": 0.20293339130212396, "grad_norm": 0.22665783762931824, "learning_rate": 9.21907408390875e-06, "loss": 1.1541, "step": 2645 }, { "epoch": 0.20301011470148203, "grad_norm": 0.2282121777534485, "learning_rate": 9.21840880517826e-06, "loss": 1.1096, "step": 2646 }, { "epoch": 0.20308683810084013, "grad_norm": 0.20818491280078888, "learning_rate": 9.21774326721522e-06, "loss": 1.1349, "step": 2647 }, { "epoch": 0.2031635615001982, "grad_norm": 0.6065112948417664, "learning_rate": 9.217077470060525e-06, "loss": 1.1543, "step": 2648 }, { "epoch": 0.20324028489955628, "grad_norm": 0.23158849775791168, "learning_rate": 9.216411413755089e-06, "loss": 1.1495, "step": 2649 }, { "epoch": 0.20331700829891436, "grad_norm": 0.23077012598514557, "learning_rate": 9.215745098339843e-06, "loss": 1.1406, "step": 2650 }, { "epoch": 0.20339373169827243, "grad_norm": 0.24257968366146088, "learning_rate": 9.215078523855736e-06, "loss": 1.1616, "step": 2651 }, { "epoch": 0.20347045509763054, "grad_norm": 0.21442250907421112, "learning_rate": 9.214411690343732e-06, "loss": 1.1089, "step": 2652 }, { "epoch": 0.2035471784969886, "grad_norm": 0.10401368886232376, "learning_rate": 9.213744597844806e-06, "loss": 1.286, "step": 2653 }, { "epoch": 0.2036239018963467, "grad_norm": 0.6592430472373962, "learning_rate": 9.213077246399955e-06, "loss": 1.1196, "step": 2654 }, { "epoch": 0.20370062529570476, "grad_norm": 0.19946454465389252, "learning_rate": 9.212409636050187e-06, "loss": 1.1358, "step": 2655 }, { "epoch": 0.20377734869506284, "grad_norm": 0.236355721950531, "learning_rate": 9.211741766836533e-06, "loss": 1.1048, "step": 2656 }, { "epoch": 0.20385407209442094, "grad_norm": 0.2166309654712677, "learning_rate": 9.21107363880003e-06, "loss": 1.0897, "step": 2657 }, { "epoch": 0.20393079549377902, "grad_norm": 0.20093920826911926, "learning_rate": 9.21040525198174e-06, "loss": 1.0739, "step": 2658 }, { "epoch": 0.2040075188931371, "grad_norm": 0.2285643219947815, "learning_rate": 9.209736606422736e-06, "loss": 1.0896, "step": 2659 }, { "epoch": 0.20408424229249517, "grad_norm": 0.42286327481269836, "learning_rate": 9.209067702164109e-06, "loss": 1.1766, "step": 2660 }, { "epoch": 0.20416096569185324, "grad_norm": 0.18449077010154724, "learning_rate": 9.208398539246964e-06, "loss": 1.0869, "step": 2661 }, { "epoch": 0.20423768909121134, "grad_norm": 0.19393408298492432, "learning_rate": 9.207729117712422e-06, "loss": 1.118, "step": 2662 }, { "epoch": 0.20431441249056942, "grad_norm": 0.19877506792545319, "learning_rate": 9.207059437601622e-06, "loss": 1.0678, "step": 2663 }, { "epoch": 0.2043911358899275, "grad_norm": 0.21774089336395264, "learning_rate": 9.206389498955717e-06, "loss": 1.1813, "step": 2664 }, { "epoch": 0.20446785928928557, "grad_norm": 2.406858205795288, "learning_rate": 9.205719301815877e-06, "loss": 1.0918, "step": 2665 }, { "epoch": 0.20454458268864364, "grad_norm": 0.2420181781053543, "learning_rate": 9.205048846223288e-06, "loss": 1.1429, "step": 2666 }, { "epoch": 0.20462130608800175, "grad_norm": 0.20093071460723877, "learning_rate": 9.204378132219149e-06, "loss": 1.1146, "step": 2667 }, { "epoch": 0.20469802948735982, "grad_norm": 0.21142229437828064, "learning_rate": 9.20370715984468e-06, "loss": 1.0955, "step": 2668 }, { "epoch": 0.2047747528867179, "grad_norm": 0.1879918873310089, "learning_rate": 9.203035929141113e-06, "loss": 1.0367, "step": 2669 }, { "epoch": 0.20485147628607597, "grad_norm": 0.3109991252422333, "learning_rate": 9.202364440149695e-06, "loss": 1.1678, "step": 2670 }, { "epoch": 0.20492819968543408, "grad_norm": 0.2279961109161377, "learning_rate": 9.201692692911696e-06, "loss": 1.1953, "step": 2671 }, { "epoch": 0.20500492308479215, "grad_norm": 0.22924835979938507, "learning_rate": 9.201020687468391e-06, "loss": 1.1949, "step": 2672 }, { "epoch": 0.20508164648415023, "grad_norm": 0.25072014331817627, "learning_rate": 9.200348423861079e-06, "loss": 1.1446, "step": 2673 }, { "epoch": 0.2051583698835083, "grad_norm": 0.293570339679718, "learning_rate": 9.199675902131071e-06, "loss": 1.1242, "step": 2674 }, { "epoch": 0.20523509328286638, "grad_norm": 0.2756400406360626, "learning_rate": 9.199003122319699e-06, "loss": 1.1001, "step": 2675 }, { "epoch": 0.20531181668222448, "grad_norm": 0.09731018543243408, "learning_rate": 9.198330084468302e-06, "loss": 1.2636, "step": 2676 }, { "epoch": 0.20538854008158255, "grad_norm": 0.2234748750925064, "learning_rate": 9.197656788618243e-06, "loss": 1.1456, "step": 2677 }, { "epoch": 0.20546526348094063, "grad_norm": 0.19704729318618774, "learning_rate": 9.1969832348109e-06, "loss": 1.0547, "step": 2678 }, { "epoch": 0.2055419868802987, "grad_norm": 0.1886373609304428, "learning_rate": 9.19630942308766e-06, "loss": 1.135, "step": 2679 }, { "epoch": 0.20561871027965678, "grad_norm": 0.539902925491333, "learning_rate": 9.195635353489932e-06, "loss": 1.1471, "step": 2680 }, { "epoch": 0.20569543367901488, "grad_norm": 0.260736882686615, "learning_rate": 9.194961026059139e-06, "loss": 1.1703, "step": 2681 }, { "epoch": 0.20577215707837296, "grad_norm": 0.2728630304336548, "learning_rate": 9.194286440836723e-06, "loss": 1.1139, "step": 2682 }, { "epoch": 0.20584888047773103, "grad_norm": 0.2842792570590973, "learning_rate": 9.193611597864138e-06, "loss": 1.1524, "step": 2683 }, { "epoch": 0.2059256038770891, "grad_norm": 0.24690203368663788, "learning_rate": 9.192936497182853e-06, "loss": 1.0502, "step": 2684 }, { "epoch": 0.20600232727644718, "grad_norm": 0.29309582710266113, "learning_rate": 9.192261138834357e-06, "loss": 1.1368, "step": 2685 }, { "epoch": 0.2060790506758053, "grad_norm": 0.6340298652648926, "learning_rate": 9.191585522860151e-06, "loss": 1.0699, "step": 2686 }, { "epoch": 0.20615577407516336, "grad_norm": 0.20854444801807404, "learning_rate": 9.190909649301754e-06, "loss": 1.1933, "step": 2687 }, { "epoch": 0.20623249747452144, "grad_norm": 0.30726319551467896, "learning_rate": 9.190233518200699e-06, "loss": 1.088, "step": 2688 }, { "epoch": 0.2063092208738795, "grad_norm": 0.20215004682540894, "learning_rate": 9.189557129598538e-06, "loss": 1.095, "step": 2689 }, { "epoch": 0.2063859442732376, "grad_norm": 0.2013946920633316, "learning_rate": 9.188880483536837e-06, "loss": 1.1296, "step": 2690 }, { "epoch": 0.2064626676725957, "grad_norm": 0.20333589613437653, "learning_rate": 9.188203580057178e-06, "loss": 1.1476, "step": 2691 }, { "epoch": 0.20653939107195377, "grad_norm": 0.21958027780056, "learning_rate": 9.187526419201153e-06, "loss": 1.1644, "step": 2692 }, { "epoch": 0.20661611447131184, "grad_norm": 0.21318818628787994, "learning_rate": 9.186849001010384e-06, "loss": 1.1057, "step": 2693 }, { "epoch": 0.20669283787066992, "grad_norm": 0.10352859646081924, "learning_rate": 9.186171325526494e-06, "loss": 1.2814, "step": 2694 }, { "epoch": 0.206769561270028, "grad_norm": 0.27395349740982056, "learning_rate": 9.185493392791132e-06, "loss": 1.1163, "step": 2695 }, { "epoch": 0.2068462846693861, "grad_norm": 0.20655131340026855, "learning_rate": 9.184815202845954e-06, "loss": 1.1215, "step": 2696 }, { "epoch": 0.20692300806874417, "grad_norm": 0.21435342729091644, "learning_rate": 9.18413675573264e-06, "loss": 1.0792, "step": 2697 }, { "epoch": 0.20699973146810224, "grad_norm": 0.09716975688934326, "learning_rate": 9.183458051492884e-06, "loss": 1.2688, "step": 2698 }, { "epoch": 0.20707645486746032, "grad_norm": 0.1899331659078598, "learning_rate": 9.18277909016839e-06, "loss": 1.1364, "step": 2699 }, { "epoch": 0.20715317826681842, "grad_norm": 0.21329458057880402, "learning_rate": 9.182099871800884e-06, "loss": 1.1226, "step": 2700 }, { "epoch": 0.2072299016661765, "grad_norm": 0.19685281813144684, "learning_rate": 9.181420396432105e-06, "loss": 1.2019, "step": 2701 }, { "epoch": 0.20730662506553457, "grad_norm": 0.22558322548866272, "learning_rate": 9.180740664103812e-06, "loss": 1.1405, "step": 2702 }, { "epoch": 0.20738334846489265, "grad_norm": 0.27195265889167786, "learning_rate": 9.18006067485777e-06, "loss": 1.1921, "step": 2703 }, { "epoch": 0.20746007186425072, "grad_norm": 0.1855841428041458, "learning_rate": 9.179380428735773e-06, "loss": 1.1169, "step": 2704 }, { "epoch": 0.20753679526360883, "grad_norm": 0.2593628764152527, "learning_rate": 9.17869992577962e-06, "loss": 1.1169, "step": 2705 }, { "epoch": 0.2076135186629669, "grad_norm": 0.2848238945007324, "learning_rate": 9.178019166031129e-06, "loss": 1.1043, "step": 2706 }, { "epoch": 0.20769024206232498, "grad_norm": 0.24928425252437592, "learning_rate": 9.177338149532137e-06, "loss": 1.263, "step": 2707 }, { "epoch": 0.20776696546168305, "grad_norm": 0.22740525007247925, "learning_rate": 9.176656876324493e-06, "loss": 1.1424, "step": 2708 }, { "epoch": 0.20784368886104113, "grad_norm": 0.3799554109573364, "learning_rate": 9.175975346450063e-06, "loss": 1.1526, "step": 2709 }, { "epoch": 0.20792041226039923, "grad_norm": 0.20894905924797058, "learning_rate": 9.175293559950731e-06, "loss": 1.1046, "step": 2710 }, { "epoch": 0.2079971356597573, "grad_norm": 0.32691070437431335, "learning_rate": 9.174611516868392e-06, "loss": 1.0639, "step": 2711 }, { "epoch": 0.20807385905911538, "grad_norm": 0.21444830298423767, "learning_rate": 9.17392921724496e-06, "loss": 1.1108, "step": 2712 }, { "epoch": 0.20815058245847345, "grad_norm": 0.2465330958366394, "learning_rate": 9.173246661122364e-06, "loss": 1.1267, "step": 2713 }, { "epoch": 0.20822730585783153, "grad_norm": 0.23367005586624146, "learning_rate": 9.172563848542551e-06, "loss": 1.0773, "step": 2714 }, { "epoch": 0.20830402925718963, "grad_norm": 0.19375643134117126, "learning_rate": 9.171880779547479e-06, "loss": 1.1687, "step": 2715 }, { "epoch": 0.2083807526565477, "grad_norm": 0.1902383714914322, "learning_rate": 9.171197454179124e-06, "loss": 1.1573, "step": 2716 }, { "epoch": 0.20845747605590578, "grad_norm": 0.18960009515285492, "learning_rate": 9.17051387247948e-06, "loss": 1.2054, "step": 2717 }, { "epoch": 0.20853419945526386, "grad_norm": 0.1810789406299591, "learning_rate": 9.169830034490557e-06, "loss": 1.0671, "step": 2718 }, { "epoch": 0.20861092285462193, "grad_norm": 0.19419518113136292, "learning_rate": 9.169145940254374e-06, "loss": 1.109, "step": 2719 }, { "epoch": 0.20868764625398004, "grad_norm": 0.19084645807743073, "learning_rate": 9.168461589812972e-06, "loss": 1.1554, "step": 2720 }, { "epoch": 0.2087643696533381, "grad_norm": 0.3770262897014618, "learning_rate": 9.167776983208408e-06, "loss": 1.1483, "step": 2721 }, { "epoch": 0.2088410930526962, "grad_norm": 0.27327707409858704, "learning_rate": 9.167092120482753e-06, "loss": 1.1225, "step": 2722 }, { "epoch": 0.20891781645205426, "grad_norm": 0.19827616214752197, "learning_rate": 9.166407001678091e-06, "loss": 1.2379, "step": 2723 }, { "epoch": 0.20899453985141234, "grad_norm": 0.5661057233810425, "learning_rate": 9.165721626836523e-06, "loss": 1.0982, "step": 2724 }, { "epoch": 0.20907126325077044, "grad_norm": 0.18292362987995148, "learning_rate": 9.165035996000173e-06, "loss": 1.1823, "step": 2725 }, { "epoch": 0.20914798665012851, "grad_norm": 0.22452294826507568, "learning_rate": 9.164350109211171e-06, "loss": 1.1803, "step": 2726 }, { "epoch": 0.2092247100494866, "grad_norm": 0.22411797940731049, "learning_rate": 9.16366396651167e-06, "loss": 1.0689, "step": 2727 }, { "epoch": 0.20930143344884466, "grad_norm": 0.1890091449022293, "learning_rate": 9.162977567943828e-06, "loss": 1.127, "step": 2728 }, { "epoch": 0.20937815684820277, "grad_norm": 0.21168601512908936, "learning_rate": 9.162290913549833e-06, "loss": 1.0952, "step": 2729 }, { "epoch": 0.20945488024756084, "grad_norm": 0.2892916798591614, "learning_rate": 9.161604003371879e-06, "loss": 1.1813, "step": 2730 }, { "epoch": 0.20953160364691892, "grad_norm": 0.22272151708602905, "learning_rate": 9.160916837452179e-06, "loss": 1.1091, "step": 2731 }, { "epoch": 0.209608327046277, "grad_norm": 0.18746984004974365, "learning_rate": 9.16022941583296e-06, "loss": 1.106, "step": 2732 }, { "epoch": 0.20968505044563507, "grad_norm": 0.21152754127979279, "learning_rate": 9.159541738556468e-06, "loss": 1.2143, "step": 2733 }, { "epoch": 0.20976177384499317, "grad_norm": 0.21341870725154877, "learning_rate": 9.158853805664962e-06, "loss": 1.0711, "step": 2734 }, { "epoch": 0.20983849724435125, "grad_norm": 0.20185285806655884, "learning_rate": 9.158165617200717e-06, "loss": 1.1813, "step": 2735 }, { "epoch": 0.20991522064370932, "grad_norm": 0.22903046011924744, "learning_rate": 9.157477173206024e-06, "loss": 1.1193, "step": 2736 }, { "epoch": 0.2099919440430674, "grad_norm": 0.19198419153690338, "learning_rate": 9.156788473723188e-06, "loss": 1.084, "step": 2737 }, { "epoch": 0.21006866744242547, "grad_norm": 0.16555511951446533, "learning_rate": 9.156099518794535e-06, "loss": 1.1011, "step": 2738 }, { "epoch": 0.21014539084178357, "grad_norm": 0.23619763553142548, "learning_rate": 9.1554103084624e-06, "loss": 1.1132, "step": 2739 }, { "epoch": 0.21022211424114165, "grad_norm": 0.27828076481819153, "learning_rate": 9.15472084276914e-06, "loss": 1.1936, "step": 2740 }, { "epoch": 0.21029883764049973, "grad_norm": 0.18146122992038727, "learning_rate": 9.154031121757121e-06, "loss": 1.1995, "step": 2741 }, { "epoch": 0.2103755610398578, "grad_norm": 0.21266181766986847, "learning_rate": 9.15334114546873e-06, "loss": 1.0946, "step": 2742 }, { "epoch": 0.21045228443921588, "grad_norm": 0.2271893322467804, "learning_rate": 9.152650913946368e-06, "loss": 1.107, "step": 2743 }, { "epoch": 0.21052900783857398, "grad_norm": 0.24058455228805542, "learning_rate": 9.15196042723245e-06, "loss": 1.1307, "step": 2744 }, { "epoch": 0.21060573123793205, "grad_norm": 0.5376937389373779, "learning_rate": 9.15126968536941e-06, "loss": 1.1419, "step": 2745 }, { "epoch": 0.21068245463729013, "grad_norm": 0.19568675756454468, "learning_rate": 9.150578688399697e-06, "loss": 1.0887, "step": 2746 }, { "epoch": 0.2107591780366482, "grad_norm": 0.32640406489372253, "learning_rate": 9.149887436365772e-06, "loss": 1.1169, "step": 2747 }, { "epoch": 0.21083590143600628, "grad_norm": 0.19817675650119781, "learning_rate": 9.149195929310116e-06, "loss": 1.1637, "step": 2748 }, { "epoch": 0.21091262483536438, "grad_norm": 0.22412942349910736, "learning_rate": 9.148504167275222e-06, "loss": 1.1181, "step": 2749 }, { "epoch": 0.21098934823472246, "grad_norm": 0.1760866940021515, "learning_rate": 9.147812150303603e-06, "loss": 1.1317, "step": 2750 }, { "epoch": 0.21106607163408053, "grad_norm": 0.09167621284723282, "learning_rate": 9.147119878437784e-06, "loss": 1.1293, "step": 2751 }, { "epoch": 0.2111427950334386, "grad_norm": 0.1828722208738327, "learning_rate": 9.14642735172031e-06, "loss": 1.1196, "step": 2752 }, { "epoch": 0.21121951843279668, "grad_norm": 0.18752868473529816, "learning_rate": 9.145734570193731e-06, "loss": 1.2098, "step": 2753 }, { "epoch": 0.21129624183215479, "grad_norm": 0.213824063539505, "learning_rate": 9.14504153390063e-06, "loss": 1.0964, "step": 2754 }, { "epoch": 0.21137296523151286, "grad_norm": 0.23670907318592072, "learning_rate": 9.144348242883587e-06, "loss": 1.1744, "step": 2755 }, { "epoch": 0.21144968863087094, "grad_norm": 0.21236902475357056, "learning_rate": 9.143654697185214e-06, "loss": 1.145, "step": 2756 }, { "epoch": 0.211526412030229, "grad_norm": 0.6503284573554993, "learning_rate": 9.142960896848125e-06, "loss": 1.158, "step": 2757 }, { "epoch": 0.2116031354295871, "grad_norm": 0.24566712975502014, "learning_rate": 9.142266841914962e-06, "loss": 1.1711, "step": 2758 }, { "epoch": 0.2116798588289452, "grad_norm": 0.2360415905714035, "learning_rate": 9.14157253242837e-06, "loss": 1.1336, "step": 2759 }, { "epoch": 0.21175658222830326, "grad_norm": 0.102275051176548, "learning_rate": 9.140877968431022e-06, "loss": 1.1817, "step": 2760 }, { "epoch": 0.21183330562766134, "grad_norm": 0.17798258364200592, "learning_rate": 9.140183149965597e-06, "loss": 1.1059, "step": 2761 }, { "epoch": 0.21191002902701941, "grad_norm": 0.2842516303062439, "learning_rate": 9.139488077074796e-06, "loss": 1.1249, "step": 2762 }, { "epoch": 0.21198675242637752, "grad_norm": 0.32537153363227844, "learning_rate": 9.138792749801332e-06, "loss": 1.1197, "step": 2763 }, { "epoch": 0.2120634758257356, "grad_norm": 0.24423564970493317, "learning_rate": 9.138097168187934e-06, "loss": 1.1255, "step": 2764 }, { "epoch": 0.21214019922509367, "grad_norm": 0.31014952063560486, "learning_rate": 9.137401332277348e-06, "loss": 1.0539, "step": 2765 }, { "epoch": 0.21221692262445174, "grad_norm": 0.18016107380390167, "learning_rate": 9.136705242112335e-06, "loss": 1.1206, "step": 2766 }, { "epoch": 0.21229364602380982, "grad_norm": 0.26399552822113037, "learning_rate": 9.136008897735673e-06, "loss": 1.0742, "step": 2767 }, { "epoch": 0.21237036942316792, "grad_norm": 0.202462300658226, "learning_rate": 9.135312299190152e-06, "loss": 1.1419, "step": 2768 }, { "epoch": 0.212447092822526, "grad_norm": 0.19997349381446838, "learning_rate": 9.134615446518582e-06, "loss": 1.0966, "step": 2769 }, { "epoch": 0.21252381622188407, "grad_norm": 0.1917419284582138, "learning_rate": 9.133918339763784e-06, "loss": 1.2358, "step": 2770 }, { "epoch": 0.21260053962124215, "grad_norm": 0.09389626234769821, "learning_rate": 9.133220978968597e-06, "loss": 1.2555, "step": 2771 }, { "epoch": 0.21267726302060022, "grad_norm": 0.21205243468284607, "learning_rate": 9.132523364175879e-06, "loss": 1.0348, "step": 2772 }, { "epoch": 0.21275398641995832, "grad_norm": 0.23376494646072388, "learning_rate": 9.131825495428496e-06, "loss": 1.1678, "step": 2773 }, { "epoch": 0.2128307098193164, "grad_norm": 0.18937155604362488, "learning_rate": 9.13112737276934e-06, "loss": 1.1521, "step": 2774 }, { "epoch": 0.21290743321867447, "grad_norm": 0.35349729657173157, "learning_rate": 9.130428996241306e-06, "loss": 1.1498, "step": 2775 }, { "epoch": 0.21298415661803255, "grad_norm": 0.20081953704357147, "learning_rate": 9.129730365887312e-06, "loss": 1.1008, "step": 2776 }, { "epoch": 0.21306088001739062, "grad_norm": 0.29083263874053955, "learning_rate": 9.129031481750295e-06, "loss": 1.1359, "step": 2777 }, { "epoch": 0.21313760341674873, "grad_norm": 0.2275586575269699, "learning_rate": 9.1283323438732e-06, "loss": 1.0548, "step": 2778 }, { "epoch": 0.2132143268161068, "grad_norm": 0.426830530166626, "learning_rate": 9.127632952298993e-06, "loss": 1.1754, "step": 2779 }, { "epoch": 0.21329105021546488, "grad_norm": 0.29951515793800354, "learning_rate": 9.12693330707065e-06, "loss": 1.1149, "step": 2780 }, { "epoch": 0.21336777361482295, "grad_norm": 0.26107820868492126, "learning_rate": 9.126233408231172e-06, "loss": 1.1215, "step": 2781 }, { "epoch": 0.21344449701418103, "grad_norm": 0.30054405331611633, "learning_rate": 9.125533255823564e-06, "loss": 1.1203, "step": 2782 }, { "epoch": 0.21352122041353913, "grad_norm": 0.09664256125688553, "learning_rate": 9.124832849890854e-06, "loss": 1.2897, "step": 2783 }, { "epoch": 0.2135979438128972, "grad_norm": 0.24519817531108856, "learning_rate": 9.124132190476084e-06, "loss": 1.1281, "step": 2784 }, { "epoch": 0.21367466721225528, "grad_norm": 0.2176155298948288, "learning_rate": 9.123431277622312e-06, "loss": 1.1704, "step": 2785 }, { "epoch": 0.21375139061161336, "grad_norm": 0.19184601306915283, "learning_rate": 9.12273011137261e-06, "loss": 1.0873, "step": 2786 }, { "epoch": 0.21382811401097146, "grad_norm": 0.18873846530914307, "learning_rate": 9.122028691770067e-06, "loss": 1.1, "step": 2787 }, { "epoch": 0.21390483741032953, "grad_norm": 0.18164867162704468, "learning_rate": 9.121327018857787e-06, "loss": 1.144, "step": 2788 }, { "epoch": 0.2139815608096876, "grad_norm": 0.227892205119133, "learning_rate": 9.12062509267889e-06, "loss": 1.1436, "step": 2789 }, { "epoch": 0.21405828420904568, "grad_norm": 0.09991489350795746, "learning_rate": 9.119922913276511e-06, "loss": 1.1962, "step": 2790 }, { "epoch": 0.21413500760840376, "grad_norm": 0.23573528230190277, "learning_rate": 9.1192204806938e-06, "loss": 1.1431, "step": 2791 }, { "epoch": 0.21421173100776186, "grad_norm": 0.27823925018310547, "learning_rate": 9.118517794973925e-06, "loss": 1.2302, "step": 2792 }, { "epoch": 0.21428845440711994, "grad_norm": 0.19496271014213562, "learning_rate": 9.117814856160067e-06, "loss": 1.2074, "step": 2793 }, { "epoch": 0.214365177806478, "grad_norm": 0.2076965719461441, "learning_rate": 9.117111664295424e-06, "loss": 1.1301, "step": 2794 }, { "epoch": 0.2144419012058361, "grad_norm": 0.1729883849620819, "learning_rate": 9.116408219423209e-06, "loss": 1.0995, "step": 2795 }, { "epoch": 0.21451862460519416, "grad_norm": 0.25390321016311646, "learning_rate": 9.11570452158665e-06, "loss": 1.1366, "step": 2796 }, { "epoch": 0.21459534800455227, "grad_norm": 0.2636638879776001, "learning_rate": 9.115000570828991e-06, "loss": 1.0427, "step": 2797 }, { "epoch": 0.21467207140391034, "grad_norm": 0.31025099754333496, "learning_rate": 9.114296367193491e-06, "loss": 1.1195, "step": 2798 }, { "epoch": 0.21474879480326842, "grad_norm": 0.10059884190559387, "learning_rate": 9.113591910723427e-06, "loss": 1.2623, "step": 2799 }, { "epoch": 0.2148255182026265, "grad_norm": 0.21623069047927856, "learning_rate": 9.11288720146209e-06, "loss": 1.1228, "step": 2800 }, { "epoch": 0.21490224160198457, "grad_norm": 0.21218733489513397, "learning_rate": 9.112182239452785e-06, "loss": 1.1224, "step": 2801 }, { "epoch": 0.21497896500134267, "grad_norm": 0.21082603931427002, "learning_rate": 9.111477024738833e-06, "loss": 1.164, "step": 2802 }, { "epoch": 0.21505568840070075, "grad_norm": 0.20948457717895508, "learning_rate": 9.110771557363573e-06, "loss": 1.1217, "step": 2803 }, { "epoch": 0.21513241180005882, "grad_norm": 0.1902424693107605, "learning_rate": 9.110065837370357e-06, "loss": 1.2238, "step": 2804 }, { "epoch": 0.2152091351994169, "grad_norm": 0.22820733487606049, "learning_rate": 9.109359864802554e-06, "loss": 1.1202, "step": 2805 }, { "epoch": 0.21528585859877497, "grad_norm": 0.20820041000843048, "learning_rate": 9.108653639703547e-06, "loss": 1.0818, "step": 2806 }, { "epoch": 0.21536258199813307, "grad_norm": 0.2109760195016861, "learning_rate": 9.107947162116738e-06, "loss": 1.0622, "step": 2807 }, { "epoch": 0.21543930539749115, "grad_norm": 0.20599548518657684, "learning_rate": 9.107240432085538e-06, "loss": 1.0243, "step": 2808 }, { "epoch": 0.21551602879684922, "grad_norm": 0.20419928431510925, "learning_rate": 9.106533449653378e-06, "loss": 1.079, "step": 2809 }, { "epoch": 0.2155927521962073, "grad_norm": 0.24766573309898376, "learning_rate": 9.105826214863707e-06, "loss": 1.1381, "step": 2810 }, { "epoch": 0.21566947559556537, "grad_norm": 0.22349759936332703, "learning_rate": 9.105118727759984e-06, "loss": 1.0658, "step": 2811 }, { "epoch": 0.21574619899492348, "grad_norm": 0.10081872344017029, "learning_rate": 9.104410988385688e-06, "loss": 1.236, "step": 2812 }, { "epoch": 0.21582292239428155, "grad_norm": 0.1997845470905304, "learning_rate": 9.103702996784311e-06, "loss": 1.086, "step": 2813 }, { "epoch": 0.21589964579363963, "grad_norm": 0.19913095235824585, "learning_rate": 9.102994752999359e-06, "loss": 1.2147, "step": 2814 }, { "epoch": 0.2159763691929977, "grad_norm": 0.20597173273563385, "learning_rate": 9.102286257074358e-06, "loss": 1.2075, "step": 2815 }, { "epoch": 0.2160530925923558, "grad_norm": 0.24078767001628876, "learning_rate": 9.101577509052844e-06, "loss": 1.1391, "step": 2816 }, { "epoch": 0.21612981599171388, "grad_norm": 0.20113177597522736, "learning_rate": 9.100868508978373e-06, "loss": 1.2319, "step": 2817 }, { "epoch": 0.21620653939107196, "grad_norm": 0.29223114252090454, "learning_rate": 9.100159256894518e-06, "loss": 1.1919, "step": 2818 }, { "epoch": 0.21628326279043003, "grad_norm": 0.20297448337078094, "learning_rate": 9.09944975284486e-06, "loss": 1.1067, "step": 2819 }, { "epoch": 0.2163599861897881, "grad_norm": 0.26577088236808777, "learning_rate": 9.098739996873001e-06, "loss": 1.1327, "step": 2820 }, { "epoch": 0.2164367095891462, "grad_norm": 0.21472539007663727, "learning_rate": 9.098029989022558e-06, "loss": 1.2048, "step": 2821 }, { "epoch": 0.21651343298850428, "grad_norm": 0.5295758247375488, "learning_rate": 9.097319729337162e-06, "loss": 1.0561, "step": 2822 }, { "epoch": 0.21659015638786236, "grad_norm": 0.19604024291038513, "learning_rate": 9.096609217860464e-06, "loss": 1.0962, "step": 2823 }, { "epoch": 0.21666687978722043, "grad_norm": 0.16467368602752686, "learning_rate": 9.095898454636122e-06, "loss": 1.0956, "step": 2824 }, { "epoch": 0.2167436031865785, "grad_norm": 0.33182039856910706, "learning_rate": 9.095187439707817e-06, "loss": 1.1646, "step": 2825 }, { "epoch": 0.2168203265859366, "grad_norm": 0.19046808779239655, "learning_rate": 9.094476173119242e-06, "loss": 1.129, "step": 2826 }, { "epoch": 0.2168970499852947, "grad_norm": 0.2070869654417038, "learning_rate": 9.093764654914105e-06, "loss": 1.1019, "step": 2827 }, { "epoch": 0.21697377338465276, "grad_norm": 0.2704012393951416, "learning_rate": 9.093052885136135e-06, "loss": 1.0845, "step": 2828 }, { "epoch": 0.21705049678401084, "grad_norm": 0.2703877091407776, "learning_rate": 9.092340863829067e-06, "loss": 1.1234, "step": 2829 }, { "epoch": 0.2171272201833689, "grad_norm": 0.22302207350730896, "learning_rate": 9.09162859103666e-06, "loss": 1.1847, "step": 2830 }, { "epoch": 0.21720394358272702, "grad_norm": 0.31349220871925354, "learning_rate": 9.090916066802685e-06, "loss": 1.2028, "step": 2831 }, { "epoch": 0.2172806669820851, "grad_norm": 0.2042998969554901, "learning_rate": 9.090203291170924e-06, "loss": 1.0759, "step": 2832 }, { "epoch": 0.21735739038144317, "grad_norm": 0.21569761633872986, "learning_rate": 9.089490264185186e-06, "loss": 1.1731, "step": 2833 }, { "epoch": 0.21743411378080124, "grad_norm": 0.20052237808704376, "learning_rate": 9.088776985889282e-06, "loss": 1.0656, "step": 2834 }, { "epoch": 0.21751083718015932, "grad_norm": 0.1939418911933899, "learning_rate": 9.08806345632705e-06, "loss": 1.0637, "step": 2835 }, { "epoch": 0.21758756057951742, "grad_norm": 0.1887160986661911, "learning_rate": 9.087349675542336e-06, "loss": 1.1716, "step": 2836 }, { "epoch": 0.2176642839788755, "grad_norm": 0.21519996225833893, "learning_rate": 9.086635643579002e-06, "loss": 1.1068, "step": 2837 }, { "epoch": 0.21774100737823357, "grad_norm": 0.23157507181167603, "learning_rate": 9.08592136048093e-06, "loss": 1.2067, "step": 2838 }, { "epoch": 0.21781773077759164, "grad_norm": 0.1877855509519577, "learning_rate": 9.085206826292014e-06, "loss": 1.1079, "step": 2839 }, { "epoch": 0.21789445417694972, "grad_norm": 0.24115023016929626, "learning_rate": 9.084492041056164e-06, "loss": 1.0969, "step": 2840 }, { "epoch": 0.21797117757630782, "grad_norm": 0.23125039041042328, "learning_rate": 9.083777004817305e-06, "loss": 1.1084, "step": 2841 }, { "epoch": 0.2180479009756659, "grad_norm": 0.1667248010635376, "learning_rate": 9.083061717619377e-06, "loss": 1.1707, "step": 2842 }, { "epoch": 0.21812462437502397, "grad_norm": 0.20736923813819885, "learning_rate": 9.082346179506338e-06, "loss": 1.0756, "step": 2843 }, { "epoch": 0.21820134777438205, "grad_norm": 0.26795294880867004, "learning_rate": 9.081630390522158e-06, "loss": 1.1268, "step": 2844 }, { "epoch": 0.21827807117374015, "grad_norm": 0.1955413669347763, "learning_rate": 9.080914350710827e-06, "loss": 1.1274, "step": 2845 }, { "epoch": 0.21835479457309823, "grad_norm": 0.1815408617258072, "learning_rate": 9.080198060116345e-06, "loss": 1.1764, "step": 2846 }, { "epoch": 0.2184315179724563, "grad_norm": 0.21701550483703613, "learning_rate": 9.079481518782731e-06, "loss": 1.1293, "step": 2847 }, { "epoch": 0.21850824137181438, "grad_norm": 0.24820062518119812, "learning_rate": 9.078764726754018e-06, "loss": 1.1549, "step": 2848 }, { "epoch": 0.21858496477117245, "grad_norm": 0.17294038832187653, "learning_rate": 9.078047684074254e-06, "loss": 1.0975, "step": 2849 }, { "epoch": 0.21866168817053055, "grad_norm": 0.18989264965057373, "learning_rate": 9.077330390787506e-06, "loss": 1.1215, "step": 2850 }, { "epoch": 0.21873841156988863, "grad_norm": 0.20777565240859985, "learning_rate": 9.07661284693785e-06, "loss": 1.1262, "step": 2851 }, { "epoch": 0.2188151349692467, "grad_norm": 0.2140064835548401, "learning_rate": 9.075895052569384e-06, "loss": 1.0702, "step": 2852 }, { "epoch": 0.21889185836860478, "grad_norm": 0.2681368589401245, "learning_rate": 9.075177007726217e-06, "loss": 1.166, "step": 2853 }, { "epoch": 0.21896858176796286, "grad_norm": 0.20613166689872742, "learning_rate": 9.074458712452476e-06, "loss": 1.1274, "step": 2854 }, { "epoch": 0.21904530516732096, "grad_norm": 0.18205438554286957, "learning_rate": 9.0737401667923e-06, "loss": 1.0691, "step": 2855 }, { "epoch": 0.21912202856667903, "grad_norm": 0.09767748415470123, "learning_rate": 9.073021370789847e-06, "loss": 1.2302, "step": 2856 }, { "epoch": 0.2191987519660371, "grad_norm": 0.242942214012146, "learning_rate": 9.072302324489288e-06, "loss": 1.1127, "step": 2857 }, { "epoch": 0.21927547536539518, "grad_norm": 0.2725113034248352, "learning_rate": 9.071583027934811e-06, "loss": 1.1708, "step": 2858 }, { "epoch": 0.21935219876475326, "grad_norm": 0.1974530816078186, "learning_rate": 9.070863481170619e-06, "loss": 1.1266, "step": 2859 }, { "epoch": 0.21942892216411136, "grad_norm": 0.19790276885032654, "learning_rate": 9.07014368424093e-06, "loss": 1.0939, "step": 2860 }, { "epoch": 0.21950564556346944, "grad_norm": 0.21500754356384277, "learning_rate": 9.069423637189975e-06, "loss": 1.0846, "step": 2861 }, { "epoch": 0.2195823689628275, "grad_norm": 0.1798257678747177, "learning_rate": 9.068703340062009e-06, "loss": 1.1929, "step": 2862 }, { "epoch": 0.2196590923621856, "grad_norm": 0.22828415036201477, "learning_rate": 9.067982792901288e-06, "loss": 1.0651, "step": 2863 }, { "epoch": 0.21973581576154366, "grad_norm": 0.18638230860233307, "learning_rate": 9.067261995752099e-06, "loss": 1.1403, "step": 2864 }, { "epoch": 0.21981253916090177, "grad_norm": 0.22671005129814148, "learning_rate": 9.066540948658734e-06, "loss": 1.0694, "step": 2865 }, { "epoch": 0.21988926256025984, "grad_norm": 0.3176322281360626, "learning_rate": 9.0658196516655e-06, "loss": 1.1638, "step": 2866 }, { "epoch": 0.21996598595961792, "grad_norm": 0.29276779294013977, "learning_rate": 9.065098104816728e-06, "loss": 1.1232, "step": 2867 }, { "epoch": 0.220042709358976, "grad_norm": 0.20699474215507507, "learning_rate": 9.064376308156754e-06, "loss": 1.1868, "step": 2868 }, { "epoch": 0.22011943275833407, "grad_norm": 0.22257240116596222, "learning_rate": 9.06365426172994e-06, "loss": 1.086, "step": 2869 }, { "epoch": 0.22019615615769217, "grad_norm": 0.09354904294013977, "learning_rate": 9.062931965580655e-06, "loss": 1.2294, "step": 2870 }, { "epoch": 0.22027287955705024, "grad_norm": 0.21428751945495605, "learning_rate": 9.062209419753283e-06, "loss": 1.1546, "step": 2871 }, { "epoch": 0.22034960295640832, "grad_norm": 0.2516440153121948, "learning_rate": 9.06148662429223e-06, "loss": 1.0958, "step": 2872 }, { "epoch": 0.2204263263557664, "grad_norm": 0.18541495501995087, "learning_rate": 9.060763579241914e-06, "loss": 1.0579, "step": 2873 }, { "epoch": 0.2205030497551245, "grad_norm": 0.20200759172439575, "learning_rate": 9.060040284646764e-06, "loss": 1.1035, "step": 2874 }, { "epoch": 0.22057977315448257, "grad_norm": 0.23509924113750458, "learning_rate": 9.059316740551235e-06, "loss": 1.0835, "step": 2875 }, { "epoch": 0.22065649655384065, "grad_norm": 0.2440008670091629, "learning_rate": 9.058592946999784e-06, "loss": 1.1478, "step": 2876 }, { "epoch": 0.22073321995319872, "grad_norm": 0.39681276679039, "learning_rate": 9.057868904036895e-06, "loss": 1.1285, "step": 2877 }, { "epoch": 0.2208099433525568, "grad_norm": 0.09748224169015884, "learning_rate": 9.05714461170706e-06, "loss": 1.2213, "step": 2878 }, { "epoch": 0.2208866667519149, "grad_norm": 0.23445506393909454, "learning_rate": 9.056420070054788e-06, "loss": 1.0655, "step": 2879 }, { "epoch": 0.22096339015127298, "grad_norm": 0.203287273645401, "learning_rate": 9.055695279124605e-06, "loss": 1.1374, "step": 2880 }, { "epoch": 0.22104011355063105, "grad_norm": 0.2637001872062683, "learning_rate": 9.054970238961054e-06, "loss": 1.1689, "step": 2881 }, { "epoch": 0.22111683694998913, "grad_norm": 0.09268863499164581, "learning_rate": 9.054244949608685e-06, "loss": 1.1966, "step": 2882 }, { "epoch": 0.2211935603493472, "grad_norm": 0.3147300183773041, "learning_rate": 9.053519411112075e-06, "loss": 1.0728, "step": 2883 }, { "epoch": 0.2212702837487053, "grad_norm": 0.20582059025764465, "learning_rate": 9.052793623515806e-06, "loss": 1.0826, "step": 2884 }, { "epoch": 0.22134700714806338, "grad_norm": 0.19280825555324554, "learning_rate": 9.052067586864481e-06, "loss": 1.2493, "step": 2885 }, { "epoch": 0.22142373054742145, "grad_norm": 0.18987010419368744, "learning_rate": 9.051341301202717e-06, "loss": 1.1173, "step": 2886 }, { "epoch": 0.22150045394677953, "grad_norm": 0.0994253009557724, "learning_rate": 9.050614766575147e-06, "loss": 1.2093, "step": 2887 }, { "epoch": 0.2215771773461376, "grad_norm": 0.19545932114124298, "learning_rate": 9.049887983026417e-06, "loss": 1.1818, "step": 2888 }, { "epoch": 0.2216539007454957, "grad_norm": 0.20387399196624756, "learning_rate": 9.04916095060119e-06, "loss": 1.1376, "step": 2889 }, { "epoch": 0.22173062414485378, "grad_norm": 0.23608605563640594, "learning_rate": 9.048433669344146e-06, "loss": 1.0857, "step": 2890 }, { "epoch": 0.22180734754421186, "grad_norm": 0.19731798768043518, "learning_rate": 9.047706139299975e-06, "loss": 1.077, "step": 2891 }, { "epoch": 0.22188407094356993, "grad_norm": 0.23050788044929504, "learning_rate": 9.046978360513387e-06, "loss": 1.2344, "step": 2892 }, { "epoch": 0.221960794342928, "grad_norm": 0.30555176734924316, "learning_rate": 9.04625033302911e-06, "loss": 1.043, "step": 2893 }, { "epoch": 0.2220375177422861, "grad_norm": 0.24005313217639923, "learning_rate": 9.045522056891878e-06, "loss": 1.0616, "step": 2894 }, { "epoch": 0.2221142411416442, "grad_norm": 0.20872657001018524, "learning_rate": 9.044793532146448e-06, "loss": 1.1533, "step": 2895 }, { "epoch": 0.22219096454100226, "grad_norm": 0.09902860224246979, "learning_rate": 9.044064758837587e-06, "loss": 1.21, "step": 2896 }, { "epoch": 0.22226768794036034, "grad_norm": 0.21757780015468597, "learning_rate": 9.043335737010084e-06, "loss": 1.1258, "step": 2897 }, { "epoch": 0.2223444113397184, "grad_norm": 0.17836351692676544, "learning_rate": 9.042606466708737e-06, "loss": 1.1776, "step": 2898 }, { "epoch": 0.22242113473907651, "grad_norm": 0.09631787240505219, "learning_rate": 9.041876947978363e-06, "loss": 1.1792, "step": 2899 }, { "epoch": 0.2224978581384346, "grad_norm": 0.1742585152387619, "learning_rate": 9.041147180863792e-06, "loss": 1.2035, "step": 2900 }, { "epoch": 0.22257458153779267, "grad_norm": 0.2311485856771469, "learning_rate": 9.04041716540987e-06, "loss": 1.1648, "step": 2901 }, { "epoch": 0.22265130493715074, "grad_norm": 0.22758761048316956, "learning_rate": 9.039686901661457e-06, "loss": 1.1082, "step": 2902 }, { "epoch": 0.22272802833650884, "grad_norm": 0.2066960632801056, "learning_rate": 9.038956389663434e-06, "loss": 1.1143, "step": 2903 }, { "epoch": 0.22280475173586692, "grad_norm": 0.24794287979602814, "learning_rate": 9.03822562946069e-06, "loss": 1.1662, "step": 2904 }, { "epoch": 0.222881475135225, "grad_norm": 0.3211086392402649, "learning_rate": 9.03749462109813e-06, "loss": 1.1943, "step": 2905 }, { "epoch": 0.22295819853458307, "grad_norm": 0.26174288988113403, "learning_rate": 9.03676336462068e-06, "loss": 1.1627, "step": 2906 }, { "epoch": 0.22303492193394114, "grad_norm": 0.2018662989139557, "learning_rate": 9.036031860073276e-06, "loss": 1.1613, "step": 2907 }, { "epoch": 0.22311164533329925, "grad_norm": 0.18601837754249573, "learning_rate": 9.035300107500873e-06, "loss": 1.1278, "step": 2908 }, { "epoch": 0.22318836873265732, "grad_norm": 0.1818820983171463, "learning_rate": 9.034568106948437e-06, "loss": 1.0694, "step": 2909 }, { "epoch": 0.2232650921320154, "grad_norm": 0.29245051741600037, "learning_rate": 9.03383585846095e-06, "loss": 1.0294, "step": 2910 }, { "epoch": 0.22334181553137347, "grad_norm": 0.20215854048728943, "learning_rate": 9.033103362083415e-06, "loss": 1.1809, "step": 2911 }, { "epoch": 0.22341853893073155, "grad_norm": 0.22064417600631714, "learning_rate": 9.032370617860844e-06, "loss": 1.2011, "step": 2912 }, { "epoch": 0.22349526233008965, "grad_norm": 0.3808475732803345, "learning_rate": 9.031637625838265e-06, "loss": 1.0032, "step": 2913 }, { "epoch": 0.22357198572944773, "grad_norm": 0.2327088564634323, "learning_rate": 9.030904386060722e-06, "loss": 1.0772, "step": 2914 }, { "epoch": 0.2236487091288058, "grad_norm": 0.19182300567626953, "learning_rate": 9.030170898573276e-06, "loss": 1.0575, "step": 2915 }, { "epoch": 0.22372543252816388, "grad_norm": 0.18089759349822998, "learning_rate": 9.029437163421002e-06, "loss": 1.1644, "step": 2916 }, { "epoch": 0.22380215592752195, "grad_norm": 0.2703777551651001, "learning_rate": 9.028703180648987e-06, "loss": 1.224, "step": 2917 }, { "epoch": 0.22387887932688005, "grad_norm": 0.1840095818042755, "learning_rate": 9.027968950302342e-06, "loss": 1.1237, "step": 2918 }, { "epoch": 0.22395560272623813, "grad_norm": 0.5055450797080994, "learning_rate": 9.02723447242618e-06, "loss": 1.1159, "step": 2919 }, { "epoch": 0.2240323261255962, "grad_norm": 0.22248271107673645, "learning_rate": 9.026499747065644e-06, "loss": 1.0762, "step": 2920 }, { "epoch": 0.22410904952495428, "grad_norm": 0.2211061418056488, "learning_rate": 9.025764774265879e-06, "loss": 1.2476, "step": 2921 }, { "epoch": 0.22418577292431235, "grad_norm": 0.1994880586862564, "learning_rate": 9.025029554072054e-06, "loss": 1.1873, "step": 2922 }, { "epoch": 0.22426249632367046, "grad_norm": 0.20465262234210968, "learning_rate": 9.02429408652935e-06, "loss": 1.1272, "step": 2923 }, { "epoch": 0.22433921972302853, "grad_norm": 0.20958097279071808, "learning_rate": 9.023558371682963e-06, "loss": 1.1154, "step": 2924 }, { "epoch": 0.2244159431223866, "grad_norm": 0.2321946769952774, "learning_rate": 9.022822409578106e-06, "loss": 1.1303, "step": 2925 }, { "epoch": 0.22449266652174468, "grad_norm": 0.19498637318611145, "learning_rate": 9.022086200260003e-06, "loss": 1.095, "step": 2926 }, { "epoch": 0.22456938992110276, "grad_norm": 0.19043616950511932, "learning_rate": 9.0213497437739e-06, "loss": 1.1047, "step": 2927 }, { "epoch": 0.22464611332046086, "grad_norm": 0.26194557547569275, "learning_rate": 9.020613040165049e-06, "loss": 1.1131, "step": 2928 }, { "epoch": 0.22472283671981894, "grad_norm": 0.24376659095287323, "learning_rate": 9.019876089478727e-06, "loss": 1.1857, "step": 2929 }, { "epoch": 0.224799560119177, "grad_norm": 0.19982193410396576, "learning_rate": 9.01913889176022e-06, "loss": 1.0658, "step": 2930 }, { "epoch": 0.2248762835185351, "grad_norm": 0.17522262036800385, "learning_rate": 9.018401447054829e-06, "loss": 1.1324, "step": 2931 }, { "epoch": 0.2249530069178932, "grad_norm": 0.200003519654274, "learning_rate": 9.017663755407876e-06, "loss": 1.2082, "step": 2932 }, { "epoch": 0.22502973031725126, "grad_norm": 0.1674860119819641, "learning_rate": 9.01692581686469e-06, "loss": 1.1215, "step": 2933 }, { "epoch": 0.22510645371660934, "grad_norm": 0.18695619702339172, "learning_rate": 9.016187631470622e-06, "loss": 1.112, "step": 2934 }, { "epoch": 0.22518317711596741, "grad_norm": 0.25158947706222534, "learning_rate": 9.015449199271036e-06, "loss": 1.0762, "step": 2935 }, { "epoch": 0.2252599005153255, "grad_norm": 0.22878341376781464, "learning_rate": 9.014710520311307e-06, "loss": 1.087, "step": 2936 }, { "epoch": 0.2253366239146836, "grad_norm": 0.2105109691619873, "learning_rate": 9.013971594636832e-06, "loss": 1.0813, "step": 2937 }, { "epoch": 0.22541334731404167, "grad_norm": 0.2490585893392563, "learning_rate": 9.013232422293019e-06, "loss": 1.1956, "step": 2938 }, { "epoch": 0.22549007071339974, "grad_norm": 0.2103641778230667, "learning_rate": 9.012493003325292e-06, "loss": 1.1319, "step": 2939 }, { "epoch": 0.22556679411275782, "grad_norm": 0.2530089020729065, "learning_rate": 9.01175333777909e-06, "loss": 1.1243, "step": 2940 }, { "epoch": 0.2256435175121159, "grad_norm": 0.19230419397354126, "learning_rate": 9.011013425699868e-06, "loss": 1.1411, "step": 2941 }, { "epoch": 0.225720240911474, "grad_norm": 0.1727573424577713, "learning_rate": 9.010273267133096e-06, "loss": 1.1719, "step": 2942 }, { "epoch": 0.22579696431083207, "grad_norm": 0.1885506808757782, "learning_rate": 9.009532862124257e-06, "loss": 1.1477, "step": 2943 }, { "epoch": 0.22587368771019015, "grad_norm": 0.25045421719551086, "learning_rate": 9.008792210718854e-06, "loss": 1.2033, "step": 2944 }, { "epoch": 0.22595041110954822, "grad_norm": 0.23843522369861603, "learning_rate": 9.008051312962398e-06, "loss": 1.1378, "step": 2945 }, { "epoch": 0.2260271345089063, "grad_norm": 0.2504977881908417, "learning_rate": 9.007310168900423e-06, "loss": 1.1016, "step": 2946 }, { "epoch": 0.2261038579082644, "grad_norm": 0.1813415288925171, "learning_rate": 9.006568778578472e-06, "loss": 1.087, "step": 2947 }, { "epoch": 0.22618058130762247, "grad_norm": 0.1802094727754593, "learning_rate": 9.005827142042106e-06, "loss": 1.1433, "step": 2948 }, { "epoch": 0.22625730470698055, "grad_norm": 0.20932023227214813, "learning_rate": 9.0050852593369e-06, "loss": 1.1303, "step": 2949 }, { "epoch": 0.22633402810633863, "grad_norm": 0.24815188348293304, "learning_rate": 9.004343130508445e-06, "loss": 1.0654, "step": 2950 }, { "epoch": 0.2264107515056967, "grad_norm": 0.20622213184833527, "learning_rate": 9.003600755602347e-06, "loss": 1.1507, "step": 2951 }, { "epoch": 0.2264874749050548, "grad_norm": 0.18653319776058197, "learning_rate": 9.002858134664227e-06, "loss": 1.1585, "step": 2952 }, { "epoch": 0.22656419830441288, "grad_norm": 0.20047543942928314, "learning_rate": 9.00211526773972e-06, "loss": 1.1599, "step": 2953 }, { "epoch": 0.22664092170377095, "grad_norm": 0.28622788190841675, "learning_rate": 9.001372154874482e-06, "loss": 1.1819, "step": 2954 }, { "epoch": 0.22671764510312903, "grad_norm": 0.21789856255054474, "learning_rate": 9.000628796114173e-06, "loss": 1.0398, "step": 2955 }, { "epoch": 0.2267943685024871, "grad_norm": 0.21157017350196838, "learning_rate": 8.999885191504475e-06, "loss": 1.0855, "step": 2956 }, { "epoch": 0.2268710919018452, "grad_norm": 0.211798295378685, "learning_rate": 8.999141341091088e-06, "loss": 1.1706, "step": 2957 }, { "epoch": 0.22694781530120328, "grad_norm": 0.1031690165400505, "learning_rate": 8.998397244919723e-06, "loss": 1.23, "step": 2958 }, { "epoch": 0.22702453870056136, "grad_norm": 0.2251032143831253, "learning_rate": 8.997652903036105e-06, "loss": 1.1701, "step": 2959 }, { "epoch": 0.22710126209991943, "grad_norm": 0.17239785194396973, "learning_rate": 8.996908315485977e-06, "loss": 1.1272, "step": 2960 }, { "epoch": 0.22717798549927753, "grad_norm": 0.2146177440881729, "learning_rate": 8.996163482315094e-06, "loss": 1.0959, "step": 2961 }, { "epoch": 0.2272547088986356, "grad_norm": 0.20369353890419006, "learning_rate": 8.99541840356923e-06, "loss": 1.1381, "step": 2962 }, { "epoch": 0.22733143229799369, "grad_norm": 0.2697161138057709, "learning_rate": 8.994673079294171e-06, "loss": 1.1269, "step": 2963 }, { "epoch": 0.22740815569735176, "grad_norm": 0.19712994992733002, "learning_rate": 8.99392750953572e-06, "loss": 1.155, "step": 2964 }, { "epoch": 0.22748487909670984, "grad_norm": 0.09631528705358505, "learning_rate": 8.993181694339694e-06, "loss": 1.2243, "step": 2965 }, { "epoch": 0.22756160249606794, "grad_norm": 0.1924717128276825, "learning_rate": 8.992435633751924e-06, "loss": 1.1759, "step": 2966 }, { "epoch": 0.227638325895426, "grad_norm": 0.2397550791501999, "learning_rate": 8.99168932781826e-06, "loss": 1.1238, "step": 2967 }, { "epoch": 0.2277150492947841, "grad_norm": 0.22155293822288513, "learning_rate": 8.990942776584564e-06, "loss": 1.131, "step": 2968 }, { "epoch": 0.22779177269414216, "grad_norm": 0.21261616051197052, "learning_rate": 8.990195980096713e-06, "loss": 1.0361, "step": 2969 }, { "epoch": 0.22786849609350024, "grad_norm": 0.21846835315227509, "learning_rate": 8.989448938400596e-06, "loss": 1.1514, "step": 2970 }, { "epoch": 0.22794521949285834, "grad_norm": 0.19825242459774017, "learning_rate": 8.988701651542126e-06, "loss": 1.1109, "step": 2971 }, { "epoch": 0.22802194289221642, "grad_norm": 0.657435953617096, "learning_rate": 8.987954119567224e-06, "loss": 1.1723, "step": 2972 }, { "epoch": 0.2280986662915745, "grad_norm": 0.2795810103416443, "learning_rate": 8.987206342521829e-06, "loss": 1.0857, "step": 2973 }, { "epoch": 0.22817538969093257, "grad_norm": 0.19391269981861115, "learning_rate": 8.986458320451891e-06, "loss": 1.124, "step": 2974 }, { "epoch": 0.22825211309029064, "grad_norm": 0.21013474464416504, "learning_rate": 8.985710053403381e-06, "loss": 1.1589, "step": 2975 }, { "epoch": 0.22832883648964875, "grad_norm": 0.22342199087142944, "learning_rate": 8.984961541422279e-06, "loss": 1.0753, "step": 2976 }, { "epoch": 0.22840555988900682, "grad_norm": 0.2344118356704712, "learning_rate": 8.984212784554586e-06, "loss": 1.1987, "step": 2977 }, { "epoch": 0.2284822832883649, "grad_norm": 0.3844047486782074, "learning_rate": 8.983463782846314e-06, "loss": 1.0962, "step": 2978 }, { "epoch": 0.22855900668772297, "grad_norm": 0.27284014225006104, "learning_rate": 8.98271453634349e-06, "loss": 1.1351, "step": 2979 }, { "epoch": 0.22863573008708105, "grad_norm": 0.0932404026389122, "learning_rate": 8.981965045092159e-06, "loss": 1.2005, "step": 2980 }, { "epoch": 0.22871245348643915, "grad_norm": 0.27671346068382263, "learning_rate": 8.981215309138376e-06, "loss": 1.0719, "step": 2981 }, { "epoch": 0.22878917688579722, "grad_norm": 0.188067227602005, "learning_rate": 8.98046532852822e-06, "loss": 1.1589, "step": 2982 }, { "epoch": 0.2288659002851553, "grad_norm": 0.18259942531585693, "learning_rate": 8.979715103307775e-06, "loss": 1.1408, "step": 2983 }, { "epoch": 0.22894262368451337, "grad_norm": 0.31151503324508667, "learning_rate": 8.978964633523144e-06, "loss": 1.0619, "step": 2984 }, { "epoch": 0.22901934708387145, "grad_norm": 0.3160683512687683, "learning_rate": 8.978213919220448e-06, "loss": 1.1093, "step": 2985 }, { "epoch": 0.22909607048322955, "grad_norm": 0.20341584086418152, "learning_rate": 8.97746296044582e-06, "loss": 1.0775, "step": 2986 }, { "epoch": 0.22917279388258763, "grad_norm": 0.2537078261375427, "learning_rate": 8.976711757245408e-06, "loss": 1.0998, "step": 2987 }, { "epoch": 0.2292495172819457, "grad_norm": 0.230434849858284, "learning_rate": 8.975960309665373e-06, "loss": 1.0612, "step": 2988 }, { "epoch": 0.22932624068130378, "grad_norm": 0.19453571736812592, "learning_rate": 8.975208617751898e-06, "loss": 1.1185, "step": 2989 }, { "epoch": 0.22940296408066188, "grad_norm": 0.19725459814071655, "learning_rate": 8.974456681551171e-06, "loss": 1.1383, "step": 2990 }, { "epoch": 0.22947968748001996, "grad_norm": 0.3188723623752594, "learning_rate": 8.973704501109406e-06, "loss": 1.1083, "step": 2991 }, { "epoch": 0.22955641087937803, "grad_norm": 0.18403975665569305, "learning_rate": 8.972952076472824e-06, "loss": 1.0821, "step": 2992 }, { "epoch": 0.2296331342787361, "grad_norm": 0.23806758224964142, "learning_rate": 8.972199407687663e-06, "loss": 1.0993, "step": 2993 }, { "epoch": 0.22970985767809418, "grad_norm": 0.2357804775238037, "learning_rate": 8.971446494800177e-06, "loss": 1.14, "step": 2994 }, { "epoch": 0.22978658107745228, "grad_norm": 0.19988617300987244, "learning_rate": 8.970693337856636e-06, "loss": 1.0789, "step": 2995 }, { "epoch": 0.22986330447681036, "grad_norm": 0.20685966312885284, "learning_rate": 8.96993993690332e-06, "loss": 1.0965, "step": 2996 }, { "epoch": 0.22994002787616843, "grad_norm": 0.24573999643325806, "learning_rate": 8.969186291986532e-06, "loss": 1.1543, "step": 2997 }, { "epoch": 0.2300167512755265, "grad_norm": 0.2912411391735077, "learning_rate": 8.96843240315258e-06, "loss": 1.147, "step": 2998 }, { "epoch": 0.23009347467488458, "grad_norm": 0.1916811168193817, "learning_rate": 8.9676782704478e-06, "loss": 1.2052, "step": 2999 }, { "epoch": 0.2301701980742427, "grad_norm": 0.28606337308883667, "learning_rate": 8.966923893918527e-06, "loss": 1.1778, "step": 3000 }, { "epoch": 0.23024692147360076, "grad_norm": 0.17881014943122864, "learning_rate": 8.966169273611125e-06, "loss": 1.098, "step": 3001 }, { "epoch": 0.23032364487295884, "grad_norm": 0.21163836121559143, "learning_rate": 8.965414409571967e-06, "loss": 1.104, "step": 3002 }, { "epoch": 0.2304003682723169, "grad_norm": 0.21810795366764069, "learning_rate": 8.96465930184744e-06, "loss": 1.0939, "step": 3003 }, { "epoch": 0.230477091671675, "grad_norm": 0.2100108414888382, "learning_rate": 8.963903950483948e-06, "loss": 1.1655, "step": 3004 }, { "epoch": 0.2305538150710331, "grad_norm": 0.19686263799667358, "learning_rate": 8.963148355527909e-06, "loss": 1.0723, "step": 3005 }, { "epoch": 0.23063053847039117, "grad_norm": 0.1834029108285904, "learning_rate": 8.962392517025757e-06, "loss": 1.1959, "step": 3006 }, { "epoch": 0.23070726186974924, "grad_norm": 0.6597473621368408, "learning_rate": 8.961636435023939e-06, "loss": 1.1341, "step": 3007 }, { "epoch": 0.23078398526910732, "grad_norm": 0.20149481296539307, "learning_rate": 8.960880109568919e-06, "loss": 1.2036, "step": 3008 }, { "epoch": 0.2308607086684654, "grad_norm": 0.26604077219963074, "learning_rate": 8.960123540707177e-06, "loss": 1.1735, "step": 3009 }, { "epoch": 0.2309374320678235, "grad_norm": 0.1776905059814453, "learning_rate": 8.959366728485204e-06, "loss": 1.1136, "step": 3010 }, { "epoch": 0.23101415546718157, "grad_norm": 0.19112186133861542, "learning_rate": 8.958609672949508e-06, "loss": 1.0854, "step": 3011 }, { "epoch": 0.23109087886653965, "grad_norm": 0.26682719588279724, "learning_rate": 8.957852374146614e-06, "loss": 1.0659, "step": 3012 }, { "epoch": 0.23116760226589772, "grad_norm": 0.21434055268764496, "learning_rate": 8.957094832123058e-06, "loss": 1.1766, "step": 3013 }, { "epoch": 0.2312443256652558, "grad_norm": 0.1912633329629898, "learning_rate": 8.956337046925394e-06, "loss": 1.1059, "step": 3014 }, { "epoch": 0.2313210490646139, "grad_norm": 0.10094510763883591, "learning_rate": 8.955579018600188e-06, "loss": 1.2017, "step": 3015 }, { "epoch": 0.23139777246397197, "grad_norm": 0.2555466592311859, "learning_rate": 8.954820747194028e-06, "loss": 1.1362, "step": 3016 }, { "epoch": 0.23147449586333005, "grad_norm": 0.1987655609846115, "learning_rate": 8.954062232753506e-06, "loss": 1.0788, "step": 3017 }, { "epoch": 0.23155121926268812, "grad_norm": 0.20996488630771637, "learning_rate": 8.95330347532524e-06, "loss": 1.0918, "step": 3018 }, { "epoch": 0.23162794266204623, "grad_norm": 0.21382750570774078, "learning_rate": 8.952544474955853e-06, "loss": 1.1967, "step": 3019 }, { "epoch": 0.2317046660614043, "grad_norm": 0.20446056127548218, "learning_rate": 8.95178523169199e-06, "loss": 1.0815, "step": 3020 }, { "epoch": 0.23178138946076238, "grad_norm": 0.21688154339790344, "learning_rate": 8.951025745580307e-06, "loss": 1.1325, "step": 3021 }, { "epoch": 0.23185811286012045, "grad_norm": 0.1859772503376007, "learning_rate": 8.950266016667479e-06, "loss": 1.1365, "step": 3022 }, { "epoch": 0.23193483625947853, "grad_norm": 0.19437478482723236, "learning_rate": 8.949506045000192e-06, "loss": 1.1137, "step": 3023 }, { "epoch": 0.23201155965883663, "grad_norm": 0.19621722400188446, "learning_rate": 8.948745830625148e-06, "loss": 1.1708, "step": 3024 }, { "epoch": 0.2320882830581947, "grad_norm": 0.18992340564727783, "learning_rate": 8.947985373589062e-06, "loss": 1.1879, "step": 3025 }, { "epoch": 0.23216500645755278, "grad_norm": 0.20024503767490387, "learning_rate": 8.94722467393867e-06, "loss": 1.1458, "step": 3026 }, { "epoch": 0.23224172985691086, "grad_norm": 0.18371161818504333, "learning_rate": 8.94646373172072e-06, "loss": 1.0778, "step": 3027 }, { "epoch": 0.23231845325626893, "grad_norm": 0.4710817039012909, "learning_rate": 8.94570254698197e-06, "loss": 1.0419, "step": 3028 }, { "epoch": 0.23239517665562703, "grad_norm": 0.09767737984657288, "learning_rate": 8.944941119769197e-06, "loss": 1.1773, "step": 3029 }, { "epoch": 0.2324719000549851, "grad_norm": 0.5536109805107117, "learning_rate": 8.944179450129196e-06, "loss": 1.1163, "step": 3030 }, { "epoch": 0.23254862345434318, "grad_norm": 0.2075061947107315, "learning_rate": 8.943417538108769e-06, "loss": 1.0452, "step": 3031 }, { "epoch": 0.23262534685370126, "grad_norm": 0.1951213777065277, "learning_rate": 8.942655383754743e-06, "loss": 1.1319, "step": 3032 }, { "epoch": 0.23270207025305933, "grad_norm": 0.26740047335624695, "learning_rate": 8.941892987113951e-06, "loss": 1.1392, "step": 3033 }, { "epoch": 0.23277879365241744, "grad_norm": 0.18849168717861176, "learning_rate": 8.941130348233246e-06, "loss": 1.1118, "step": 3034 }, { "epoch": 0.2328555170517755, "grad_norm": 0.28410208225250244, "learning_rate": 8.940367467159492e-06, "loss": 1.169, "step": 3035 }, { "epoch": 0.2329322404511336, "grad_norm": 0.19898591935634613, "learning_rate": 8.939604343939572e-06, "loss": 1.1319, "step": 3036 }, { "epoch": 0.23300896385049166, "grad_norm": 0.2613164186477661, "learning_rate": 8.938840978620382e-06, "loss": 1.1761, "step": 3037 }, { "epoch": 0.23308568724984974, "grad_norm": 0.23351937532424927, "learning_rate": 8.93807737124883e-06, "loss": 1.0655, "step": 3038 }, { "epoch": 0.23316241064920784, "grad_norm": 0.18571597337722778, "learning_rate": 8.937313521871846e-06, "loss": 1.1488, "step": 3039 }, { "epoch": 0.23323913404856592, "grad_norm": 0.19310876727104187, "learning_rate": 8.936549430536368e-06, "loss": 1.1682, "step": 3040 }, { "epoch": 0.233315857447924, "grad_norm": 0.26538339257240295, "learning_rate": 8.935785097289353e-06, "loss": 1.1126, "step": 3041 }, { "epoch": 0.23339258084728207, "grad_norm": 0.21816451847553253, "learning_rate": 8.935020522177768e-06, "loss": 1.099, "step": 3042 }, { "epoch": 0.23346930424664014, "grad_norm": 0.23869265615940094, "learning_rate": 8.934255705248602e-06, "loss": 1.1194, "step": 3043 }, { "epoch": 0.23354602764599824, "grad_norm": 0.2779237627983093, "learning_rate": 8.933490646548854e-06, "loss": 1.1694, "step": 3044 }, { "epoch": 0.23362275104535632, "grad_norm": 0.1944165676832199, "learning_rate": 8.932725346125537e-06, "loss": 1.1433, "step": 3045 }, { "epoch": 0.2336994744447144, "grad_norm": 0.2171664834022522, "learning_rate": 8.931959804025684e-06, "loss": 1.2135, "step": 3046 }, { "epoch": 0.23377619784407247, "grad_norm": 0.3097567558288574, "learning_rate": 8.931194020296336e-06, "loss": 1.1259, "step": 3047 }, { "epoch": 0.23385292124343057, "grad_norm": 0.27596840262413025, "learning_rate": 8.930427994984557e-06, "loss": 1.1309, "step": 3048 }, { "epoch": 0.23392964464278865, "grad_norm": 0.19372649490833282, "learning_rate": 8.929661728137416e-06, "loss": 1.0819, "step": 3049 }, { "epoch": 0.23400636804214672, "grad_norm": 0.19692093133926392, "learning_rate": 8.928895219802005e-06, "loss": 1.1234, "step": 3050 }, { "epoch": 0.2340830914415048, "grad_norm": 0.17640149593353271, "learning_rate": 8.92812847002543e-06, "loss": 1.0431, "step": 3051 }, { "epoch": 0.23415981484086287, "grad_norm": 0.18751080334186554, "learning_rate": 8.927361478854805e-06, "loss": 1.1975, "step": 3052 }, { "epoch": 0.23423653824022098, "grad_norm": 0.20361767709255219, "learning_rate": 8.926594246337268e-06, "loss": 1.0442, "step": 3053 }, { "epoch": 0.23431326163957905, "grad_norm": 0.2353592962026596, "learning_rate": 8.925826772519965e-06, "loss": 1.0681, "step": 3054 }, { "epoch": 0.23438998503893713, "grad_norm": 0.26261216402053833, "learning_rate": 8.92505905745006e-06, "loss": 1.16, "step": 3055 }, { "epoch": 0.2344667084382952, "grad_norm": 0.1871473789215088, "learning_rate": 8.924291101174732e-06, "loss": 1.0817, "step": 3056 }, { "epoch": 0.23454343183765328, "grad_norm": 0.1877516359090805, "learning_rate": 8.923522903741173e-06, "loss": 1.1467, "step": 3057 }, { "epoch": 0.23462015523701138, "grad_norm": 0.3689831793308258, "learning_rate": 8.922754465196591e-06, "loss": 0.9749, "step": 3058 }, { "epoch": 0.23469687863636945, "grad_norm": 0.2693054676055908, "learning_rate": 8.92198578558821e-06, "loss": 1.0644, "step": 3059 }, { "epoch": 0.23477360203572753, "grad_norm": 0.8335785269737244, "learning_rate": 8.921216864963265e-06, "loss": 1.2289, "step": 3060 }, { "epoch": 0.2348503254350856, "grad_norm": 0.4799807369709015, "learning_rate": 8.92044770336901e-06, "loss": 1.0998, "step": 3061 }, { "epoch": 0.23492704883444368, "grad_norm": 0.2212795913219452, "learning_rate": 8.919678300852708e-06, "loss": 1.074, "step": 3062 }, { "epoch": 0.23500377223380178, "grad_norm": 0.20006023347377777, "learning_rate": 8.918908657461648e-06, "loss": 1.1915, "step": 3063 }, { "epoch": 0.23508049563315986, "grad_norm": 0.27834659814834595, "learning_rate": 8.91813877324312e-06, "loss": 1.1768, "step": 3064 }, { "epoch": 0.23515721903251793, "grad_norm": 0.09890912473201752, "learning_rate": 8.917368648244443e-06, "loss": 1.2011, "step": 3065 }, { "epoch": 0.235233942431876, "grad_norm": 0.3467099964618683, "learning_rate": 8.916598282512933e-06, "loss": 1.1169, "step": 3066 }, { "epoch": 0.23531066583123408, "grad_norm": 0.26515084505081177, "learning_rate": 8.915827676095941e-06, "loss": 1.1066, "step": 3067 }, { "epoch": 0.2353873892305922, "grad_norm": 0.2767142653465271, "learning_rate": 8.915056829040816e-06, "loss": 1.1681, "step": 3068 }, { "epoch": 0.23546411262995026, "grad_norm": 0.2493206262588501, "learning_rate": 8.914285741394933e-06, "loss": 1.106, "step": 3069 }, { "epoch": 0.23554083602930834, "grad_norm": 0.17353756725788116, "learning_rate": 8.913514413205672e-06, "loss": 1.1503, "step": 3070 }, { "epoch": 0.2356175594286664, "grad_norm": 0.19254586100578308, "learning_rate": 8.912742844520441e-06, "loss": 1.0943, "step": 3071 }, { "epoch": 0.2356942828280245, "grad_norm": 0.1827017217874527, "learning_rate": 8.911971035386649e-06, "loss": 1.1676, "step": 3072 }, { "epoch": 0.2357710062273826, "grad_norm": 0.25218337774276733, "learning_rate": 8.911198985851725e-06, "loss": 1.0446, "step": 3073 }, { "epoch": 0.23584772962674067, "grad_norm": 0.25323811173439026, "learning_rate": 8.91042669596312e-06, "loss": 1.1007, "step": 3074 }, { "epoch": 0.23592445302609874, "grad_norm": 0.2550209164619446, "learning_rate": 8.909654165768286e-06, "loss": 1.1516, "step": 3075 }, { "epoch": 0.23600117642545682, "grad_norm": 0.1878555864095688, "learning_rate": 8.908881395314703e-06, "loss": 1.1676, "step": 3076 }, { "epoch": 0.23607789982481492, "grad_norm": 0.21499967575073242, "learning_rate": 8.908108384649856e-06, "loss": 1.1298, "step": 3077 }, { "epoch": 0.236154623224173, "grad_norm": 0.28109103441238403, "learning_rate": 8.907335133821247e-06, "loss": 1.0581, "step": 3078 }, { "epoch": 0.23623134662353107, "grad_norm": 0.2014380544424057, "learning_rate": 8.9065616428764e-06, "loss": 1.0806, "step": 3079 }, { "epoch": 0.23630807002288914, "grad_norm": 0.20518188178539276, "learning_rate": 8.905787911862845e-06, "loss": 1.1296, "step": 3080 }, { "epoch": 0.23638479342224722, "grad_norm": 0.28208932280540466, "learning_rate": 8.905013940828128e-06, "loss": 1.1229, "step": 3081 }, { "epoch": 0.23646151682160532, "grad_norm": 0.37965017557144165, "learning_rate": 8.904239729819815e-06, "loss": 1.1441, "step": 3082 }, { "epoch": 0.2365382402209634, "grad_norm": 0.4890561103820801, "learning_rate": 8.903465278885482e-06, "loss": 1.1033, "step": 3083 }, { "epoch": 0.23661496362032147, "grad_norm": 0.27361729741096497, "learning_rate": 8.902690588072722e-06, "loss": 1.0646, "step": 3084 }, { "epoch": 0.23669168701967955, "grad_norm": 0.18120506405830383, "learning_rate": 8.901915657429138e-06, "loss": 1.0302, "step": 3085 }, { "epoch": 0.23676841041903762, "grad_norm": 0.19600729644298553, "learning_rate": 8.901140487002358e-06, "loss": 1.0901, "step": 3086 }, { "epoch": 0.23684513381839573, "grad_norm": 0.19134017825126648, "learning_rate": 8.900365076840011e-06, "loss": 1.1046, "step": 3087 }, { "epoch": 0.2369218572177538, "grad_norm": 0.2451462596654892, "learning_rate": 8.899589426989754e-06, "loss": 1.0845, "step": 3088 }, { "epoch": 0.23699858061711188, "grad_norm": 0.2374630719423294, "learning_rate": 8.89881353749925e-06, "loss": 1.1468, "step": 3089 }, { "epoch": 0.23707530401646995, "grad_norm": 0.23005342483520508, "learning_rate": 8.89803740841618e-06, "loss": 1.0737, "step": 3090 }, { "epoch": 0.23715202741582803, "grad_norm": 0.18772298097610474, "learning_rate": 8.897261039788238e-06, "loss": 1.0976, "step": 3091 }, { "epoch": 0.23722875081518613, "grad_norm": 0.20368260145187378, "learning_rate": 8.896484431663136e-06, "loss": 1.1598, "step": 3092 }, { "epoch": 0.2373054742145442, "grad_norm": 0.2509476840496063, "learning_rate": 8.8957075840886e-06, "loss": 1.1354, "step": 3093 }, { "epoch": 0.23738219761390228, "grad_norm": 0.20493832230567932, "learning_rate": 8.894930497112365e-06, "loss": 1.0392, "step": 3094 }, { "epoch": 0.23745892101326035, "grad_norm": 0.21845746040344238, "learning_rate": 8.894153170782186e-06, "loss": 1.0899, "step": 3095 }, { "epoch": 0.23753564441261843, "grad_norm": 0.1962783932685852, "learning_rate": 8.893375605145837e-06, "loss": 1.0551, "step": 3096 }, { "epoch": 0.23761236781197653, "grad_norm": 0.28934741020202637, "learning_rate": 8.892597800251094e-06, "loss": 1.1312, "step": 3097 }, { "epoch": 0.2376890912113346, "grad_norm": 0.20578144490718842, "learning_rate": 8.89181975614576e-06, "loss": 1.111, "step": 3098 }, { "epoch": 0.23776581461069268, "grad_norm": 0.34979984164237976, "learning_rate": 8.891041472877648e-06, "loss": 1.0507, "step": 3099 }, { "epoch": 0.23784253801005076, "grad_norm": 0.2568162977695465, "learning_rate": 8.89026295049458e-06, "loss": 1.1153, "step": 3100 }, { "epoch": 0.23791926140940883, "grad_norm": 0.19414275884628296, "learning_rate": 8.889484189044408e-06, "loss": 1.1473, "step": 3101 }, { "epoch": 0.23799598480876694, "grad_norm": 0.2716462314128876, "learning_rate": 8.88870518857498e-06, "loss": 1.1263, "step": 3102 }, { "epoch": 0.238072708208125, "grad_norm": 0.19256095588207245, "learning_rate": 8.887925949134173e-06, "loss": 1.1389, "step": 3103 }, { "epoch": 0.2381494316074831, "grad_norm": 0.0943199098110199, "learning_rate": 8.88714647076987e-06, "loss": 1.2459, "step": 3104 }, { "epoch": 0.23822615500684116, "grad_norm": 0.26760661602020264, "learning_rate": 8.886366753529973e-06, "loss": 1.1627, "step": 3105 }, { "epoch": 0.23830287840619926, "grad_norm": 0.27224114537239075, "learning_rate": 8.8855867974624e-06, "loss": 1.1511, "step": 3106 }, { "epoch": 0.23837960180555734, "grad_norm": 0.22146150469779968, "learning_rate": 8.88480660261508e-06, "loss": 1.1437, "step": 3107 }, { "epoch": 0.23845632520491541, "grad_norm": 0.18452772498130798, "learning_rate": 8.884026169035958e-06, "loss": 1.1026, "step": 3108 }, { "epoch": 0.2385330486042735, "grad_norm": 0.17835983633995056, "learning_rate": 8.883245496772992e-06, "loss": 1.0831, "step": 3109 }, { "epoch": 0.23860977200363157, "grad_norm": 0.21573488414287567, "learning_rate": 8.882464585874158e-06, "loss": 1.0898, "step": 3110 }, { "epoch": 0.23868649540298967, "grad_norm": 0.27177780866622925, "learning_rate": 8.881683436387444e-06, "loss": 1.0753, "step": 3111 }, { "epoch": 0.23876321880234774, "grad_norm": 0.26862633228302, "learning_rate": 8.880902048360857e-06, "loss": 1.2613, "step": 3112 }, { "epoch": 0.23883994220170582, "grad_norm": 0.179892435669899, "learning_rate": 8.880120421842413e-06, "loss": 1.125, "step": 3113 }, { "epoch": 0.2389166656010639, "grad_norm": 0.24357181787490845, "learning_rate": 8.879338556880145e-06, "loss": 1.1754, "step": 3114 }, { "epoch": 0.23899338900042197, "grad_norm": 0.24208304286003113, "learning_rate": 8.8785564535221e-06, "loss": 1.1644, "step": 3115 }, { "epoch": 0.23907011239978007, "grad_norm": 0.20730915665626526, "learning_rate": 8.877774111816342e-06, "loss": 1.1222, "step": 3116 }, { "epoch": 0.23914683579913815, "grad_norm": 0.27344000339508057, "learning_rate": 8.876991531810946e-06, "loss": 1.1557, "step": 3117 }, { "epoch": 0.23922355919849622, "grad_norm": 0.22529548406600952, "learning_rate": 8.876208713554006e-06, "loss": 1.0535, "step": 3118 }, { "epoch": 0.2393002825978543, "grad_norm": 0.18635255098342896, "learning_rate": 8.875425657093627e-06, "loss": 1.123, "step": 3119 }, { "epoch": 0.23937700599721237, "grad_norm": 0.19724583625793457, "learning_rate": 8.87464236247793e-06, "loss": 1.1593, "step": 3120 }, { "epoch": 0.23945372939657047, "grad_norm": 0.0981757640838623, "learning_rate": 8.87385882975505e-06, "loss": 1.2134, "step": 3121 }, { "epoch": 0.23953045279592855, "grad_norm": 0.23450730741024017, "learning_rate": 8.873075058973139e-06, "loss": 1.1125, "step": 3122 }, { "epoch": 0.23960717619528663, "grad_norm": 0.18615196645259857, "learning_rate": 8.87229105018036e-06, "loss": 1.0574, "step": 3123 }, { "epoch": 0.2396838995946447, "grad_norm": 0.2195225954055786, "learning_rate": 8.871506803424894e-06, "loss": 1.0486, "step": 3124 }, { "epoch": 0.23976062299400278, "grad_norm": 0.21369214355945587, "learning_rate": 8.870722318754932e-06, "loss": 0.9927, "step": 3125 }, { "epoch": 0.23983734639336088, "grad_norm": 0.22303864359855652, "learning_rate": 8.869937596218686e-06, "loss": 1.0554, "step": 3126 }, { "epoch": 0.23991406979271895, "grad_norm": 0.19951017200946808, "learning_rate": 8.86915263586438e-06, "loss": 1.0694, "step": 3127 }, { "epoch": 0.23999079319207703, "grad_norm": 0.20697957277297974, "learning_rate": 8.868367437740247e-06, "loss": 1.1829, "step": 3128 }, { "epoch": 0.2400675165914351, "grad_norm": 0.20241250097751617, "learning_rate": 8.867582001894544e-06, "loss": 1.0811, "step": 3129 }, { "epoch": 0.24014423999079318, "grad_norm": 0.222550630569458, "learning_rate": 8.866796328375538e-06, "loss": 1.1404, "step": 3130 }, { "epoch": 0.24022096339015128, "grad_norm": 0.09609407931566238, "learning_rate": 8.866010417231508e-06, "loss": 1.2313, "step": 3131 }, { "epoch": 0.24029768678950936, "grad_norm": 0.27119582891464233, "learning_rate": 8.865224268510752e-06, "loss": 1.074, "step": 3132 }, { "epoch": 0.24037441018886743, "grad_norm": 0.1942150890827179, "learning_rate": 8.86443788226158e-06, "loss": 1.0797, "step": 3133 }, { "epoch": 0.2404511335882255, "grad_norm": 0.20031793415546417, "learning_rate": 8.86365125853232e-06, "loss": 1.1725, "step": 3134 }, { "epoch": 0.2405278569875836, "grad_norm": 0.19643624126911163, "learning_rate": 8.862864397371309e-06, "loss": 1.1527, "step": 3135 }, { "epoch": 0.24060458038694169, "grad_norm": 0.2968432903289795, "learning_rate": 8.862077298826905e-06, "loss": 1.1974, "step": 3136 }, { "epoch": 0.24068130378629976, "grad_norm": 0.18695256114006042, "learning_rate": 8.861289962947474e-06, "loss": 1.1598, "step": 3137 }, { "epoch": 0.24075802718565784, "grad_norm": 0.3046567738056183, "learning_rate": 8.860502389781403e-06, "loss": 1.1463, "step": 3138 }, { "epoch": 0.2408347505850159, "grad_norm": 0.2033158540725708, "learning_rate": 8.859714579377088e-06, "loss": 1.1712, "step": 3139 }, { "epoch": 0.240911473984374, "grad_norm": 0.1937180608510971, "learning_rate": 8.858926531782943e-06, "loss": 1.1368, "step": 3140 }, { "epoch": 0.2409881973837321, "grad_norm": 0.24182717502117157, "learning_rate": 8.858138247047396e-06, "loss": 1.1189, "step": 3141 }, { "epoch": 0.24106492078309016, "grad_norm": 0.19105607271194458, "learning_rate": 8.857349725218889e-06, "loss": 1.1546, "step": 3142 }, { "epoch": 0.24114164418244824, "grad_norm": 0.2559698820114136, "learning_rate": 8.856560966345878e-06, "loss": 1.0999, "step": 3143 }, { "epoch": 0.24121836758180631, "grad_norm": 0.27550724148750305, "learning_rate": 8.855771970476834e-06, "loss": 1.0935, "step": 3144 }, { "epoch": 0.24129509098116442, "grad_norm": 0.18734005093574524, "learning_rate": 8.854982737660246e-06, "loss": 1.0969, "step": 3145 }, { "epoch": 0.2413718143805225, "grad_norm": 0.19519081711769104, "learning_rate": 8.85419326794461e-06, "loss": 1.1404, "step": 3146 }, { "epoch": 0.24144853777988057, "grad_norm": 0.2145737111568451, "learning_rate": 8.853403561378447e-06, "loss": 1.0602, "step": 3147 }, { "epoch": 0.24152526117923864, "grad_norm": 0.19059835374355316, "learning_rate": 8.852613618010281e-06, "loss": 1.1671, "step": 3148 }, { "epoch": 0.24160198457859672, "grad_norm": 0.22383108735084534, "learning_rate": 8.851823437888658e-06, "loss": 1.0768, "step": 3149 }, { "epoch": 0.24167870797795482, "grad_norm": 0.1660042703151703, "learning_rate": 8.851033021062136e-06, "loss": 1.1082, "step": 3150 }, { "epoch": 0.2417554313773129, "grad_norm": 0.2221231311559677, "learning_rate": 8.850242367579292e-06, "loss": 1.0961, "step": 3151 }, { "epoch": 0.24183215477667097, "grad_norm": 0.2067532241344452, "learning_rate": 8.849451477488708e-06, "loss": 1.1535, "step": 3152 }, { "epoch": 0.24190887817602905, "grad_norm": 0.19497857987880707, "learning_rate": 8.84866035083899e-06, "loss": 1.0944, "step": 3153 }, { "epoch": 0.24198560157538712, "grad_norm": 0.21094176173210144, "learning_rate": 8.847868987678754e-06, "loss": 1.0643, "step": 3154 }, { "epoch": 0.24206232497474522, "grad_norm": 0.21975024044513702, "learning_rate": 8.847077388056633e-06, "loss": 1.1071, "step": 3155 }, { "epoch": 0.2421390483741033, "grad_norm": 0.22429943084716797, "learning_rate": 8.84628555202127e-06, "loss": 1.1891, "step": 3156 }, { "epoch": 0.24221577177346137, "grad_norm": 0.17096877098083496, "learning_rate": 8.845493479621327e-06, "loss": 1.1685, "step": 3157 }, { "epoch": 0.24229249517281945, "grad_norm": 0.1955200433731079, "learning_rate": 8.844701170905478e-06, "loss": 1.1626, "step": 3158 }, { "epoch": 0.24236921857217752, "grad_norm": 0.17099334299564362, "learning_rate": 8.843908625922415e-06, "loss": 1.0407, "step": 3159 }, { "epoch": 0.24244594197153563, "grad_norm": 0.37946823239326477, "learning_rate": 8.843115844720838e-06, "loss": 1.0489, "step": 3160 }, { "epoch": 0.2425226653708937, "grad_norm": 0.17941956222057343, "learning_rate": 8.84232282734947e-06, "loss": 1.1111, "step": 3161 }, { "epoch": 0.24259938877025178, "grad_norm": 0.25202077627182007, "learning_rate": 8.841529573857042e-06, "loss": 1.076, "step": 3162 }, { "epoch": 0.24267611216960985, "grad_norm": 0.27420148253440857, "learning_rate": 8.840736084292299e-06, "loss": 1.1857, "step": 3163 }, { "epoch": 0.24275283556896796, "grad_norm": 0.23766924440860748, "learning_rate": 8.839942358704008e-06, "loss": 1.1605, "step": 3164 }, { "epoch": 0.24282955896832603, "grad_norm": 0.20421148836612701, "learning_rate": 8.839148397140943e-06, "loss": 1.1022, "step": 3165 }, { "epoch": 0.2429062823676841, "grad_norm": 0.2130688726902008, "learning_rate": 8.838354199651892e-06, "loss": 1.1603, "step": 3166 }, { "epoch": 0.24298300576704218, "grad_norm": 0.21191175282001495, "learning_rate": 8.837559766285667e-06, "loss": 1.0836, "step": 3167 }, { "epoch": 0.24305972916640026, "grad_norm": 0.19873195886611938, "learning_rate": 8.836765097091085e-06, "loss": 1.086, "step": 3168 }, { "epoch": 0.24313645256575836, "grad_norm": 0.17494037747383118, "learning_rate": 8.835970192116979e-06, "loss": 1.1678, "step": 3169 }, { "epoch": 0.24321317596511643, "grad_norm": 0.1886751502752304, "learning_rate": 8.8351750514122e-06, "loss": 1.1356, "step": 3170 }, { "epoch": 0.2432898993644745, "grad_norm": 0.24377325177192688, "learning_rate": 8.83437967502561e-06, "loss": 1.0815, "step": 3171 }, { "epoch": 0.24336662276383259, "grad_norm": 0.1898488700389862, "learning_rate": 8.833584063006088e-06, "loss": 1.112, "step": 3172 }, { "epoch": 0.24344334616319066, "grad_norm": 0.19579321146011353, "learning_rate": 8.832788215402527e-06, "loss": 1.1365, "step": 3173 }, { "epoch": 0.24352006956254876, "grad_norm": 0.2036602646112442, "learning_rate": 8.831992132263833e-06, "loss": 1.0549, "step": 3174 }, { "epoch": 0.24359679296190684, "grad_norm": 0.2041243612766266, "learning_rate": 8.831195813638928e-06, "loss": 1.1653, "step": 3175 }, { "epoch": 0.2436735163612649, "grad_norm": 0.1910407692193985, "learning_rate": 8.830399259576747e-06, "loss": 1.1783, "step": 3176 }, { "epoch": 0.243750239760623, "grad_norm": 0.2050318717956543, "learning_rate": 8.829602470126243e-06, "loss": 1.0779, "step": 3177 }, { "epoch": 0.24382696315998106, "grad_norm": 0.3043111562728882, "learning_rate": 8.828805445336379e-06, "loss": 1.1407, "step": 3178 }, { "epoch": 0.24390368655933917, "grad_norm": 0.24641147255897522, "learning_rate": 8.828008185256134e-06, "loss": 1.0581, "step": 3179 }, { "epoch": 0.24398040995869724, "grad_norm": 0.18456341326236725, "learning_rate": 8.827210689934502e-06, "loss": 1.1134, "step": 3180 }, { "epoch": 0.24405713335805532, "grad_norm": 0.6567121148109436, "learning_rate": 8.82641295942049e-06, "loss": 1.0892, "step": 3181 }, { "epoch": 0.2441338567574134, "grad_norm": 0.2244608998298645, "learning_rate": 8.825614993763124e-06, "loss": 1.1739, "step": 3182 }, { "epoch": 0.24421058015677147, "grad_norm": 0.22535578906536102, "learning_rate": 8.824816793011437e-06, "loss": 1.1312, "step": 3183 }, { "epoch": 0.24428730355612957, "grad_norm": 0.19180002808570862, "learning_rate": 8.824018357214483e-06, "loss": 1.1751, "step": 3184 }, { "epoch": 0.24436402695548765, "grad_norm": 0.22173650562763214, "learning_rate": 8.823219686421329e-06, "loss": 1.177, "step": 3185 }, { "epoch": 0.24444075035484572, "grad_norm": 0.18165314197540283, "learning_rate": 8.822420780681053e-06, "loss": 1.1329, "step": 3186 }, { "epoch": 0.2445174737542038, "grad_norm": 0.2609531879425049, "learning_rate": 8.821621640042753e-06, "loss": 1.1324, "step": 3187 }, { "epoch": 0.24459419715356187, "grad_norm": 0.24408814311027527, "learning_rate": 8.820822264555536e-06, "loss": 1.1188, "step": 3188 }, { "epoch": 0.24467092055291997, "grad_norm": 0.17565621435642242, "learning_rate": 8.820022654268525e-06, "loss": 1.2197, "step": 3189 }, { "epoch": 0.24474764395227805, "grad_norm": 0.6501249074935913, "learning_rate": 8.81922280923086e-06, "loss": 1.1261, "step": 3190 }, { "epoch": 0.24482436735163612, "grad_norm": 0.18881413340568542, "learning_rate": 8.818422729491693e-06, "loss": 1.0694, "step": 3191 }, { "epoch": 0.2449010907509942, "grad_norm": 0.18086007237434387, "learning_rate": 8.817622415100194e-06, "loss": 1.1731, "step": 3192 }, { "epoch": 0.2449778141503523, "grad_norm": 0.22387464344501495, "learning_rate": 8.816821866105538e-06, "loss": 1.0854, "step": 3193 }, { "epoch": 0.24505453754971038, "grad_norm": 0.2392778843641281, "learning_rate": 8.816021082556924e-06, "loss": 1.0987, "step": 3194 }, { "epoch": 0.24513126094906845, "grad_norm": 0.267693430185318, "learning_rate": 8.815220064503564e-06, "loss": 1.1007, "step": 3195 }, { "epoch": 0.24520798434842653, "grad_norm": 0.22108355164527893, "learning_rate": 8.814418811994683e-06, "loss": 1.0736, "step": 3196 }, { "epoch": 0.2452847077477846, "grad_norm": 0.18170516192913055, "learning_rate": 8.813617325079518e-06, "loss": 1.1345, "step": 3197 }, { "epoch": 0.2453614311471427, "grad_norm": 0.20258097350597382, "learning_rate": 8.81281560380732e-06, "loss": 1.1155, "step": 3198 }, { "epoch": 0.24543815454650078, "grad_norm": 0.23557636141777039, "learning_rate": 8.812013648227364e-06, "loss": 1.0958, "step": 3199 }, { "epoch": 0.24551487794585886, "grad_norm": 0.19822072982788086, "learning_rate": 8.811211458388926e-06, "loss": 1.1175, "step": 3200 }, { "epoch": 0.24559160134521693, "grad_norm": 0.22470001876354218, "learning_rate": 8.810409034341307e-06, "loss": 1.1257, "step": 3201 }, { "epoch": 0.245668324744575, "grad_norm": 1.5102607011795044, "learning_rate": 8.809606376133814e-06, "loss": 1.1329, "step": 3202 }, { "epoch": 0.2457450481439331, "grad_norm": 0.20421001315116882, "learning_rate": 8.808803483815778e-06, "loss": 1.1546, "step": 3203 }, { "epoch": 0.24582177154329118, "grad_norm": 0.21830229461193085, "learning_rate": 8.808000357436533e-06, "loss": 1.1102, "step": 3204 }, { "epoch": 0.24589849494264926, "grad_norm": 0.6162480115890503, "learning_rate": 8.807196997045437e-06, "loss": 1.0391, "step": 3205 }, { "epoch": 0.24597521834200733, "grad_norm": 0.24848900735378265, "learning_rate": 8.806393402691855e-06, "loss": 1.0988, "step": 3206 }, { "epoch": 0.2460519417413654, "grad_norm": 0.27537965774536133, "learning_rate": 8.805589574425175e-06, "loss": 1.1428, "step": 3207 }, { "epoch": 0.2461286651407235, "grad_norm": 0.24111807346343994, "learning_rate": 8.804785512294791e-06, "loss": 1.1197, "step": 3208 }, { "epoch": 0.2462053885400816, "grad_norm": 0.17649443447589874, "learning_rate": 8.803981216350116e-06, "loss": 1.1292, "step": 3209 }, { "epoch": 0.24628211193943966, "grad_norm": 0.4645230770111084, "learning_rate": 8.803176686640577e-06, "loss": 1.1877, "step": 3210 }, { "epoch": 0.24635883533879774, "grad_norm": 0.2177266776561737, "learning_rate": 8.802371923215612e-06, "loss": 1.1327, "step": 3211 }, { "epoch": 0.2464355587381558, "grad_norm": 0.16037562489509583, "learning_rate": 8.801566926124678e-06, "loss": 1.0507, "step": 3212 }, { "epoch": 0.24651228213751392, "grad_norm": 0.2730584442615509, "learning_rate": 8.800761695417244e-06, "loss": 1.1176, "step": 3213 }, { "epoch": 0.246589005536872, "grad_norm": 0.20598027110099792, "learning_rate": 8.799956231142793e-06, "loss": 1.0642, "step": 3214 }, { "epoch": 0.24666572893623007, "grad_norm": 0.17660798132419586, "learning_rate": 8.799150533350823e-06, "loss": 1.0718, "step": 3215 }, { "epoch": 0.24674245233558814, "grad_norm": 0.2360464185476303, "learning_rate": 8.798344602090845e-06, "loss": 1.0818, "step": 3216 }, { "epoch": 0.24681917573494622, "grad_norm": 0.18559563159942627, "learning_rate": 8.797538437412387e-06, "loss": 1.0575, "step": 3217 }, { "epoch": 0.24689589913430432, "grad_norm": 0.30193257331848145, "learning_rate": 8.796732039364991e-06, "loss": 1.1635, "step": 3218 }, { "epoch": 0.2469726225336624, "grad_norm": 0.2257184535264969, "learning_rate": 8.795925407998212e-06, "loss": 1.1196, "step": 3219 }, { "epoch": 0.24704934593302047, "grad_norm": 0.20661139488220215, "learning_rate": 8.795118543361618e-06, "loss": 1.1513, "step": 3220 }, { "epoch": 0.24712606933237855, "grad_norm": 0.21776294708251953, "learning_rate": 8.794311445504793e-06, "loss": 1.1091, "step": 3221 }, { "epoch": 0.24720279273173662, "grad_norm": 0.20314618945121765, "learning_rate": 8.793504114477339e-06, "loss": 1.0895, "step": 3222 }, { "epoch": 0.24727951613109472, "grad_norm": 0.4282088875770569, "learning_rate": 8.792696550328863e-06, "loss": 1.1618, "step": 3223 }, { "epoch": 0.2473562395304528, "grad_norm": 0.2550133168697357, "learning_rate": 8.791888753108998e-06, "loss": 1.152, "step": 3224 }, { "epoch": 0.24743296292981087, "grad_norm": 0.19113492965698242, "learning_rate": 8.791080722867378e-06, "loss": 1.1554, "step": 3225 }, { "epoch": 0.24750968632916895, "grad_norm": 0.17313116788864136, "learning_rate": 8.790272459653665e-06, "loss": 1.1732, "step": 3226 }, { "epoch": 0.24758640972852705, "grad_norm": 0.19492283463478088, "learning_rate": 8.789463963517528e-06, "loss": 1.1096, "step": 3227 }, { "epoch": 0.24766313312788513, "grad_norm": 1.1970646381378174, "learning_rate": 8.78865523450865e-06, "loss": 1.1568, "step": 3228 }, { "epoch": 0.2477398565272432, "grad_norm": 0.3001539707183838, "learning_rate": 8.787846272676728e-06, "loss": 1.1136, "step": 3229 }, { "epoch": 0.24781657992660128, "grad_norm": 0.18150702118873596, "learning_rate": 8.78703707807148e-06, "loss": 1.1386, "step": 3230 }, { "epoch": 0.24789330332595935, "grad_norm": 0.19266773760318756, "learning_rate": 8.786227650742624e-06, "loss": 1.0351, "step": 3231 }, { "epoch": 0.24797002672531746, "grad_norm": 0.099181167781353, "learning_rate": 8.785417990739912e-06, "loss": 1.1697, "step": 3232 }, { "epoch": 0.24804675012467553, "grad_norm": 0.21634741127490997, "learning_rate": 8.784608098113093e-06, "loss": 1.097, "step": 3233 }, { "epoch": 0.2481234735240336, "grad_norm": 0.18054254353046417, "learning_rate": 8.783797972911939e-06, "loss": 1.1405, "step": 3234 }, { "epoch": 0.24820019692339168, "grad_norm": 0.23342373967170715, "learning_rate": 8.782987615186236e-06, "loss": 0.9919, "step": 3235 }, { "epoch": 0.24827692032274976, "grad_norm": 0.23054271936416626, "learning_rate": 8.782177024985779e-06, "loss": 1.1052, "step": 3236 }, { "epoch": 0.24835364372210786, "grad_norm": 0.2110915184020996, "learning_rate": 8.781366202360385e-06, "loss": 1.1074, "step": 3237 }, { "epoch": 0.24843036712146593, "grad_norm": 0.2973988950252533, "learning_rate": 8.780555147359877e-06, "loss": 1.0908, "step": 3238 }, { "epoch": 0.248507090520824, "grad_norm": 0.25516605377197266, "learning_rate": 8.7797438600341e-06, "loss": 1.0856, "step": 3239 }, { "epoch": 0.24858381392018208, "grad_norm": 0.24669675529003143, "learning_rate": 8.778932340432908e-06, "loss": 1.1125, "step": 3240 }, { "epoch": 0.24866053731954016, "grad_norm": 0.1957927942276001, "learning_rate": 8.778120588606174e-06, "loss": 1.1345, "step": 3241 }, { "epoch": 0.24873726071889826, "grad_norm": 0.1919257789850235, "learning_rate": 8.777308604603776e-06, "loss": 1.1252, "step": 3242 }, { "epoch": 0.24881398411825634, "grad_norm": 0.1962677240371704, "learning_rate": 8.77649638847562e-06, "loss": 1.1507, "step": 3243 }, { "epoch": 0.2488907075176144, "grad_norm": 0.19177086651325226, "learning_rate": 8.775683940271615e-06, "loss": 1.1168, "step": 3244 }, { "epoch": 0.2489674309169725, "grad_norm": 0.19750574231147766, "learning_rate": 8.774871260041687e-06, "loss": 1.0933, "step": 3245 }, { "epoch": 0.24904415431633056, "grad_norm": 0.2562953233718872, "learning_rate": 8.774058347835779e-06, "loss": 1.1043, "step": 3246 }, { "epoch": 0.24912087771568867, "grad_norm": 0.19733823835849762, "learning_rate": 8.773245203703847e-06, "loss": 1.1277, "step": 3247 }, { "epoch": 0.24919760111504674, "grad_norm": 0.21290941536426544, "learning_rate": 8.772431827695862e-06, "loss": 1.1271, "step": 3248 }, { "epoch": 0.24927432451440482, "grad_norm": 0.2525458037853241, "learning_rate": 8.771618219861804e-06, "loss": 1.0718, "step": 3249 }, { "epoch": 0.2493510479137629, "grad_norm": 0.21535232663154602, "learning_rate": 8.770804380251676e-06, "loss": 1.0594, "step": 3250 }, { "epoch": 0.24942777131312097, "grad_norm": 0.24293218553066254, "learning_rate": 8.769990308915487e-06, "loss": 1.0857, "step": 3251 }, { "epoch": 0.24950449471247907, "grad_norm": 0.21301446855068207, "learning_rate": 8.769176005903266e-06, "loss": 1.0701, "step": 3252 }, { "epoch": 0.24958121811183714, "grad_norm": 0.25481218099594116, "learning_rate": 8.768361471265055e-06, "loss": 1.0694, "step": 3253 }, { "epoch": 0.24965794151119522, "grad_norm": 0.2322929948568344, "learning_rate": 8.767546705050906e-06, "loss": 1.1301, "step": 3254 }, { "epoch": 0.2497346649105533, "grad_norm": 0.17745572328567505, "learning_rate": 8.76673170731089e-06, "loss": 1.1706, "step": 3255 }, { "epoch": 0.2498113883099114, "grad_norm": 0.20972751080989838, "learning_rate": 8.765916478095093e-06, "loss": 1.166, "step": 3256 }, { "epoch": 0.24988811170926947, "grad_norm": 0.1959049105644226, "learning_rate": 8.765101017453611e-06, "loss": 1.0677, "step": 3257 }, { "epoch": 0.24996483510862755, "grad_norm": 0.22172142565250397, "learning_rate": 8.764285325436558e-06, "loss": 1.1033, "step": 3258 }, { "epoch": 0.25004155850798565, "grad_norm": 0.09906110912561417, "learning_rate": 8.763469402094055e-06, "loss": 1.1302, "step": 3259 }, { "epoch": 0.2501182819073437, "grad_norm": 0.22015102207660675, "learning_rate": 8.762653247476249e-06, "loss": 1.1324, "step": 3260 }, { "epoch": 0.2501950053067018, "grad_norm": 0.17378845810890198, "learning_rate": 8.761836861633295e-06, "loss": 1.1536, "step": 3261 }, { "epoch": 0.25027172870605985, "grad_norm": 0.1901717334985733, "learning_rate": 8.761020244615357e-06, "loss": 1.1639, "step": 3262 }, { "epoch": 0.25034845210541795, "grad_norm": 0.1838688850402832, "learning_rate": 8.76020339647262e-06, "loss": 1.1351, "step": 3263 }, { "epoch": 0.25042517550477605, "grad_norm": 0.24151207506656647, "learning_rate": 8.759386317255285e-06, "loss": 1.0885, "step": 3264 }, { "epoch": 0.2505018989041341, "grad_norm": 0.2057626098394394, "learning_rate": 8.75856900701356e-06, "loss": 1.0907, "step": 3265 }, { "epoch": 0.2505786223034922, "grad_norm": 0.21049009263515472, "learning_rate": 8.757751465797672e-06, "loss": 1.083, "step": 3266 }, { "epoch": 0.25065534570285025, "grad_norm": 0.39579975605010986, "learning_rate": 8.756933693657863e-06, "loss": 1.0522, "step": 3267 }, { "epoch": 0.25073206910220835, "grad_norm": 0.4709335267543793, "learning_rate": 8.756115690644384e-06, "loss": 1.1121, "step": 3268 }, { "epoch": 0.25080879250156646, "grad_norm": 0.21663713455200195, "learning_rate": 8.755297456807505e-06, "loss": 1.1851, "step": 3269 }, { "epoch": 0.2508855159009245, "grad_norm": 0.1948162317276001, "learning_rate": 8.75447899219751e-06, "loss": 1.0784, "step": 3270 }, { "epoch": 0.2509622393002826, "grad_norm": 0.27103570103645325, "learning_rate": 8.753660296864692e-06, "loss": 1.1528, "step": 3271 }, { "epoch": 0.25103896269964066, "grad_norm": 0.21837736666202545, "learning_rate": 8.752841370859367e-06, "loss": 1.0881, "step": 3272 }, { "epoch": 0.25111568609899876, "grad_norm": 0.28205519914627075, "learning_rate": 8.752022214231856e-06, "loss": 1.1102, "step": 3273 }, { "epoch": 0.25119240949835686, "grad_norm": 0.17861595749855042, "learning_rate": 8.751202827032501e-06, "loss": 1.118, "step": 3274 }, { "epoch": 0.2512691328977149, "grad_norm": 0.2804919183254242, "learning_rate": 8.750383209311655e-06, "loss": 1.1108, "step": 3275 }, { "epoch": 0.251345856297073, "grad_norm": 0.21565356850624084, "learning_rate": 8.749563361119685e-06, "loss": 1.2056, "step": 3276 }, { "epoch": 0.25142257969643106, "grad_norm": 0.19185201823711395, "learning_rate": 8.748743282506974e-06, "loss": 1.0773, "step": 3277 }, { "epoch": 0.25149930309578916, "grad_norm": 0.23424126207828522, "learning_rate": 8.747922973523918e-06, "loss": 1.1302, "step": 3278 }, { "epoch": 0.25157602649514726, "grad_norm": 0.20103546977043152, "learning_rate": 8.747102434220924e-06, "loss": 1.0845, "step": 3279 }, { "epoch": 0.2516527498945053, "grad_norm": 0.26706212759017944, "learning_rate": 8.746281664648422e-06, "loss": 1.0918, "step": 3280 }, { "epoch": 0.2517294732938634, "grad_norm": 0.21937629580497742, "learning_rate": 8.745460664856845e-06, "loss": 1.1178, "step": 3281 }, { "epoch": 0.25180619669322146, "grad_norm": 0.7036359310150146, "learning_rate": 8.744639434896649e-06, "loss": 1.094, "step": 3282 }, { "epoch": 0.25188292009257957, "grad_norm": 0.22693763673305511, "learning_rate": 8.7438179748183e-06, "loss": 1.0421, "step": 3283 }, { "epoch": 0.25195964349193767, "grad_norm": 0.23236829042434692, "learning_rate": 8.742996284672278e-06, "loss": 1.0776, "step": 3284 }, { "epoch": 0.2520363668912957, "grad_norm": 0.20519934594631195, "learning_rate": 8.742174364509077e-06, "loss": 1.0879, "step": 3285 }, { "epoch": 0.2521130902906538, "grad_norm": 0.24776625633239746, "learning_rate": 8.74135221437921e-06, "loss": 1.0959, "step": 3286 }, { "epoch": 0.25218981369001187, "grad_norm": 0.24181565642356873, "learning_rate": 8.7405298343332e-06, "loss": 1.1255, "step": 3287 }, { "epoch": 0.25226653708936997, "grad_norm": 0.17270499467849731, "learning_rate": 8.73970722442158e-06, "loss": 1.0753, "step": 3288 }, { "epoch": 0.25234326048872807, "grad_norm": 0.22728776931762695, "learning_rate": 8.738884384694905e-06, "loss": 1.1752, "step": 3289 }, { "epoch": 0.2524199838880861, "grad_norm": 0.1920216977596283, "learning_rate": 8.73806131520374e-06, "loss": 1.1824, "step": 3290 }, { "epoch": 0.2524967072874442, "grad_norm": 0.10134924948215485, "learning_rate": 8.737238015998667e-06, "loss": 1.1731, "step": 3291 }, { "epoch": 0.25257343068680227, "grad_norm": 0.2328861802816391, "learning_rate": 8.736414487130276e-06, "loss": 1.1787, "step": 3292 }, { "epoch": 0.2526501540861604, "grad_norm": 0.20750951766967773, "learning_rate": 8.735590728649177e-06, "loss": 1.0555, "step": 3293 }, { "epoch": 0.2527268774855185, "grad_norm": 0.266832560300827, "learning_rate": 8.734766740605992e-06, "loss": 1.0325, "step": 3294 }, { "epoch": 0.2528036008848765, "grad_norm": 0.2024351805448532, "learning_rate": 8.733942523051359e-06, "loss": 1.1386, "step": 3295 }, { "epoch": 0.2528803242842346, "grad_norm": 0.22732795774936676, "learning_rate": 8.733118076035925e-06, "loss": 1.1477, "step": 3296 }, { "epoch": 0.25295704768359273, "grad_norm": 0.20652078092098236, "learning_rate": 8.732293399610356e-06, "loss": 1.1627, "step": 3297 }, { "epoch": 0.2530337710829508, "grad_norm": 0.2727278769016266, "learning_rate": 8.731468493825333e-06, "loss": 1.1062, "step": 3298 }, { "epoch": 0.2531104944823089, "grad_norm": 0.2723252773284912, "learning_rate": 8.730643358731545e-06, "loss": 1.191, "step": 3299 }, { "epoch": 0.2531872178816669, "grad_norm": 0.25610458850860596, "learning_rate": 8.729817994379699e-06, "loss": 1.1023, "step": 3300 }, { "epoch": 0.25326394128102503, "grad_norm": 0.22378580272197723, "learning_rate": 8.72899240082052e-06, "loss": 1.1449, "step": 3301 }, { "epoch": 0.25334066468038313, "grad_norm": 0.2419857382774353, "learning_rate": 8.728166578104738e-06, "loss": 1.1152, "step": 3302 }, { "epoch": 0.2534173880797412, "grad_norm": 0.24730418622493744, "learning_rate": 8.727340526283103e-06, "loss": 1.0797, "step": 3303 }, { "epoch": 0.2534941114790993, "grad_norm": 0.2979305684566498, "learning_rate": 8.726514245406382e-06, "loss": 1.1085, "step": 3304 }, { "epoch": 0.25357083487845733, "grad_norm": 0.2160532921552658, "learning_rate": 8.725687735525347e-06, "loss": 1.0992, "step": 3305 }, { "epoch": 0.25364755827781543, "grad_norm": 0.19564171135425568, "learning_rate": 8.724860996690793e-06, "loss": 1.1748, "step": 3306 }, { "epoch": 0.25372428167717354, "grad_norm": 0.30569711327552795, "learning_rate": 8.724034028953524e-06, "loss": 1.051, "step": 3307 }, { "epoch": 0.2538010050765316, "grad_norm": 0.19476351141929626, "learning_rate": 8.723206832364359e-06, "loss": 1.0928, "step": 3308 }, { "epoch": 0.2538777284758897, "grad_norm": 0.2208857387304306, "learning_rate": 8.722379406974133e-06, "loss": 1.1227, "step": 3309 }, { "epoch": 0.25395445187524773, "grad_norm": 0.26436251401901245, "learning_rate": 8.72155175283369e-06, "loss": 1.0905, "step": 3310 }, { "epoch": 0.25403117527460584, "grad_norm": 0.21279166638851166, "learning_rate": 8.720723869993895e-06, "loss": 1.1128, "step": 3311 }, { "epoch": 0.25410789867396394, "grad_norm": 0.21293118596076965, "learning_rate": 8.719895758505623e-06, "loss": 1.0511, "step": 3312 }, { "epoch": 0.254184622073322, "grad_norm": 0.24847905337810516, "learning_rate": 8.719067418419762e-06, "loss": 1.153, "step": 3313 }, { "epoch": 0.2542613454726801, "grad_norm": 0.2710203230381012, "learning_rate": 8.718238849787218e-06, "loss": 1.1829, "step": 3314 }, { "epoch": 0.25433806887203814, "grad_norm": 0.22210325300693512, "learning_rate": 8.717410052658908e-06, "loss": 1.1283, "step": 3315 }, { "epoch": 0.25441479227139624, "grad_norm": 0.20681370794773102, "learning_rate": 8.716581027085763e-06, "loss": 1.1143, "step": 3316 }, { "epoch": 0.25449151567075434, "grad_norm": 0.1719503551721573, "learning_rate": 8.715751773118728e-06, "loss": 1.0845, "step": 3317 }, { "epoch": 0.2545682390701124, "grad_norm": 0.190375417470932, "learning_rate": 8.714922290808766e-06, "loss": 1.15, "step": 3318 }, { "epoch": 0.2546449624694705, "grad_norm": 0.17763078212738037, "learning_rate": 8.714092580206847e-06, "loss": 1.0807, "step": 3319 }, { "epoch": 0.25472168586882854, "grad_norm": 0.1913878619670868, "learning_rate": 8.713262641363964e-06, "loss": 1.1582, "step": 3320 }, { "epoch": 0.25479840926818664, "grad_norm": 0.28442931175231934, "learning_rate": 8.712432474331114e-06, "loss": 1.0254, "step": 3321 }, { "epoch": 0.25487513266754475, "grad_norm": 0.20785211026668549, "learning_rate": 8.711602079159315e-06, "loss": 1.1691, "step": 3322 }, { "epoch": 0.2549518560669028, "grad_norm": 0.20048221945762634, "learning_rate": 8.710771455899599e-06, "loss": 1.147, "step": 3323 }, { "epoch": 0.2550285794662609, "grad_norm": 0.5105294585227966, "learning_rate": 8.709940604603006e-06, "loss": 1.0715, "step": 3324 }, { "epoch": 0.25510530286561894, "grad_norm": 0.20909398794174194, "learning_rate": 8.709109525320598e-06, "loss": 1.0531, "step": 3325 }, { "epoch": 0.25518202626497705, "grad_norm": 0.20408642292022705, "learning_rate": 8.708278218103443e-06, "loss": 1.0526, "step": 3326 }, { "epoch": 0.25525874966433515, "grad_norm": 0.20903760194778442, "learning_rate": 8.707446683002632e-06, "loss": 1.1268, "step": 3327 }, { "epoch": 0.2553354730636932, "grad_norm": 0.3153960704803467, "learning_rate": 8.70661492006926e-06, "loss": 1.1382, "step": 3328 }, { "epoch": 0.2554121964630513, "grad_norm": 0.4486374855041504, "learning_rate": 8.705782929354443e-06, "loss": 1.0947, "step": 3329 }, { "epoch": 0.25548891986240935, "grad_norm": 0.19049833714962006, "learning_rate": 8.704950710909312e-06, "loss": 1.0757, "step": 3330 }, { "epoch": 0.25556564326176745, "grad_norm": 0.31807395815849304, "learning_rate": 8.704118264785004e-06, "loss": 1.1714, "step": 3331 }, { "epoch": 0.25564236666112555, "grad_norm": 0.18672670423984528, "learning_rate": 8.703285591032678e-06, "loss": 1.1074, "step": 3332 }, { "epoch": 0.2557190900604836, "grad_norm": 0.20020517706871033, "learning_rate": 8.702452689703505e-06, "loss": 1.1341, "step": 3333 }, { "epoch": 0.2557958134598417, "grad_norm": 0.26701006293296814, "learning_rate": 8.701619560848667e-06, "loss": 1.0765, "step": 3334 }, { "epoch": 0.25587253685919975, "grad_norm": 0.17117957770824432, "learning_rate": 8.700786204519363e-06, "loss": 1.1227, "step": 3335 }, { "epoch": 0.25594926025855785, "grad_norm": 0.22922687232494354, "learning_rate": 8.699952620766803e-06, "loss": 1.1769, "step": 3336 }, { "epoch": 0.25602598365791596, "grad_norm": 0.18570436537265778, "learning_rate": 8.699118809642215e-06, "loss": 1.1404, "step": 3337 }, { "epoch": 0.256102707057274, "grad_norm": 0.213525652885437, "learning_rate": 8.69828477119684e-06, "loss": 1.1541, "step": 3338 }, { "epoch": 0.2561794304566321, "grad_norm": 0.24073438346385956, "learning_rate": 8.697450505481928e-06, "loss": 1.1526, "step": 3339 }, { "epoch": 0.25625615385599015, "grad_norm": 0.1003086194396019, "learning_rate": 8.69661601254875e-06, "loss": 1.2608, "step": 3340 }, { "epoch": 0.25633287725534826, "grad_norm": 0.2863501012325287, "learning_rate": 8.695781292448588e-06, "loss": 1.1146, "step": 3341 }, { "epoch": 0.25640960065470636, "grad_norm": 0.23541176319122314, "learning_rate": 8.694946345232738e-06, "loss": 1.1485, "step": 3342 }, { "epoch": 0.2564863240540644, "grad_norm": 0.1750400960445404, "learning_rate": 8.694111170952508e-06, "loss": 1.0892, "step": 3343 }, { "epoch": 0.2565630474534225, "grad_norm": 0.20999406278133392, "learning_rate": 8.69327576965922e-06, "loss": 1.13, "step": 3344 }, { "epoch": 0.25663977085278056, "grad_norm": 0.2904253304004669, "learning_rate": 8.692440141404217e-06, "loss": 1.2026, "step": 3345 }, { "epoch": 0.25671649425213866, "grad_norm": 0.20894771814346313, "learning_rate": 8.691604286238846e-06, "loss": 1.0946, "step": 3346 }, { "epoch": 0.25679321765149676, "grad_norm": 0.18068401515483856, "learning_rate": 8.690768204214474e-06, "loss": 1.0987, "step": 3347 }, { "epoch": 0.2568699410508548, "grad_norm": 0.27155613899230957, "learning_rate": 8.689931895382482e-06, "loss": 1.0778, "step": 3348 }, { "epoch": 0.2569466644502129, "grad_norm": 0.20225955545902252, "learning_rate": 8.68909535979426e-06, "loss": 1.0697, "step": 3349 }, { "epoch": 0.25702338784957096, "grad_norm": 0.39212116599082947, "learning_rate": 8.68825859750122e-06, "loss": 1.0863, "step": 3350 }, { "epoch": 0.25710011124892906, "grad_norm": 0.20426258444786072, "learning_rate": 8.687421608554778e-06, "loss": 1.1716, "step": 3351 }, { "epoch": 0.25717683464828717, "grad_norm": 0.19010023772716522, "learning_rate": 8.686584393006374e-06, "loss": 1.1242, "step": 3352 }, { "epoch": 0.2572535580476452, "grad_norm": 0.25909584760665894, "learning_rate": 8.685746950907455e-06, "loss": 1.1428, "step": 3353 }, { "epoch": 0.2573302814470033, "grad_norm": 0.21726197004318237, "learning_rate": 8.684909282309483e-06, "loss": 1.13, "step": 3354 }, { "epoch": 0.2574070048463614, "grad_norm": 0.17564094066619873, "learning_rate": 8.684071387263937e-06, "loss": 1.1395, "step": 3355 }, { "epoch": 0.25748372824571947, "grad_norm": 0.1933431178331375, "learning_rate": 8.683233265822306e-06, "loss": 1.1301, "step": 3356 }, { "epoch": 0.25756045164507757, "grad_norm": 0.4115966856479645, "learning_rate": 8.682394918036096e-06, "loss": 1.0635, "step": 3357 }, { "epoch": 0.2576371750444356, "grad_norm": 0.19789892435073853, "learning_rate": 8.681556343956827e-06, "loss": 1.1033, "step": 3358 }, { "epoch": 0.2577138984437937, "grad_norm": 0.23925887048244476, "learning_rate": 8.68071754363603e-06, "loss": 1.1216, "step": 3359 }, { "epoch": 0.2577906218431518, "grad_norm": 0.17441168427467346, "learning_rate": 8.67987851712525e-06, "loss": 1.1236, "step": 3360 }, { "epoch": 0.25786734524250987, "grad_norm": 0.23978014290332794, "learning_rate": 8.67903926447605e-06, "loss": 1.0891, "step": 3361 }, { "epoch": 0.257944068641868, "grad_norm": 0.2605348825454712, "learning_rate": 8.678199785740003e-06, "loss": 1.1337, "step": 3362 }, { "epoch": 0.258020792041226, "grad_norm": 0.18448422849178314, "learning_rate": 8.6773600809687e-06, "loss": 1.0669, "step": 3363 }, { "epoch": 0.2580975154405841, "grad_norm": 0.2787679135799408, "learning_rate": 8.676520150213738e-06, "loss": 1.045, "step": 3364 }, { "epoch": 0.2581742388399422, "grad_norm": 0.18429240584373474, "learning_rate": 8.675679993526738e-06, "loss": 1.097, "step": 3365 }, { "epoch": 0.2582509622393003, "grad_norm": 0.2729732394218445, "learning_rate": 8.674839610959327e-06, "loss": 1.0769, "step": 3366 }, { "epoch": 0.2583276856386584, "grad_norm": 0.20149414241313934, "learning_rate": 8.673999002563148e-06, "loss": 1.0846, "step": 3367 }, { "epoch": 0.2584044090380164, "grad_norm": 0.2353196144104004, "learning_rate": 8.673158168389862e-06, "loss": 1.1105, "step": 3368 }, { "epoch": 0.25848113243737453, "grad_norm": 0.20244042575359344, "learning_rate": 8.672317108491137e-06, "loss": 1.0943, "step": 3369 }, { "epoch": 0.25855785583673263, "grad_norm": 0.22122590243816376, "learning_rate": 8.671475822918663e-06, "loss": 1.1459, "step": 3370 }, { "epoch": 0.2586345792360907, "grad_norm": 0.32661914825439453, "learning_rate": 8.670634311724133e-06, "loss": 1.1709, "step": 3371 }, { "epoch": 0.2587113026354488, "grad_norm": 0.19641704857349396, "learning_rate": 8.669792574959266e-06, "loss": 1.1312, "step": 3372 }, { "epoch": 0.25878802603480683, "grad_norm": 0.24381455779075623, "learning_rate": 8.668950612675784e-06, "loss": 1.0411, "step": 3373 }, { "epoch": 0.25886474943416493, "grad_norm": 0.32283738255500793, "learning_rate": 8.668108424925433e-06, "loss": 1.0511, "step": 3374 }, { "epoch": 0.25894147283352303, "grad_norm": 0.2331642359495163, "learning_rate": 8.667266011759963e-06, "loss": 1.1012, "step": 3375 }, { "epoch": 0.2590181962328811, "grad_norm": 0.4206712543964386, "learning_rate": 8.666423373231145e-06, "loss": 1.1157, "step": 3376 }, { "epoch": 0.2590949196322392, "grad_norm": 0.24915845692157745, "learning_rate": 8.66558050939076e-06, "loss": 1.0583, "step": 3377 }, { "epoch": 0.25917164303159723, "grad_norm": 0.20670805871486664, "learning_rate": 8.664737420290606e-06, "loss": 1.0688, "step": 3378 }, { "epoch": 0.25924836643095533, "grad_norm": 0.21986325085163116, "learning_rate": 8.663894105982492e-06, "loss": 1.1313, "step": 3379 }, { "epoch": 0.25932508983031344, "grad_norm": 0.1972992867231369, "learning_rate": 8.663050566518242e-06, "loss": 1.0951, "step": 3380 }, { "epoch": 0.2594018132296715, "grad_norm": 0.19920827448368073, "learning_rate": 8.662206801949694e-06, "loss": 1.1422, "step": 3381 }, { "epoch": 0.2594785366290296, "grad_norm": 0.2125539779663086, "learning_rate": 8.661362812328702e-06, "loss": 1.0938, "step": 3382 }, { "epoch": 0.25955526002838764, "grad_norm": 0.26222530007362366, "learning_rate": 8.660518597707126e-06, "loss": 1.0609, "step": 3383 }, { "epoch": 0.25963198342774574, "grad_norm": 0.760109543800354, "learning_rate": 8.65967415813685e-06, "loss": 1.0925, "step": 3384 }, { "epoch": 0.25970870682710384, "grad_norm": 0.20076289772987366, "learning_rate": 8.658829493669762e-06, "loss": 1.0983, "step": 3385 }, { "epoch": 0.2597854302264619, "grad_norm": 0.2145748734474182, "learning_rate": 8.657984604357776e-06, "loss": 1.0861, "step": 3386 }, { "epoch": 0.25986215362582, "grad_norm": 0.4595610499382019, "learning_rate": 8.657139490252808e-06, "loss": 1.1142, "step": 3387 }, { "epoch": 0.25993887702517804, "grad_norm": 0.19164417684078217, "learning_rate": 8.656294151406793e-06, "loss": 1.121, "step": 3388 }, { "epoch": 0.26001560042453614, "grad_norm": 0.2114851176738739, "learning_rate": 8.65544858787168e-06, "loss": 1.0972, "step": 3389 }, { "epoch": 0.26009232382389424, "grad_norm": 0.21160754561424255, "learning_rate": 8.65460279969943e-06, "loss": 1.1315, "step": 3390 }, { "epoch": 0.2601690472232523, "grad_norm": 0.2819449305534363, "learning_rate": 8.65375678694202e-06, "loss": 1.0761, "step": 3391 }, { "epoch": 0.2602457706226104, "grad_norm": 0.20370326936244965, "learning_rate": 8.65291054965144e-06, "loss": 1.1555, "step": 3392 }, { "epoch": 0.26032249402196844, "grad_norm": 0.21287685632705688, "learning_rate": 8.652064087879695e-06, "loss": 1.1257, "step": 3393 }, { "epoch": 0.26039921742132655, "grad_norm": 0.19752712547779083, "learning_rate": 8.651217401678798e-06, "loss": 1.1708, "step": 3394 }, { "epoch": 0.26047594082068465, "grad_norm": 0.21133394539356232, "learning_rate": 8.650370491100784e-06, "loss": 1.0741, "step": 3395 }, { "epoch": 0.2605526642200427, "grad_norm": 0.18718703091144562, "learning_rate": 8.649523356197697e-06, "loss": 1.0484, "step": 3396 }, { "epoch": 0.2606293876194008, "grad_norm": 0.1895519495010376, "learning_rate": 8.648675997021593e-06, "loss": 1.1044, "step": 3397 }, { "epoch": 0.26070611101875885, "grad_norm": 0.20367921888828278, "learning_rate": 8.64782841362455e-06, "loss": 1.1568, "step": 3398 }, { "epoch": 0.26078283441811695, "grad_norm": 0.19467343389987946, "learning_rate": 8.64698060605865e-06, "loss": 1.1405, "step": 3399 }, { "epoch": 0.26085955781747505, "grad_norm": 0.1942833811044693, "learning_rate": 8.646132574375994e-06, "loss": 1.1242, "step": 3400 }, { "epoch": 0.2609362812168331, "grad_norm": 0.27998673915863037, "learning_rate": 8.645284318628697e-06, "loss": 1.1028, "step": 3401 }, { "epoch": 0.2610130046161912, "grad_norm": 0.2241089940071106, "learning_rate": 8.644435838868883e-06, "loss": 1.102, "step": 3402 }, { "epoch": 0.26108972801554925, "grad_norm": 0.20882967114448547, "learning_rate": 8.643587135148698e-06, "loss": 1.0964, "step": 3403 }, { "epoch": 0.26116645141490735, "grad_norm": 0.1934651881456375, "learning_rate": 8.642738207520294e-06, "loss": 1.1088, "step": 3404 }, { "epoch": 0.26124317481426546, "grad_norm": 0.2215917706489563, "learning_rate": 8.641889056035842e-06, "loss": 1.055, "step": 3405 }, { "epoch": 0.2613198982136235, "grad_norm": 0.20415639877319336, "learning_rate": 8.641039680747523e-06, "loss": 1.0401, "step": 3406 }, { "epoch": 0.2613966216129816, "grad_norm": 0.17246733605861664, "learning_rate": 8.640190081707534e-06, "loss": 1.1196, "step": 3407 }, { "epoch": 0.26147334501233965, "grad_norm": 0.24058829247951508, "learning_rate": 8.639340258968086e-06, "loss": 1.1134, "step": 3408 }, { "epoch": 0.26155006841169776, "grad_norm": 0.33830371499061584, "learning_rate": 8.6384902125814e-06, "loss": 1.15, "step": 3409 }, { "epoch": 0.26162679181105586, "grad_norm": 0.2113913744688034, "learning_rate": 8.637639942599717e-06, "loss": 1.1283, "step": 3410 }, { "epoch": 0.2617035152104139, "grad_norm": 0.22153636813163757, "learning_rate": 8.63678944907529e-06, "loss": 1.0579, "step": 3411 }, { "epoch": 0.261780238609772, "grad_norm": 0.19418273866176605, "learning_rate": 8.635938732060377e-06, "loss": 1.0575, "step": 3412 }, { "epoch": 0.2618569620091301, "grad_norm": 0.2091265171766281, "learning_rate": 8.635087791607262e-06, "loss": 1.1107, "step": 3413 }, { "epoch": 0.26193368540848816, "grad_norm": 0.10660496354103088, "learning_rate": 8.634236627768236e-06, "loss": 1.2192, "step": 3414 }, { "epoch": 0.26201040880784626, "grad_norm": 0.20738203823566437, "learning_rate": 8.633385240595606e-06, "loss": 1.1709, "step": 3415 }, { "epoch": 0.2620871322072043, "grad_norm": 0.2390841841697693, "learning_rate": 8.632533630141694e-06, "loss": 1.1679, "step": 3416 }, { "epoch": 0.2621638556065624, "grad_norm": 0.20133665204048157, "learning_rate": 8.631681796458831e-06, "loss": 1.1418, "step": 3417 }, { "epoch": 0.2622405790059205, "grad_norm": 0.18655288219451904, "learning_rate": 8.630829739599364e-06, "loss": 1.0681, "step": 3418 }, { "epoch": 0.26231730240527856, "grad_norm": 0.19586682319641113, "learning_rate": 8.629977459615655e-06, "loss": 1.178, "step": 3419 }, { "epoch": 0.26239402580463667, "grad_norm": 0.22367100417613983, "learning_rate": 8.62912495656008e-06, "loss": 1.1379, "step": 3420 }, { "epoch": 0.2624707492039947, "grad_norm": 0.2546268105506897, "learning_rate": 8.628272230485026e-06, "loss": 1.1175, "step": 3421 }, { "epoch": 0.2625474726033528, "grad_norm": 0.415458619594574, "learning_rate": 8.627419281442897e-06, "loss": 1.1214, "step": 3422 }, { "epoch": 0.2626241960027109, "grad_norm": 0.21393214166164398, "learning_rate": 8.626566109486108e-06, "loss": 1.1054, "step": 3423 }, { "epoch": 0.26270091940206897, "grad_norm": 0.22395747900009155, "learning_rate": 8.625712714667089e-06, "loss": 1.1087, "step": 3424 }, { "epoch": 0.26277764280142707, "grad_norm": 0.21366067230701447, "learning_rate": 8.624859097038282e-06, "loss": 1.113, "step": 3425 }, { "epoch": 0.2628543662007851, "grad_norm": 0.274433434009552, "learning_rate": 8.624005256652148e-06, "loss": 1.0805, "step": 3426 }, { "epoch": 0.2629310896001432, "grad_norm": 0.20233535766601562, "learning_rate": 8.623151193561154e-06, "loss": 1.1901, "step": 3427 }, { "epoch": 0.2630078129995013, "grad_norm": 0.6379210948944092, "learning_rate": 8.622296907817784e-06, "loss": 1.1298, "step": 3428 }, { "epoch": 0.26308453639885937, "grad_norm": 0.30246493220329285, "learning_rate": 8.62144239947454e-06, "loss": 1.1768, "step": 3429 }, { "epoch": 0.2631612597982175, "grad_norm": 0.2956676185131073, "learning_rate": 8.62058766858393e-06, "loss": 0.9888, "step": 3430 }, { "epoch": 0.2632379831975755, "grad_norm": 0.19758222997188568, "learning_rate": 8.619732715198482e-06, "loss": 1.103, "step": 3431 }, { "epoch": 0.2633147065969336, "grad_norm": 0.24175316095352173, "learning_rate": 8.618877539370733e-06, "loss": 1.1422, "step": 3432 }, { "epoch": 0.2633914299962917, "grad_norm": 0.20681598782539368, "learning_rate": 8.618022141153238e-06, "loss": 1.1036, "step": 3433 }, { "epoch": 0.2634681533956498, "grad_norm": 0.2805899381637573, "learning_rate": 8.617166520598563e-06, "loss": 1.1051, "step": 3434 }, { "epoch": 0.2635448767950079, "grad_norm": 0.29356470704078674, "learning_rate": 8.616310677759288e-06, "loss": 1.0826, "step": 3435 }, { "epoch": 0.2636216001943659, "grad_norm": 0.2106916606426239, "learning_rate": 8.615454612688006e-06, "loss": 1.0789, "step": 3436 }, { "epoch": 0.263698323593724, "grad_norm": 0.23760594427585602, "learning_rate": 8.614598325437324e-06, "loss": 1.104, "step": 3437 }, { "epoch": 0.26377504699308213, "grad_norm": 0.18999692797660828, "learning_rate": 8.613741816059867e-06, "loss": 1.0892, "step": 3438 }, { "epoch": 0.2638517703924402, "grad_norm": 0.2316529005765915, "learning_rate": 8.612885084608265e-06, "loss": 1.1518, "step": 3439 }, { "epoch": 0.2639284937917983, "grad_norm": 0.23276208341121674, "learning_rate": 8.612028131135169e-06, "loss": 1.1023, "step": 3440 }, { "epoch": 0.2640052171911563, "grad_norm": 0.20410983264446259, "learning_rate": 8.611170955693242e-06, "loss": 1.129, "step": 3441 }, { "epoch": 0.26408194059051443, "grad_norm": 0.2030872106552124, "learning_rate": 8.610313558335157e-06, "loss": 1.1282, "step": 3442 }, { "epoch": 0.26415866398987253, "grad_norm": 1.0581409931182861, "learning_rate": 8.609455939113604e-06, "loss": 1.053, "step": 3443 }, { "epoch": 0.2642353873892306, "grad_norm": 0.240289568901062, "learning_rate": 8.60859809808129e-06, "loss": 1.0975, "step": 3444 }, { "epoch": 0.2643121107885887, "grad_norm": 0.20612776279449463, "learning_rate": 8.607740035290926e-06, "loss": 1.11, "step": 3445 }, { "epoch": 0.26438883418794673, "grad_norm": 2.219280958175659, "learning_rate": 8.606881750795244e-06, "loss": 1.1458, "step": 3446 }, { "epoch": 0.26446555758730483, "grad_norm": 0.16702154278755188, "learning_rate": 8.60602324464699e-06, "loss": 1.0812, "step": 3447 }, { "epoch": 0.26454228098666294, "grad_norm": 0.20515090227127075, "learning_rate": 8.60516451689892e-06, "loss": 1.1898, "step": 3448 }, { "epoch": 0.264619004386021, "grad_norm": 0.21103744208812714, "learning_rate": 8.604305567603804e-06, "loss": 1.1026, "step": 3449 }, { "epoch": 0.2646957277853791, "grad_norm": 0.19578084349632263, "learning_rate": 8.60344639681443e-06, "loss": 1.1063, "step": 3450 }, { "epoch": 0.26477245118473713, "grad_norm": 0.19638460874557495, "learning_rate": 8.602587004583592e-06, "loss": 1.0986, "step": 3451 }, { "epoch": 0.26484917458409524, "grad_norm": 0.5252426266670227, "learning_rate": 8.601727390964105e-06, "loss": 1.0936, "step": 3452 }, { "epoch": 0.26492589798345334, "grad_norm": 0.2613396942615509, "learning_rate": 8.600867556008794e-06, "loss": 1.1801, "step": 3453 }, { "epoch": 0.2650026213828114, "grad_norm": 0.22426921129226685, "learning_rate": 8.6000074997705e-06, "loss": 1.0745, "step": 3454 }, { "epoch": 0.2650793447821695, "grad_norm": 0.5095717906951904, "learning_rate": 8.599147222302071e-06, "loss": 1.1363, "step": 3455 }, { "epoch": 0.26515606818152754, "grad_norm": 0.1947138011455536, "learning_rate": 8.598286723656377e-06, "loss": 1.1535, "step": 3456 }, { "epoch": 0.26523279158088564, "grad_norm": 0.3179166316986084, "learning_rate": 8.597426003886295e-06, "loss": 1.1422, "step": 3457 }, { "epoch": 0.26530951498024374, "grad_norm": 0.24850782752037048, "learning_rate": 8.596565063044724e-06, "loss": 1.1264, "step": 3458 }, { "epoch": 0.2653862383796018, "grad_norm": 0.19920223951339722, "learning_rate": 8.595703901184566e-06, "loss": 1.1992, "step": 3459 }, { "epoch": 0.2654629617789599, "grad_norm": 0.22798465192317963, "learning_rate": 8.594842518358745e-06, "loss": 1.1282, "step": 3460 }, { "epoch": 0.26553968517831794, "grad_norm": 0.3543723523616791, "learning_rate": 8.593980914620193e-06, "loss": 1.1677, "step": 3461 }, { "epoch": 0.26561640857767604, "grad_norm": 0.20732367038726807, "learning_rate": 8.593119090021858e-06, "loss": 1.0685, "step": 3462 }, { "epoch": 0.26569313197703415, "grad_norm": 0.26152580976486206, "learning_rate": 8.592257044616701e-06, "loss": 1.1425, "step": 3463 }, { "epoch": 0.2657698553763922, "grad_norm": 0.20470227301120758, "learning_rate": 8.5913947784577e-06, "loss": 1.1209, "step": 3464 }, { "epoch": 0.2658465787757503, "grad_norm": 0.322763055562973, "learning_rate": 8.59053229159784e-06, "loss": 1.1577, "step": 3465 }, { "epoch": 0.26592330217510834, "grad_norm": 0.17879611253738403, "learning_rate": 8.589669584090128e-06, "loss": 1.1186, "step": 3466 }, { "epoch": 0.26600002557446645, "grad_norm": 0.1732025295495987, "learning_rate": 8.588806655987573e-06, "loss": 1.0869, "step": 3467 }, { "epoch": 0.26607674897382455, "grad_norm": 0.18527370691299438, "learning_rate": 8.587943507343208e-06, "loss": 1.1941, "step": 3468 }, { "epoch": 0.2661534723731826, "grad_norm": 0.4553345739841461, "learning_rate": 8.587080138210078e-06, "loss": 1.2085, "step": 3469 }, { "epoch": 0.2662301957725407, "grad_norm": 0.20418062806129456, "learning_rate": 8.586216548641234e-06, "loss": 1.0749, "step": 3470 }, { "epoch": 0.2663069191718988, "grad_norm": 0.19739890098571777, "learning_rate": 8.58535273868975e-06, "loss": 1.1607, "step": 3471 }, { "epoch": 0.26638364257125685, "grad_norm": 0.2093198448419571, "learning_rate": 8.584488708408708e-06, "loss": 1.1159, "step": 3472 }, { "epoch": 0.26646036597061495, "grad_norm": 0.2226068079471588, "learning_rate": 8.583624457851206e-06, "loss": 1.0617, "step": 3473 }, { "epoch": 0.266537089369973, "grad_norm": 0.28958824276924133, "learning_rate": 8.582759987070353e-06, "loss": 1.0932, "step": 3474 }, { "epoch": 0.2666138127693311, "grad_norm": 0.10703297704458237, "learning_rate": 8.581895296119274e-06, "loss": 1.271, "step": 3475 }, { "epoch": 0.2666905361686892, "grad_norm": 0.24694761633872986, "learning_rate": 8.581030385051105e-06, "loss": 1.1363, "step": 3476 }, { "epoch": 0.26676725956804725, "grad_norm": 0.610656201839447, "learning_rate": 8.580165253918999e-06, "loss": 1.1279, "step": 3477 }, { "epoch": 0.26684398296740536, "grad_norm": 0.20791390538215637, "learning_rate": 8.579299902776121e-06, "loss": 1.0834, "step": 3478 }, { "epoch": 0.2669207063667634, "grad_norm": 0.19199685752391815, "learning_rate": 8.578434331675648e-06, "loss": 1.0877, "step": 3479 }, { "epoch": 0.2669974297661215, "grad_norm": 0.1959000676870346, "learning_rate": 8.577568540670773e-06, "loss": 1.113, "step": 3480 }, { "epoch": 0.2670741531654796, "grad_norm": 0.25368139147758484, "learning_rate": 8.576702529814698e-06, "loss": 1.161, "step": 3481 }, { "epoch": 0.26715087656483766, "grad_norm": 0.23527130484580994, "learning_rate": 8.575836299160643e-06, "loss": 1.1543, "step": 3482 }, { "epoch": 0.26722759996419576, "grad_norm": 0.18419788777828217, "learning_rate": 8.574969848761843e-06, "loss": 1.1449, "step": 3483 }, { "epoch": 0.2673043233635538, "grad_norm": 0.21431516110897064, "learning_rate": 8.574103178671542e-06, "loss": 1.0613, "step": 3484 }, { "epoch": 0.2673810467629119, "grad_norm": 0.23953771591186523, "learning_rate": 8.573236288942997e-06, "loss": 1.1786, "step": 3485 }, { "epoch": 0.26745777016227, "grad_norm": 0.27940255403518677, "learning_rate": 8.572369179629486e-06, "loss": 1.1559, "step": 3486 }, { "epoch": 0.26753449356162806, "grad_norm": 0.26264768838882446, "learning_rate": 8.571501850784287e-06, "loss": 1.165, "step": 3487 }, { "epoch": 0.26761121696098616, "grad_norm": 0.20403552055358887, "learning_rate": 8.570634302460707e-06, "loss": 1.1587, "step": 3488 }, { "epoch": 0.2676879403603442, "grad_norm": 0.2038695514202118, "learning_rate": 8.569766534712058e-06, "loss": 1.103, "step": 3489 }, { "epoch": 0.2677646637597023, "grad_norm": 0.2284691333770752, "learning_rate": 8.568898547591664e-06, "loss": 1.171, "step": 3490 }, { "epoch": 0.2678413871590604, "grad_norm": 0.18468116223812103, "learning_rate": 8.568030341152866e-06, "loss": 1.1976, "step": 3491 }, { "epoch": 0.26791811055841847, "grad_norm": 0.3029477596282959, "learning_rate": 8.567161915449018e-06, "loss": 1.0613, "step": 3492 }, { "epoch": 0.26799483395777657, "grad_norm": 0.35427939891815186, "learning_rate": 8.566293270533488e-06, "loss": 1.0813, "step": 3493 }, { "epoch": 0.2680715573571346, "grad_norm": 0.1965276449918747, "learning_rate": 8.565424406459654e-06, "loss": 1.1211, "step": 3494 }, { "epoch": 0.2681482807564927, "grad_norm": 0.2009820193052292, "learning_rate": 8.564555323280913e-06, "loss": 1.0806, "step": 3495 }, { "epoch": 0.2682250041558508, "grad_norm": 0.466905802488327, "learning_rate": 8.563686021050672e-06, "loss": 1.0454, "step": 3496 }, { "epoch": 0.26830172755520887, "grad_norm": 0.27519315481185913, "learning_rate": 8.562816499822349e-06, "loss": 1.1276, "step": 3497 }, { "epoch": 0.26837845095456697, "grad_norm": 0.19907376170158386, "learning_rate": 8.561946759649382e-06, "loss": 1.0912, "step": 3498 }, { "epoch": 0.268455174353925, "grad_norm": 0.21354570984840393, "learning_rate": 8.561076800585219e-06, "loss": 1.0897, "step": 3499 }, { "epoch": 0.2685318977532831, "grad_norm": 0.18730784952640533, "learning_rate": 8.560206622683318e-06, "loss": 1.138, "step": 3500 }, { "epoch": 0.2686086211526412, "grad_norm": 0.22557756304740906, "learning_rate": 8.559336225997155e-06, "loss": 1.1772, "step": 3501 }, { "epoch": 0.2686853445519993, "grad_norm": 0.2511585056781769, "learning_rate": 8.558465610580219e-06, "loss": 1.143, "step": 3502 }, { "epoch": 0.2687620679513574, "grad_norm": 0.2447444051504135, "learning_rate": 8.557594776486014e-06, "loss": 1.1303, "step": 3503 }, { "epoch": 0.2688387913507154, "grad_norm": 0.22034355998039246, "learning_rate": 8.556723723768051e-06, "loss": 1.1361, "step": 3504 }, { "epoch": 0.2689155147500735, "grad_norm": 0.25529250502586365, "learning_rate": 8.555852452479858e-06, "loss": 1.0612, "step": 3505 }, { "epoch": 0.26899223814943163, "grad_norm": 0.18796075880527496, "learning_rate": 8.554980962674982e-06, "loss": 1.0566, "step": 3506 }, { "epoch": 0.2690689615487897, "grad_norm": 0.20042838156223297, "learning_rate": 8.554109254406972e-06, "loss": 1.0331, "step": 3507 }, { "epoch": 0.2691456849481478, "grad_norm": 0.19665668904781342, "learning_rate": 8.553237327729405e-06, "loss": 1.0481, "step": 3508 }, { "epoch": 0.2692224083475058, "grad_norm": 0.17533883452415466, "learning_rate": 8.552365182695854e-06, "loss": 1.1328, "step": 3509 }, { "epoch": 0.26929913174686393, "grad_norm": 0.19834080338478088, "learning_rate": 8.551492819359921e-06, "loss": 1.1257, "step": 3510 }, { "epoch": 0.26937585514622203, "grad_norm": 0.20807386934757233, "learning_rate": 8.550620237775213e-06, "loss": 1.1331, "step": 3511 }, { "epoch": 0.2694525785455801, "grad_norm": 0.3290281593799591, "learning_rate": 8.549747437995354e-06, "loss": 1.1435, "step": 3512 }, { "epoch": 0.2695293019449382, "grad_norm": 1.1064835786819458, "learning_rate": 8.548874420073977e-06, "loss": 1.0935, "step": 3513 }, { "epoch": 0.26960602534429623, "grad_norm": 0.27818313241004944, "learning_rate": 8.548001184064733e-06, "loss": 1.1159, "step": 3514 }, { "epoch": 0.26968274874365433, "grad_norm": 0.3269844055175781, "learning_rate": 8.547127730021285e-06, "loss": 1.1463, "step": 3515 }, { "epoch": 0.26975947214301244, "grad_norm": 0.2321031242609024, "learning_rate": 8.54625405799731e-06, "loss": 1.1345, "step": 3516 }, { "epoch": 0.2698361955423705, "grad_norm": 0.19790711998939514, "learning_rate": 8.545380168046495e-06, "loss": 1.0784, "step": 3517 }, { "epoch": 0.2699129189417286, "grad_norm": 0.09793002158403397, "learning_rate": 8.544506060222542e-06, "loss": 1.2152, "step": 3518 }, { "epoch": 0.26998964234108663, "grad_norm": 0.263976126909256, "learning_rate": 8.543631734579171e-06, "loss": 1.1288, "step": 3519 }, { "epoch": 0.27006636574044474, "grad_norm": 0.21712830662727356, "learning_rate": 8.542757191170111e-06, "loss": 1.1621, "step": 3520 }, { "epoch": 0.27014308913980284, "grad_norm": 0.19967712461948395, "learning_rate": 8.541882430049103e-06, "loss": 1.077, "step": 3521 }, { "epoch": 0.2702198125391609, "grad_norm": 0.20015589892864227, "learning_rate": 8.541007451269905e-06, "loss": 1.1849, "step": 3522 }, { "epoch": 0.270296535938519, "grad_norm": 0.19515205919742584, "learning_rate": 8.540132254886287e-06, "loss": 1.0952, "step": 3523 }, { "epoch": 0.27037325933787704, "grad_norm": 0.17785261571407318, "learning_rate": 8.539256840952028e-06, "loss": 1.0623, "step": 3524 }, { "epoch": 0.27044998273723514, "grad_norm": 0.26516327261924744, "learning_rate": 8.53838120952093e-06, "loss": 1.0697, "step": 3525 }, { "epoch": 0.27052670613659324, "grad_norm": 0.2035418003797531, "learning_rate": 8.537505360646801e-06, "loss": 1.1138, "step": 3526 }, { "epoch": 0.2706034295359513, "grad_norm": 0.22130133211612701, "learning_rate": 8.536629294383465e-06, "loss": 1.1193, "step": 3527 }, { "epoch": 0.2706801529353094, "grad_norm": 0.2838699519634247, "learning_rate": 8.535753010784757e-06, "loss": 1.1257, "step": 3528 }, { "epoch": 0.2707568763346675, "grad_norm": 0.19153793156147003, "learning_rate": 8.534876509904527e-06, "loss": 1.1159, "step": 3529 }, { "epoch": 0.27083359973402554, "grad_norm": 0.19593948125839233, "learning_rate": 8.53399979179664e-06, "loss": 1.1131, "step": 3530 }, { "epoch": 0.27091032313338365, "grad_norm": 0.24063630402088165, "learning_rate": 8.533122856514973e-06, "loss": 1.0735, "step": 3531 }, { "epoch": 0.2709870465327417, "grad_norm": 0.09851383417844772, "learning_rate": 8.532245704113411e-06, "loss": 1.2577, "step": 3532 }, { "epoch": 0.2710637699320998, "grad_norm": 0.22923950850963593, "learning_rate": 8.531368334645865e-06, "loss": 1.0397, "step": 3533 }, { "epoch": 0.2711404933314579, "grad_norm": 0.31795966625213623, "learning_rate": 8.530490748166245e-06, "loss": 1.1389, "step": 3534 }, { "epoch": 0.27121721673081595, "grad_norm": 0.26385483145713806, "learning_rate": 8.529612944728486e-06, "loss": 1.1028, "step": 3535 }, { "epoch": 0.27129394013017405, "grad_norm": 0.2028239667415619, "learning_rate": 8.528734924386528e-06, "loss": 1.0883, "step": 3536 }, { "epoch": 0.2713706635295321, "grad_norm": 0.20661146938800812, "learning_rate": 8.52785668719433e-06, "loss": 1.1207, "step": 3537 }, { "epoch": 0.2714473869288902, "grad_norm": 0.26282909512519836, "learning_rate": 8.52697823320586e-06, "loss": 1.0296, "step": 3538 }, { "epoch": 0.2715241103282483, "grad_norm": 0.23405848443508148, "learning_rate": 8.526099562475104e-06, "loss": 1.0996, "step": 3539 }, { "epoch": 0.27160083372760635, "grad_norm": 0.261021226644516, "learning_rate": 8.525220675056054e-06, "loss": 1.047, "step": 3540 }, { "epoch": 0.27167755712696445, "grad_norm": 0.31105804443359375, "learning_rate": 8.524341571002724e-06, "loss": 1.0678, "step": 3541 }, { "epoch": 0.2717542805263225, "grad_norm": 0.20019356906414032, "learning_rate": 8.523462250369136e-06, "loss": 1.1547, "step": 3542 }, { "epoch": 0.2718310039256806, "grad_norm": 0.2022862583398819, "learning_rate": 8.52258271320933e-06, "loss": 1.1846, "step": 3543 }, { "epoch": 0.2719077273250387, "grad_norm": 0.24155683815479279, "learning_rate": 8.521702959577352e-06, "loss": 1.1603, "step": 3544 }, { "epoch": 0.27198445072439675, "grad_norm": 0.19204463064670563, "learning_rate": 8.520822989527264e-06, "loss": 1.0761, "step": 3545 }, { "epoch": 0.27206117412375486, "grad_norm": 0.28474491834640503, "learning_rate": 8.519942803113147e-06, "loss": 1.118, "step": 3546 }, { "epoch": 0.2721378975231129, "grad_norm": 0.2917936146259308, "learning_rate": 8.519062400389087e-06, "loss": 1.0264, "step": 3547 }, { "epoch": 0.272214620922471, "grad_norm": 0.25245019793510437, "learning_rate": 8.518181781409187e-06, "loss": 1.0266, "step": 3548 }, { "epoch": 0.2722913443218291, "grad_norm": 0.27060022950172424, "learning_rate": 8.517300946227569e-06, "loss": 1.1669, "step": 3549 }, { "epoch": 0.27236806772118716, "grad_norm": 0.2151862382888794, "learning_rate": 8.516419894898356e-06, "loss": 1.0397, "step": 3550 }, { "epoch": 0.27244479112054526, "grad_norm": 0.18952809274196625, "learning_rate": 8.515538627475693e-06, "loss": 1.1117, "step": 3551 }, { "epoch": 0.2725215145199033, "grad_norm": 0.1817447990179062, "learning_rate": 8.514657144013738e-06, "loss": 1.0817, "step": 3552 }, { "epoch": 0.2725982379192614, "grad_norm": 0.2364649623632431, "learning_rate": 8.513775444566658e-06, "loss": 1.0443, "step": 3553 }, { "epoch": 0.2726749613186195, "grad_norm": 0.2007552683353424, "learning_rate": 8.51289352918864e-06, "loss": 1.1374, "step": 3554 }, { "epoch": 0.27275168471797756, "grad_norm": 0.2349356710910797, "learning_rate": 8.512011397933876e-06, "loss": 1.1368, "step": 3555 }, { "epoch": 0.27282840811733566, "grad_norm": 0.2247542142868042, "learning_rate": 8.511129050856576e-06, "loss": 1.0805, "step": 3556 }, { "epoch": 0.2729051315166937, "grad_norm": 0.19963595271110535, "learning_rate": 8.510246488010964e-06, "loss": 1.1838, "step": 3557 }, { "epoch": 0.2729818549160518, "grad_norm": 0.2036382555961609, "learning_rate": 8.509363709451275e-06, "loss": 1.0981, "step": 3558 }, { "epoch": 0.2730585783154099, "grad_norm": 0.20243658125400543, "learning_rate": 8.508480715231758e-06, "loss": 1.1481, "step": 3559 }, { "epoch": 0.27313530171476796, "grad_norm": 0.2105940878391266, "learning_rate": 8.507597505406675e-06, "loss": 1.1354, "step": 3560 }, { "epoch": 0.27321202511412607, "grad_norm": 0.23689740896224976, "learning_rate": 8.506714080030303e-06, "loss": 1.1367, "step": 3561 }, { "epoch": 0.2732887485134841, "grad_norm": 0.22528605163097382, "learning_rate": 8.50583043915693e-06, "loss": 1.1092, "step": 3562 }, { "epoch": 0.2733654719128422, "grad_norm": 0.22203993797302246, "learning_rate": 8.504946582840857e-06, "loss": 1.0839, "step": 3563 }, { "epoch": 0.2734421953122003, "grad_norm": 0.24675215780735016, "learning_rate": 8.504062511136402e-06, "loss": 1.0857, "step": 3564 }, { "epoch": 0.27351891871155837, "grad_norm": 0.2355632483959198, "learning_rate": 8.50317822409789e-06, "loss": 1.0887, "step": 3565 }, { "epoch": 0.27359564211091647, "grad_norm": 0.3631565272808075, "learning_rate": 8.502293721779667e-06, "loss": 1.1632, "step": 3566 }, { "epoch": 0.2736723655102745, "grad_norm": 0.2927640974521637, "learning_rate": 8.501409004236086e-06, "loss": 1.0713, "step": 3567 }, { "epoch": 0.2737490889096326, "grad_norm": 0.5251568555831909, "learning_rate": 8.500524071521514e-06, "loss": 1.0886, "step": 3568 }, { "epoch": 0.2738258123089907, "grad_norm": 0.2192053347826004, "learning_rate": 8.499638923690335e-06, "loss": 1.1149, "step": 3569 }, { "epoch": 0.27390253570834877, "grad_norm": 0.18476630747318268, "learning_rate": 8.49875356079694e-06, "loss": 1.0579, "step": 3570 }, { "epoch": 0.2739792591077069, "grad_norm": 0.1886129528284073, "learning_rate": 8.497867982895741e-06, "loss": 1.0777, "step": 3571 }, { "epoch": 0.2740559825070649, "grad_norm": 0.24824370443820953, "learning_rate": 8.496982190041158e-06, "loss": 1.0581, "step": 3572 }, { "epoch": 0.274132705906423, "grad_norm": 0.2088594287633896, "learning_rate": 8.496096182287623e-06, "loss": 1.1069, "step": 3573 }, { "epoch": 0.2742094293057811, "grad_norm": 0.22379624843597412, "learning_rate": 8.495209959689587e-06, "loss": 1.0711, "step": 3574 }, { "epoch": 0.2742861527051392, "grad_norm": 0.2191958725452423, "learning_rate": 8.494323522301509e-06, "loss": 1.0942, "step": 3575 }, { "epoch": 0.2743628761044973, "grad_norm": 0.24457460641860962, "learning_rate": 8.493436870177863e-06, "loss": 1.1496, "step": 3576 }, { "epoch": 0.2744395995038553, "grad_norm": 0.2243255376815796, "learning_rate": 8.492550003373135e-06, "loss": 1.1488, "step": 3577 }, { "epoch": 0.27451632290321343, "grad_norm": 0.23635464906692505, "learning_rate": 8.491662921941829e-06, "loss": 1.0765, "step": 3578 }, { "epoch": 0.27459304630257153, "grad_norm": 0.22058002650737762, "learning_rate": 8.490775625938452e-06, "loss": 1.1379, "step": 3579 }, { "epoch": 0.2746697697019296, "grad_norm": 0.19645296037197113, "learning_rate": 8.489888115417538e-06, "loss": 1.1212, "step": 3580 }, { "epoch": 0.2747464931012877, "grad_norm": 0.25272732973098755, "learning_rate": 8.489000390433624e-06, "loss": 1.1554, "step": 3581 }, { "epoch": 0.27482321650064573, "grad_norm": 0.24253089725971222, "learning_rate": 8.488112451041262e-06, "loss": 1.1232, "step": 3582 }, { "epoch": 0.27489993990000383, "grad_norm": 0.2671903371810913, "learning_rate": 8.487224297295018e-06, "loss": 1.0153, "step": 3583 }, { "epoch": 0.27497666329936193, "grad_norm": 0.24764394760131836, "learning_rate": 8.486335929249475e-06, "loss": 1.0883, "step": 3584 }, { "epoch": 0.27505338669872, "grad_norm": 0.22127731144428253, "learning_rate": 8.485447346959222e-06, "loss": 1.0307, "step": 3585 }, { "epoch": 0.2751301100980781, "grad_norm": 0.17403456568717957, "learning_rate": 8.484558550478866e-06, "loss": 1.0613, "step": 3586 }, { "epoch": 0.2752068334974362, "grad_norm": 0.20685654878616333, "learning_rate": 8.483669539863024e-06, "loss": 1.1885, "step": 3587 }, { "epoch": 0.27528355689679423, "grad_norm": 0.21034446358680725, "learning_rate": 8.482780315166333e-06, "loss": 1.0914, "step": 3588 }, { "epoch": 0.27536028029615234, "grad_norm": 0.23590399324893951, "learning_rate": 8.481890876443432e-06, "loss": 1.1423, "step": 3589 }, { "epoch": 0.2754370036955104, "grad_norm": 0.6712637543678284, "learning_rate": 8.481001223748986e-06, "loss": 1.1419, "step": 3590 }, { "epoch": 0.2755137270948685, "grad_norm": 0.21742235124111176, "learning_rate": 8.480111357137661e-06, "loss": 1.174, "step": 3591 }, { "epoch": 0.2755904504942266, "grad_norm": 0.18867290019989014, "learning_rate": 8.479221276664145e-06, "loss": 1.0371, "step": 3592 }, { "epoch": 0.27566717389358464, "grad_norm": 0.19920235872268677, "learning_rate": 8.478330982383132e-06, "loss": 1.0195, "step": 3593 }, { "epoch": 0.27574389729294274, "grad_norm": 0.24527950584888458, "learning_rate": 8.477440474349339e-06, "loss": 1.1351, "step": 3594 }, { "epoch": 0.2758206206923008, "grad_norm": 0.20322658121585846, "learning_rate": 8.476549752617485e-06, "loss": 1.1512, "step": 3595 }, { "epoch": 0.2758973440916589, "grad_norm": 0.32333850860595703, "learning_rate": 8.47565881724231e-06, "loss": 1.0352, "step": 3596 }, { "epoch": 0.275974067491017, "grad_norm": 0.3523447513580322, "learning_rate": 8.47476766827856e-06, "loss": 1.1099, "step": 3597 }, { "epoch": 0.27605079089037504, "grad_norm": 0.1780218631029129, "learning_rate": 8.473876305781006e-06, "loss": 1.1038, "step": 3598 }, { "epoch": 0.27612751428973314, "grad_norm": 0.2184314727783203, "learning_rate": 8.472984729804419e-06, "loss": 1.1008, "step": 3599 }, { "epoch": 0.2762042376890912, "grad_norm": 0.4550127387046814, "learning_rate": 8.47209294040359e-06, "loss": 1.0701, "step": 3600 }, { "epoch": 0.2762809610884493, "grad_norm": 0.19249394536018372, "learning_rate": 8.471200937633322e-06, "loss": 1.1963, "step": 3601 }, { "epoch": 0.2763576844878074, "grad_norm": 0.2054969221353531, "learning_rate": 8.47030872154843e-06, "loss": 1.1168, "step": 3602 }, { "epoch": 0.27643440788716545, "grad_norm": 0.1841030865907669, "learning_rate": 8.469416292203747e-06, "loss": 1.1214, "step": 3603 }, { "epoch": 0.27651113128652355, "grad_norm": 0.1975058764219284, "learning_rate": 8.46852364965411e-06, "loss": 1.0881, "step": 3604 }, { "epoch": 0.2765878546858816, "grad_norm": 0.2197147011756897, "learning_rate": 8.467630793954379e-06, "loss": 1.1546, "step": 3605 }, { "epoch": 0.2766645780852397, "grad_norm": 0.2586170434951782, "learning_rate": 8.466737725159417e-06, "loss": 1.1815, "step": 3606 }, { "epoch": 0.2767413014845978, "grad_norm": 0.29888302087783813, "learning_rate": 8.465844443324109e-06, "loss": 1.0549, "step": 3607 }, { "epoch": 0.27681802488395585, "grad_norm": 0.3907199501991272, "learning_rate": 8.46495094850335e-06, "loss": 1.1267, "step": 3608 }, { "epoch": 0.27689474828331395, "grad_norm": 0.18502087891101837, "learning_rate": 8.464057240752046e-06, "loss": 1.0938, "step": 3609 }, { "epoch": 0.276971471682672, "grad_norm": 0.2397070676088333, "learning_rate": 8.463163320125119e-06, "loss": 1.1183, "step": 3610 }, { "epoch": 0.2770481950820301, "grad_norm": 0.19027328491210938, "learning_rate": 8.462269186677502e-06, "loss": 1.2513, "step": 3611 }, { "epoch": 0.2771249184813882, "grad_norm": 0.20503169298171997, "learning_rate": 8.461374840464143e-06, "loss": 1.1174, "step": 3612 }, { "epoch": 0.27720164188074625, "grad_norm": 0.18345844745635986, "learning_rate": 8.46048028154e-06, "loss": 1.112, "step": 3613 }, { "epoch": 0.27727836528010436, "grad_norm": 0.20599354803562164, "learning_rate": 8.459585509960047e-06, "loss": 1.0087, "step": 3614 }, { "epoch": 0.2773550886794624, "grad_norm": 0.23476339876651764, "learning_rate": 8.45869052577927e-06, "loss": 1.0926, "step": 3615 }, { "epoch": 0.2774318120788205, "grad_norm": 0.22290177643299103, "learning_rate": 8.45779532905267e-06, "loss": 1.0602, "step": 3616 }, { "epoch": 0.2775085354781786, "grad_norm": 0.24948395788669586, "learning_rate": 8.456899919835257e-06, "loss": 1.1445, "step": 3617 }, { "epoch": 0.27758525887753666, "grad_norm": 0.3379741907119751, "learning_rate": 8.456004298182056e-06, "loss": 1.1897, "step": 3618 }, { "epoch": 0.27766198227689476, "grad_norm": 0.2842944860458374, "learning_rate": 8.455108464148107e-06, "loss": 1.1216, "step": 3619 }, { "epoch": 0.2777387056762528, "grad_norm": 0.27944108843803406, "learning_rate": 8.454212417788461e-06, "loss": 1.2255, "step": 3620 }, { "epoch": 0.2778154290756109, "grad_norm": 0.1813981533050537, "learning_rate": 8.453316159158183e-06, "loss": 1.164, "step": 3621 }, { "epoch": 0.277892152474969, "grad_norm": 0.224364772439003, "learning_rate": 8.452419688312348e-06, "loss": 1.199, "step": 3622 }, { "epoch": 0.27796887587432706, "grad_norm": 0.21271100640296936, "learning_rate": 8.451523005306048e-06, "loss": 1.115, "step": 3623 }, { "epoch": 0.27804559927368516, "grad_norm": 0.21473026275634766, "learning_rate": 8.450626110194388e-06, "loss": 1.0807, "step": 3624 }, { "epoch": 0.2781223226730432, "grad_norm": 0.20479796826839447, "learning_rate": 8.449729003032484e-06, "loss": 1.1492, "step": 3625 }, { "epoch": 0.2781990460724013, "grad_norm": 0.38026851415634155, "learning_rate": 8.448831683875465e-06, "loss": 1.083, "step": 3626 }, { "epoch": 0.2782757694717594, "grad_norm": 0.2222415953874588, "learning_rate": 8.447934152778473e-06, "loss": 1.1422, "step": 3627 }, { "epoch": 0.27835249287111746, "grad_norm": 0.09823190420866013, "learning_rate": 8.447036409796663e-06, "loss": 1.2006, "step": 3628 }, { "epoch": 0.27842921627047557, "grad_norm": 0.2293127328157425, "learning_rate": 8.446138454985209e-06, "loss": 1.1602, "step": 3629 }, { "epoch": 0.2785059396698336, "grad_norm": 0.18786199390888214, "learning_rate": 8.445240288399285e-06, "loss": 1.0541, "step": 3630 }, { "epoch": 0.2785826630691917, "grad_norm": 0.23794548213481903, "learning_rate": 8.444341910094093e-06, "loss": 1.0474, "step": 3631 }, { "epoch": 0.2786593864685498, "grad_norm": 0.19840769469738007, "learning_rate": 8.443443320124836e-06, "loss": 1.1898, "step": 3632 }, { "epoch": 0.27873610986790787, "grad_norm": 0.21489141881465912, "learning_rate": 8.442544518546736e-06, "loss": 1.0666, "step": 3633 }, { "epoch": 0.27881283326726597, "grad_norm": 0.20818382501602173, "learning_rate": 8.441645505415028e-06, "loss": 1.1034, "step": 3634 }, { "epoch": 0.278889556666624, "grad_norm": 0.1972523033618927, "learning_rate": 8.440746280784955e-06, "loss": 1.2196, "step": 3635 }, { "epoch": 0.2789662800659821, "grad_norm": 0.21865950524806976, "learning_rate": 8.439846844711781e-06, "loss": 1.1959, "step": 3636 }, { "epoch": 0.2790430034653402, "grad_norm": 0.38405853509902954, "learning_rate": 8.43894719725078e-06, "loss": 1.0903, "step": 3637 }, { "epoch": 0.27911972686469827, "grad_norm": 0.20364651083946228, "learning_rate": 8.438047338457232e-06, "loss": 1.1134, "step": 3638 }, { "epoch": 0.2791964502640564, "grad_norm": 0.2268204391002655, "learning_rate": 8.43714726838644e-06, "loss": 1.0664, "step": 3639 }, { "epoch": 0.2792731736634144, "grad_norm": 0.19320988655090332, "learning_rate": 8.436246987093712e-06, "loss": 1.0634, "step": 3640 }, { "epoch": 0.2793498970627725, "grad_norm": 0.21695910394191742, "learning_rate": 8.435346494634378e-06, "loss": 1.1863, "step": 3641 }, { "epoch": 0.2794266204621306, "grad_norm": 0.1889868974685669, "learning_rate": 8.434445791063773e-06, "loss": 1.1114, "step": 3642 }, { "epoch": 0.2795033438614887, "grad_norm": 0.22780074179172516, "learning_rate": 8.433544876437246e-06, "loss": 1.0552, "step": 3643 }, { "epoch": 0.2795800672608468, "grad_norm": 0.33458465337753296, "learning_rate": 8.432643750810162e-06, "loss": 1.1062, "step": 3644 }, { "epoch": 0.2796567906602049, "grad_norm": 0.3001900315284729, "learning_rate": 8.4317424142379e-06, "loss": 1.063, "step": 3645 }, { "epoch": 0.2797335140595629, "grad_norm": 0.30814576148986816, "learning_rate": 8.430840866775845e-06, "loss": 1.0284, "step": 3646 }, { "epoch": 0.27981023745892103, "grad_norm": 0.24219262599945068, "learning_rate": 8.429939108479403e-06, "loss": 1.1966, "step": 3647 }, { "epoch": 0.2798869608582791, "grad_norm": 0.22948099672794342, "learning_rate": 8.429037139403988e-06, "loss": 1.0591, "step": 3648 }, { "epoch": 0.2799636842576372, "grad_norm": 0.2456643134355545, "learning_rate": 8.428134959605028e-06, "loss": 1.1317, "step": 3649 }, { "epoch": 0.2800404076569953, "grad_norm": 0.2193591147661209, "learning_rate": 8.427232569137967e-06, "loss": 1.0842, "step": 3650 }, { "epoch": 0.28011713105635333, "grad_norm": 0.33807897567749023, "learning_rate": 8.426329968058255e-06, "loss": 1.1155, "step": 3651 }, { "epoch": 0.28019385445571143, "grad_norm": 0.20963524281978607, "learning_rate": 8.425427156421362e-06, "loss": 1.0724, "step": 3652 }, { "epoch": 0.2802705778550695, "grad_norm": 0.2166055142879486, "learning_rate": 8.424524134282768e-06, "loss": 1.0711, "step": 3653 }, { "epoch": 0.2803473012544276, "grad_norm": 0.21396303176879883, "learning_rate": 8.423620901697968e-06, "loss": 1.1279, "step": 3654 }, { "epoch": 0.2804240246537857, "grad_norm": 0.10759660601615906, "learning_rate": 8.422717458722464e-06, "loss": 1.2404, "step": 3655 }, { "epoch": 0.28050074805314373, "grad_norm": 0.40868079662323, "learning_rate": 8.421813805411778e-06, "loss": 1.0699, "step": 3656 }, { "epoch": 0.28057747145250184, "grad_norm": 0.2246435433626175, "learning_rate": 8.42090994182144e-06, "loss": 1.1026, "step": 3657 }, { "epoch": 0.2806541948518599, "grad_norm": 0.21009226143360138, "learning_rate": 8.420005868006997e-06, "loss": 1.0878, "step": 3658 }, { "epoch": 0.280730918251218, "grad_norm": 0.19939428567886353, "learning_rate": 8.419101584024006e-06, "loss": 1.1389, "step": 3659 }, { "epoch": 0.2808076416505761, "grad_norm": 0.22645945847034454, "learning_rate": 8.418197089928036e-06, "loss": 1.1193, "step": 3660 }, { "epoch": 0.28088436504993414, "grad_norm": 0.2075985074043274, "learning_rate": 8.417292385774672e-06, "loss": 1.0882, "step": 3661 }, { "epoch": 0.28096108844929224, "grad_norm": 0.2533717453479767, "learning_rate": 8.416387471619511e-06, "loss": 1.1017, "step": 3662 }, { "epoch": 0.2810378118486503, "grad_norm": 0.21394704282283783, "learning_rate": 8.415482347518162e-06, "loss": 1.0808, "step": 3663 }, { "epoch": 0.2811145352480084, "grad_norm": 0.26915350556373596, "learning_rate": 8.414577013526247e-06, "loss": 1.1027, "step": 3664 }, { "epoch": 0.2811912586473665, "grad_norm": 0.2086331844329834, "learning_rate": 8.413671469699401e-06, "loss": 1.094, "step": 3665 }, { "epoch": 0.28126798204672454, "grad_norm": 0.26121968030929565, "learning_rate": 8.412765716093273e-06, "loss": 1.0859, "step": 3666 }, { "epoch": 0.28134470544608264, "grad_norm": 0.09724880754947662, "learning_rate": 8.411859752763521e-06, "loss": 1.2752, "step": 3667 }, { "epoch": 0.2814214288454407, "grad_norm": 0.22577212750911713, "learning_rate": 8.410953579765822e-06, "loss": 1.1253, "step": 3668 }, { "epoch": 0.2814981522447988, "grad_norm": 0.2106754332780838, "learning_rate": 8.410047197155864e-06, "loss": 1.0685, "step": 3669 }, { "epoch": 0.2815748756441569, "grad_norm": 0.2179766744375229, "learning_rate": 8.409140604989344e-06, "loss": 1.0809, "step": 3670 }, { "epoch": 0.28165159904351494, "grad_norm": 0.3161863386631012, "learning_rate": 8.408233803321975e-06, "loss": 1.0622, "step": 3671 }, { "epoch": 0.28172832244287305, "grad_norm": 0.19756953418254852, "learning_rate": 8.407326792209483e-06, "loss": 1.0597, "step": 3672 }, { "epoch": 0.2818050458422311, "grad_norm": 0.23957811295986176, "learning_rate": 8.406419571707603e-06, "loss": 1.0824, "step": 3673 }, { "epoch": 0.2818817692415892, "grad_norm": 0.20178021490573883, "learning_rate": 8.405512141872092e-06, "loss": 1.062, "step": 3674 }, { "epoch": 0.2819584926409473, "grad_norm": 0.19945502281188965, "learning_rate": 8.404604502758712e-06, "loss": 1.1723, "step": 3675 }, { "epoch": 0.28203521604030535, "grad_norm": 0.2527840733528137, "learning_rate": 8.403696654423235e-06, "loss": 1.1183, "step": 3676 }, { "epoch": 0.28211193943966345, "grad_norm": 0.2374127209186554, "learning_rate": 8.40278859692146e-06, "loss": 1.1185, "step": 3677 }, { "epoch": 0.2821886628390215, "grad_norm": 0.19694364070892334, "learning_rate": 8.401880330309178e-06, "loss": 1.1201, "step": 3678 }, { "epoch": 0.2822653862383796, "grad_norm": 0.19708454608917236, "learning_rate": 8.400971854642216e-06, "loss": 1.0984, "step": 3679 }, { "epoch": 0.2823421096377377, "grad_norm": 0.281986266374588, "learning_rate": 8.400063169976392e-06, "loss": 1.1222, "step": 3680 }, { "epoch": 0.28241883303709575, "grad_norm": 0.19878032803535461, "learning_rate": 8.399154276367554e-06, "loss": 1.0918, "step": 3681 }, { "epoch": 0.28249555643645385, "grad_norm": 0.19130130112171173, "learning_rate": 8.398245173871556e-06, "loss": 1.0852, "step": 3682 }, { "epoch": 0.2825722798358119, "grad_norm": 0.1737290769815445, "learning_rate": 8.397335862544261e-06, "loss": 1.1277, "step": 3683 }, { "epoch": 0.28264900323517, "grad_norm": 0.09928449988365173, "learning_rate": 8.396426342441549e-06, "loss": 1.1684, "step": 3684 }, { "epoch": 0.2827257266345281, "grad_norm": 0.2521997392177582, "learning_rate": 8.395516613619315e-06, "loss": 1.1278, "step": 3685 }, { "epoch": 0.28280245003388615, "grad_norm": 0.32685545086860657, "learning_rate": 8.394606676133463e-06, "loss": 1.1382, "step": 3686 }, { "epoch": 0.28287917343324426, "grad_norm": 0.4494686722755432, "learning_rate": 8.393696530039913e-06, "loss": 0.962, "step": 3687 }, { "epoch": 0.2829558968326023, "grad_norm": 0.25361117720603943, "learning_rate": 8.392786175394591e-06, "loss": 1.0745, "step": 3688 }, { "epoch": 0.2830326202319604, "grad_norm": 0.7591424584388733, "learning_rate": 8.391875612253446e-06, "loss": 1.0345, "step": 3689 }, { "epoch": 0.2831093436313185, "grad_norm": 0.25131955742836, "learning_rate": 8.39096484067243e-06, "loss": 1.1076, "step": 3690 }, { "epoch": 0.28318606703067656, "grad_norm": 0.23645645380020142, "learning_rate": 8.390053860707516e-06, "loss": 1.1222, "step": 3691 }, { "epoch": 0.28326279043003466, "grad_norm": 0.22595319151878357, "learning_rate": 8.389142672414687e-06, "loss": 1.0886, "step": 3692 }, { "epoch": 0.2833395138293927, "grad_norm": 0.2945529818534851, "learning_rate": 8.388231275849934e-06, "loss": 1.0417, "step": 3693 }, { "epoch": 0.2834162372287508, "grad_norm": 0.22470223903656006, "learning_rate": 8.387319671069268e-06, "loss": 1.106, "step": 3694 }, { "epoch": 0.2834929606281089, "grad_norm": 0.28619006276130676, "learning_rate": 8.386407858128707e-06, "loss": 0.9918, "step": 3695 }, { "epoch": 0.28356968402746696, "grad_norm": 0.2133435159921646, "learning_rate": 8.385495837084284e-06, "loss": 1.1359, "step": 3696 }, { "epoch": 0.28364640742682506, "grad_norm": 0.2093123346567154, "learning_rate": 8.38458360799205e-06, "loss": 1.1775, "step": 3697 }, { "epoch": 0.2837231308261831, "grad_norm": 0.20598147809505463, "learning_rate": 8.383671170908058e-06, "loss": 1.0555, "step": 3698 }, { "epoch": 0.2837998542255412, "grad_norm": 0.29031142592430115, "learning_rate": 8.382758525888383e-06, "loss": 1.1676, "step": 3699 }, { "epoch": 0.2838765776248993, "grad_norm": 0.20153425633907318, "learning_rate": 8.381845672989111e-06, "loss": 1.0823, "step": 3700 }, { "epoch": 0.28395330102425737, "grad_norm": 0.2320547103881836, "learning_rate": 8.380932612266333e-06, "loss": 1.0215, "step": 3701 }, { "epoch": 0.28403002442361547, "grad_norm": 0.19677947461605072, "learning_rate": 8.380019343776167e-06, "loss": 1.1068, "step": 3702 }, { "epoch": 0.2841067478229735, "grad_norm": 0.218545064330101, "learning_rate": 8.37910586757473e-06, "loss": 1.0938, "step": 3703 }, { "epoch": 0.2841834712223316, "grad_norm": 0.22052587568759918, "learning_rate": 8.378192183718158e-06, "loss": 1.1383, "step": 3704 }, { "epoch": 0.2842601946216897, "grad_norm": 0.2125827670097351, "learning_rate": 8.377278292262605e-06, "loss": 1.0798, "step": 3705 }, { "epoch": 0.28433691802104777, "grad_norm": 0.2102038413286209, "learning_rate": 8.376364193264226e-06, "loss": 1.1385, "step": 3706 }, { "epoch": 0.28441364142040587, "grad_norm": 0.2288038730621338, "learning_rate": 8.375449886779196e-06, "loss": 1.0807, "step": 3707 }, { "epoch": 0.284490364819764, "grad_norm": 0.28393977880477905, "learning_rate": 8.374535372863702e-06, "loss": 1.1012, "step": 3708 }, { "epoch": 0.284567088219122, "grad_norm": 0.43466195464134216, "learning_rate": 8.373620651573944e-06, "loss": 1.0832, "step": 3709 }, { "epoch": 0.2846438116184801, "grad_norm": 0.17731408774852753, "learning_rate": 8.372705722966135e-06, "loss": 1.1437, "step": 3710 }, { "epoch": 0.28472053501783817, "grad_norm": 0.2134024202823639, "learning_rate": 8.371790587096497e-06, "loss": 1.0614, "step": 3711 }, { "epoch": 0.2847972584171963, "grad_norm": 0.17562244832515717, "learning_rate": 8.370875244021271e-06, "loss": 1.1228, "step": 3712 }, { "epoch": 0.2848739818165544, "grad_norm": 0.2520209848880768, "learning_rate": 8.369959693796704e-06, "loss": 1.0871, "step": 3713 }, { "epoch": 0.2849507052159124, "grad_norm": 0.19971424341201782, "learning_rate": 8.36904393647906e-06, "loss": 1.0656, "step": 3714 }, { "epoch": 0.28502742861527053, "grad_norm": 0.21018655598163605, "learning_rate": 8.368127972124617e-06, "loss": 1.0425, "step": 3715 }, { "epoch": 0.2851041520146286, "grad_norm": 0.228448748588562, "learning_rate": 8.367211800789661e-06, "loss": 1.1348, "step": 3716 }, { "epoch": 0.2851808754139867, "grad_norm": 0.21304932236671448, "learning_rate": 8.366295422530493e-06, "loss": 1.0743, "step": 3717 }, { "epoch": 0.2852575988133448, "grad_norm": 0.17591263353824615, "learning_rate": 8.365378837403429e-06, "loss": 1.1702, "step": 3718 }, { "epoch": 0.28533432221270283, "grad_norm": 0.25215601921081543, "learning_rate": 8.364462045464794e-06, "loss": 1.0296, "step": 3719 }, { "epoch": 0.28541104561206093, "grad_norm": 0.30458611249923706, "learning_rate": 8.363545046770927e-06, "loss": 1.0593, "step": 3720 }, { "epoch": 0.285487769011419, "grad_norm": 0.19731886684894562, "learning_rate": 8.36262784137818e-06, "loss": 1.0582, "step": 3721 }, { "epoch": 0.2855644924107771, "grad_norm": 0.907728374004364, "learning_rate": 8.361710429342919e-06, "loss": 1.1462, "step": 3722 }, { "epoch": 0.2856412158101352, "grad_norm": 0.21294377744197845, "learning_rate": 8.360792810721522e-06, "loss": 1.1327, "step": 3723 }, { "epoch": 0.28571793920949323, "grad_norm": 0.27676767110824585, "learning_rate": 8.359874985570378e-06, "loss": 1.142, "step": 3724 }, { "epoch": 0.28579466260885134, "grad_norm": 0.23847100138664246, "learning_rate": 8.358956953945888e-06, "loss": 1.0883, "step": 3725 }, { "epoch": 0.2858713860082094, "grad_norm": 0.1849750131368637, "learning_rate": 8.358038715904472e-06, "loss": 1.1146, "step": 3726 }, { "epoch": 0.2859481094075675, "grad_norm": 0.16702759265899658, "learning_rate": 8.357120271502555e-06, "loss": 1.1705, "step": 3727 }, { "epoch": 0.2860248328069256, "grad_norm": 0.33261099457740784, "learning_rate": 8.356201620796579e-06, "loss": 1.1361, "step": 3728 }, { "epoch": 0.28610155620628364, "grad_norm": 0.1931898593902588, "learning_rate": 8.355282763842994e-06, "loss": 1.0845, "step": 3729 }, { "epoch": 0.28617827960564174, "grad_norm": 0.2817465662956238, "learning_rate": 8.354363700698271e-06, "loss": 1.1704, "step": 3730 }, { "epoch": 0.2862550030049998, "grad_norm": 0.1898747831583023, "learning_rate": 8.353444431418887e-06, "loss": 1.158, "step": 3731 }, { "epoch": 0.2863317264043579, "grad_norm": 0.24601517617702484, "learning_rate": 8.352524956061335e-06, "loss": 1.0873, "step": 3732 }, { "epoch": 0.286408449803716, "grad_norm": 0.19335655868053436, "learning_rate": 8.351605274682116e-06, "loss": 1.1403, "step": 3733 }, { "epoch": 0.28648517320307404, "grad_norm": 0.2245582491159439, "learning_rate": 8.35068538733775e-06, "loss": 1.132, "step": 3734 }, { "epoch": 0.28656189660243214, "grad_norm": 0.20876339077949524, "learning_rate": 8.349765294084764e-06, "loss": 1.0771, "step": 3735 }, { "epoch": 0.2866386200017902, "grad_norm": 0.18295948207378387, "learning_rate": 8.348844994979704e-06, "loss": 1.0956, "step": 3736 }, { "epoch": 0.2867153434011483, "grad_norm": 0.2281235307455063, "learning_rate": 8.34792449007912e-06, "loss": 1.0928, "step": 3737 }, { "epoch": 0.2867920668005064, "grad_norm": 0.2831052541732788, "learning_rate": 8.347003779439584e-06, "loss": 1.1057, "step": 3738 }, { "epoch": 0.28686879019986444, "grad_norm": 0.2150048017501831, "learning_rate": 8.346082863117673e-06, "loss": 1.171, "step": 3739 }, { "epoch": 0.28694551359922255, "grad_norm": 0.25102174282073975, "learning_rate": 8.345161741169981e-06, "loss": 1.0613, "step": 3740 }, { "epoch": 0.2870222369985806, "grad_norm": 0.19596229493618011, "learning_rate": 8.344240413653112e-06, "loss": 1.0523, "step": 3741 }, { "epoch": 0.2870989603979387, "grad_norm": 0.19993025064468384, "learning_rate": 8.343318880623688e-06, "loss": 1.0684, "step": 3742 }, { "epoch": 0.2871756837972968, "grad_norm": 0.18658755719661713, "learning_rate": 8.342397142138334e-06, "loss": 1.1436, "step": 3743 }, { "epoch": 0.28725240719665485, "grad_norm": 0.21363230049610138, "learning_rate": 8.341475198253697e-06, "loss": 1.1834, "step": 3744 }, { "epoch": 0.28732913059601295, "grad_norm": 0.2248489409685135, "learning_rate": 8.340553049026434e-06, "loss": 1.071, "step": 3745 }, { "epoch": 0.287405853995371, "grad_norm": 0.1751261055469513, "learning_rate": 8.339630694513209e-06, "loss": 1.0782, "step": 3746 }, { "epoch": 0.2874825773947291, "grad_norm": 0.23154132068157196, "learning_rate": 8.33870813477071e-06, "loss": 1.1035, "step": 3747 }, { "epoch": 0.2875593007940872, "grad_norm": 0.21141913533210754, "learning_rate": 8.337785369855622e-06, "loss": 1.0995, "step": 3748 }, { "epoch": 0.28763602419344525, "grad_norm": 0.2679730951786041, "learning_rate": 8.33686239982466e-06, "loss": 1.08, "step": 3749 }, { "epoch": 0.28771274759280335, "grad_norm": 0.17662377655506134, "learning_rate": 8.335939224734537e-06, "loss": 1.1274, "step": 3750 }, { "epoch": 0.2877894709921614, "grad_norm": 0.22424587607383728, "learning_rate": 8.335015844641987e-06, "loss": 1.0908, "step": 3751 }, { "epoch": 0.2878661943915195, "grad_norm": 0.2413048893213272, "learning_rate": 8.334092259603754e-06, "loss": 1.1438, "step": 3752 }, { "epoch": 0.2879429177908776, "grad_norm": 0.1838156282901764, "learning_rate": 8.333168469676595e-06, "loss": 1.0596, "step": 3753 }, { "epoch": 0.28801964119023565, "grad_norm": 0.2975185215473175, "learning_rate": 8.332244474917279e-06, "loss": 1.0965, "step": 3754 }, { "epoch": 0.28809636458959376, "grad_norm": 0.21674378216266632, "learning_rate": 8.331320275382589e-06, "loss": 1.1345, "step": 3755 }, { "epoch": 0.2881730879889518, "grad_norm": 0.18859441578388214, "learning_rate": 8.330395871129316e-06, "loss": 1.1897, "step": 3756 }, { "epoch": 0.2882498113883099, "grad_norm": 0.2009044885635376, "learning_rate": 8.329471262214271e-06, "loss": 1.1631, "step": 3757 }, { "epoch": 0.288326534787668, "grad_norm": 0.18849918246269226, "learning_rate": 8.328546448694273e-06, "loss": 1.1711, "step": 3758 }, { "epoch": 0.28840325818702606, "grad_norm": 0.2522987127304077, "learning_rate": 8.327621430626152e-06, "loss": 1.099, "step": 3759 }, { "epoch": 0.28847998158638416, "grad_norm": 0.21228890120983124, "learning_rate": 8.326696208066756e-06, "loss": 1.1168, "step": 3760 }, { "epoch": 0.2885567049857422, "grad_norm": 0.21851591765880585, "learning_rate": 8.325770781072939e-06, "loss": 1.131, "step": 3761 }, { "epoch": 0.2886334283851003, "grad_norm": 0.2218645066022873, "learning_rate": 8.324845149701574e-06, "loss": 1.1795, "step": 3762 }, { "epoch": 0.2887101517844584, "grad_norm": 0.3287045359611511, "learning_rate": 8.323919314009544e-06, "loss": 1.1803, "step": 3763 }, { "epoch": 0.28878687518381646, "grad_norm": 0.18719208240509033, "learning_rate": 8.32299327405374e-06, "loss": 1.0687, "step": 3764 }, { "epoch": 0.28886359858317456, "grad_norm": 0.20639391243457794, "learning_rate": 8.322067029891072e-06, "loss": 1.1692, "step": 3765 }, { "epoch": 0.28894032198253267, "grad_norm": 0.19418184459209442, "learning_rate": 8.321140581578461e-06, "loss": 1.0304, "step": 3766 }, { "epoch": 0.2890170453818907, "grad_norm": 0.2229248434305191, "learning_rate": 8.32021392917284e-06, "loss": 1.0461, "step": 3767 }, { "epoch": 0.2890937687812488, "grad_norm": 0.24352501332759857, "learning_rate": 8.319287072731153e-06, "loss": 1.1458, "step": 3768 }, { "epoch": 0.28917049218060686, "grad_norm": 0.2475014328956604, "learning_rate": 8.318360012310359e-06, "loss": 1.0352, "step": 3769 }, { "epoch": 0.28924721557996497, "grad_norm": 0.18792057037353516, "learning_rate": 8.317432747967425e-06, "loss": 1.117, "step": 3770 }, { "epoch": 0.28932393897932307, "grad_norm": 0.19967584311962128, "learning_rate": 8.316505279759339e-06, "loss": 1.0988, "step": 3771 }, { "epoch": 0.2894006623786811, "grad_norm": 0.19132503867149353, "learning_rate": 8.315577607743091e-06, "loss": 1.0694, "step": 3772 }, { "epoch": 0.2894773857780392, "grad_norm": 0.21872927248477936, "learning_rate": 8.314649731975694e-06, "loss": 1.1387, "step": 3773 }, { "epoch": 0.28955410917739727, "grad_norm": 0.1840377151966095, "learning_rate": 8.313721652514166e-06, "loss": 1.1083, "step": 3774 }, { "epoch": 0.28963083257675537, "grad_norm": 0.22854673862457275, "learning_rate": 8.31279336941554e-06, "loss": 1.1391, "step": 3775 }, { "epoch": 0.2897075559761135, "grad_norm": 0.20508398115634918, "learning_rate": 8.31186488273686e-06, "loss": 1.0626, "step": 3776 }, { "epoch": 0.2897842793754715, "grad_norm": 0.25101372599601746, "learning_rate": 8.310936192535187e-06, "loss": 1.079, "step": 3777 }, { "epoch": 0.2898610027748296, "grad_norm": 0.1953643560409546, "learning_rate": 8.31000729886759e-06, "loss": 1.093, "step": 3778 }, { "epoch": 0.28993772617418767, "grad_norm": 0.1967104971408844, "learning_rate": 8.309078201791152e-06, "loss": 1.0551, "step": 3779 }, { "epoch": 0.2900144495735458, "grad_norm": 0.09718634933233261, "learning_rate": 8.30814890136297e-06, "loss": 1.1832, "step": 3780 }, { "epoch": 0.2900911729729039, "grad_norm": 0.23312842845916748, "learning_rate": 8.30721939764015e-06, "loss": 1.0738, "step": 3781 }, { "epoch": 0.2901678963722619, "grad_norm": 0.6514925956726074, "learning_rate": 8.306289690679812e-06, "loss": 1.1887, "step": 3782 }, { "epoch": 0.29024461977162, "grad_norm": 0.09770411252975464, "learning_rate": 8.305359780539092e-06, "loss": 1.1796, "step": 3783 }, { "epoch": 0.2903213431709781, "grad_norm": 0.21582193672657013, "learning_rate": 8.304429667275133e-06, "loss": 1.0992, "step": 3784 }, { "epoch": 0.2903980665703362, "grad_norm": 0.18806371092796326, "learning_rate": 8.303499350945093e-06, "loss": 1.0478, "step": 3785 }, { "epoch": 0.2904747899696943, "grad_norm": 0.18487636744976044, "learning_rate": 8.302568831606144e-06, "loss": 1.1109, "step": 3786 }, { "epoch": 0.2905515133690523, "grad_norm": 0.2417554259300232, "learning_rate": 8.301638109315466e-06, "loss": 1.1751, "step": 3787 }, { "epoch": 0.29062823676841043, "grad_norm": 0.19610819220542908, "learning_rate": 8.300707184130259e-06, "loss": 1.1756, "step": 3788 }, { "epoch": 0.2907049601677685, "grad_norm": 0.19038741290569305, "learning_rate": 8.299776056107727e-06, "loss": 1.1581, "step": 3789 }, { "epoch": 0.2907816835671266, "grad_norm": 0.5155816674232483, "learning_rate": 8.298844725305092e-06, "loss": 1.06, "step": 3790 }, { "epoch": 0.2908584069664847, "grad_norm": 0.21824371814727783, "learning_rate": 8.297913191779585e-06, "loss": 1.2447, "step": 3791 }, { "epoch": 0.29093513036584273, "grad_norm": 0.1882278323173523, "learning_rate": 8.296981455588453e-06, "loss": 1.0919, "step": 3792 }, { "epoch": 0.29101185376520083, "grad_norm": 0.22942133247852325, "learning_rate": 8.296049516788956e-06, "loss": 1.095, "step": 3793 }, { "epoch": 0.2910885771645589, "grad_norm": 0.24241961538791656, "learning_rate": 8.295117375438358e-06, "loss": 0.9663, "step": 3794 }, { "epoch": 0.291165300563917, "grad_norm": 0.1982918530702591, "learning_rate": 8.294185031593946e-06, "loss": 1.1283, "step": 3795 }, { "epoch": 0.2912420239632751, "grad_norm": 0.2456391304731369, "learning_rate": 8.293252485313014e-06, "loss": 1.0803, "step": 3796 }, { "epoch": 0.29131874736263313, "grad_norm": 0.20202863216400146, "learning_rate": 8.292319736652869e-06, "loss": 1.2023, "step": 3797 }, { "epoch": 0.29139547076199124, "grad_norm": 0.25864696502685547, "learning_rate": 8.291386785670831e-06, "loss": 1.1069, "step": 3798 }, { "epoch": 0.2914721941613493, "grad_norm": 0.3714875280857086, "learning_rate": 8.290453632424236e-06, "loss": 1.0873, "step": 3799 }, { "epoch": 0.2915489175607074, "grad_norm": 0.24229831993579865, "learning_rate": 8.289520276970421e-06, "loss": 1.1541, "step": 3800 }, { "epoch": 0.2916256409600655, "grad_norm": 0.3304937779903412, "learning_rate": 8.288586719366751e-06, "loss": 0.9751, "step": 3801 }, { "epoch": 0.29170236435942354, "grad_norm": 0.35480356216430664, "learning_rate": 8.287652959670593e-06, "loss": 1.1041, "step": 3802 }, { "epoch": 0.29177908775878164, "grad_norm": 0.2612221837043762, "learning_rate": 8.286718997939326e-06, "loss": 1.1279, "step": 3803 }, { "epoch": 0.2918558111581397, "grad_norm": 0.22340965270996094, "learning_rate": 8.285784834230347e-06, "loss": 1.095, "step": 3804 }, { "epoch": 0.2919325345574978, "grad_norm": 0.18766437470912933, "learning_rate": 8.284850468601065e-06, "loss": 1.1387, "step": 3805 }, { "epoch": 0.2920092579568559, "grad_norm": 0.0968589186668396, "learning_rate": 8.283915901108892e-06, "loss": 1.1877, "step": 3806 }, { "epoch": 0.29208598135621394, "grad_norm": 0.2210705578327179, "learning_rate": 8.282981131811269e-06, "loss": 1.1682, "step": 3807 }, { "epoch": 0.29216270475557204, "grad_norm": 0.280536949634552, "learning_rate": 8.282046160765636e-06, "loss": 1.0481, "step": 3808 }, { "epoch": 0.2922394281549301, "grad_norm": 0.21533912420272827, "learning_rate": 8.281110988029447e-06, "loss": 1.1369, "step": 3809 }, { "epoch": 0.2923161515542882, "grad_norm": 0.18924959003925323, "learning_rate": 8.280175613660175e-06, "loss": 1.1445, "step": 3810 }, { "epoch": 0.2923928749536463, "grad_norm": 0.2798130214214325, "learning_rate": 8.279240037715297e-06, "loss": 1.1778, "step": 3811 }, { "epoch": 0.29246959835300435, "grad_norm": 0.30717161297798157, "learning_rate": 8.27830426025231e-06, "loss": 1.0762, "step": 3812 }, { "epoch": 0.29254632175236245, "grad_norm": 0.20868884027004242, "learning_rate": 8.277368281328721e-06, "loss": 1.0957, "step": 3813 }, { "epoch": 0.2926230451517205, "grad_norm": 0.19083918631076813, "learning_rate": 8.276432101002045e-06, "loss": 1.135, "step": 3814 }, { "epoch": 0.2926997685510786, "grad_norm": 0.23071140050888062, "learning_rate": 8.275495719329813e-06, "loss": 1.113, "step": 3815 }, { "epoch": 0.2927764919504367, "grad_norm": 0.21547861397266388, "learning_rate": 8.27455913636957e-06, "loss": 1.0999, "step": 3816 }, { "epoch": 0.29285321534979475, "grad_norm": 0.2138332724571228, "learning_rate": 8.273622352178874e-06, "loss": 1.1469, "step": 3817 }, { "epoch": 0.29292993874915285, "grad_norm": 0.18188145756721497, "learning_rate": 8.272685366815287e-06, "loss": 1.0859, "step": 3818 }, { "epoch": 0.2930066621485109, "grad_norm": 0.2080245167016983, "learning_rate": 8.271748180336393e-06, "loss": 1.1316, "step": 3819 }, { "epoch": 0.293083385547869, "grad_norm": 0.2123948484659195, "learning_rate": 8.270810792799784e-06, "loss": 1.1465, "step": 3820 }, { "epoch": 0.2931601089472271, "grad_norm": 0.19041724503040314, "learning_rate": 8.269873204263064e-06, "loss": 1.0598, "step": 3821 }, { "epoch": 0.29323683234658515, "grad_norm": 0.22251498699188232, "learning_rate": 8.268935414783852e-06, "loss": 1.1339, "step": 3822 }, { "epoch": 0.29331355574594326, "grad_norm": 0.17771509289741516, "learning_rate": 8.267997424419776e-06, "loss": 1.2564, "step": 3823 }, { "epoch": 0.29339027914530136, "grad_norm": 0.2597888708114624, "learning_rate": 8.267059233228478e-06, "loss": 1.2506, "step": 3824 }, { "epoch": 0.2934670025446594, "grad_norm": 0.24584807455539703, "learning_rate": 8.266120841267616e-06, "loss": 1.2137, "step": 3825 }, { "epoch": 0.2935437259440175, "grad_norm": 0.1701756864786148, "learning_rate": 8.265182248594852e-06, "loss": 1.0915, "step": 3826 }, { "epoch": 0.29362044934337556, "grad_norm": 0.18588493764400482, "learning_rate": 8.264243455267867e-06, "loss": 1.1241, "step": 3827 }, { "epoch": 0.29369717274273366, "grad_norm": 0.30028030276298523, "learning_rate": 8.263304461344352e-06, "loss": 1.141, "step": 3828 }, { "epoch": 0.29377389614209176, "grad_norm": 0.3277975916862488, "learning_rate": 8.262365266882013e-06, "loss": 1.1039, "step": 3829 }, { "epoch": 0.2938506195414498, "grad_norm": 0.4898519515991211, "learning_rate": 8.261425871938562e-06, "loss": 1.0451, "step": 3830 }, { "epoch": 0.2939273429408079, "grad_norm": 0.2104838341474533, "learning_rate": 8.26048627657173e-06, "loss": 1.1553, "step": 3831 }, { "epoch": 0.29400406634016596, "grad_norm": 0.28333738446235657, "learning_rate": 8.259546480839258e-06, "loss": 1.0897, "step": 3832 }, { "epoch": 0.29408078973952406, "grad_norm": 0.1905307173728943, "learning_rate": 8.258606484798896e-06, "loss": 1.1085, "step": 3833 }, { "epoch": 0.29415751313888217, "grad_norm": 0.2474374622106552, "learning_rate": 8.257666288508413e-06, "loss": 1.0522, "step": 3834 }, { "epoch": 0.2942342365382402, "grad_norm": 0.197981595993042, "learning_rate": 8.256725892025582e-06, "loss": 1.1407, "step": 3835 }, { "epoch": 0.2943109599375983, "grad_norm": 0.20384477078914642, "learning_rate": 8.255785295408199e-06, "loss": 1.0441, "step": 3836 }, { "epoch": 0.29438768333695636, "grad_norm": 0.22225767374038696, "learning_rate": 8.254844498714063e-06, "loss": 1.1091, "step": 3837 }, { "epoch": 0.29446440673631447, "grad_norm": 0.20073580741882324, "learning_rate": 8.253903502000987e-06, "loss": 1.091, "step": 3838 }, { "epoch": 0.29454113013567257, "grad_norm": 0.1919577419757843, "learning_rate": 8.2529623053268e-06, "loss": 1.1436, "step": 3839 }, { "epoch": 0.2946178535350306, "grad_norm": 0.18158072233200073, "learning_rate": 8.252020908749338e-06, "loss": 1.075, "step": 3840 }, { "epoch": 0.2946945769343887, "grad_norm": 0.19060185551643372, "learning_rate": 8.251079312326458e-06, "loss": 1.1419, "step": 3841 }, { "epoch": 0.29477130033374677, "grad_norm": 0.25890034437179565, "learning_rate": 8.25013751611602e-06, "loss": 1.1048, "step": 3842 }, { "epoch": 0.29484802373310487, "grad_norm": 0.19811205565929413, "learning_rate": 8.249195520175898e-06, "loss": 1.0502, "step": 3843 }, { "epoch": 0.29492474713246297, "grad_norm": 0.17880356311798096, "learning_rate": 8.248253324563984e-06, "loss": 1.084, "step": 3844 }, { "epoch": 0.295001470531821, "grad_norm": 0.18067120015621185, "learning_rate": 8.247310929338175e-06, "loss": 1.0782, "step": 3845 }, { "epoch": 0.2950781939311791, "grad_norm": 0.21518902480602264, "learning_rate": 8.246368334556386e-06, "loss": 1.1287, "step": 3846 }, { "epoch": 0.29515491733053717, "grad_norm": 0.2049192637205124, "learning_rate": 8.245425540276542e-06, "loss": 1.0688, "step": 3847 }, { "epoch": 0.2952316407298953, "grad_norm": 0.2087092101573944, "learning_rate": 8.24448254655658e-06, "loss": 1.1452, "step": 3848 }, { "epoch": 0.2953083641292534, "grad_norm": 0.36390796303749084, "learning_rate": 8.24353935345445e-06, "loss": 1.1572, "step": 3849 }, { "epoch": 0.2953850875286114, "grad_norm": 0.3574431240558624, "learning_rate": 8.242595961028113e-06, "loss": 1.0898, "step": 3850 }, { "epoch": 0.2954618109279695, "grad_norm": 0.2619244456291199, "learning_rate": 8.241652369335542e-06, "loss": 1.1407, "step": 3851 }, { "epoch": 0.2955385343273276, "grad_norm": 0.2311657816171646, "learning_rate": 8.240708578434725e-06, "loss": 1.0526, "step": 3852 }, { "epoch": 0.2956152577266857, "grad_norm": 0.24692046642303467, "learning_rate": 8.239764588383658e-06, "loss": 1.124, "step": 3853 }, { "epoch": 0.2956919811260438, "grad_norm": 0.196799173951149, "learning_rate": 8.238820399240355e-06, "loss": 1.0962, "step": 3854 }, { "epoch": 0.2957687045254018, "grad_norm": 0.2669820189476013, "learning_rate": 8.237876011062837e-06, "loss": 1.1509, "step": 3855 }, { "epoch": 0.29584542792475993, "grad_norm": 0.20663899183273315, "learning_rate": 8.23693142390914e-06, "loss": 1.1049, "step": 3856 }, { "epoch": 0.295922151324118, "grad_norm": 0.23704101145267487, "learning_rate": 8.235986637837309e-06, "loss": 1.1519, "step": 3857 }, { "epoch": 0.2959988747234761, "grad_norm": 0.2244967371225357, "learning_rate": 8.235041652905409e-06, "loss": 1.0355, "step": 3858 }, { "epoch": 0.2960755981228342, "grad_norm": 0.4065455496311188, "learning_rate": 8.234096469171506e-06, "loss": 1.1519, "step": 3859 }, { "epoch": 0.29615232152219223, "grad_norm": 0.1876799613237381, "learning_rate": 8.233151086693687e-06, "loss": 1.1146, "step": 3860 }, { "epoch": 0.29622904492155033, "grad_norm": 0.0917532667517662, "learning_rate": 8.232205505530048e-06, "loss": 1.1536, "step": 3861 }, { "epoch": 0.2963057683209084, "grad_norm": 0.21154895424842834, "learning_rate": 8.231259725738698e-06, "loss": 1.1508, "step": 3862 }, { "epoch": 0.2963824917202665, "grad_norm": 0.1760980784893036, "learning_rate": 8.230313747377756e-06, "loss": 1.0951, "step": 3863 }, { "epoch": 0.2964592151196246, "grad_norm": 0.36697134375572205, "learning_rate": 8.229367570505358e-06, "loss": 1.1842, "step": 3864 }, { "epoch": 0.29653593851898263, "grad_norm": 0.20566296577453613, "learning_rate": 8.228421195179645e-06, "loss": 1.1848, "step": 3865 }, { "epoch": 0.29661266191834074, "grad_norm": 0.09389882534742355, "learning_rate": 8.227474621458777e-06, "loss": 1.2075, "step": 3866 }, { "epoch": 0.2966893853176988, "grad_norm": 0.19334961473941803, "learning_rate": 8.226527849400925e-06, "loss": 1.0933, "step": 3867 }, { "epoch": 0.2967661087170569, "grad_norm": 0.29406043887138367, "learning_rate": 8.225580879064266e-06, "loss": 1.0818, "step": 3868 }, { "epoch": 0.296842832116415, "grad_norm": 0.2890907824039459, "learning_rate": 8.224633710506997e-06, "loss": 1.1231, "step": 3869 }, { "epoch": 0.29691955551577304, "grad_norm": 0.22382570803165436, "learning_rate": 8.223686343787325e-06, "loss": 1.1666, "step": 3870 }, { "epoch": 0.29699627891513114, "grad_norm": 0.7301306128501892, "learning_rate": 8.222738778963466e-06, "loss": 1.0985, "step": 3871 }, { "epoch": 0.2970730023144892, "grad_norm": 0.22236260771751404, "learning_rate": 8.22179101609365e-06, "loss": 1.1207, "step": 3872 }, { "epoch": 0.2971497257138473, "grad_norm": 0.23953722417354584, "learning_rate": 8.220843055236122e-06, "loss": 1.0731, "step": 3873 }, { "epoch": 0.2972264491132054, "grad_norm": 0.2674197256565094, "learning_rate": 8.219894896449135e-06, "loss": 1.212, "step": 3874 }, { "epoch": 0.29730317251256344, "grad_norm": 0.19370295107364655, "learning_rate": 8.218946539790957e-06, "loss": 1.1786, "step": 3875 }, { "epoch": 0.29737989591192154, "grad_norm": 0.23635898530483246, "learning_rate": 8.217997985319865e-06, "loss": 1.0969, "step": 3876 }, { "epoch": 0.2974566193112796, "grad_norm": 0.2949928939342499, "learning_rate": 8.217049233094153e-06, "loss": 1.1559, "step": 3877 }, { "epoch": 0.2975333427106377, "grad_norm": 0.2652623653411865, "learning_rate": 8.216100283172122e-06, "loss": 1.1353, "step": 3878 }, { "epoch": 0.2976100661099958, "grad_norm": 0.20331279933452606, "learning_rate": 8.215151135612089e-06, "loss": 1.0837, "step": 3879 }, { "epoch": 0.29768678950935384, "grad_norm": 0.19739395380020142, "learning_rate": 8.214201790472383e-06, "loss": 1.0734, "step": 3880 }, { "epoch": 0.29776351290871195, "grad_norm": 0.3071683645248413, "learning_rate": 8.21325224781134e-06, "loss": 1.0754, "step": 3881 }, { "epoch": 0.29784023630807005, "grad_norm": 0.2609206736087799, "learning_rate": 8.212302507687315e-06, "loss": 1.1057, "step": 3882 }, { "epoch": 0.2979169597074281, "grad_norm": 0.2700245976448059, "learning_rate": 8.21135257015867e-06, "loss": 1.1109, "step": 3883 }, { "epoch": 0.2979936831067862, "grad_norm": 0.18970169126987457, "learning_rate": 8.210402435283785e-06, "loss": 1.0351, "step": 3884 }, { "epoch": 0.29807040650614425, "grad_norm": 0.3306960165500641, "learning_rate": 8.209452103121042e-06, "loss": 1.0629, "step": 3885 }, { "epoch": 0.29814712990550235, "grad_norm": 0.20895330607891083, "learning_rate": 8.208501573728846e-06, "loss": 1.0366, "step": 3886 }, { "epoch": 0.29822385330486045, "grad_norm": 0.20507292449474335, "learning_rate": 8.20755084716561e-06, "loss": 1.0603, "step": 3887 }, { "epoch": 0.2983005767042185, "grad_norm": 0.4782136380672455, "learning_rate": 8.206599923489757e-06, "loss": 1.0882, "step": 3888 }, { "epoch": 0.2983773001035766, "grad_norm": 0.26803821325302124, "learning_rate": 8.205648802759725e-06, "loss": 1.073, "step": 3889 }, { "epoch": 0.29845402350293465, "grad_norm": 0.22043073177337646, "learning_rate": 8.20469748503396e-06, "loss": 1.0982, "step": 3890 }, { "epoch": 0.29853074690229275, "grad_norm": 0.20960448682308197, "learning_rate": 8.203745970370926e-06, "loss": 1.0268, "step": 3891 }, { "epoch": 0.29860747030165086, "grad_norm": 0.38504934310913086, "learning_rate": 8.202794258829097e-06, "loss": 1.0779, "step": 3892 }, { "epoch": 0.2986841937010089, "grad_norm": 0.225070521235466, "learning_rate": 8.201842350466955e-06, "loss": 1.1426, "step": 3893 }, { "epoch": 0.298760917100367, "grad_norm": 0.19546279311180115, "learning_rate": 8.200890245342999e-06, "loss": 1.1334, "step": 3894 }, { "epoch": 0.29883764049972505, "grad_norm": 0.36011919379234314, "learning_rate": 8.199937943515738e-06, "loss": 1.1049, "step": 3895 }, { "epoch": 0.29891436389908316, "grad_norm": 0.17596594989299774, "learning_rate": 8.198985445043695e-06, "loss": 1.1488, "step": 3896 }, { "epoch": 0.29899108729844126, "grad_norm": 0.17375950515270233, "learning_rate": 8.1980327499854e-06, "loss": 1.0872, "step": 3897 }, { "epoch": 0.2990678106977993, "grad_norm": 0.2762269675731659, "learning_rate": 8.197079858399403e-06, "loss": 1.0879, "step": 3898 }, { "epoch": 0.2991445340971574, "grad_norm": 0.2179732769727707, "learning_rate": 8.19612677034426e-06, "loss": 1.1718, "step": 3899 }, { "epoch": 0.29922125749651546, "grad_norm": 0.20949165523052216, "learning_rate": 8.19517348587854e-06, "loss": 1.034, "step": 3900 }, { "epoch": 0.29929798089587356, "grad_norm": 0.1900671124458313, "learning_rate": 8.194220005060825e-06, "loss": 1.149, "step": 3901 }, { "epoch": 0.29937470429523166, "grad_norm": 0.18745273351669312, "learning_rate": 8.193266327949709e-06, "loss": 1.0555, "step": 3902 }, { "epoch": 0.2994514276945897, "grad_norm": 0.23035821318626404, "learning_rate": 8.1923124546038e-06, "loss": 1.0319, "step": 3903 }, { "epoch": 0.2995281510939478, "grad_norm": 0.2162892371416092, "learning_rate": 8.191358385081714e-06, "loss": 1.1767, "step": 3904 }, { "epoch": 0.29960487449330586, "grad_norm": 0.1997142881155014, "learning_rate": 8.19040411944208e-06, "loss": 1.0162, "step": 3905 }, { "epoch": 0.29968159789266396, "grad_norm": 0.2502029538154602, "learning_rate": 8.189449657743544e-06, "loss": 1.124, "step": 3906 }, { "epoch": 0.29975832129202207, "grad_norm": 0.1956532597541809, "learning_rate": 8.188495000044756e-06, "loss": 1.0816, "step": 3907 }, { "epoch": 0.2998350446913801, "grad_norm": 0.20867270231246948, "learning_rate": 8.187540146404387e-06, "loss": 1.0455, "step": 3908 }, { "epoch": 0.2999117680907382, "grad_norm": 0.21728914976119995, "learning_rate": 8.186585096881112e-06, "loss": 1.1954, "step": 3909 }, { "epoch": 0.29998849149009627, "grad_norm": 0.22440561652183533, "learning_rate": 8.18562985153362e-06, "loss": 1.0955, "step": 3910 }, { "epoch": 0.30006521488945437, "grad_norm": 0.2099321037530899, "learning_rate": 8.18467441042062e-06, "loss": 1.1713, "step": 3911 }, { "epoch": 0.30014193828881247, "grad_norm": 0.18662476539611816, "learning_rate": 8.183718773600817e-06, "loss": 1.0548, "step": 3912 }, { "epoch": 0.3002186616881705, "grad_norm": 0.21245455741882324, "learning_rate": 8.182762941132944e-06, "loss": 1.028, "step": 3913 }, { "epoch": 0.3002953850875286, "grad_norm": 0.21521416306495667, "learning_rate": 8.181806913075738e-06, "loss": 1.03, "step": 3914 }, { "epoch": 0.30037210848688667, "grad_norm": 0.10013094544410706, "learning_rate": 8.18085068948795e-06, "loss": 1.1795, "step": 3915 }, { "epoch": 0.30044883188624477, "grad_norm": 0.18347851932048798, "learning_rate": 8.179894270428341e-06, "loss": 1.1026, "step": 3916 }, { "epoch": 0.3005255552856029, "grad_norm": 0.27251511812210083, "learning_rate": 8.178937655955687e-06, "loss": 1.0816, "step": 3917 }, { "epoch": 0.3006022786849609, "grad_norm": 0.1849866509437561, "learning_rate": 8.177980846128772e-06, "loss": 1.1545, "step": 3918 }, { "epoch": 0.300679002084319, "grad_norm": 0.09765039384365082, "learning_rate": 8.1770238410064e-06, "loss": 1.2487, "step": 3919 }, { "epoch": 0.30075572548367707, "grad_norm": 0.21391303837299347, "learning_rate": 8.176066640647374e-06, "loss": 1.0285, "step": 3920 }, { "epoch": 0.3008324488830352, "grad_norm": 0.21639315783977509, "learning_rate": 8.175109245110524e-06, "loss": 1.1051, "step": 3921 }, { "epoch": 0.3009091722823933, "grad_norm": 0.22535598278045654, "learning_rate": 8.174151654454679e-06, "loss": 1.0505, "step": 3922 }, { "epoch": 0.3009858956817513, "grad_norm": 0.21676214039325714, "learning_rate": 8.173193868738687e-06, "loss": 1.0606, "step": 3923 }, { "epoch": 0.30106261908110943, "grad_norm": 0.2725915312767029, "learning_rate": 8.172235888021407e-06, "loss": 1.138, "step": 3924 }, { "epoch": 0.3011393424804675, "grad_norm": 0.184920996427536, "learning_rate": 8.17127771236171e-06, "loss": 1.0716, "step": 3925 }, { "epoch": 0.3012160658798256, "grad_norm": 0.1747226119041443, "learning_rate": 8.170319341818478e-06, "loss": 1.1072, "step": 3926 }, { "epoch": 0.3012927892791837, "grad_norm": 0.22696667909622192, "learning_rate": 8.169360776450606e-06, "loss": 1.0616, "step": 3927 }, { "epoch": 0.30136951267854173, "grad_norm": 0.23911763727664948, "learning_rate": 8.168402016316999e-06, "loss": 1.1059, "step": 3928 }, { "epoch": 0.30144623607789983, "grad_norm": 0.22358787059783936, "learning_rate": 8.167443061476577e-06, "loss": 1.1077, "step": 3929 }, { "epoch": 0.3015229594772579, "grad_norm": 0.43789634108543396, "learning_rate": 8.16648391198827e-06, "loss": 1.0162, "step": 3930 }, { "epoch": 0.301599682876616, "grad_norm": 0.18875829875469208, "learning_rate": 8.165524567911018e-06, "loss": 1.0604, "step": 3931 }, { "epoch": 0.3016764062759741, "grad_norm": 0.24659110605716705, "learning_rate": 8.16456502930378e-06, "loss": 1.1107, "step": 3932 }, { "epoch": 0.30175312967533213, "grad_norm": 0.29284417629241943, "learning_rate": 8.163605296225516e-06, "loss": 1.0204, "step": 3933 }, { "epoch": 0.30182985307469024, "grad_norm": 0.20278404653072357, "learning_rate": 8.16264536873521e-06, "loss": 1.0895, "step": 3934 }, { "epoch": 0.3019065764740483, "grad_norm": 0.2352941334247589, "learning_rate": 8.161685246891848e-06, "loss": 1.0648, "step": 3935 }, { "epoch": 0.3019832998734064, "grad_norm": 0.34650784730911255, "learning_rate": 8.160724930754435e-06, "loss": 1.0894, "step": 3936 }, { "epoch": 0.3020600232727645, "grad_norm": 0.19472993910312653, "learning_rate": 8.159764420381983e-06, "loss": 1.0429, "step": 3937 }, { "epoch": 0.30213674667212254, "grad_norm": 0.20070721209049225, "learning_rate": 8.158803715833519e-06, "loss": 1.0747, "step": 3938 }, { "epoch": 0.30221347007148064, "grad_norm": 0.21290375292301178, "learning_rate": 8.15784281716808e-06, "loss": 1.1553, "step": 3939 }, { "epoch": 0.30229019347083874, "grad_norm": 0.24990025162696838, "learning_rate": 8.156881724444718e-06, "loss": 1.1493, "step": 3940 }, { "epoch": 0.3023669168701968, "grad_norm": 0.23822090029716492, "learning_rate": 8.15592043772249e-06, "loss": 1.1032, "step": 3941 }, { "epoch": 0.3024436402695549, "grad_norm": 0.2045547515153885, "learning_rate": 8.154958957060476e-06, "loss": 1.0672, "step": 3942 }, { "epoch": 0.30252036366891294, "grad_norm": 0.2072467803955078, "learning_rate": 8.153997282517758e-06, "loss": 1.0608, "step": 3943 }, { "epoch": 0.30259708706827104, "grad_norm": 0.1706719994544983, "learning_rate": 8.153035414153431e-06, "loss": 1.1675, "step": 3944 }, { "epoch": 0.30267381046762915, "grad_norm": 0.1865326166152954, "learning_rate": 8.15207335202661e-06, "loss": 1.0693, "step": 3945 }, { "epoch": 0.3027505338669872, "grad_norm": 0.2699199616909027, "learning_rate": 8.151111096196413e-06, "loss": 1.1473, "step": 3946 }, { "epoch": 0.3028272572663453, "grad_norm": 0.21781158447265625, "learning_rate": 8.150148646721974e-06, "loss": 1.2394, "step": 3947 }, { "epoch": 0.30290398066570334, "grad_norm": 0.16891039907932281, "learning_rate": 8.149186003662437e-06, "loss": 1.1167, "step": 3948 }, { "epoch": 0.30298070406506145, "grad_norm": 0.46643057465553284, "learning_rate": 8.14822316707696e-06, "loss": 1.0735, "step": 3949 }, { "epoch": 0.30305742746441955, "grad_norm": 0.21298249065876007, "learning_rate": 8.147260137024711e-06, "loss": 1.1391, "step": 3950 }, { "epoch": 0.3031341508637776, "grad_norm": 0.4137958288192749, "learning_rate": 8.146296913564872e-06, "loss": 1.134, "step": 3951 }, { "epoch": 0.3032108742631357, "grad_norm": 0.25261110067367554, "learning_rate": 8.145333496756636e-06, "loss": 1.0634, "step": 3952 }, { "epoch": 0.30328759766249375, "grad_norm": 0.2999952733516693, "learning_rate": 8.144369886659206e-06, "loss": 1.1238, "step": 3953 }, { "epoch": 0.30336432106185185, "grad_norm": 0.18594397604465485, "learning_rate": 8.1434060833318e-06, "loss": 1.1251, "step": 3954 }, { "epoch": 0.30344104446120995, "grad_norm": 0.2186991572380066, "learning_rate": 8.142442086833644e-06, "loss": 1.2021, "step": 3955 }, { "epoch": 0.303517767860568, "grad_norm": 0.22676776349544525, "learning_rate": 8.14147789722398e-06, "loss": 1.0588, "step": 3956 }, { "epoch": 0.3035944912599261, "grad_norm": 0.19508051872253418, "learning_rate": 8.14051351456206e-06, "loss": 1.0727, "step": 3957 }, { "epoch": 0.30367121465928415, "grad_norm": 0.19465959072113037, "learning_rate": 8.139548938907147e-06, "loss": 1.0911, "step": 3958 }, { "epoch": 0.30374793805864225, "grad_norm": 0.1899416595697403, "learning_rate": 8.138584170318517e-06, "loss": 1.1243, "step": 3959 }, { "epoch": 0.30382466145800036, "grad_norm": 0.2532894015312195, "learning_rate": 8.13761920885546e-06, "loss": 1.0794, "step": 3960 }, { "epoch": 0.3039013848573584, "grad_norm": 0.1999843716621399, "learning_rate": 8.136654054577272e-06, "loss": 1.1459, "step": 3961 }, { "epoch": 0.3039781082567165, "grad_norm": 0.2611137628555298, "learning_rate": 8.135688707543266e-06, "loss": 1.1045, "step": 3962 }, { "epoch": 0.30405483165607455, "grad_norm": 0.21078890562057495, "learning_rate": 8.134723167812766e-06, "loss": 1.1587, "step": 3963 }, { "epoch": 0.30413155505543266, "grad_norm": 0.19844074547290802, "learning_rate": 8.133757435445104e-06, "loss": 1.0977, "step": 3964 }, { "epoch": 0.30420827845479076, "grad_norm": 0.1858065277338028, "learning_rate": 8.132791510499629e-06, "loss": 1.0997, "step": 3965 }, { "epoch": 0.3042850018541488, "grad_norm": 0.29998207092285156, "learning_rate": 8.131825393035702e-06, "loss": 1.142, "step": 3966 }, { "epoch": 0.3043617252535069, "grad_norm": 0.21378467977046967, "learning_rate": 8.130859083112687e-06, "loss": 1.0925, "step": 3967 }, { "epoch": 0.30443844865286496, "grad_norm": 0.16896182298660278, "learning_rate": 8.129892580789973e-06, "loss": 1.1107, "step": 3968 }, { "epoch": 0.30451517205222306, "grad_norm": 0.21405291557312012, "learning_rate": 8.128925886126953e-06, "loss": 1.049, "step": 3969 }, { "epoch": 0.30459189545158116, "grad_norm": 0.32231900095939636, "learning_rate": 8.127958999183027e-06, "loss": 1.1532, "step": 3970 }, { "epoch": 0.3046686188509392, "grad_norm": 0.17346259951591492, "learning_rate": 8.126991920017621e-06, "loss": 1.0472, "step": 3971 }, { "epoch": 0.3047453422502973, "grad_norm": 0.19376301765441895, "learning_rate": 8.126024648690158e-06, "loss": 1.1277, "step": 3972 }, { "epoch": 0.30482206564965536, "grad_norm": 0.2209787219762802, "learning_rate": 8.125057185260085e-06, "loss": 1.0472, "step": 3973 }, { "epoch": 0.30489878904901346, "grad_norm": 0.21043750643730164, "learning_rate": 8.124089529786852e-06, "loss": 1.1155, "step": 3974 }, { "epoch": 0.30497551244837157, "grad_norm": 0.4043278992176056, "learning_rate": 8.123121682329923e-06, "loss": 1.1576, "step": 3975 }, { "epoch": 0.3050522358477296, "grad_norm": 0.0967136025428772, "learning_rate": 8.122153642948778e-06, "loss": 1.2306, "step": 3976 }, { "epoch": 0.3051289592470877, "grad_norm": 0.19939693808555603, "learning_rate": 8.121185411702903e-06, "loss": 1.0899, "step": 3977 }, { "epoch": 0.30520568264644576, "grad_norm": 0.21149060130119324, "learning_rate": 8.1202169886518e-06, "loss": 1.1058, "step": 3978 }, { "epoch": 0.30528240604580387, "grad_norm": 0.2307398021221161, "learning_rate": 8.119248373854979e-06, "loss": 1.0349, "step": 3979 }, { "epoch": 0.30535912944516197, "grad_norm": 0.22601927816867828, "learning_rate": 8.118279567371968e-06, "loss": 1.1524, "step": 3980 }, { "epoch": 0.30543585284452, "grad_norm": 0.21074903011322021, "learning_rate": 8.1173105692623e-06, "loss": 1.2134, "step": 3981 }, { "epoch": 0.3055125762438781, "grad_norm": 0.21471288800239563, "learning_rate": 8.116341379585523e-06, "loss": 1.134, "step": 3982 }, { "epoch": 0.30558929964323617, "grad_norm": 0.19363877177238464, "learning_rate": 8.115371998401193e-06, "loss": 1.0549, "step": 3983 }, { "epoch": 0.30566602304259427, "grad_norm": 0.29252979159355164, "learning_rate": 8.114402425768889e-06, "loss": 1.1151, "step": 3984 }, { "epoch": 0.3057427464419524, "grad_norm": 0.206162229180336, "learning_rate": 8.113432661748187e-06, "loss": 1.0513, "step": 3985 }, { "epoch": 0.3058194698413104, "grad_norm": 0.23076024651527405, "learning_rate": 8.112462706398683e-06, "loss": 1.0198, "step": 3986 }, { "epoch": 0.3058961932406685, "grad_norm": 0.19034051895141602, "learning_rate": 8.111492559779983e-06, "loss": 1.0586, "step": 3987 }, { "epoch": 0.30597291664002657, "grad_norm": 0.28003406524658203, "learning_rate": 8.110522221951709e-06, "loss": 1.1455, "step": 3988 }, { "epoch": 0.3060496400393847, "grad_norm": 0.23019973933696747, "learning_rate": 8.109551692973487e-06, "loss": 1.1083, "step": 3989 }, { "epoch": 0.3061263634387428, "grad_norm": 0.2655048966407776, "learning_rate": 8.10858097290496e-06, "loss": 1.0318, "step": 3990 }, { "epoch": 0.3062030868381008, "grad_norm": 0.16959445178508759, "learning_rate": 8.10761006180578e-06, "loss": 1.0356, "step": 3991 }, { "epoch": 0.3062798102374589, "grad_norm": 0.23690569400787354, "learning_rate": 8.106638959735614e-06, "loss": 1.0611, "step": 3992 }, { "epoch": 0.306356533636817, "grad_norm": 0.24119111895561218, "learning_rate": 8.105667666754138e-06, "loss": 1.1827, "step": 3993 }, { "epoch": 0.3064332570361751, "grad_norm": 0.21831583976745605, "learning_rate": 8.10469618292104e-06, "loss": 1.035, "step": 3994 }, { "epoch": 0.3065099804355332, "grad_norm": 0.19004173576831818, "learning_rate": 8.103724508296022e-06, "loss": 1.0963, "step": 3995 }, { "epoch": 0.3065867038348912, "grad_norm": 0.2290993183851242, "learning_rate": 8.102752642938793e-06, "loss": 1.1235, "step": 3996 }, { "epoch": 0.30666342723424933, "grad_norm": 0.20333538949489594, "learning_rate": 8.101780586909081e-06, "loss": 1.0333, "step": 3997 }, { "epoch": 0.30674015063360743, "grad_norm": 0.3121757507324219, "learning_rate": 8.100808340266617e-06, "loss": 1.2087, "step": 3998 }, { "epoch": 0.3068168740329655, "grad_norm": 0.18107415735721588, "learning_rate": 8.099835903071152e-06, "loss": 1.1433, "step": 3999 }, { "epoch": 0.3068935974323236, "grad_norm": 0.19541813433170319, "learning_rate": 8.09886327538244e-06, "loss": 1.0557, "step": 4000 }, { "epoch": 0.30697032083168163, "grad_norm": 0.18835750222206116, "learning_rate": 8.097890457260257e-06, "loss": 1.1079, "step": 4001 }, { "epoch": 0.30704704423103973, "grad_norm": 0.29370251297950745, "learning_rate": 8.096917448764383e-06, "loss": 1.0837, "step": 4002 }, { "epoch": 0.30712376763039784, "grad_norm": 0.18396082520484924, "learning_rate": 8.09594424995461e-06, "loss": 1.0709, "step": 4003 }, { "epoch": 0.3072004910297559, "grad_norm": 0.30864936113357544, "learning_rate": 8.094970860890748e-06, "loss": 1.0573, "step": 4004 }, { "epoch": 0.307277214429114, "grad_norm": 0.25134602189064026, "learning_rate": 8.093997281632611e-06, "loss": 1.0916, "step": 4005 }, { "epoch": 0.30735393782847203, "grad_norm": 0.24799376726150513, "learning_rate": 8.093023512240029e-06, "loss": 1.1264, "step": 4006 }, { "epoch": 0.30743066122783014, "grad_norm": 0.09675345569849014, "learning_rate": 8.092049552772843e-06, "loss": 1.1573, "step": 4007 }, { "epoch": 0.30750738462718824, "grad_norm": 0.7952577471733093, "learning_rate": 8.091075403290905e-06, "loss": 1.0834, "step": 4008 }, { "epoch": 0.3075841080265463, "grad_norm": 0.19962851703166962, "learning_rate": 8.090101063854079e-06, "loss": 1.0915, "step": 4009 }, { "epoch": 0.3076608314259044, "grad_norm": 0.2821514308452606, "learning_rate": 8.089126534522243e-06, "loss": 1.1575, "step": 4010 }, { "epoch": 0.30773755482526244, "grad_norm": 0.20795677602291107, "learning_rate": 8.088151815355281e-06, "loss": 1.056, "step": 4011 }, { "epoch": 0.30781427822462054, "grad_norm": 0.24710752069950104, "learning_rate": 8.087176906413093e-06, "loss": 1.152, "step": 4012 }, { "epoch": 0.30789100162397864, "grad_norm": 0.26656076312065125, "learning_rate": 8.08620180775559e-06, "loss": 1.1044, "step": 4013 }, { "epoch": 0.3079677250233367, "grad_norm": 0.2133619636297226, "learning_rate": 8.085226519442697e-06, "loss": 1.0242, "step": 4014 }, { "epoch": 0.3080444484226948, "grad_norm": 0.20977383852005005, "learning_rate": 8.084251041534346e-06, "loss": 1.0635, "step": 4015 }, { "epoch": 0.30812117182205284, "grad_norm": 0.42412856221199036, "learning_rate": 8.083275374090481e-06, "loss": 1.0844, "step": 4016 }, { "epoch": 0.30819789522141094, "grad_norm": 0.21325916051864624, "learning_rate": 8.082299517171061e-06, "loss": 1.1979, "step": 4017 }, { "epoch": 0.30827461862076905, "grad_norm": 0.18907903134822845, "learning_rate": 8.081323470836057e-06, "loss": 1.0367, "step": 4018 }, { "epoch": 0.3083513420201271, "grad_norm": 0.24047473073005676, "learning_rate": 8.080347235145446e-06, "loss": 1.0787, "step": 4019 }, { "epoch": 0.3084280654194852, "grad_norm": 0.19936834275722504, "learning_rate": 8.079370810159223e-06, "loss": 1.0314, "step": 4020 }, { "epoch": 0.30850478881884325, "grad_norm": 0.19919560849666595, "learning_rate": 8.078394195937392e-06, "loss": 1.106, "step": 4021 }, { "epoch": 0.30858151221820135, "grad_norm": 0.28615015745162964, "learning_rate": 8.077417392539965e-06, "loss": 1.0803, "step": 4022 }, { "epoch": 0.30865823561755945, "grad_norm": 0.39007100462913513, "learning_rate": 8.076440400026973e-06, "loss": 1.1284, "step": 4023 }, { "epoch": 0.3087349590169175, "grad_norm": 0.22537191212177277, "learning_rate": 8.075463218458453e-06, "loss": 1.0316, "step": 4024 }, { "epoch": 0.3088116824162756, "grad_norm": 0.2540743947029114, "learning_rate": 8.074485847894457e-06, "loss": 1.066, "step": 4025 }, { "epoch": 0.30888840581563365, "grad_norm": 0.210642471909523, "learning_rate": 8.073508288395047e-06, "loss": 1.1089, "step": 4026 }, { "epoch": 0.30896512921499175, "grad_norm": 0.22697433829307556, "learning_rate": 8.072530540020294e-06, "loss": 1.1401, "step": 4027 }, { "epoch": 0.30904185261434985, "grad_norm": 0.20073731243610382, "learning_rate": 8.071552602830285e-06, "loss": 1.1561, "step": 4028 }, { "epoch": 0.3091185760137079, "grad_norm": 0.21140259504318237, "learning_rate": 8.070574476885119e-06, "loss": 1.0294, "step": 4029 }, { "epoch": 0.309195299413066, "grad_norm": 0.20623968541622162, "learning_rate": 8.0695961622449e-06, "loss": 1.1462, "step": 4030 }, { "epoch": 0.30927202281242405, "grad_norm": 0.1726132482290268, "learning_rate": 8.068617658969752e-06, "loss": 1.0849, "step": 4031 }, { "epoch": 0.30934874621178216, "grad_norm": 0.24129274487495422, "learning_rate": 8.067638967119803e-06, "loss": 1.1006, "step": 4032 }, { "epoch": 0.30942546961114026, "grad_norm": 0.25975728034973145, "learning_rate": 8.066660086755199e-06, "loss": 1.1629, "step": 4033 }, { "epoch": 0.3095021930104983, "grad_norm": 0.31884270906448364, "learning_rate": 8.065681017936095e-06, "loss": 1.0959, "step": 4034 }, { "epoch": 0.3095789164098564, "grad_norm": 0.17304176092147827, "learning_rate": 8.064701760722655e-06, "loss": 1.1613, "step": 4035 }, { "epoch": 0.30965563980921446, "grad_norm": 0.19369865953922272, "learning_rate": 8.06372231517506e-06, "loss": 1.1011, "step": 4036 }, { "epoch": 0.30973236320857256, "grad_norm": 0.1955728530883789, "learning_rate": 8.062742681353497e-06, "loss": 1.103, "step": 4037 }, { "epoch": 0.30980908660793066, "grad_norm": 0.2253711074590683, "learning_rate": 8.06176285931817e-06, "loss": 1.1841, "step": 4038 }, { "epoch": 0.3098858100072887, "grad_norm": 0.21528400480747223, "learning_rate": 8.06078284912929e-06, "loss": 1.1454, "step": 4039 }, { "epoch": 0.3099625334066468, "grad_norm": 0.20774273574352264, "learning_rate": 8.059802650847078e-06, "loss": 1.0785, "step": 4040 }, { "epoch": 0.31003925680600486, "grad_norm": 0.17705054581165314, "learning_rate": 8.058822264531776e-06, "loss": 1.0608, "step": 4041 }, { "epoch": 0.31011598020536296, "grad_norm": 0.20422299206256866, "learning_rate": 8.057841690243627e-06, "loss": 1.1552, "step": 4042 }, { "epoch": 0.31019270360472107, "grad_norm": 0.19730888307094574, "learning_rate": 8.056860928042892e-06, "loss": 1.1268, "step": 4043 }, { "epoch": 0.3102694270040791, "grad_norm": 0.1824883073568344, "learning_rate": 8.05587997798984e-06, "loss": 1.1913, "step": 4044 }, { "epoch": 0.3103461504034372, "grad_norm": 0.2193903923034668, "learning_rate": 8.054898840144754e-06, "loss": 1.1566, "step": 4045 }, { "epoch": 0.31042287380279526, "grad_norm": 0.19285468757152557, "learning_rate": 8.053917514567927e-06, "loss": 1.1833, "step": 4046 }, { "epoch": 0.31049959720215337, "grad_norm": 0.45346003770828247, "learning_rate": 8.052936001319664e-06, "loss": 1.1271, "step": 4047 }, { "epoch": 0.31057632060151147, "grad_norm": 0.20366071164608002, "learning_rate": 8.051954300460284e-06, "loss": 1.0974, "step": 4048 }, { "epoch": 0.3106530440008695, "grad_norm": 0.19971510767936707, "learning_rate": 8.05097241205011e-06, "loss": 1.076, "step": 4049 }, { "epoch": 0.3107297674002276, "grad_norm": 0.28544047474861145, "learning_rate": 8.049990336149487e-06, "loss": 1.0389, "step": 4050 }, { "epoch": 0.31080649079958567, "grad_norm": 0.17269569635391235, "learning_rate": 8.049008072818764e-06, "loss": 1.0957, "step": 4051 }, { "epoch": 0.31088321419894377, "grad_norm": 0.2211022675037384, "learning_rate": 8.048025622118305e-06, "loss": 1.1522, "step": 4052 }, { "epoch": 0.31095993759830187, "grad_norm": 0.18817070126533508, "learning_rate": 8.047042984108481e-06, "loss": 1.1405, "step": 4053 }, { "epoch": 0.3110366609976599, "grad_norm": 0.17962311208248138, "learning_rate": 8.046060158849682e-06, "loss": 1.1552, "step": 4054 }, { "epoch": 0.311113384397018, "grad_norm": 0.20901194214820862, "learning_rate": 8.045077146402302e-06, "loss": 1.1283, "step": 4055 }, { "epoch": 0.3111901077963761, "grad_norm": 0.2245478630065918, "learning_rate": 8.04409394682675e-06, "loss": 1.1451, "step": 4056 }, { "epoch": 0.3112668311957342, "grad_norm": 0.24038518965244293, "learning_rate": 8.043110560183447e-06, "loss": 1.0559, "step": 4057 }, { "epoch": 0.3113435545950923, "grad_norm": 0.2440531849861145, "learning_rate": 8.042126986532824e-06, "loss": 1.1086, "step": 4058 }, { "epoch": 0.3114202779944503, "grad_norm": 0.466129869222641, "learning_rate": 8.041143225935328e-06, "loss": 1.0935, "step": 4059 }, { "epoch": 0.3114970013938084, "grad_norm": 0.21442067623138428, "learning_rate": 8.040159278451408e-06, "loss": 1.0645, "step": 4060 }, { "epoch": 0.31157372479316653, "grad_norm": 0.20228303968906403, "learning_rate": 8.039175144141534e-06, "loss": 1.0789, "step": 4061 }, { "epoch": 0.3116504481925246, "grad_norm": 0.23047888278961182, "learning_rate": 8.038190823066184e-06, "loss": 1.0959, "step": 4062 }, { "epoch": 0.3117271715918827, "grad_norm": 0.2720075845718384, "learning_rate": 8.037206315285842e-06, "loss": 1.1097, "step": 4063 }, { "epoch": 0.3118038949912407, "grad_norm": 0.274628609418869, "learning_rate": 8.036221620861015e-06, "loss": 1.0661, "step": 4064 }, { "epoch": 0.31188061839059883, "grad_norm": 0.22573453187942505, "learning_rate": 8.035236739852214e-06, "loss": 1.0742, "step": 4065 }, { "epoch": 0.31195734178995693, "grad_norm": 0.19041751325130463, "learning_rate": 8.034251672319959e-06, "loss": 1.0994, "step": 4066 }, { "epoch": 0.312034065189315, "grad_norm": 0.2024129033088684, "learning_rate": 8.033266418324789e-06, "loss": 1.0752, "step": 4067 }, { "epoch": 0.3121107885886731, "grad_norm": 0.7770058512687683, "learning_rate": 8.032280977927249e-06, "loss": 1.0718, "step": 4068 }, { "epoch": 0.31218751198803113, "grad_norm": 0.2467866837978363, "learning_rate": 8.031295351187895e-06, "loss": 1.0974, "step": 4069 }, { "epoch": 0.31226423538738923, "grad_norm": 0.2510092258453369, "learning_rate": 8.030309538167297e-06, "loss": 1.1203, "step": 4070 }, { "epoch": 0.31234095878674734, "grad_norm": 0.32793909311294556, "learning_rate": 8.02932353892604e-06, "loss": 1.0548, "step": 4071 }, { "epoch": 0.3124176821861054, "grad_norm": 0.2034921646118164, "learning_rate": 8.028337353524712e-06, "loss": 1.1772, "step": 4072 }, { "epoch": 0.3124944055854635, "grad_norm": 0.26773741841316223, "learning_rate": 8.027350982023918e-06, "loss": 1.0786, "step": 4073 }, { "epoch": 0.31257112898482153, "grad_norm": 0.2910991609096527, "learning_rate": 8.026364424484271e-06, "loss": 1.0827, "step": 4074 }, { "epoch": 0.31264785238417964, "grad_norm": 0.20623035728931427, "learning_rate": 8.025377680966403e-06, "loss": 1.0344, "step": 4075 }, { "epoch": 0.31272457578353774, "grad_norm": 0.293478399515152, "learning_rate": 8.024390751530948e-06, "loss": 1.0791, "step": 4076 }, { "epoch": 0.3128012991828958, "grad_norm": 0.20233534276485443, "learning_rate": 8.023403636238556e-06, "loss": 1.1343, "step": 4077 }, { "epoch": 0.3128780225822539, "grad_norm": 0.09566259384155273, "learning_rate": 8.02241633514989e-06, "loss": 1.163, "step": 4078 }, { "epoch": 0.31295474598161194, "grad_norm": 0.1748170703649521, "learning_rate": 8.021428848325619e-06, "loss": 1.0469, "step": 4079 }, { "epoch": 0.31303146938097004, "grad_norm": 0.40322577953338623, "learning_rate": 8.020441175826427e-06, "loss": 1.1546, "step": 4080 }, { "epoch": 0.31310819278032814, "grad_norm": 0.2928348481655121, "learning_rate": 8.019453317713014e-06, "loss": 1.0725, "step": 4081 }, { "epoch": 0.3131849161796862, "grad_norm": 0.21045631170272827, "learning_rate": 8.018465274046079e-06, "loss": 1.14, "step": 4082 }, { "epoch": 0.3132616395790443, "grad_norm": 0.19844964146614075, "learning_rate": 8.017477044886345e-06, "loss": 1.1018, "step": 4083 }, { "epoch": 0.31333836297840234, "grad_norm": 0.255286306142807, "learning_rate": 8.016488630294539e-06, "loss": 1.0491, "step": 4084 }, { "epoch": 0.31341508637776044, "grad_norm": 0.224631205201149, "learning_rate": 8.015500030331404e-06, "loss": 1.1082, "step": 4085 }, { "epoch": 0.31349180977711855, "grad_norm": 0.21224907040596008, "learning_rate": 8.014511245057692e-06, "loss": 1.1189, "step": 4086 }, { "epoch": 0.3135685331764766, "grad_norm": 0.1797514110803604, "learning_rate": 8.013522274534165e-06, "loss": 1.0985, "step": 4087 }, { "epoch": 0.3136452565758347, "grad_norm": 0.20449618995189667, "learning_rate": 8.012533118821599e-06, "loss": 1.091, "step": 4088 }, { "epoch": 0.31372197997519274, "grad_norm": 0.247007817029953, "learning_rate": 8.011543777980779e-06, "loss": 1.1245, "step": 4089 }, { "epoch": 0.31379870337455085, "grad_norm": 0.2025526612997055, "learning_rate": 8.010554252072502e-06, "loss": 1.1828, "step": 4090 }, { "epoch": 0.31387542677390895, "grad_norm": 0.21340273320674896, "learning_rate": 8.009564541157582e-06, "loss": 1.1066, "step": 4091 }, { "epoch": 0.313952150173267, "grad_norm": 0.27904486656188965, "learning_rate": 8.008574645296832e-06, "loss": 1.09, "step": 4092 }, { "epoch": 0.3140288735726251, "grad_norm": 0.1774921864271164, "learning_rate": 8.00758456455109e-06, "loss": 1.0226, "step": 4093 }, { "epoch": 0.31410559697198315, "grad_norm": 0.3274863362312317, "learning_rate": 8.006594298981194e-06, "loss": 1.1705, "step": 4094 }, { "epoch": 0.31418232037134125, "grad_norm": 0.22448603808879852, "learning_rate": 8.005603848648004e-06, "loss": 0.9896, "step": 4095 }, { "epoch": 0.31425904377069935, "grad_norm": 0.19878602027893066, "learning_rate": 8.004613213612382e-06, "loss": 1.1, "step": 4096 }, { "epoch": 0.3143357671700574, "grad_norm": 0.10121391713619232, "learning_rate": 8.003622393935206e-06, "loss": 1.2535, "step": 4097 }, { "epoch": 0.3144124905694155, "grad_norm": 0.16698797047138214, "learning_rate": 8.002631389677365e-06, "loss": 1.0968, "step": 4098 }, { "epoch": 0.31448921396877355, "grad_norm": 0.4990605115890503, "learning_rate": 8.001640200899758e-06, "loss": 1.086, "step": 4099 }, { "epoch": 0.31456593736813165, "grad_norm": 0.18879558145999908, "learning_rate": 8.000648827663297e-06, "loss": 1.0931, "step": 4100 }, { "epoch": 0.31464266076748976, "grad_norm": 0.1799258589744568, "learning_rate": 7.999657270028904e-06, "loss": 1.0789, "step": 4101 }, { "epoch": 0.3147193841668478, "grad_norm": 0.19627097249031067, "learning_rate": 7.998665528057515e-06, "loss": 1.0343, "step": 4102 }, { "epoch": 0.3147961075662059, "grad_norm": 0.2701769471168518, "learning_rate": 7.997673601810071e-06, "loss": 1.1335, "step": 4103 }, { "epoch": 0.31487283096556395, "grad_norm": 0.21501660346984863, "learning_rate": 7.996681491347533e-06, "loss": 1.1434, "step": 4104 }, { "epoch": 0.31494955436492206, "grad_norm": 0.3924688696861267, "learning_rate": 7.995689196730865e-06, "loss": 1.1064, "step": 4105 }, { "epoch": 0.31502627776428016, "grad_norm": 0.20852577686309814, "learning_rate": 7.99469671802105e-06, "loss": 1.1038, "step": 4106 }, { "epoch": 0.3151030011636382, "grad_norm": 0.1838654726743698, "learning_rate": 7.993704055279075e-06, "loss": 1.0714, "step": 4107 }, { "epoch": 0.3151797245629963, "grad_norm": 0.24308934807777405, "learning_rate": 7.992711208565941e-06, "loss": 1.1034, "step": 4108 }, { "epoch": 0.31525644796235436, "grad_norm": 0.23602759838104248, "learning_rate": 7.991718177942667e-06, "loss": 1.0304, "step": 4109 }, { "epoch": 0.31533317136171246, "grad_norm": 0.1849546879529953, "learning_rate": 7.990724963470272e-06, "loss": 1.1429, "step": 4110 }, { "epoch": 0.31540989476107056, "grad_norm": 0.2497934103012085, "learning_rate": 7.989731565209796e-06, "loss": 1.1228, "step": 4111 }, { "epoch": 0.3154866181604286, "grad_norm": 0.23578429222106934, "learning_rate": 7.98873798322228e-06, "loss": 1.0748, "step": 4112 }, { "epoch": 0.3155633415597867, "grad_norm": 0.18911711871623993, "learning_rate": 7.987744217568786e-06, "loss": 1.0549, "step": 4113 }, { "epoch": 0.3156400649591448, "grad_norm": 0.24009114503860474, "learning_rate": 7.986750268310386e-06, "loss": 1.1218, "step": 4114 }, { "epoch": 0.31571678835850286, "grad_norm": 0.2327776998281479, "learning_rate": 7.985756135508155e-06, "loss": 1.0999, "step": 4115 }, { "epoch": 0.31579351175786097, "grad_norm": 0.21296441555023193, "learning_rate": 7.98476181922319e-06, "loss": 1.2221, "step": 4116 }, { "epoch": 0.315870235157219, "grad_norm": 0.18049269914627075, "learning_rate": 7.983767319516594e-06, "loss": 1.0881, "step": 4117 }, { "epoch": 0.3159469585565771, "grad_norm": 0.09281114488840103, "learning_rate": 7.982772636449478e-06, "loss": 1.1447, "step": 4118 }, { "epoch": 0.3160236819559352, "grad_norm": 0.2194802463054657, "learning_rate": 7.981777770082973e-06, "loss": 1.111, "step": 4119 }, { "epoch": 0.31610040535529327, "grad_norm": 0.23831824958324432, "learning_rate": 7.980782720478211e-06, "loss": 1.0689, "step": 4120 }, { "epoch": 0.31617712875465137, "grad_norm": 0.2028980404138565, "learning_rate": 7.979787487696342e-06, "loss": 1.1364, "step": 4121 }, { "epoch": 0.3162538521540094, "grad_norm": 0.2489023059606552, "learning_rate": 7.97879207179853e-06, "loss": 1.0826, "step": 4122 }, { "epoch": 0.3163305755533675, "grad_norm": 0.20518487691879272, "learning_rate": 7.97779647284594e-06, "loss": 1.1131, "step": 4123 }, { "epoch": 0.3164072989527256, "grad_norm": 0.24229483306407928, "learning_rate": 7.976800690899758e-06, "loss": 1.1038, "step": 4124 }, { "epoch": 0.31648402235208367, "grad_norm": 0.21547269821166992, "learning_rate": 7.975804726021177e-06, "loss": 1.1263, "step": 4125 }, { "epoch": 0.3165607457514418, "grad_norm": 0.2006065547466278, "learning_rate": 7.9748085782714e-06, "loss": 1.0639, "step": 4126 }, { "epoch": 0.3166374691507998, "grad_norm": 0.2328047901391983, "learning_rate": 7.973812247711646e-06, "loss": 1.1376, "step": 4127 }, { "epoch": 0.3167141925501579, "grad_norm": 0.18583767116069794, "learning_rate": 7.97281573440314e-06, "loss": 1.0511, "step": 4128 }, { "epoch": 0.316790915949516, "grad_norm": 0.2034512758255005, "learning_rate": 7.971819038407117e-06, "loss": 1.1295, "step": 4129 }, { "epoch": 0.3168676393488741, "grad_norm": 0.3049301207065582, "learning_rate": 7.970822159784832e-06, "loss": 0.9915, "step": 4130 }, { "epoch": 0.3169443627482322, "grad_norm": 0.19453644752502441, "learning_rate": 7.969825098597545e-06, "loss": 1.1612, "step": 4131 }, { "epoch": 0.3170210861475902, "grad_norm": 0.2249792516231537, "learning_rate": 7.96882785490653e-06, "loss": 1.0688, "step": 4132 }, { "epoch": 0.31709780954694833, "grad_norm": 0.22948502004146576, "learning_rate": 7.967830428773064e-06, "loss": 1.0454, "step": 4133 }, { "epoch": 0.31717453294630643, "grad_norm": 0.17853449285030365, "learning_rate": 7.966832820258445e-06, "loss": 1.1636, "step": 4134 }, { "epoch": 0.3172512563456645, "grad_norm": 0.20627132058143616, "learning_rate": 7.96583502942398e-06, "loss": 1.1092, "step": 4135 }, { "epoch": 0.3173279797450226, "grad_norm": 0.20152364671230316, "learning_rate": 7.964837056330986e-06, "loss": 1.1069, "step": 4136 }, { "epoch": 0.31740470314438063, "grad_norm": 0.36260703206062317, "learning_rate": 7.963838901040788e-06, "loss": 1.1144, "step": 4137 }, { "epoch": 0.31748142654373873, "grad_norm": 0.21216176450252533, "learning_rate": 7.962840563614726e-06, "loss": 1.038, "step": 4138 }, { "epoch": 0.31755814994309683, "grad_norm": 0.0898488312959671, "learning_rate": 7.961842044114154e-06, "loss": 1.1172, "step": 4139 }, { "epoch": 0.3176348733424549, "grad_norm": 0.24743933975696564, "learning_rate": 7.96084334260043e-06, "loss": 1.0297, "step": 4140 }, { "epoch": 0.317711596741813, "grad_norm": 0.24268916249275208, "learning_rate": 7.95984445913493e-06, "loss": 1.0894, "step": 4141 }, { "epoch": 0.31778832014117103, "grad_norm": 0.21374699473381042, "learning_rate": 7.958845393779036e-06, "loss": 1.0892, "step": 4142 }, { "epoch": 0.31786504354052914, "grad_norm": 0.24895504117012024, "learning_rate": 7.957846146594143e-06, "loss": 1.1955, "step": 4143 }, { "epoch": 0.31794176693988724, "grad_norm": 0.2958773672580719, "learning_rate": 7.956846717641659e-06, "loss": 1.0246, "step": 4144 }, { "epoch": 0.3180184903392453, "grad_norm": 0.27608972787857056, "learning_rate": 7.955847106982998e-06, "loss": 1.0809, "step": 4145 }, { "epoch": 0.3180952137386034, "grad_norm": 0.22720375657081604, "learning_rate": 7.954847314679595e-06, "loss": 1.1135, "step": 4146 }, { "epoch": 0.31817193713796144, "grad_norm": 0.213415265083313, "learning_rate": 7.953847340792885e-06, "loss": 1.0579, "step": 4147 }, { "epoch": 0.31824866053731954, "grad_norm": 0.22880832850933075, "learning_rate": 7.95284718538432e-06, "loss": 1.0506, "step": 4148 }, { "epoch": 0.31832538393667764, "grad_norm": 0.21941418945789337, "learning_rate": 7.951846848515361e-06, "loss": 1.1393, "step": 4149 }, { "epoch": 0.3184021073360357, "grad_norm": 0.2571694254875183, "learning_rate": 7.950846330247486e-06, "loss": 1.1086, "step": 4150 }, { "epoch": 0.3184788307353938, "grad_norm": 0.20589686930179596, "learning_rate": 7.949845630642177e-06, "loss": 1.0899, "step": 4151 }, { "epoch": 0.31855555413475184, "grad_norm": 0.24066634476184845, "learning_rate": 7.948844749760927e-06, "loss": 1.0585, "step": 4152 }, { "epoch": 0.31863227753410994, "grad_norm": 0.21955206990242004, "learning_rate": 7.947843687665245e-06, "loss": 1.0653, "step": 4153 }, { "epoch": 0.31870900093346805, "grad_norm": 0.09656322747468948, "learning_rate": 7.946842444416652e-06, "loss": 1.2304, "step": 4154 }, { "epoch": 0.3187857243328261, "grad_norm": 0.4782160818576813, "learning_rate": 7.94584102007667e-06, "loss": 1.0853, "step": 4155 }, { "epoch": 0.3188624477321842, "grad_norm": 0.2162877768278122, "learning_rate": 7.944839414706847e-06, "loss": 1.0508, "step": 4156 }, { "epoch": 0.31893917113154224, "grad_norm": 0.20142365992069244, "learning_rate": 7.943837628368727e-06, "loss": 1.1291, "step": 4157 }, { "epoch": 0.31901589453090035, "grad_norm": 0.1902233213186264, "learning_rate": 7.94283566112388e-06, "loss": 1.1078, "step": 4158 }, { "epoch": 0.31909261793025845, "grad_norm": 0.1984141767024994, "learning_rate": 7.941833513033873e-06, "loss": 1.097, "step": 4159 }, { "epoch": 0.3191693413296165, "grad_norm": 0.2507762312889099, "learning_rate": 7.940831184160294e-06, "loss": 1.0943, "step": 4160 }, { "epoch": 0.3192460647289746, "grad_norm": 0.18035180866718292, "learning_rate": 7.939828674564738e-06, "loss": 1.0958, "step": 4161 }, { "epoch": 0.31932278812833265, "grad_norm": 0.4635048508644104, "learning_rate": 7.938825984308813e-06, "loss": 1.1181, "step": 4162 }, { "epoch": 0.31939951152769075, "grad_norm": 0.21980272233486176, "learning_rate": 7.937823113454136e-06, "loss": 1.1341, "step": 4163 }, { "epoch": 0.31947623492704885, "grad_norm": 0.21516160666942596, "learning_rate": 7.936820062062336e-06, "loss": 1.1594, "step": 4164 }, { "epoch": 0.3195529583264069, "grad_norm": 0.2118409425020218, "learning_rate": 7.935816830195054e-06, "loss": 1.1506, "step": 4165 }, { "epoch": 0.319629681725765, "grad_norm": 0.24145780503749847, "learning_rate": 7.93481341791394e-06, "loss": 1.1318, "step": 4166 }, { "epoch": 0.31970640512512305, "grad_norm": 0.25508740544319153, "learning_rate": 7.933809825280656e-06, "loss": 1.1343, "step": 4167 }, { "epoch": 0.31978312852448115, "grad_norm": 0.22783073782920837, "learning_rate": 7.932806052356878e-06, "loss": 1.0506, "step": 4168 }, { "epoch": 0.31985985192383926, "grad_norm": 0.2518661618232727, "learning_rate": 7.931802099204292e-06, "loss": 1.0345, "step": 4169 }, { "epoch": 0.3199365753231973, "grad_norm": 0.193417027592659, "learning_rate": 7.930797965884586e-06, "loss": 1.0833, "step": 4170 }, { "epoch": 0.3200132987225554, "grad_norm": 0.17242054641246796, "learning_rate": 7.929793652459473e-06, "loss": 1.1743, "step": 4171 }, { "epoch": 0.3200900221219135, "grad_norm": 0.23546448349952698, "learning_rate": 7.92878915899067e-06, "loss": 1.1846, "step": 4172 }, { "epoch": 0.32016674552127156, "grad_norm": 0.20566043257713318, "learning_rate": 7.927784485539904e-06, "loss": 1.1044, "step": 4173 }, { "epoch": 0.32024346892062966, "grad_norm": 0.17862367630004883, "learning_rate": 7.926779632168918e-06, "loss": 1.1006, "step": 4174 }, { "epoch": 0.3203201923199877, "grad_norm": 0.24266092479228973, "learning_rate": 7.925774598939457e-06, "loss": 1.1554, "step": 4175 }, { "epoch": 0.3203969157193458, "grad_norm": 0.1843085139989853, "learning_rate": 7.924769385913289e-06, "loss": 1.1168, "step": 4176 }, { "epoch": 0.3204736391187039, "grad_norm": 0.23491865396499634, "learning_rate": 7.923763993152186e-06, "loss": 1.123, "step": 4177 }, { "epoch": 0.32055036251806196, "grad_norm": 0.2242911458015442, "learning_rate": 7.92275842071793e-06, "loss": 1.0769, "step": 4178 }, { "epoch": 0.32062708591742006, "grad_norm": 0.09424395859241486, "learning_rate": 7.921752668672316e-06, "loss": 1.1978, "step": 4179 }, { "epoch": 0.3207038093167781, "grad_norm": 0.1846321076154709, "learning_rate": 7.920746737077154e-06, "loss": 1.0495, "step": 4180 }, { "epoch": 0.3207805327161362, "grad_norm": 0.22178959846496582, "learning_rate": 7.919740625994256e-06, "loss": 1.0914, "step": 4181 }, { "epoch": 0.3208572561154943, "grad_norm": 0.25166118144989014, "learning_rate": 7.918734335485453e-06, "loss": 1.1177, "step": 4182 }, { "epoch": 0.32093397951485236, "grad_norm": 0.20718638598918915, "learning_rate": 7.917727865612584e-06, "loss": 1.056, "step": 4183 }, { "epoch": 0.32101070291421047, "grad_norm": 0.18554070591926575, "learning_rate": 7.9167212164375e-06, "loss": 1.1191, "step": 4184 }, { "epoch": 0.3210874263135685, "grad_norm": 0.206452876329422, "learning_rate": 7.91571438802206e-06, "loss": 1.1312, "step": 4185 }, { "epoch": 0.3211641497129266, "grad_norm": 0.19376784563064575, "learning_rate": 7.91470738042814e-06, "loss": 1.0875, "step": 4186 }, { "epoch": 0.3212408731122847, "grad_norm": 0.09703758358955383, "learning_rate": 7.91370019371762e-06, "loss": 1.1444, "step": 4187 }, { "epoch": 0.32131759651164277, "grad_norm": 0.2114633172750473, "learning_rate": 7.912692827952395e-06, "loss": 1.1258, "step": 4188 }, { "epoch": 0.32139431991100087, "grad_norm": 0.20117484033107758, "learning_rate": 7.91168528319437e-06, "loss": 1.0422, "step": 4189 }, { "epoch": 0.3214710433103589, "grad_norm": 0.17936678230762482, "learning_rate": 7.910677559505464e-06, "loss": 1.1333, "step": 4190 }, { "epoch": 0.321547766709717, "grad_norm": 0.3687213063240051, "learning_rate": 7.909669656947602e-06, "loss": 1.0726, "step": 4191 }, { "epoch": 0.3216244901090751, "grad_norm": 0.26504752039909363, "learning_rate": 7.908661575582724e-06, "loss": 1.0971, "step": 4192 }, { "epoch": 0.32170121350843317, "grad_norm": 0.2591072916984558, "learning_rate": 7.907653315472776e-06, "loss": 1.0397, "step": 4193 }, { "epoch": 0.3217779369077913, "grad_norm": 0.20848174393177032, "learning_rate": 7.90664487667972e-06, "loss": 1.0469, "step": 4194 }, { "epoch": 0.3218546603071493, "grad_norm": 0.3399973213672638, "learning_rate": 7.90563625926553e-06, "loss": 1.1028, "step": 4195 }, { "epoch": 0.3219313837065074, "grad_norm": 1.1735575199127197, "learning_rate": 7.904627463292183e-06, "loss": 1.037, "step": 4196 }, { "epoch": 0.3220081071058655, "grad_norm": 0.24764320254325867, "learning_rate": 7.903618488821678e-06, "loss": 1.1037, "step": 4197 }, { "epoch": 0.3220848305052236, "grad_norm": 0.6460142731666565, "learning_rate": 7.902609335916015e-06, "loss": 1.1411, "step": 4198 }, { "epoch": 0.3221615539045817, "grad_norm": 0.2254372090101242, "learning_rate": 7.901600004637211e-06, "loss": 1.1419, "step": 4199 }, { "epoch": 0.3222382773039397, "grad_norm": 0.2397749274969101, "learning_rate": 7.90059049504729e-06, "loss": 1.1211, "step": 4200 }, { "epoch": 0.3223150007032978, "grad_norm": 0.2012224793434143, "learning_rate": 7.89958080720829e-06, "loss": 1.0974, "step": 4201 }, { "epoch": 0.32239172410265593, "grad_norm": 0.19000719487667084, "learning_rate": 7.898570941182264e-06, "loss": 1.0925, "step": 4202 }, { "epoch": 0.322468447502014, "grad_norm": 0.20350266993045807, "learning_rate": 7.897560897031265e-06, "loss": 1.1736, "step": 4203 }, { "epoch": 0.3225451709013721, "grad_norm": 0.19178277254104614, "learning_rate": 7.896550674817363e-06, "loss": 1.0433, "step": 4204 }, { "epoch": 0.3226218943007301, "grad_norm": 0.20823772251605988, "learning_rate": 7.89554027460264e-06, "loss": 1.1228, "step": 4205 }, { "epoch": 0.32269861770008823, "grad_norm": 0.19924332201480865, "learning_rate": 7.894529696449191e-06, "loss": 1.0236, "step": 4206 }, { "epoch": 0.32277534109944633, "grad_norm": 0.20168055593967438, "learning_rate": 7.893518940419113e-06, "loss": 1.0519, "step": 4207 }, { "epoch": 0.3228520644988044, "grad_norm": 0.2505527138710022, "learning_rate": 7.892508006574527e-06, "loss": 1.0791, "step": 4208 }, { "epoch": 0.3229287878981625, "grad_norm": 0.1960727572441101, "learning_rate": 7.89149689497755e-06, "loss": 1.1145, "step": 4209 }, { "epoch": 0.32300551129752053, "grad_norm": 0.17807714641094208, "learning_rate": 7.890485605690322e-06, "loss": 1.0684, "step": 4210 }, { "epoch": 0.32308223469687863, "grad_norm": 0.20852044224739075, "learning_rate": 7.889474138774986e-06, "loss": 1.036, "step": 4211 }, { "epoch": 0.32315895809623674, "grad_norm": 0.34412992000579834, "learning_rate": 7.888462494293703e-06, "loss": 1.0385, "step": 4212 }, { "epoch": 0.3232356814955948, "grad_norm": 0.256382018327713, "learning_rate": 7.887450672308641e-06, "loss": 1.099, "step": 4213 }, { "epoch": 0.3233124048949529, "grad_norm": 0.1964324563741684, "learning_rate": 7.886438672881977e-06, "loss": 1.1301, "step": 4214 }, { "epoch": 0.32338912829431093, "grad_norm": 0.2091486155986786, "learning_rate": 7.885426496075903e-06, "loss": 1.1123, "step": 4215 }, { "epoch": 0.32346585169366904, "grad_norm": 0.19078503549098969, "learning_rate": 7.884414141952619e-06, "loss": 1.105, "step": 4216 }, { "epoch": 0.32354257509302714, "grad_norm": 0.18413777649402618, "learning_rate": 7.883401610574338e-06, "loss": 1.0302, "step": 4217 }, { "epoch": 0.3236192984923852, "grad_norm": 0.23652216792106628, "learning_rate": 7.88238890200328e-06, "loss": 1.1103, "step": 4218 }, { "epoch": 0.3236960218917433, "grad_norm": 0.342189222574234, "learning_rate": 7.88137601630168e-06, "loss": 1.0226, "step": 4219 }, { "epoch": 0.32377274529110134, "grad_norm": 0.1881849318742752, "learning_rate": 7.880362953531783e-06, "loss": 1.0966, "step": 4220 }, { "epoch": 0.32384946869045944, "grad_norm": 0.2541825473308563, "learning_rate": 7.879349713755847e-06, "loss": 1.0332, "step": 4221 }, { "epoch": 0.32392619208981754, "grad_norm": 0.22311942279338837, "learning_rate": 7.878336297036134e-06, "loss": 1.1076, "step": 4222 }, { "epoch": 0.3240029154891756, "grad_norm": 0.22911541163921356, "learning_rate": 7.877322703434922e-06, "loss": 1.1178, "step": 4223 }, { "epoch": 0.3240796388885337, "grad_norm": 0.29142430424690247, "learning_rate": 7.876308933014502e-06, "loss": 1.0826, "step": 4224 }, { "epoch": 0.32415636228789174, "grad_norm": 0.3505755662918091, "learning_rate": 7.875294985837169e-06, "loss": 1.0595, "step": 4225 }, { "epoch": 0.32423308568724984, "grad_norm": 0.2765863239765167, "learning_rate": 7.874280861965236e-06, "loss": 1.1073, "step": 4226 }, { "epoch": 0.32430980908660795, "grad_norm": 0.18587997555732727, "learning_rate": 7.873266561461022e-06, "loss": 1.0852, "step": 4227 }, { "epoch": 0.324386532485966, "grad_norm": 0.19765245914459229, "learning_rate": 7.872252084386858e-06, "loss": 1.1361, "step": 4228 }, { "epoch": 0.3244632558853241, "grad_norm": 0.20642505586147308, "learning_rate": 7.871237430805088e-06, "loss": 1.0833, "step": 4229 }, { "epoch": 0.3245399792846822, "grad_norm": 0.27174827456474304, "learning_rate": 7.870222600778065e-06, "loss": 1.0771, "step": 4230 }, { "epoch": 0.32461670268404025, "grad_norm": 0.24040530622005463, "learning_rate": 7.86920759436815e-06, "loss": 1.041, "step": 4231 }, { "epoch": 0.32469342608339835, "grad_norm": 0.22887836396694183, "learning_rate": 7.868192411637723e-06, "loss": 1.0637, "step": 4232 }, { "epoch": 0.3247701494827564, "grad_norm": 0.20268231630325317, "learning_rate": 7.867177052649166e-06, "loss": 1.0646, "step": 4233 }, { "epoch": 0.3248468728821145, "grad_norm": 0.18693293631076813, "learning_rate": 7.866161517464876e-06, "loss": 1.2123, "step": 4234 }, { "epoch": 0.3249235962814726, "grad_norm": 0.2584936022758484, "learning_rate": 7.865145806147262e-06, "loss": 1.0738, "step": 4235 }, { "epoch": 0.32500031968083065, "grad_norm": 0.22003574669361115, "learning_rate": 7.864129918758738e-06, "loss": 1.053, "step": 4236 }, { "epoch": 0.32507704308018875, "grad_norm": 0.18968817591667175, "learning_rate": 7.863113855361739e-06, "loss": 1.1706, "step": 4237 }, { "epoch": 0.3251537664795468, "grad_norm": 0.18520215153694153, "learning_rate": 7.8620976160187e-06, "loss": 1.1164, "step": 4238 }, { "epoch": 0.3252304898789049, "grad_norm": 0.20501524209976196, "learning_rate": 7.861081200792075e-06, "loss": 1.0895, "step": 4239 }, { "epoch": 0.325307213278263, "grad_norm": 0.2990799844264984, "learning_rate": 7.860064609744325e-06, "loss": 1.1576, "step": 4240 }, { "epoch": 0.32538393667762106, "grad_norm": 0.09355003386735916, "learning_rate": 7.859047842937918e-06, "loss": 1.2111, "step": 4241 }, { "epoch": 0.32546066007697916, "grad_norm": 0.20543140172958374, "learning_rate": 7.858030900435343e-06, "loss": 1.0887, "step": 4242 }, { "epoch": 0.3255373834763372, "grad_norm": 0.21211740374565125, "learning_rate": 7.85701378229909e-06, "loss": 1.0709, "step": 4243 }, { "epoch": 0.3256141068756953, "grad_norm": 0.1938391774892807, "learning_rate": 7.855996488591663e-06, "loss": 1.107, "step": 4244 }, { "epoch": 0.3256908302750534, "grad_norm": 0.17971056699752808, "learning_rate": 7.854979019375582e-06, "loss": 1.1215, "step": 4245 }, { "epoch": 0.32576755367441146, "grad_norm": 0.21083985269069672, "learning_rate": 7.853961374713367e-06, "loss": 1.0153, "step": 4246 }, { "epoch": 0.32584427707376956, "grad_norm": 0.26434871554374695, "learning_rate": 7.852943554667562e-06, "loss": 1.2126, "step": 4247 }, { "epoch": 0.3259210004731276, "grad_norm": 0.19504792988300323, "learning_rate": 7.85192555930071e-06, "loss": 1.0461, "step": 4248 }, { "epoch": 0.3259977238724857, "grad_norm": 0.2031756341457367, "learning_rate": 7.850907388675371e-06, "loss": 1.104, "step": 4249 }, { "epoch": 0.3260744472718438, "grad_norm": 0.19740545749664307, "learning_rate": 7.849889042854113e-06, "loss": 0.9531, "step": 4250 }, { "epoch": 0.32615117067120186, "grad_norm": 0.1943281888961792, "learning_rate": 7.848870521899518e-06, "loss": 1.0792, "step": 4251 }, { "epoch": 0.32622789407055997, "grad_norm": 0.23808801174163818, "learning_rate": 7.847851825874175e-06, "loss": 1.1396, "step": 4252 }, { "epoch": 0.326304617469918, "grad_norm": 0.1986899971961975, "learning_rate": 7.846832954840689e-06, "loss": 1.1255, "step": 4253 }, { "epoch": 0.3263813408692761, "grad_norm": 0.20457187294960022, "learning_rate": 7.845813908861668e-06, "loss": 1.0723, "step": 4254 }, { "epoch": 0.3264580642686342, "grad_norm": 0.22374294698238373, "learning_rate": 7.844794687999737e-06, "loss": 1.097, "step": 4255 }, { "epoch": 0.32653478766799227, "grad_norm": 0.26558080315589905, "learning_rate": 7.843775292317529e-06, "loss": 1.1146, "step": 4256 }, { "epoch": 0.32661151106735037, "grad_norm": 0.2117437720298767, "learning_rate": 7.84275572187769e-06, "loss": 1.094, "step": 4257 }, { "epoch": 0.3266882344667084, "grad_norm": 0.09563012421131134, "learning_rate": 7.841735976742877e-06, "loss": 1.1717, "step": 4258 }, { "epoch": 0.3267649578660665, "grad_norm": 0.22695598006248474, "learning_rate": 7.84071605697575e-06, "loss": 1.1097, "step": 4259 }, { "epoch": 0.3268416812654246, "grad_norm": 0.2102193683385849, "learning_rate": 7.839695962638994e-06, "loss": 1.1536, "step": 4260 }, { "epoch": 0.32691840466478267, "grad_norm": 0.18182507157325745, "learning_rate": 7.83867569379529e-06, "loss": 1.1092, "step": 4261 }, { "epoch": 0.32699512806414077, "grad_norm": 0.20693828165531158, "learning_rate": 7.83765525050734e-06, "loss": 0.9455, "step": 4262 }, { "epoch": 0.3270718514634988, "grad_norm": 0.17758572101593018, "learning_rate": 7.83663463283785e-06, "loss": 1.1342, "step": 4263 }, { "epoch": 0.3271485748628569, "grad_norm": 0.21114657819271088, "learning_rate": 7.835613840849541e-06, "loss": 1.1722, "step": 4264 }, { "epoch": 0.327225298262215, "grad_norm": 0.2089896947145462, "learning_rate": 7.834592874605145e-06, "loss": 1.1413, "step": 4265 }, { "epoch": 0.3273020216615731, "grad_norm": 0.20823827385902405, "learning_rate": 7.8335717341674e-06, "loss": 1.0981, "step": 4266 }, { "epoch": 0.3273787450609312, "grad_norm": 0.20183666050434113, "learning_rate": 7.832550419599062e-06, "loss": 1.2056, "step": 4267 }, { "epoch": 0.3274554684602892, "grad_norm": 0.270102858543396, "learning_rate": 7.831528930962888e-06, "loss": 1.1217, "step": 4268 }, { "epoch": 0.3275321918596473, "grad_norm": 0.19207756221294403, "learning_rate": 7.830507268321657e-06, "loss": 1.0981, "step": 4269 }, { "epoch": 0.32760891525900543, "grad_norm": 0.19178816676139832, "learning_rate": 7.829485431738149e-06, "loss": 1.0981, "step": 4270 }, { "epoch": 0.3276856386583635, "grad_norm": 0.195746511220932, "learning_rate": 7.82846342127516e-06, "loss": 1.0324, "step": 4271 }, { "epoch": 0.3277623620577216, "grad_norm": 0.21920306980609894, "learning_rate": 7.827441236995495e-06, "loss": 1.1831, "step": 4272 }, { "epoch": 0.3278390854570796, "grad_norm": 0.2163018137216568, "learning_rate": 7.82641887896197e-06, "loss": 1.1229, "step": 4273 }, { "epoch": 0.32791580885643773, "grad_norm": 0.2207801192998886, "learning_rate": 7.825396347237413e-06, "loss": 0.9611, "step": 4274 }, { "epoch": 0.32799253225579583, "grad_norm": 0.17686425149440765, "learning_rate": 7.82437364188466e-06, "loss": 1.0439, "step": 4275 }, { "epoch": 0.3280692556551539, "grad_norm": 0.18026815354824066, "learning_rate": 7.823350762966557e-06, "loss": 1.1481, "step": 4276 }, { "epoch": 0.328145979054512, "grad_norm": 0.24122700095176697, "learning_rate": 7.822327710545967e-06, "loss": 1.0927, "step": 4277 }, { "epoch": 0.32822270245387003, "grad_norm": 0.2099127620458603, "learning_rate": 7.821304484685756e-06, "loss": 1.064, "step": 4278 }, { "epoch": 0.32829942585322813, "grad_norm": 0.24784858524799347, "learning_rate": 7.820281085448804e-06, "loss": 1.1418, "step": 4279 }, { "epoch": 0.32837614925258624, "grad_norm": 0.5914435982704163, "learning_rate": 7.819257512898005e-06, "loss": 1.0773, "step": 4280 }, { "epoch": 0.3284528726519443, "grad_norm": 0.19005610048770905, "learning_rate": 7.818233767096255e-06, "loss": 1.0511, "step": 4281 }, { "epoch": 0.3285295960513024, "grad_norm": 0.20332428812980652, "learning_rate": 7.81720984810647e-06, "loss": 1.1345, "step": 4282 }, { "epoch": 0.32860631945066043, "grad_norm": 0.19345012307167053, "learning_rate": 7.816185755991571e-06, "loss": 1.0774, "step": 4283 }, { "epoch": 0.32868304285001854, "grad_norm": 0.44008058309555054, "learning_rate": 7.815161490814494e-06, "loss": 1.1696, "step": 4284 }, { "epoch": 0.32875976624937664, "grad_norm": 0.3535563349723816, "learning_rate": 7.81413705263818e-06, "loss": 1.0528, "step": 4285 }, { "epoch": 0.3288364896487347, "grad_norm": 0.34385401010513306, "learning_rate": 7.813112441525583e-06, "loss": 1.0594, "step": 4286 }, { "epoch": 0.3289132130480928, "grad_norm": 0.5044721961021423, "learning_rate": 7.812087657539668e-06, "loss": 1.1194, "step": 4287 }, { "epoch": 0.3289899364474509, "grad_norm": 0.1957246959209442, "learning_rate": 7.811062700743413e-06, "loss": 1.1701, "step": 4288 }, { "epoch": 0.32906665984680894, "grad_norm": 0.26042768359184265, "learning_rate": 7.810037571199803e-06, "loss": 1.0979, "step": 4289 }, { "epoch": 0.32914338324616704, "grad_norm": 0.2060677707195282, "learning_rate": 7.809012268971835e-06, "loss": 1.0218, "step": 4290 }, { "epoch": 0.3292201066455251, "grad_norm": 0.2037869542837143, "learning_rate": 7.807986794122518e-06, "loss": 1.0227, "step": 4291 }, { "epoch": 0.3292968300448832, "grad_norm": 0.23603031039237976, "learning_rate": 7.806961146714866e-06, "loss": 1.1617, "step": 4292 }, { "epoch": 0.3293735534442413, "grad_norm": 0.20914402604103088, "learning_rate": 7.805935326811913e-06, "loss": 1.0725, "step": 4293 }, { "epoch": 0.32945027684359934, "grad_norm": 0.2123742401599884, "learning_rate": 7.804909334476696e-06, "loss": 1.0893, "step": 4294 }, { "epoch": 0.32952700024295745, "grad_norm": 0.19787491858005524, "learning_rate": 7.803883169772264e-06, "loss": 1.0889, "step": 4295 }, { "epoch": 0.3296037236423155, "grad_norm": 0.18657365441322327, "learning_rate": 7.802856832761678e-06, "loss": 1.1238, "step": 4296 }, { "epoch": 0.3296804470416736, "grad_norm": 0.19995389878749847, "learning_rate": 7.801830323508011e-06, "loss": 1.0757, "step": 4297 }, { "epoch": 0.3297571704410317, "grad_norm": 0.22366903722286224, "learning_rate": 7.800803642074343e-06, "loss": 1.0928, "step": 4298 }, { "epoch": 0.32983389384038975, "grad_norm": 0.624474048614502, "learning_rate": 7.799776788523767e-06, "loss": 1.069, "step": 4299 }, { "epoch": 0.32991061723974785, "grad_norm": 0.2093268632888794, "learning_rate": 7.798749762919387e-06, "loss": 1.1638, "step": 4300 }, { "epoch": 0.3299873406391059, "grad_norm": 0.22862370312213898, "learning_rate": 7.797722565324312e-06, "loss": 1.1242, "step": 4301 }, { "epoch": 0.330064064038464, "grad_norm": 0.2410593181848526, "learning_rate": 7.796695195801673e-06, "loss": 1.1088, "step": 4302 }, { "epoch": 0.3301407874378221, "grad_norm": 0.23520183563232422, "learning_rate": 7.7956676544146e-06, "loss": 1.1585, "step": 4303 }, { "epoch": 0.33021751083718015, "grad_norm": 0.2548092007637024, "learning_rate": 7.794639941226238e-06, "loss": 1.0881, "step": 4304 }, { "epoch": 0.33029423423653825, "grad_norm": 0.18967053294181824, "learning_rate": 7.793612056299745e-06, "loss": 1.0642, "step": 4305 }, { "epoch": 0.3303709576358963, "grad_norm": 0.1804339438676834, "learning_rate": 7.792583999698284e-06, "loss": 1.0661, "step": 4306 }, { "epoch": 0.3304476810352544, "grad_norm": 0.22054065763950348, "learning_rate": 7.791555771485037e-06, "loss": 1.0472, "step": 4307 }, { "epoch": 0.3305244044346125, "grad_norm": 0.3337841331958771, "learning_rate": 7.790527371723187e-06, "loss": 1.1352, "step": 4308 }, { "epoch": 0.33060112783397055, "grad_norm": 0.23290467262268066, "learning_rate": 7.789498800475932e-06, "loss": 1.1475, "step": 4309 }, { "epoch": 0.33067785123332866, "grad_norm": 0.6280738711357117, "learning_rate": 7.788470057806483e-06, "loss": 1.068, "step": 4310 }, { "epoch": 0.3307545746326867, "grad_norm": 0.23242361843585968, "learning_rate": 7.787441143778057e-06, "loss": 1.1429, "step": 4311 }, { "epoch": 0.3308312980320448, "grad_norm": 0.20421858131885529, "learning_rate": 7.786412058453886e-06, "loss": 1.0431, "step": 4312 }, { "epoch": 0.3309080214314029, "grad_norm": 0.26334115862846375, "learning_rate": 7.785382801897207e-06, "loss": 1.1074, "step": 4313 }, { "epoch": 0.33098474483076096, "grad_norm": 0.18168847262859344, "learning_rate": 7.784353374171273e-06, "loss": 1.1064, "step": 4314 }, { "epoch": 0.33106146823011906, "grad_norm": 0.1875869333744049, "learning_rate": 7.783323775339342e-06, "loss": 1.1218, "step": 4315 }, { "epoch": 0.3311381916294771, "grad_norm": 0.2442728877067566, "learning_rate": 7.782294005464688e-06, "loss": 1.0609, "step": 4316 }, { "epoch": 0.3312149150288352, "grad_norm": 0.22718161344528198, "learning_rate": 7.781264064610593e-06, "loss": 1.0031, "step": 4317 }, { "epoch": 0.3312916384281933, "grad_norm": 0.24658071994781494, "learning_rate": 7.780233952840353e-06, "loss": 1.1335, "step": 4318 }, { "epoch": 0.33136836182755136, "grad_norm": 0.30214691162109375, "learning_rate": 7.779203670217264e-06, "loss": 1.092, "step": 4319 }, { "epoch": 0.33144508522690946, "grad_norm": 0.20186389982700348, "learning_rate": 7.778173216804643e-06, "loss": 1.1087, "step": 4320 }, { "epoch": 0.3315218086262675, "grad_norm": 0.3071085810661316, "learning_rate": 7.777142592665814e-06, "loss": 1.0925, "step": 4321 }, { "epoch": 0.3315985320256256, "grad_norm": 0.2582693099975586, "learning_rate": 7.776111797864115e-06, "loss": 1.1755, "step": 4322 }, { "epoch": 0.3316752554249837, "grad_norm": 0.28547757863998413, "learning_rate": 7.775080832462884e-06, "loss": 1.0745, "step": 4323 }, { "epoch": 0.33175197882434176, "grad_norm": 0.22265851497650146, "learning_rate": 7.774049696525483e-06, "loss": 1.0691, "step": 4324 }, { "epoch": 0.33182870222369987, "grad_norm": 0.18340538442134857, "learning_rate": 7.773018390115275e-06, "loss": 1.082, "step": 4325 }, { "epoch": 0.3319054256230579, "grad_norm": 0.1972285509109497, "learning_rate": 7.771986913295638e-06, "loss": 1.118, "step": 4326 }, { "epoch": 0.331982149022416, "grad_norm": 0.2813330888748169, "learning_rate": 7.770955266129957e-06, "loss": 1.1311, "step": 4327 }, { "epoch": 0.3320588724217741, "grad_norm": 0.1842583268880844, "learning_rate": 7.76992344868163e-06, "loss": 1.0604, "step": 4328 }, { "epoch": 0.33213559582113217, "grad_norm": 0.25073498487472534, "learning_rate": 7.768891461014068e-06, "loss": 1.0903, "step": 4329 }, { "epoch": 0.33221231922049027, "grad_norm": 0.18995170295238495, "learning_rate": 7.767859303190688e-06, "loss": 1.0667, "step": 4330 }, { "epoch": 0.3322890426198483, "grad_norm": 0.20243431627750397, "learning_rate": 7.766826975274916e-06, "loss": 1.1092, "step": 4331 }, { "epoch": 0.3323657660192064, "grad_norm": 0.2827082574367523, "learning_rate": 7.765794477330193e-06, "loss": 1.0494, "step": 4332 }, { "epoch": 0.3324424894185645, "grad_norm": 0.29779767990112305, "learning_rate": 7.764761809419969e-06, "loss": 1.1406, "step": 4333 }, { "epoch": 0.33251921281792257, "grad_norm": 0.18436099588871002, "learning_rate": 7.763728971607704e-06, "loss": 1.1146, "step": 4334 }, { "epoch": 0.3325959362172807, "grad_norm": 0.2028699666261673, "learning_rate": 7.762695963956873e-06, "loss": 1.1352, "step": 4335 }, { "epoch": 0.3326726596166387, "grad_norm": 0.3456462025642395, "learning_rate": 7.761662786530947e-06, "loss": 1.2908, "step": 4336 }, { "epoch": 0.3327493830159968, "grad_norm": 0.20974914729595184, "learning_rate": 7.760629439393428e-06, "loss": 1.0968, "step": 4337 }, { "epoch": 0.3328261064153549, "grad_norm": 0.20121772587299347, "learning_rate": 7.759595922607813e-06, "loss": 1.122, "step": 4338 }, { "epoch": 0.332902829814713, "grad_norm": 0.22403591871261597, "learning_rate": 7.758562236237614e-06, "loss": 1.0688, "step": 4339 }, { "epoch": 0.3329795532140711, "grad_norm": 0.21409717202186584, "learning_rate": 7.757528380346353e-06, "loss": 1.1728, "step": 4340 }, { "epoch": 0.3330562766134291, "grad_norm": 0.20400665700435638, "learning_rate": 7.756494354997567e-06, "loss": 1.1798, "step": 4341 }, { "epoch": 0.33313300001278723, "grad_norm": 0.24412192404270172, "learning_rate": 7.755460160254798e-06, "loss": 1.1396, "step": 4342 }, { "epoch": 0.33320972341214533, "grad_norm": 0.19742631912231445, "learning_rate": 7.754425796181599e-06, "loss": 1.0655, "step": 4343 }, { "epoch": 0.3332864468115034, "grad_norm": 0.17014731466770172, "learning_rate": 7.753391262841537e-06, "loss": 1.0439, "step": 4344 }, { "epoch": 0.3333631702108615, "grad_norm": 0.27667370438575745, "learning_rate": 7.752356560298181e-06, "loss": 1.0726, "step": 4345 }, { "epoch": 0.3334398936102196, "grad_norm": 0.26478931307792664, "learning_rate": 7.751321688615123e-06, "loss": 1.0125, "step": 4346 }, { "epoch": 0.33351661700957763, "grad_norm": 0.19899839162826538, "learning_rate": 7.750286647855954e-06, "loss": 1.11, "step": 4347 }, { "epoch": 0.33359334040893573, "grad_norm": 0.2141726166009903, "learning_rate": 7.749251438084283e-06, "loss": 1.0528, "step": 4348 }, { "epoch": 0.3336700638082938, "grad_norm": 0.2194036841392517, "learning_rate": 7.748216059363727e-06, "loss": 1.078, "step": 4349 }, { "epoch": 0.3337467872076519, "grad_norm": 0.10553374886512756, "learning_rate": 7.747180511757908e-06, "loss": 1.2163, "step": 4350 }, { "epoch": 0.33382351060701, "grad_norm": 0.6213448643684387, "learning_rate": 7.746144795330468e-06, "loss": 1.0897, "step": 4351 }, { "epoch": 0.33390023400636804, "grad_norm": 0.22815661132335663, "learning_rate": 7.745108910145052e-06, "loss": 1.1134, "step": 4352 }, { "epoch": 0.33397695740572614, "grad_norm": 0.3143658936023712, "learning_rate": 7.74407285626532e-06, "loss": 1.1505, "step": 4353 }, { "epoch": 0.3340536808050842, "grad_norm": 0.2005816400051117, "learning_rate": 7.743036633754937e-06, "loss": 1.0973, "step": 4354 }, { "epoch": 0.3341304042044423, "grad_norm": 0.2039792686700821, "learning_rate": 7.742000242677585e-06, "loss": 1.0941, "step": 4355 }, { "epoch": 0.3342071276038004, "grad_norm": 0.26748594641685486, "learning_rate": 7.740963683096952e-06, "loss": 1.1746, "step": 4356 }, { "epoch": 0.33428385100315844, "grad_norm": 0.20046889781951904, "learning_rate": 7.739926955076738e-06, "loss": 1.0767, "step": 4357 }, { "epoch": 0.33436057440251654, "grad_norm": 0.24151107668876648, "learning_rate": 7.738890058680651e-06, "loss": 1.0571, "step": 4358 }, { "epoch": 0.3344372978018746, "grad_norm": 0.20823849737644196, "learning_rate": 7.737852993972411e-06, "loss": 1.0398, "step": 4359 }, { "epoch": 0.3345140212012327, "grad_norm": 0.2763756513595581, "learning_rate": 7.73681576101575e-06, "loss": 0.9812, "step": 4360 }, { "epoch": 0.3345907446005908, "grad_norm": 0.240657240152359, "learning_rate": 7.735778359874407e-06, "loss": 1.0541, "step": 4361 }, { "epoch": 0.33466746799994884, "grad_norm": 0.21364325284957886, "learning_rate": 7.734740790612137e-06, "loss": 1.1451, "step": 4362 }, { "epoch": 0.33474419139930695, "grad_norm": 0.1875043660402298, "learning_rate": 7.733703053292694e-06, "loss": 1.0429, "step": 4363 }, { "epoch": 0.334820914798665, "grad_norm": 0.19921629130840302, "learning_rate": 7.732665147979858e-06, "loss": 1.1277, "step": 4364 }, { "epoch": 0.3348976381980231, "grad_norm": 0.22289440035820007, "learning_rate": 7.731627074737406e-06, "loss": 1.1416, "step": 4365 }, { "epoch": 0.3349743615973812, "grad_norm": 0.20110173523426056, "learning_rate": 7.730588833629131e-06, "loss": 1.0704, "step": 4366 }, { "epoch": 0.33505108499673925, "grad_norm": 0.24562713503837585, "learning_rate": 7.72955042471884e-06, "loss": 1.0302, "step": 4367 }, { "epoch": 0.33512780839609735, "grad_norm": 0.2279452234506607, "learning_rate": 7.72851184807034e-06, "loss": 1.1022, "step": 4368 }, { "epoch": 0.3352045317954554, "grad_norm": 0.3575749099254608, "learning_rate": 7.727473103747456e-06, "loss": 1.0828, "step": 4369 }, { "epoch": 0.3352812551948135, "grad_norm": 0.3267897367477417, "learning_rate": 7.726434191814024e-06, "loss": 1.0663, "step": 4370 }, { "epoch": 0.3353579785941716, "grad_norm": 0.2045108526945114, "learning_rate": 7.725395112333884e-06, "loss": 1.1192, "step": 4371 }, { "epoch": 0.33543470199352965, "grad_norm": 0.17831432819366455, "learning_rate": 7.724355865370896e-06, "loss": 1.0998, "step": 4372 }, { "epoch": 0.33551142539288775, "grad_norm": 0.18031257390975952, "learning_rate": 7.723316450988918e-06, "loss": 1.1109, "step": 4373 }, { "epoch": 0.3355881487922458, "grad_norm": 0.3427230715751648, "learning_rate": 7.72227686925183e-06, "loss": 1.0933, "step": 4374 }, { "epoch": 0.3356648721916039, "grad_norm": 0.17582085728645325, "learning_rate": 7.721237120223514e-06, "loss": 1.1666, "step": 4375 }, { "epoch": 0.335741595590962, "grad_norm": 0.19065463542938232, "learning_rate": 7.720197203967866e-06, "loss": 1.0632, "step": 4376 }, { "epoch": 0.33581831899032005, "grad_norm": 0.1643633246421814, "learning_rate": 7.719157120548792e-06, "loss": 1.0635, "step": 4377 }, { "epoch": 0.33589504238967816, "grad_norm": 0.38454288244247437, "learning_rate": 7.71811687003021e-06, "loss": 1.1105, "step": 4378 }, { "epoch": 0.3359717657890362, "grad_norm": 0.2739012837409973, "learning_rate": 7.717076452476043e-06, "loss": 1.0953, "step": 4379 }, { "epoch": 0.3360484891883943, "grad_norm": 0.5633780360221863, "learning_rate": 7.716035867950229e-06, "loss": 1.087, "step": 4380 }, { "epoch": 0.3361252125877524, "grad_norm": 0.23206546902656555, "learning_rate": 7.714995116516713e-06, "loss": 1.0976, "step": 4381 }, { "epoch": 0.33620193598711046, "grad_norm": 0.2549842298030853, "learning_rate": 7.713954198239454e-06, "loss": 1.1041, "step": 4382 }, { "epoch": 0.33627865938646856, "grad_norm": 0.3287334442138672, "learning_rate": 7.712913113182419e-06, "loss": 1.0868, "step": 4383 }, { "epoch": 0.3363553827858266, "grad_norm": 0.25521162152290344, "learning_rate": 7.711871861409584e-06, "loss": 1.1133, "step": 4384 }, { "epoch": 0.3364321061851847, "grad_norm": 0.26066070795059204, "learning_rate": 7.710830442984938e-06, "loss": 1.0698, "step": 4385 }, { "epoch": 0.3365088295845428, "grad_norm": 0.21165673434734344, "learning_rate": 7.709788857972479e-06, "loss": 1.0264, "step": 4386 }, { "epoch": 0.33658555298390086, "grad_norm": 0.2012885957956314, "learning_rate": 7.708747106436213e-06, "loss": 1.0675, "step": 4387 }, { "epoch": 0.33666227638325896, "grad_norm": 0.28468137979507446, "learning_rate": 7.707705188440165e-06, "loss": 1.1157, "step": 4388 }, { "epoch": 0.336738999782617, "grad_norm": 0.19967928528785706, "learning_rate": 7.706663104048356e-06, "loss": 1.1105, "step": 4389 }, { "epoch": 0.3368157231819751, "grad_norm": 0.2967929244041443, "learning_rate": 7.705620853324828e-06, "loss": 1.153, "step": 4390 }, { "epoch": 0.3368924465813332, "grad_norm": 0.21954898536205292, "learning_rate": 7.70457843633363e-06, "loss": 1.1146, "step": 4391 }, { "epoch": 0.33696916998069126, "grad_norm": 0.21740666031837463, "learning_rate": 7.703535853138819e-06, "loss": 1.0714, "step": 4392 }, { "epoch": 0.33704589338004937, "grad_norm": 0.20511320233345032, "learning_rate": 7.702493103804469e-06, "loss": 1.0932, "step": 4393 }, { "epoch": 0.3371226167794074, "grad_norm": 0.19710053503513336, "learning_rate": 7.701450188394658e-06, "loss": 1.1489, "step": 4394 }, { "epoch": 0.3371993401787655, "grad_norm": 0.22723346948623657, "learning_rate": 7.700407106973474e-06, "loss": 1.0235, "step": 4395 }, { "epoch": 0.3372760635781236, "grad_norm": 0.18342722952365875, "learning_rate": 7.69936385960502e-06, "loss": 1.1018, "step": 4396 }, { "epoch": 0.33735278697748167, "grad_norm": 0.20478181540966034, "learning_rate": 7.698320446353404e-06, "loss": 1.0788, "step": 4397 }, { "epoch": 0.33742951037683977, "grad_norm": 0.21519355475902557, "learning_rate": 7.69727686728275e-06, "loss": 1.0362, "step": 4398 }, { "epoch": 0.3375062337761978, "grad_norm": 0.25912851095199585, "learning_rate": 7.696233122457184e-06, "loss": 1.0143, "step": 4399 }, { "epoch": 0.3375829571755559, "grad_norm": 0.21965886652469635, "learning_rate": 7.695189211940847e-06, "loss": 1.1182, "step": 4400 }, { "epoch": 0.337659680574914, "grad_norm": 0.22326864302158356, "learning_rate": 7.694145135797897e-06, "loss": 1.0812, "step": 4401 }, { "epoch": 0.33773640397427207, "grad_norm": 0.3539496064186096, "learning_rate": 7.69310089409249e-06, "loss": 1.0474, "step": 4402 }, { "epoch": 0.3378131273736302, "grad_norm": 0.23316620290279388, "learning_rate": 7.692056486888795e-06, "loss": 1.1443, "step": 4403 }, { "epoch": 0.3378898507729883, "grad_norm": 0.24022291600704193, "learning_rate": 7.691011914250998e-06, "loss": 1.0943, "step": 4404 }, { "epoch": 0.3379665741723463, "grad_norm": 0.3625507056713104, "learning_rate": 7.68996717624329e-06, "loss": 1.0326, "step": 4405 }, { "epoch": 0.3380432975717044, "grad_norm": 0.18112747371196747, "learning_rate": 7.688922272929872e-06, "loss": 1.1725, "step": 4406 }, { "epoch": 0.3381200209710625, "grad_norm": 0.2038773149251938, "learning_rate": 7.687877204374957e-06, "loss": 1.1229, "step": 4407 }, { "epoch": 0.3381967443704206, "grad_norm": 0.30803269147872925, "learning_rate": 7.686831970642768e-06, "loss": 1.097, "step": 4408 }, { "epoch": 0.3382734677697787, "grad_norm": 0.184206023812294, "learning_rate": 7.685786571797535e-06, "loss": 1.0168, "step": 4409 }, { "epoch": 0.3383501911691367, "grad_norm": 0.23060999810695648, "learning_rate": 7.6847410079035e-06, "loss": 1.1448, "step": 4410 }, { "epoch": 0.33842691456849483, "grad_norm": 0.23226453363895416, "learning_rate": 7.683695279024922e-06, "loss": 1.1094, "step": 4411 }, { "epoch": 0.3385036379678529, "grad_norm": 0.19597500562667847, "learning_rate": 7.682649385226053e-06, "loss": 1.0648, "step": 4412 }, { "epoch": 0.338580361367211, "grad_norm": 0.3043712377548218, "learning_rate": 7.681603326571179e-06, "loss": 1.0254, "step": 4413 }, { "epoch": 0.3386570847665691, "grad_norm": 0.22888313233852386, "learning_rate": 7.680557103124574e-06, "loss": 1.0899, "step": 4414 }, { "epoch": 0.33873380816592713, "grad_norm": 0.22895382344722748, "learning_rate": 7.679510714950532e-06, "loss": 1.0222, "step": 4415 }, { "epoch": 0.33881053156528523, "grad_norm": 0.22106440365314484, "learning_rate": 7.678464162113359e-06, "loss": 1.1134, "step": 4416 }, { "epoch": 0.3388872549646433, "grad_norm": 0.24628964066505432, "learning_rate": 7.67741744467737e-06, "loss": 1.0533, "step": 4417 }, { "epoch": 0.3389639783640014, "grad_norm": 0.2835063338279724, "learning_rate": 7.676370562706882e-06, "loss": 1.1001, "step": 4418 }, { "epoch": 0.3390407017633595, "grad_norm": 0.19643914699554443, "learning_rate": 7.675323516266236e-06, "loss": 1.0855, "step": 4419 }, { "epoch": 0.33911742516271753, "grad_norm": 0.1988016813993454, "learning_rate": 7.67427630541977e-06, "loss": 1.0803, "step": 4420 }, { "epoch": 0.33919414856207564, "grad_norm": 0.2218898981809616, "learning_rate": 7.673228930231843e-06, "loss": 1.1116, "step": 4421 }, { "epoch": 0.3392708719614337, "grad_norm": 0.20337671041488647, "learning_rate": 7.672181390766816e-06, "loss": 1.1422, "step": 4422 }, { "epoch": 0.3393475953607918, "grad_norm": 0.20267756283283234, "learning_rate": 7.671133687089063e-06, "loss": 1.0599, "step": 4423 }, { "epoch": 0.3394243187601499, "grad_norm": 0.17388254404067993, "learning_rate": 7.67008581926297e-06, "loss": 1.1511, "step": 4424 }, { "epoch": 0.33950104215950794, "grad_norm": 0.23057690262794495, "learning_rate": 7.66903778735293e-06, "loss": 1.1556, "step": 4425 }, { "epoch": 0.33957776555886604, "grad_norm": 0.21103981137275696, "learning_rate": 7.667989591423349e-06, "loss": 1.0981, "step": 4426 }, { "epoch": 0.3396544889582241, "grad_norm": 0.20167729258537292, "learning_rate": 7.666941231538638e-06, "loss": 1.1553, "step": 4427 }, { "epoch": 0.3397312123575822, "grad_norm": 0.22025631368160248, "learning_rate": 7.665892707763223e-06, "loss": 1.0778, "step": 4428 }, { "epoch": 0.3398079357569403, "grad_norm": 0.2058657556772232, "learning_rate": 7.664844020161541e-06, "loss": 1.0798, "step": 4429 }, { "epoch": 0.33988465915629834, "grad_norm": 0.36063268780708313, "learning_rate": 7.663795168798034e-06, "loss": 1.1219, "step": 4430 }, { "epoch": 0.33996138255565644, "grad_norm": 0.09906791895627975, "learning_rate": 7.662746153737157e-06, "loss": 1.2453, "step": 4431 }, { "epoch": 0.3400381059550145, "grad_norm": 0.1966705173254013, "learning_rate": 7.661696975043379e-06, "loss": 1.0629, "step": 4432 }, { "epoch": 0.3401148293543726, "grad_norm": 0.21923980116844177, "learning_rate": 7.660647632781167e-06, "loss": 1.2258, "step": 4433 }, { "epoch": 0.3401915527537307, "grad_norm": 0.18630893528461456, "learning_rate": 7.659598127015013e-06, "loss": 1.1052, "step": 4434 }, { "epoch": 0.34026827615308874, "grad_norm": 0.17917895317077637, "learning_rate": 7.658548457809406e-06, "loss": 1.0393, "step": 4435 }, { "epoch": 0.34034499955244685, "grad_norm": 0.19264379143714905, "learning_rate": 7.657498625228856e-06, "loss": 1.1264, "step": 4436 }, { "epoch": 0.3404217229518049, "grad_norm": 0.21829956769943237, "learning_rate": 7.656448629337877e-06, "loss": 1.1504, "step": 4437 }, { "epoch": 0.340498446351163, "grad_norm": 0.3454574942588806, "learning_rate": 7.655398470200992e-06, "loss": 1.0334, "step": 4438 }, { "epoch": 0.3405751697505211, "grad_norm": 0.390773743391037, "learning_rate": 7.654348147882737e-06, "loss": 1.0972, "step": 4439 }, { "epoch": 0.34065189314987915, "grad_norm": 0.20400148630142212, "learning_rate": 7.653297662447658e-06, "loss": 1.0758, "step": 4440 }, { "epoch": 0.34072861654923725, "grad_norm": 0.37569019198417664, "learning_rate": 7.65224701396031e-06, "loss": 1.1454, "step": 4441 }, { "epoch": 0.3408053399485953, "grad_norm": 0.28027644753456116, "learning_rate": 7.651196202485259e-06, "loss": 1.1669, "step": 4442 }, { "epoch": 0.3408820633479534, "grad_norm": 0.25308361649513245, "learning_rate": 7.650145228087077e-06, "loss": 1.1319, "step": 4443 }, { "epoch": 0.3409587867473115, "grad_norm": 0.3533119261264801, "learning_rate": 7.649094090830351e-06, "loss": 1.0943, "step": 4444 }, { "epoch": 0.34103551014666955, "grad_norm": 0.3891519010066986, "learning_rate": 7.648042790779677e-06, "loss": 1.1181, "step": 4445 }, { "epoch": 0.34111223354602765, "grad_norm": 0.21554067730903625, "learning_rate": 7.646991327999658e-06, "loss": 1.0139, "step": 4446 }, { "epoch": 0.3411889569453857, "grad_norm": 0.22746993601322174, "learning_rate": 7.645939702554913e-06, "loss": 1.103, "step": 4447 }, { "epoch": 0.3412656803447438, "grad_norm": 0.09607256948947906, "learning_rate": 7.644887914510065e-06, "loss": 1.1594, "step": 4448 }, { "epoch": 0.3413424037441019, "grad_norm": 0.2187839150428772, "learning_rate": 7.643835963929747e-06, "loss": 1.1028, "step": 4449 }, { "epoch": 0.34141912714345996, "grad_norm": 0.2760956585407257, "learning_rate": 7.642783850878608e-06, "loss": 1.1211, "step": 4450 }, { "epoch": 0.34149585054281806, "grad_norm": 0.18838605284690857, "learning_rate": 7.641731575421302e-06, "loss": 1.1433, "step": 4451 }, { "epoch": 0.3415725739421761, "grad_norm": 0.32338106632232666, "learning_rate": 7.640679137622493e-06, "loss": 1.1268, "step": 4452 }, { "epoch": 0.3416492973415342, "grad_norm": 0.27477481961250305, "learning_rate": 7.639626537546856e-06, "loss": 1.0011, "step": 4453 }, { "epoch": 0.3417260207408923, "grad_norm": 0.2807546854019165, "learning_rate": 7.638573775259078e-06, "loss": 1.1516, "step": 4454 }, { "epoch": 0.34180274414025036, "grad_norm": 0.3791057765483856, "learning_rate": 7.637520850823852e-06, "loss": 1.0831, "step": 4455 }, { "epoch": 0.34187946753960846, "grad_norm": 0.19465422630310059, "learning_rate": 7.636467764305886e-06, "loss": 1.133, "step": 4456 }, { "epoch": 0.3419561909389665, "grad_norm": 0.1875925064086914, "learning_rate": 7.635414515769891e-06, "loss": 1.0735, "step": 4457 }, { "epoch": 0.3420329143383246, "grad_norm": 0.30849310755729675, "learning_rate": 7.634361105280596e-06, "loss": 1.1318, "step": 4458 }, { "epoch": 0.3421096377376827, "grad_norm": 0.22811925411224365, "learning_rate": 7.633307532902731e-06, "loss": 1.1117, "step": 4459 }, { "epoch": 0.34218636113704076, "grad_norm": 0.19910702109336853, "learning_rate": 7.632253798701045e-06, "loss": 1.1594, "step": 4460 }, { "epoch": 0.34226308453639886, "grad_norm": 0.18769991397857666, "learning_rate": 7.631199902740295e-06, "loss": 1.0696, "step": 4461 }, { "epoch": 0.34233980793575697, "grad_norm": 0.19627360999584198, "learning_rate": 7.630145845085237e-06, "loss": 1.1372, "step": 4462 }, { "epoch": 0.342416531335115, "grad_norm": 0.1743745505809784, "learning_rate": 7.629091625800655e-06, "loss": 1.074, "step": 4463 }, { "epoch": 0.3424932547344731, "grad_norm": 0.17929545044898987, "learning_rate": 7.628037244951328e-06, "loss": 1.1159, "step": 4464 }, { "epoch": 0.34256997813383117, "grad_norm": 0.32549217343330383, "learning_rate": 7.626982702602051e-06, "loss": 1.0467, "step": 4465 }, { "epoch": 0.34264670153318927, "grad_norm": 0.24128416180610657, "learning_rate": 7.62592799881763e-06, "loss": 1.0093, "step": 4466 }, { "epoch": 0.34272342493254737, "grad_norm": 0.23270419239997864, "learning_rate": 7.62487313366288e-06, "loss": 1.1072, "step": 4467 }, { "epoch": 0.3428001483319054, "grad_norm": 0.22746771574020386, "learning_rate": 7.623818107202624e-06, "loss": 1.1262, "step": 4468 }, { "epoch": 0.3428768717312635, "grad_norm": 0.22211460769176483, "learning_rate": 7.622762919501696e-06, "loss": 1.0102, "step": 4469 }, { "epoch": 0.34295359513062157, "grad_norm": 0.5291275978088379, "learning_rate": 7.62170757062494e-06, "loss": 1.0989, "step": 4470 }, { "epoch": 0.34303031852997967, "grad_norm": 0.21686603128910065, "learning_rate": 7.620652060637213e-06, "loss": 1.0235, "step": 4471 }, { "epoch": 0.3431070419293378, "grad_norm": 0.3388325870037079, "learning_rate": 7.619596389603374e-06, "loss": 1.0154, "step": 4472 }, { "epoch": 0.3431837653286958, "grad_norm": 0.335867702960968, "learning_rate": 7.618540557588301e-06, "loss": 1.1557, "step": 4473 }, { "epoch": 0.3432604887280539, "grad_norm": 0.24225403368473053, "learning_rate": 7.617484564656875e-06, "loss": 1.1505, "step": 4474 }, { "epoch": 0.343337212127412, "grad_norm": 0.20273272693157196, "learning_rate": 7.616428410873991e-06, "loss": 1.1323, "step": 4475 }, { "epoch": 0.3434139355267701, "grad_norm": 0.3220592141151428, "learning_rate": 7.615372096304552e-06, "loss": 1.0406, "step": 4476 }, { "epoch": 0.3434906589261282, "grad_norm": 0.24948260188102722, "learning_rate": 7.61431562101347e-06, "loss": 1.0817, "step": 4477 }, { "epoch": 0.3435673823254862, "grad_norm": 0.21169434487819672, "learning_rate": 7.613258985065672e-06, "loss": 1.2235, "step": 4478 }, { "epoch": 0.34364410572484433, "grad_norm": 0.314932256937027, "learning_rate": 7.612202188526089e-06, "loss": 1.052, "step": 4479 }, { "epoch": 0.3437208291242024, "grad_norm": 0.28982457518577576, "learning_rate": 7.611145231459662e-06, "loss": 1.1413, "step": 4480 }, { "epoch": 0.3437975525235605, "grad_norm": 0.2816953659057617, "learning_rate": 7.610088113931346e-06, "loss": 1.1218, "step": 4481 }, { "epoch": 0.3438742759229186, "grad_norm": 0.18885482847690582, "learning_rate": 7.609030836006106e-06, "loss": 1.1002, "step": 4482 }, { "epoch": 0.34395099932227663, "grad_norm": 0.21883706748485565, "learning_rate": 7.607973397748909e-06, "loss": 1.0511, "step": 4483 }, { "epoch": 0.34402772272163473, "grad_norm": 0.4413966238498688, "learning_rate": 7.6069157992247435e-06, "loss": 1.1476, "step": 4484 }, { "epoch": 0.3441044461209928, "grad_norm": 0.2254551202058792, "learning_rate": 7.605858040498595e-06, "loss": 1.1966, "step": 4485 }, { "epoch": 0.3441811695203509, "grad_norm": 0.22248272597789764, "learning_rate": 7.6048001216354716e-06, "loss": 1.187, "step": 4486 }, { "epoch": 0.344257892919709, "grad_norm": 0.23992112278938293, "learning_rate": 7.603742042700383e-06, "loss": 1.1496, "step": 4487 }, { "epoch": 0.34433461631906703, "grad_norm": 0.2564258873462677, "learning_rate": 7.602683803758349e-06, "loss": 1.0744, "step": 4488 }, { "epoch": 0.34441133971842514, "grad_norm": 0.21511250734329224, "learning_rate": 7.601625404874405e-06, "loss": 1.0367, "step": 4489 }, { "epoch": 0.3444880631177832, "grad_norm": 0.33280646800994873, "learning_rate": 7.6005668461135904e-06, "loss": 1.135, "step": 4490 }, { "epoch": 0.3445647865171413, "grad_norm": 0.21541006863117218, "learning_rate": 7.599508127540955e-06, "loss": 1.1374, "step": 4491 }, { "epoch": 0.3446415099164994, "grad_norm": 0.19511187076568604, "learning_rate": 7.5984492492215645e-06, "loss": 1.0743, "step": 4492 }, { "epoch": 0.34471823331585744, "grad_norm": 0.24889340996742249, "learning_rate": 7.597390211220486e-06, "loss": 1.1169, "step": 4493 }, { "epoch": 0.34479495671521554, "grad_norm": 0.17710712552070618, "learning_rate": 7.5963310136028005e-06, "loss": 1.1008, "step": 4494 }, { "epoch": 0.3448716801145736, "grad_norm": 0.2204282134771347, "learning_rate": 7.595271656433599e-06, "loss": 1.1035, "step": 4495 }, { "epoch": 0.3449484035139317, "grad_norm": 0.19828277826309204, "learning_rate": 7.594212139777982e-06, "loss": 1.0694, "step": 4496 }, { "epoch": 0.3450251269132898, "grad_norm": 0.10129273682832718, "learning_rate": 7.593152463701061e-06, "loss": 1.2052, "step": 4497 }, { "epoch": 0.34510185031264784, "grad_norm": 0.23020556569099426, "learning_rate": 7.5920926282679534e-06, "loss": 1.1016, "step": 4498 }, { "epoch": 0.34517857371200594, "grad_norm": 0.19413451850414276, "learning_rate": 7.591032633543791e-06, "loss": 1.1442, "step": 4499 }, { "epoch": 0.345255297111364, "grad_norm": 0.1972353756427765, "learning_rate": 7.589972479593712e-06, "loss": 1.0698, "step": 4500 } ], "logging_steps": 1.0, "max_steps": 13033, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.9630134517018132e+18, "train_batch_size": 3, "trial_name": null, "trial_params": null }